diff --git a/src/assembler.cpp b/src/assembler.cpp new file mode 100644 index 0000000..c768811 --- /dev/null +++ b/src/assembler.cpp @@ -0,0 +1,330 @@ +#include "assembler.hpp" +#include "builder.hpp" +#include "scanner.hpp" +#include "vm.hpp" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace vc5::tools; +using namespace vc5::tools::asmer; + +namespace fs = std::filesystem; + +using TT = Tok::Ty; + +auto vc5::tools::assemble_file(fs::path input_path, fs::path output_path) + -> std::expected +{ + auto text = std::string(); + + { + auto input_file = std::ifstream(input_path, std::ios_base::binary); + if (not input_file) { + return std::unexpected( + std::format("could not open file '{}' for reading", + std::string(input_path))); + } + + input_file.seekg(0, std::ios_base::end); + auto size = input_file.tellg(); + text.resize(static_cast(size), '\0'); + input_file.seekg(0); + input_file.read(&text[0], size); + } + + auto parser = Parser(text); + + auto program = std::vector(65536); + auto builder = Builder(program.data()); + + auto assembler = Assembler(text, builder); + + while (true) { + auto ins = parser.parse_ins(); + if (not ins) { + break; + } + } + + if (not parser.ok()) { + return std::unexpected("parsing failed"); + } + + { + auto output_file = std::ofstream(output_path, std::ios_base::binary); + if (not output_file) { + return std::unexpected( + std::format("could not open file '{}' for writing", + std::string(input_path))); + } + + output_file.write(reinterpret_cast(program.data()), + static_cast(program.size())); + } + + return {}; +} + +void Assembler::assemble_ins(Ins& ins) +{ + if (ins.labels) { + for (const auto& label : *ins.labels) { + if (label.is_local) { + m_local_labels[label.ident] = m_builder->ip(); + } else { + m_local_labels.clear(); + m_global_labels[label.ident] = m_builder->ip(); + } + } + } + + assemble_line(ins); +} + +void Assembler::assemble_line(Ins& ins) +{ + if (ins.ident == "mov") { + if (arg_count_wrong(ins, 2)) + return; + } + + error(ins.loc, + std::format("instruction '{}' not supported/implemented", ins.ident)); +} + +bool Assembler::arg_count_wrong(Ins& ins, size_t count) +{ + if (ins.args.size() != count) { + error(ins.loc, std::format("expected {} operands", count)); + return false; + } + return true; +} + +void Assembler::error(Loc loc, std::string_view message) +{ + m_failed = true; + loc.print_error(m_text, message); +} + +auto Parser::parse_ins() -> std::unique_ptr +{ + auto loc = current_loc(); + + auto labels = Ins::Labels(nullptr); + + auto ident = std::string_view(); + + while (true) { + while (eat('\n')) { } + + if (test(TT::Ident) or eat('.')) { + bool is_local = false; + if (eat('.')) { + is_local = true; + } + if (not test(TT::Ident)) { + error(current_loc(), "expected ident"); + return nullptr; + } + ident = m_tok.text; + step(); + + if (not eat(':')) { + if (not is_local) { + break; + } + + error(current_loc(), "expected ':'"); + return nullptr; + } + if (not labels) { + labels = std::make_unique>(); + } + labels->push_back(Label { loc, ident, is_local }); + continue; + } else { + return nullptr; + } + } + + auto args = Ins::Args(); + + auto first = true; + while (not test(TT::Eof) and not eat('\n')) { + if (not first and not eat(',')) { + error(current_loc(), "expected ','"); + return nullptr; + } + first = false; + + auto arg = parse_expr(); + if (not arg) { + return nullptr; + } + args.push_back(std::move(arg)); + } + + if (not test(TT::Eof) and not eat('\n')) { + error(current_loc(), "expected line ending"); + } + + return std::make_unique( + loc, std::move(labels), ident, std::move(args)); +} + +auto Parser::parse_expr() -> std::unique_ptr +{ + return parse_binary(); +} + +auto Parser::parse_binary(int prec) -> std::unique_ptr +{ + using T = Expr::Ty; + using Op = std::tuple; + + constexpr auto ops = std::array { + Op { TT::Pipe, T::Or, 4 }, + Op { TT::Hat, T::Xor, 3 }, + Op { TT::Ampersand, T::And, 2 }, + Op { TT::Plus, T::Add, 1 }, + Op { TT::Minus, T::Sub, 1 }, + }; + + if (prec == 0) { + return parse_prefix(); + } + + auto loc = current_loc(); + + auto left = parse_binary(prec - 1); + + auto should_continue = false; + while (should_continue) { + should_continue = false; + for (auto [op, ty, p] : ops) { + if (prec >= p and eat(op)) { + auto right = parse_binary(prec - 1); + left = std::make_unique(loc, + ty, + Expr::Binary { std::move(left), std::move(right) }); + } + } + } + + return left; +} + +auto Parser::parse_prefix() -> std::unique_ptr +{ + auto loc = current_loc(); + auto expr = parse_operand(); + if (eat('-')) { + return std::make_unique(loc, Expr::Ty::Negate, std::move(expr)); + } else if (eat('!')) { + return std::make_unique(loc, Expr::Ty::Not, std::move(expr)); + } else { + return expr; + } +} + +static const auto reg_idents = std::unordered_map { + { "R0", vc5::Reg::R0 }, + { "R1", vc5::Reg::R1 }, + { "R2", vc5::Reg::R2 }, + { "R3", vc5::Reg::R3 }, + { "R4", vc5::Reg::R4 }, + { "R5", vc5::Reg::R5 }, + { "Rbp", vc5::Reg::Rbp }, + { "Rsp", vc5::Reg::Rsp }, + { "Rfl", vc5::Reg::Rfl }, + { "Rip", vc5::Reg::Rip }, +}; + +auto Parser::parse_operand() -> std::unique_ptr +{ + auto loc = current_loc(); + if (test(TT::Ident)) { + auto ident = m_tok.text; + step(); + if (reg_idents.contains(ident)) { + return std::make_unique( + loc, Expr::Ty::Reg, reg_idents.at(ident)); + } else { + return std::make_unique(loc, Expr::Ty::Ident, ident); + } + } else if (test(TT::Int)) { + auto text = std::string(m_tok.text); + auto value = std::strtol(text.c_str(), nullptr, 10); + return std::make_unique( + loc, Expr::Ty::Int, static_cast(value)); + } else if (test(TT::Bin)) { + auto text = std::string(m_tok.text); + auto value = std::strtol(&text[2], nullptr, 2); + return std::make_unique( + loc, Expr::Ty::Int, static_cast(value)); + } else if (test(TT::Hex)) { + auto text = std::string(m_tok.text); + auto value = std::strtol(&text[2], nullptr, 16); + return std::make_unique( + loc, Expr::Ty::Int, static_cast(value)); + } else if (eat('(')) { + auto expr = parse_expr(); + if (not eat(')')) { + error(current_loc(), "expected ')'"); + return nullptr; + } + return expr; + } else if (eat('[')) { + auto expr = parse_expr(); + if (not eat(']')) { + error(current_loc(), "expected ']'"); + return nullptr; + } + return std::make_unique( + loc, Expr::Ty::Indirection, std::move(expr)); + } else { + error(current_loc(), "expected expression"); + return nullptr; + } +} + +auto Parser::eat(int ty) -> bool +{ + if (test(ty)) { + step(); + return true; + } + return false; +} + +void Parser::step() +{ + m_tok = m_lexer.next(); +} + +auto Parser::test(int ty) const -> bool +{ + return m_tok.ty == ty; +} + +auto Parser::current_loc() const -> Loc +{ + return m_tok.loc; +} + +void Parser::error(Loc loc, std::string_view message) +{ + m_error_occured = true; + loc.print_error(m_text, message); +} diff --git a/src/assembler.hpp b/src/assembler.hpp new file mode 100644 index 0000000..27a520e --- /dev/null +++ b/src/assembler.hpp @@ -0,0 +1,141 @@ +#pragma once + +#include "builder.hpp" +#include "scanner.hpp" +#include "vm.hpp" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace vc5::tools { + +namespace fs = std::filesystem; + +namespace asmer { + + struct Label { + Loc loc; + std::string_view ident; + bool is_local; + }; + + struct Expr { + enum class Ty { + Ident, + Reg, + Int, + Indirection, + Negate, + Not, + Or, + Xor, + And, + Add, + Sub, + }; + + using Ptr = std::unique_ptr; + using Binary = std::tuple; + using Data = std::variant; + + // clang-format off + auto as_ident() -> std::string_view& { return std::get(data); } + auto as_reg() -> Reg& { return std::get(data); } + auto as_int() -> int& { return std::get(data); } + auto as_unary() -> Ptr& { return std::get(data); } + auto as_binary() -> Binary& { return std::get(data); } + // clang-format on + + Loc loc; + Ty ty; + Data data; + }; + + struct Ins { + using Labels = std::unique_ptr>; + using Args = std::vector>; + + Loc loc; + Labels labels; + std::string_view ident; + Args args; + }; + + class Parser { + public: + explicit Parser(std::string_view text) + : m_text(text) + , m_lexer(text) + , m_tok(m_lexer.next()) + { + } + + auto parse_ins() -> std::unique_ptr; + + auto ok() const -> bool + { + return m_lexer.ok() and not m_error_occured; + } + + private: + auto parse_expr() -> std::unique_ptr; + auto parse_binary(int prec = 20) -> std::unique_ptr; + auto parse_prefix() -> std::unique_ptr; + auto parse_operand() -> std::unique_ptr; + + auto eat(int ty) -> bool; + void step(); + + auto test(int ty) const -> bool; + auto current_loc() const -> Loc; + + void error(Loc loc, std::string_view message); + + std::string_view m_text; + Scanner m_lexer; + Tok m_tok; + bool m_error_occured = false; + }; + + class Assembler { + public: + explicit Assembler(std::string_view text, Builder& builder) + : m_text(text) + , m_builder(&builder) + { + } + + void assemble_ins(Ins& ins); + + auto ok() const -> bool + { + return not m_failed; + } + + private: + void assemble_line(Ins& ins); + + /// true means fail + bool arg_count_wrong(Ins& ins, size_t count); + + void error(Loc loc, std::string_view message); + + std::string_view m_text; + Builder* m_builder; + std::unordered_map m_global_labels {}; + std::unordered_map m_local_labels {}; + bool m_failed = false; + }; + +} + +auto assemble_file(fs::path input_path, fs::path output_path) + -> std::expected; + +} diff --git a/src/block_device.hpp b/src/block_device.hpp index 9809143..c925a1a 100644 --- a/src/block_device.hpp +++ b/src/block_device.hpp @@ -2,8 +2,10 @@ #include #include +#include #include #include +#include #include namespace vc5 { @@ -53,6 +55,10 @@ public: explicit FileDisk(fs::path file_path) : m_file(file_path, std::ios_base::binary) { + if (not m_file) { + throw std::invalid_argument(std::format( + "unable to open file '{}'", std::string(file_path))); + } } auto block_count() -> uint16_t override; diff --git a/src/builder.cpp b/src/builder.cpp index 00d5f5a..c324ca8 100644 --- a/src/builder.cpp +++ b/src/builder.cpp @@ -4,7 +4,7 @@ #include #include -using namespace vc5; +using namespace vc5::tools; namespace { diff --git a/src/builder.hpp b/src/builder.hpp index 2ecc4be..0d2aa9b 100644 --- a/src/builder.hpp +++ b/src/builder.hpp @@ -4,7 +4,7 @@ #include #include -namespace vc5 { +namespace vc5::tools { using namespace vc5; diff --git a/src/main.cpp b/src/main.cpp index a4c1613..13abd9a 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -18,7 +18,7 @@ int main() auto disk = MemoryDisk(128); - auto l = Builder(disk.data()); + auto l = tools::Builder(disk.data()); l.mov_imm(rsp, 0x1000); l.mov_reg(rbp, rsp); diff --git a/src/scanner.cpp b/src/scanner.cpp new file mode 100644 index 0000000..6c67023 --- /dev/null +++ b/src/scanner.cpp @@ -0,0 +1,186 @@ +#include "scanner.hpp" +#include +#include +#include + +using namespace vc5::tools; +using namespace std::literals; + +auto Scanner::next() -> Tok +{ + auto loc = this->current_loc(); + if (done()) { + return tok(Tok::Eof, loc); + } + if (test_in(" \t\r")) { + while (test_in(" \t\r")) { + step(); + } + return next(); + } + if (test(';')) { + step(); + while (not done() and not test('\n')) { + step(); + } + return next(); + } + if (test('_') or test_range('A', 'Z') or test_range('a', 'z')) { + while (test('_') or test_range('0', '9') or test_range('A', 'Z') + or test_range('a', 'z')) { + step(); + } + return tok(Tok::Ident, loc); + } + if (test_range('1', '9')) { + while (test_range('0', '9')) { + step(); + } + return tok(Tok::Int, loc); + } + if (test('0')) { + step(); + if (test('b')) { + step(); + while (test_in("01")) { + step(); + } + return tok(Tok::Bin, loc); + } else if (test('x')) { + step(); + while (test_range('0', '9') or test_range('a', 'f') + or test_range('A', 'F')) { + step(); + } + return tok(Tok::Hex, loc); + } else { + return tok(Tok::Int, loc); + } + } + if (test_in("\n()[],:|^+-!")) { + auto ty = static_cast(current()); + step(); + return tok(ty, loc); + } + error(loc, std::format("illegal character '{}'", current())); + step(); + return next(); +} + +void Scanner::step() +{ + if (done()) + return; + + char ch = current(); + m_idx += 1; + + if (ch == '\n') { + m_line += 1; + m_col = 1; + } else { + m_col += 1; + } +} + +auto Scanner::tok(Tok::Ty ty, Loc loc) const -> Tok +{ + return Tok { m_text.substr(loc.idx, m_idx - loc.idx), loc, ty }; +} + +auto Scanner::current_loc() const -> Loc +{ + return Loc { m_idx, m_line, m_col }; +} + +auto Scanner::test_range(char begin, char end) const -> bool +{ + return not done() and current() >= begin and current() <= end; +} + +auto Scanner::test_in(std::string_view chars) const -> bool +{ + for (auto ch : chars) { + if (test(ch)) { + return true; + } + } + return false; +} + +auto Scanner::test(char ch) const -> bool +{ + return not done() and current() == ch; +} + +auto Scanner::current() const -> char +{ + return m_text[m_idx]; +} + +auto Scanner::done() const -> bool +{ + return m_idx >= m_text.size(); +} + +void Scanner::error(Loc loc, std::string_view message) +{ + m_error_occured = true; + loc.print_error(m_text, message); +} + +void Loc::print_error(std::string_view text, std::string_view message) const +{ + constexpr auto type = "error"sv; + + constexpr auto clear = "\x1b[0m"sv; + constexpr auto bold_red = "\x1b[1;31m"sv; + constexpr auto bold_white = "\x1b[1;37m"sv; + constexpr auto cyan = "\x1b[0;36m"sv; + constexpr auto gray = "\x1b[0;37m"sv; + constexpr auto light_gray = "\x1b[1;30m"sv; + + auto start = text.find_last_of('\n', idx) + 1; + auto end = text.find_first_of('\n', idx); + + if (end == std::string_view::npos) { + end = text.size(); + } + + auto line_text = text.substr(start, end - start); + + auto linenr_str = std::to_string(line); + + std::println("" + "{0}{1}{2}: {3}\n" + " {4}--> {5}:{6}:{7}\n" + " {8: <{9}}{10}|\n" + " {11}{12}{13}|{14}{15}\n" + " {16: <{17}}" + "{18}|${19: <{20}}{21}^ {22}{23}{24}", + bold_red, + type, + bold_white, + message, + cyan, + "", + line, + col, + "", + linenr_str.size(), + gray, + light_gray, + linenr_str, + gray, + light_gray, + line_text, + "", + linenr_str.size(), + gray, + "", + col - 1, + bold_red, + bold_white, + message, + clear); +} diff --git a/src/scanner.hpp b/src/scanner.hpp new file mode 100644 index 0000000..7213eb8 --- /dev/null +++ b/src/scanner.hpp @@ -0,0 +1,77 @@ +#pragma once + +#include +#include + +namespace vc5::tools { + +struct Loc { + size_t idx; + int line; + int col; + + void print_error(std::string_view text, std::string_view message) const; +}; + +struct Tok { + enum Ty { + Eof, + Ident, + Int, + Hex, + Bin, + Newline = '\n', + LParen = '(', + RParen = ')', + LBracket = '[', + RBracket = ']', + Comma = ',', + Colon = ':', + Pipe = '|', + Hat = '^', + Ampersand = '&', + Plus = '+', + Minus = '-', + Exclam = '!', + }; + + std::string_view text; + Loc loc; + Ty ty; +}; + +class Scanner { +public: + explicit Scanner(std::string_view text) + : m_text(text) + { + } + + auto next() -> Tok; + + auto ok() const -> bool + { + return not m_error_occured; + } + +private: + void step(); + + auto tok(Tok::Ty ty, Loc loc) const -> Tok; + auto current_loc() const -> Loc; + auto test_range(char begin, char end) const -> bool; + auto test_in(std::string_view chars) const -> bool; + auto test(char ch) const -> bool; + auto current() const -> char; + auto done() const -> bool; + + void error(Loc loc, std::string_view message); + + std::string_view m_text; + size_t m_idx = 0; + int m_line = 1; + int m_col = 1; + bool m_error_occured = false; +}; + +}