assembler

This commit is contained in:
sfja 2026-01-15 22:06:47 +01:00
parent 2b423a40ed
commit d1c491c7c0
8 changed files with 743 additions and 3 deletions

330
src/assembler.cpp Normal file
View File

@ -0,0 +1,330 @@
#include "assembler.hpp"
#include "builder.hpp"
#include "scanner.hpp"
#include "vm.hpp"
#include <array>
#include <cassert>
#include <cstdint>
#include <cstdlib>
#include <expected>
#include <format>
#include <fstream>
#include <ios>
#include <memory>
#include <string_view>
#include <unordered_map>
#include <vector>
using namespace vc5::tools;
using namespace vc5::tools::asmer;
namespace fs = std::filesystem;
using TT = Tok::Ty;
auto vc5::tools::assemble_file(fs::path input_path, fs::path output_path)
-> std::expected<void, std::string>
{
auto text = std::string();
{
auto input_file = std::ifstream(input_path, std::ios_base::binary);
if (not input_file) {
return std::unexpected(
std::format("could not open file '{}' for reading",
std::string(input_path)));
}
input_file.seekg(0, std::ios_base::end);
auto size = input_file.tellg();
text.resize(static_cast<size_t>(size), '\0');
input_file.seekg(0);
input_file.read(&text[0], size);
}
auto parser = Parser(text);
auto program = std::vector<uint8_t>(65536);
auto builder = Builder(program.data());
auto assembler = Assembler(text, builder);
while (true) {
auto ins = parser.parse_ins();
if (not ins) {
break;
}
}
if (not parser.ok()) {
return std::unexpected("parsing failed");
}
{
auto output_file = std::ofstream(output_path, std::ios_base::binary);
if (not output_file) {
return std::unexpected(
std::format("could not open file '{}' for writing",
std::string(input_path)));
}
output_file.write(reinterpret_cast<const char*>(program.data()),
static_cast<std::streamsize>(program.size()));
}
return {};
}
void Assembler::assemble_ins(Ins& ins)
{
if (ins.labels) {
for (const auto& label : *ins.labels) {
if (label.is_local) {
m_local_labels[label.ident] = m_builder->ip();
} else {
m_local_labels.clear();
m_global_labels[label.ident] = m_builder->ip();
}
}
}
assemble_line(ins);
}
void Assembler::assemble_line(Ins& ins)
{
if (ins.ident == "mov") {
if (arg_count_wrong(ins, 2))
return;
}
error(ins.loc,
std::format("instruction '{}' not supported/implemented", ins.ident));
}
bool Assembler::arg_count_wrong(Ins& ins, size_t count)
{
if (ins.args.size() != count) {
error(ins.loc, std::format("expected {} operands", count));
return false;
}
return true;
}
void Assembler::error(Loc loc, std::string_view message)
{
m_failed = true;
loc.print_error(m_text, message);
}
auto Parser::parse_ins() -> std::unique_ptr<Ins>
{
auto loc = current_loc();
auto labels = Ins::Labels(nullptr);
auto ident = std::string_view();
while (true) {
while (eat('\n')) { }
if (test(TT::Ident) or eat('.')) {
bool is_local = false;
if (eat('.')) {
is_local = true;
}
if (not test(TT::Ident)) {
error(current_loc(), "expected ident");
return nullptr;
}
ident = m_tok.text;
step();
if (not eat(':')) {
if (not is_local) {
break;
}
error(current_loc(), "expected ':'");
return nullptr;
}
if (not labels) {
labels = std::make_unique<std::vector<Label>>();
}
labels->push_back(Label { loc, ident, is_local });
continue;
} else {
return nullptr;
}
}
auto args = Ins::Args();
auto first = true;
while (not test(TT::Eof) and not eat('\n')) {
if (not first and not eat(',')) {
error(current_loc(), "expected ','");
return nullptr;
}
first = false;
auto arg = parse_expr();
if (not arg) {
return nullptr;
}
args.push_back(std::move(arg));
}
if (not test(TT::Eof) and not eat('\n')) {
error(current_loc(), "expected line ending");
}
return std::make_unique<Ins>(
loc, std::move(labels), ident, std::move(args));
}
auto Parser::parse_expr() -> std::unique_ptr<Expr>
{
return parse_binary();
}
auto Parser::parse_binary(int prec) -> std::unique_ptr<Expr>
{
using T = Expr::Ty;
using Op = std::tuple<TT, T, int>;
constexpr auto ops = std::array {
Op { TT::Pipe, T::Or, 4 },
Op { TT::Hat, T::Xor, 3 },
Op { TT::Ampersand, T::And, 2 },
Op { TT::Plus, T::Add, 1 },
Op { TT::Minus, T::Sub, 1 },
};
if (prec == 0) {
return parse_prefix();
}
auto loc = current_loc();
auto left = parse_binary(prec - 1);
auto should_continue = false;
while (should_continue) {
should_continue = false;
for (auto [op, ty, p] : ops) {
if (prec >= p and eat(op)) {
auto right = parse_binary(prec - 1);
left = std::make_unique<Expr>(loc,
ty,
Expr::Binary { std::move(left), std::move(right) });
}
}
}
return left;
}
auto Parser::parse_prefix() -> std::unique_ptr<Expr>
{
auto loc = current_loc();
auto expr = parse_operand();
if (eat('-')) {
return std::make_unique<Expr>(loc, Expr::Ty::Negate, std::move(expr));
} else if (eat('!')) {
return std::make_unique<Expr>(loc, Expr::Ty::Not, std::move(expr));
} else {
return expr;
}
}
static const auto reg_idents = std::unordered_map<std::string_view, vc5::Reg> {
{ "R0", vc5::Reg::R0 },
{ "R1", vc5::Reg::R1 },
{ "R2", vc5::Reg::R2 },
{ "R3", vc5::Reg::R3 },
{ "R4", vc5::Reg::R4 },
{ "R5", vc5::Reg::R5 },
{ "Rbp", vc5::Reg::Rbp },
{ "Rsp", vc5::Reg::Rsp },
{ "Rfl", vc5::Reg::Rfl },
{ "Rip", vc5::Reg::Rip },
};
auto Parser::parse_operand() -> std::unique_ptr<Expr>
{
auto loc = current_loc();
if (test(TT::Ident)) {
auto ident = m_tok.text;
step();
if (reg_idents.contains(ident)) {
return std::make_unique<Expr>(
loc, Expr::Ty::Reg, reg_idents.at(ident));
} else {
return std::make_unique<Expr>(loc, Expr::Ty::Ident, ident);
}
} else if (test(TT::Int)) {
auto text = std::string(m_tok.text);
auto value = std::strtol(text.c_str(), nullptr, 10);
return std::make_unique<Expr>(
loc, Expr::Ty::Int, static_cast<int>(value));
} else if (test(TT::Bin)) {
auto text = std::string(m_tok.text);
auto value = std::strtol(&text[2], nullptr, 2);
return std::make_unique<Expr>(
loc, Expr::Ty::Int, static_cast<int>(value));
} else if (test(TT::Hex)) {
auto text = std::string(m_tok.text);
auto value = std::strtol(&text[2], nullptr, 16);
return std::make_unique<Expr>(
loc, Expr::Ty::Int, static_cast<int>(value));
} else if (eat('(')) {
auto expr = parse_expr();
if (not eat(')')) {
error(current_loc(), "expected ')'");
return nullptr;
}
return expr;
} else if (eat('[')) {
auto expr = parse_expr();
if (not eat(']')) {
error(current_loc(), "expected ']'");
return nullptr;
}
return std::make_unique<Expr>(
loc, Expr::Ty::Indirection, std::move(expr));
} else {
error(current_loc(), "expected expression");
return nullptr;
}
}
auto Parser::eat(int ty) -> bool
{
if (test(ty)) {
step();
return true;
}
return false;
}
void Parser::step()
{
m_tok = m_lexer.next();
}
auto Parser::test(int ty) const -> bool
{
return m_tok.ty == ty;
}
auto Parser::current_loc() const -> Loc
{
return m_tok.loc;
}
void Parser::error(Loc loc, std::string_view message)
{
m_error_occured = true;
loc.print_error(m_text, message);
}

141
src/assembler.hpp Normal file
View File

@ -0,0 +1,141 @@
#pragma once
#include "builder.hpp"
#include "scanner.hpp"
#include "vm.hpp"
#include <cstdint>
#include <expected>
#include <filesystem>
#include <memory>
#include <string>
#include <string_view>
#include <unordered_map>
#include <variant>
#include <vector>
namespace vc5::tools {
namespace fs = std::filesystem;
namespace asmer {
struct Label {
Loc loc;
std::string_view ident;
bool is_local;
};
struct Expr {
enum class Ty {
Ident,
Reg,
Int,
Indirection,
Negate,
Not,
Or,
Xor,
And,
Add,
Sub,
};
using Ptr = std::unique_ptr<Expr>;
using Binary = std::tuple<Ptr, Ptr>;
using Data = std::variant<std::string_view, Reg, int, Ptr, Binary>;
// clang-format off
auto as_ident() -> std::string_view& { return std::get<std::string_view>(data); }
auto as_reg() -> Reg& { return std::get<Reg>(data); }
auto as_int() -> int& { return std::get<int>(data); }
auto as_unary() -> Ptr& { return std::get<Ptr>(data); }
auto as_binary() -> Binary& { return std::get<Binary>(data); }
// clang-format on
Loc loc;
Ty ty;
Data data;
};
struct Ins {
using Labels = std::unique_ptr<std::vector<Label>>;
using Args = std::vector<std::unique_ptr<Expr>>;
Loc loc;
Labels labels;
std::string_view ident;
Args args;
};
class Parser {
public:
explicit Parser(std::string_view text)
: m_text(text)
, m_lexer(text)
, m_tok(m_lexer.next())
{
}
auto parse_ins() -> std::unique_ptr<Ins>;
auto ok() const -> bool
{
return m_lexer.ok() and not m_error_occured;
}
private:
auto parse_expr() -> std::unique_ptr<Expr>;
auto parse_binary(int prec = 20) -> std::unique_ptr<Expr>;
auto parse_prefix() -> std::unique_ptr<Expr>;
auto parse_operand() -> std::unique_ptr<Expr>;
auto eat(int ty) -> bool;
void step();
auto test(int ty) const -> bool;
auto current_loc() const -> Loc;
void error(Loc loc, std::string_view message);
std::string_view m_text;
Scanner m_lexer;
Tok m_tok;
bool m_error_occured = false;
};
class Assembler {
public:
explicit Assembler(std::string_view text, Builder& builder)
: m_text(text)
, m_builder(&builder)
{
}
void assemble_ins(Ins& ins);
auto ok() const -> bool
{
return not m_failed;
}
private:
void assemble_line(Ins& ins);
/// true means fail
bool arg_count_wrong(Ins& ins, size_t count);
void error(Loc loc, std::string_view message);
std::string_view m_text;
Builder* m_builder;
std::unordered_map<std::string_view, uint16_t> m_global_labels {};
std::unordered_map<std::string_view, uint16_t> m_local_labels {};
bool m_failed = false;
};
}
auto assemble_file(fs::path input_path, fs::path output_path)
-> std::expected<void, std::string>;
}

View File

@ -2,8 +2,10 @@
#include <cstdint>
#include <filesystem>
#include <format>
#include <fstream>
#include <ios>
#include <stdexcept>
#include <vector>
namespace vc5 {
@ -53,6 +55,10 @@ public:
explicit FileDisk(fs::path file_path)
: m_file(file_path, std::ios_base::binary)
{
if (not m_file) {
throw std::invalid_argument(std::format(
"unable to open file '{}'", std::string(file_path)));
}
}
auto block_count() -> uint16_t override;

View File

@ -4,7 +4,7 @@
#include <print>
#include <utility>
using namespace vc5;
using namespace vc5::tools;
namespace {

View File

@ -4,7 +4,7 @@
#include <cstdint>
#include <print>
namespace vc5 {
namespace vc5::tools {
using namespace vc5;

View File

@ -18,7 +18,7 @@ int main()
auto disk = MemoryDisk(128);
auto l = Builder(disk.data());
auto l = tools::Builder(disk.data());
l.mov_imm(rsp, 0x1000);
l.mov_reg(rbp, rsp);

186
src/scanner.cpp Normal file
View File

@ -0,0 +1,186 @@
#include "scanner.hpp"
#include <print>
#include <string>
#include <string_view>
using namespace vc5::tools;
using namespace std::literals;
auto Scanner::next() -> Tok
{
auto loc = this->current_loc();
if (done()) {
return tok(Tok::Eof, loc);
}
if (test_in(" \t\r")) {
while (test_in(" \t\r")) {
step();
}
return next();
}
if (test(';')) {
step();
while (not done() and not test('\n')) {
step();
}
return next();
}
if (test('_') or test_range('A', 'Z') or test_range('a', 'z')) {
while (test('_') or test_range('0', '9') or test_range('A', 'Z')
or test_range('a', 'z')) {
step();
}
return tok(Tok::Ident, loc);
}
if (test_range('1', '9')) {
while (test_range('0', '9')) {
step();
}
return tok(Tok::Int, loc);
}
if (test('0')) {
step();
if (test('b')) {
step();
while (test_in("01")) {
step();
}
return tok(Tok::Bin, loc);
} else if (test('x')) {
step();
while (test_range('0', '9') or test_range('a', 'f')
or test_range('A', 'F')) {
step();
}
return tok(Tok::Hex, loc);
} else {
return tok(Tok::Int, loc);
}
}
if (test_in("\n()[],:|^+-!")) {
auto ty = static_cast<Tok::Ty>(current());
step();
return tok(ty, loc);
}
error(loc, std::format("illegal character '{}'", current()));
step();
return next();
}
void Scanner::step()
{
if (done())
return;
char ch = current();
m_idx += 1;
if (ch == '\n') {
m_line += 1;
m_col = 1;
} else {
m_col += 1;
}
}
auto Scanner::tok(Tok::Ty ty, Loc loc) const -> Tok
{
return Tok { m_text.substr(loc.idx, m_idx - loc.idx), loc, ty };
}
auto Scanner::current_loc() const -> Loc
{
return Loc { m_idx, m_line, m_col };
}
auto Scanner::test_range(char begin, char end) const -> bool
{
return not done() and current() >= begin and current() <= end;
}
auto Scanner::test_in(std::string_view chars) const -> bool
{
for (auto ch : chars) {
if (test(ch)) {
return true;
}
}
return false;
}
auto Scanner::test(char ch) const -> bool
{
return not done() and current() == ch;
}
auto Scanner::current() const -> char
{
return m_text[m_idx];
}
auto Scanner::done() const -> bool
{
return m_idx >= m_text.size();
}
void Scanner::error(Loc loc, std::string_view message)
{
m_error_occured = true;
loc.print_error(m_text, message);
}
void Loc::print_error(std::string_view text, std::string_view message) const
{
constexpr auto type = "error"sv;
constexpr auto clear = "\x1b[0m"sv;
constexpr auto bold_red = "\x1b[1;31m"sv;
constexpr auto bold_white = "\x1b[1;37m"sv;
constexpr auto cyan = "\x1b[0;36m"sv;
constexpr auto gray = "\x1b[0;37m"sv;
constexpr auto light_gray = "\x1b[1;30m"sv;
auto start = text.find_last_of('\n', idx) + 1;
auto end = text.find_first_of('\n', idx);
if (end == std::string_view::npos) {
end = text.size();
}
auto line_text = text.substr(start, end - start);
auto linenr_str = std::to_string(line);
std::println(""
"{0}{1}{2}: {3}\n"
" {4}--> {5}:{6}:{7}\n"
" {8: <{9}}{10}|\n"
" {11}{12}{13}|{14}{15}\n"
" {16: <{17}}"
"{18}|${19: <{20}}{21}^ {22}{23}{24}",
bold_red,
type,
bold_white,
message,
cyan,
"<file>",
line,
col,
"",
linenr_str.size(),
gray,
light_gray,
linenr_str,
gray,
light_gray,
line_text,
"",
linenr_str.size(),
gray,
"",
col - 1,
bold_red,
bold_white,
message,
clear);
}

77
src/scanner.hpp Normal file
View File

@ -0,0 +1,77 @@
#pragma once
#include <cstddef>
#include <string_view>
namespace vc5::tools {
struct Loc {
size_t idx;
int line;
int col;
void print_error(std::string_view text, std::string_view message) const;
};
struct Tok {
enum Ty {
Eof,
Ident,
Int,
Hex,
Bin,
Newline = '\n',
LParen = '(',
RParen = ')',
LBracket = '[',
RBracket = ']',
Comma = ',',
Colon = ':',
Pipe = '|',
Hat = '^',
Ampersand = '&',
Plus = '+',
Minus = '-',
Exclam = '!',
};
std::string_view text;
Loc loc;
Ty ty;
};
class Scanner {
public:
explicit Scanner(std::string_view text)
: m_text(text)
{
}
auto next() -> Tok;
auto ok() const -> bool
{
return not m_error_occured;
}
private:
void step();
auto tok(Tok::Ty ty, Loc loc) const -> Tok;
auto current_loc() const -> Loc;
auto test_range(char begin, char end) const -> bool;
auto test_in(std::string_view chars) const -> bool;
auto test(char ch) const -> bool;
auto current() const -> char;
auto done() const -> bool;
void error(Loc loc, std::string_view message);
std::string_view m_text;
size_t m_idx = 0;
int m_line = 1;
int m_col = 1;
bool m_error_occured = false;
};
}