more assembler

This commit is contained in:
sfja 2026-01-18 22:00:31 +01:00
parent d1c491c7c0
commit 2f99ef56d7
5 changed files with 364 additions and 43 deletions

View File

@ -13,6 +13,7 @@
#include <memory> #include <memory>
#include <string_view> #include <string_view>
#include <unordered_map> #include <unordered_map>
#include <utility>
#include <vector> #include <vector>
using namespace vc5::tools; using namespace vc5::tools;
@ -22,6 +23,9 @@ namespace fs = std::filesystem;
using TT = Tok::Ty; using TT = Tok::Ty;
using EO = EvaledOperand;
using EOT = EO::Ty;
auto vc5::tools::assemble_file(fs::path input_path, fs::path output_path) auto vc5::tools::assemble_file(fs::path input_path, fs::path output_path)
-> std::expected<void, std::string> -> std::expected<void, std::string>
{ {
@ -49,6 +53,8 @@ auto vc5::tools::assemble_file(fs::path input_path, fs::path output_path)
auto assembler = Assembler(text, builder); auto assembler = Assembler(text, builder);
auto lines = std::vector<Line>();
while (true) { while (true) {
auto ins = parser.parse_ins(); auto ins = parser.parse_ins();
if (not ins) { if (not ins) {
@ -75,10 +81,35 @@ auto vc5::tools::assemble_file(fs::path input_path, fs::path output_path)
return {}; return {};
} }
void Assembler::assemble_ins(Ins& ins) auto Assembler::assemble_file(std::string_view text)
-> std::expected<std::vector<uint8_t>, std::string>
{ {
if (ins.labels) { auto parser = Parser(text);
for (const auto& label : *ins.labels) {
auto program = std::vector<uint8_t>(65536);
auto builder = Builder(program.data());
auto assembler = Assembler(text, builder);
auto lines = std::vector<Line>();
while (true) {
auto ins = parser.parse_ins();
if (not ins) {
break;
}
}
if (not parser.ok()) {
return std::unexpected("parsing failed");
}
return { std::move(program) };
}
void Assembler::assemble_define_labels(const std::vector<Label>& labels)
{
for (const auto& label : labels) {
if (label.is_local) { if (label.is_local) {
m_local_labels[label.ident] = m_builder->ip(); m_local_labels[label.ident] = m_builder->ip();
} else { } else {
@ -86,13 +117,13 @@ void Assembler::assemble_ins(Ins& ins)
m_global_labels[label.ident] = m_builder->ip(); m_global_labels[label.ident] = m_builder->ip();
} }
} }
}
assemble_line(ins);
} }
void Assembler::assemble_line(Ins& ins) void Assembler::assemble_line(const Line& ins)
{ {
if (ins.ident == "db") {
for (const auto& arg : ins.args) { }
}
if (ins.ident == "mov") { if (ins.ident == "mov") {
if (arg_count_wrong(ins, 2)) if (arg_count_wrong(ins, 2))
return; return;
@ -102,7 +133,88 @@ void Assembler::assemble_line(Ins& ins)
std::format("instruction '{}' not supported/implemented", ins.ident)); std::format("instruction '{}' not supported/implemented", ins.ident));
} }
bool Assembler::arg_count_wrong(Ins& ins, size_t count) auto Assembler::eval_operand(const Expr& expr) -> std::unique_ptr<EvaledOperand>
{
auto loc = expr.loc;
switch (expr.ty) {
case Expr::Ty::Ident:
case Expr::Ty::SubLabel:
case Expr::Ty::Reg:
return std::make_unique<EO>(loc, EOT::Reg, expr.as_reg());
case Expr::Ty::Int:
case Expr::Ty::Str:
return std::make_unique<EO>(loc, EOT::Str, expr.as_str());
case Expr::Ty::Mem:
case Expr::Ty::MemByte:
case Expr::Ty::MemWord:
case Expr::Ty::Negate:
case Expr::Ty::Not:
case Expr::Ty::Or:
case Expr::Ty::Xor:
case Expr::Ty::And:
case Expr::Ty::Add:
case Expr::Ty::Sub:
break;
}
std::unreachable();
}
auto Assembler::eval_operand_mem(const Expr& expr)
-> std::unique_ptr<EvaledOperand>
{
switch (expr.ty) {
case Expr::Ty::Ident:
case Expr::Ty::SubLabel:
case Expr::Ty::Reg:
case Expr::Ty::Int:
case Expr::Ty::Str:
case Expr::Ty::Mem:
case Expr::Ty::MemByte:
case Expr::Ty::MemWord:
case Expr::Ty::Negate:
case Expr::Ty::Not:
case Expr::Ty::Or:
case Expr::Ty::Xor:
case Expr::Ty::And:
case Expr::Ty::Add:
case Expr::Ty::Sub:
break;
}
std::unreachable();
}
auto Assembler::eval_operand_to_imm(const Expr& expr)
-> std::unique_ptr<EvaledOperand>
{
switch (expr.ty) {
case Expr::Ty::Ident:
case Expr::Ty::SubLabel:
case Expr::Ty::Reg:
error(expr.loc, "registers cannot be part of an expression");
case Expr::Ty::Int:
return std::make_unique<EO>(
expr.loc, EOT::Imm, static_cast<uint16_t>(expr.as_int()));
case Expr::Ty::Str:
error(expr.loc, "strings cannot be part of an expression");
return nullptr;
case Expr::Ty::Mem:
case Expr::Ty::MemByte:
case Expr::Ty::MemWord:
error(expr.loc, "indirections cannot be part of an expression");
return nullptr;
case Expr::Ty::Negate:
case Expr::Ty::Not:
case Expr::Ty::Or:
case Expr::Ty::Xor:
case Expr::Ty::And:
case Expr::Ty::Add:
case Expr::Ty::Sub:
break;
}
std::unreachable();
}
bool Assembler::arg_count_wrong(const Line& ins, size_t count)
{ {
if (ins.args.size() != count) { if (ins.args.size() != count) {
error(ins.loc, std::format("expected {} operands", count)); error(ins.loc, std::format("expected {} operands", count));
@ -117,11 +229,11 @@ void Assembler::error(Loc loc, std::string_view message)
loc.print_error(m_text, message); loc.print_error(m_text, message);
} }
auto Parser::parse_ins() -> std::unique_ptr<Ins> auto Parser::parse_ins() -> std::unique_ptr<Line>
{ {
auto loc = current_loc(); auto loc = current_loc();
auto labels = Ins::Labels(nullptr); auto labels = Line::Labels(nullptr);
auto ident = std::string_view(); auto ident = std::string_view();
@ -158,7 +270,7 @@ auto Parser::parse_ins() -> std::unique_ptr<Ins>
} }
} }
auto args = Ins::Args(); auto args = Line::Args();
auto first = true; auto first = true;
while (not test(TT::Eof) and not eat('\n')) { while (not test(TT::Eof) and not eat('\n')) {
@ -179,7 +291,7 @@ auto Parser::parse_ins() -> std::unique_ptr<Ins>
error(current_loc(), "expected line ending"); error(current_loc(), "expected line ending");
} }
return std::make_unique<Ins>( return std::make_unique<Line>(
loc, std::move(labels), ident, std::move(args)); loc, std::move(labels), ident, std::move(args));
} }
@ -238,7 +350,9 @@ auto Parser::parse_prefix() -> std::unique_ptr<Expr>
} }
} }
static const auto reg_idents = std::unordered_map<std::string_view, vc5::Reg> { namespace {
const auto reg_idents = std::unordered_map<std::string_view, vc5::Reg> {
{ "R0", vc5::Reg::R0 }, { "R0", vc5::Reg::R0 },
{ "R1", vc5::Reg::R1 }, { "R1", vc5::Reg::R1 },
{ "R2", vc5::Reg::R2 }, { "R2", vc5::Reg::R2 },
@ -251,6 +365,25 @@ static const auto reg_idents = std::unordered_map<std::string_view, vc5::Reg> {
{ "Rip", vc5::Reg::Rip }, { "Rip", vc5::Reg::Rip },
}; };
char escape_char(char ch)
{
switch (ch) {
case 'n':
return '\n';
case 'r':
return '\r';
case 't':
return '\t';
case '0':
return '\0';
default:
return ch;
}
}
}
auto Parser::parse_operand() -> std::unique_ptr<Expr> auto Parser::parse_operand() -> std::unique_ptr<Expr>
{ {
auto loc = current_loc(); auto loc = current_loc();
@ -263,21 +396,56 @@ auto Parser::parse_operand() -> std::unique_ptr<Expr>
} else { } else {
return std::make_unique<Expr>(loc, Expr::Ty::Ident, ident); return std::make_unique<Expr>(loc, Expr::Ty::Ident, ident);
} }
} else if (eat('.')) {
if (!test(TT::Ident)) {
error(current_loc(), "expected ')'");
return nullptr;
}
auto value
= std::string_view(m_tok.text.data() - 1, m_tok.text.size() + 1);
} else if (test(TT::Int)) { } else if (test(TT::Int)) {
auto text = std::string(m_tok.text); auto text = std::string(m_tok.text);
step();
auto value = std::strtol(text.c_str(), nullptr, 10); auto value = std::strtol(text.c_str(), nullptr, 10);
return std::make_unique<Expr>( return std::make_unique<Expr>(
loc, Expr::Ty::Int, static_cast<int>(value)); loc, Expr::Ty::Int, static_cast<int>(value));
} else if (test(TT::Bin)) { } else if (test(TT::Bin)) {
auto text = std::string(m_tok.text); auto text = std::string(m_tok.text);
step();
auto value = std::strtol(&text[2], nullptr, 2); auto value = std::strtol(&text[2], nullptr, 2);
return std::make_unique<Expr>( return std::make_unique<Expr>(
loc, Expr::Ty::Int, static_cast<int>(value)); loc, Expr::Ty::Int, static_cast<int>(value));
} else if (test(TT::Hex)) { } else if (test(TT::Hex)) {
auto text = std::string(m_tok.text); auto text = std::string(m_tok.text);
step();
auto value = std::strtol(&text[2], nullptr, 16); auto value = std::strtol(&text[2], nullptr, 16);
return std::make_unique<Expr>( return std::make_unique<Expr>(
loc, Expr::Ty::Int, static_cast<int>(value)); loc, Expr::Ty::Int, static_cast<int>(value));
} else if (test(TT::Char)) {
auto text = std::string(m_tok.text);
step();
int value = text.at(1) == '\\' ? escape_char(text.at(2)) : text.at(1);
return std::make_unique<Expr>(
loc, Expr::Ty::Int, static_cast<int>(value));
} else if (test(TT::Str)) {
auto text = std::string(m_tok.text);
step();
auto value = std::string();
size_t i = 1;
while (i < text.size() - 1) {
if (text.at(i) == '\\') {
i += 1;
value.push_back(escape_char(text.at(i)));
} else {
value.push_back(text.at(i));
}
i += 1;
}
return std::make_unique<Expr>(loc, Expr::Ty::Str, std::move(value));
} else if (eat('(')) { } else if (eat('(')) {
auto expr = parse_expr(); auto expr = parse_expr();
if (not eat(')')) { if (not eat(')')) {
@ -291,8 +459,31 @@ auto Parser::parse_operand() -> std::unique_ptr<Expr>
error(current_loc(), "expected ']'"); error(current_loc(), "expected ']'");
return nullptr; return nullptr;
} }
return std::make_unique<Expr>( return std::make_unique<Expr>(loc, Expr::Ty::Mem, std::move(expr));
loc, Expr::Ty::Indirection, std::move(expr)); } else if (test(TT::Ident) and m_tok.text == "byte") {
step();
if (not eat('[')) {
error(current_loc(), "expected '['");
return nullptr;
}
auto expr = parse_expr();
if (not eat(']')) {
error(current_loc(), "expected ']'");
return nullptr;
}
return std::make_unique<Expr>(loc, Expr::Ty::MemByte, std::move(expr));
} else if (test(TT::Ident) and m_tok.text == "word") {
step();
if (not eat('[')) {
error(current_loc(), "expected '['");
return nullptr;
}
auto expr = parse_expr();
if (not eat(']')) {
error(current_loc(), "expected ']'");
return nullptr;
}
return std::make_unique<Expr>(loc, Expr::Ty::MemWord, std::move(expr));
} else { } else {
error(current_loc(), "expected expression"); error(current_loc(), "expected expression");
return nullptr; return nullptr;
@ -300,6 +491,11 @@ auto Parser::parse_operand() -> std::unique_ptr<Expr>
} }
auto Parser::eat(int ty) -> bool auto Parser::eat(int ty) -> bool
{
return eat(static_cast<Tok::Ty>(ty));
}
auto Parser::eat(Tok::Ty ty) -> bool
{ {
if (test(ty)) { if (test(ty)) {
step(); step();
@ -314,6 +510,11 @@ void Parser::step()
} }
auto Parser::test(int ty) const -> bool auto Parser::test(int ty) const -> bool
{
return m_tok.ty == static_cast<Tok::Ty>(ty);
}
auto Parser::test(Tok::Ty ty) const -> bool
{ {
return m_tok.ty == ty; return m_tok.ty == ty;
} }

View File

@ -28,9 +28,13 @@ namespace asmer {
struct Expr { struct Expr {
enum class Ty { enum class Ty {
Ident, Ident,
SubLabel,
Reg, Reg,
Int, Int,
Indirection, Str,
Mem,
MemByte,
MemWord,
Negate, Negate,
Not, Not,
Or, Or,
@ -42,22 +46,40 @@ namespace asmer {
using Ptr = std::unique_ptr<Expr>; using Ptr = std::unique_ptr<Expr>;
using Binary = std::tuple<Ptr, Ptr>; using Binary = std::tuple<Ptr, Ptr>;
using Data = std::variant<std::string_view, Reg, int, Ptr, Binary>; using Data = std::
variant<std::string_view, std::string, Reg, int, Ptr, Binary>;
// clang-format off auto as_ident() const -> const std::string_view&
auto as_ident() -> std::string_view& { return std::get<std::string_view>(data); } {
auto as_reg() -> Reg& { return std::get<Reg>(data); } return std::get<std::string_view>(data);
auto as_int() -> int& { return std::get<int>(data); } }
auto as_unary() -> Ptr& { return std::get<Ptr>(data); } auto as_str() const -> const std::string&
auto as_binary() -> Binary& { return std::get<Binary>(data); } {
// clang-format on return std::get<std::string>(data);
}
auto as_reg() const -> Reg
{
return std::get<Reg>(data);
}
auto as_int() const -> int
{
return std::get<int>(data);
}
auto as_unary() const -> const Ptr&
{
return std::get<Ptr>(data);
}
auto as_binary() const -> const Binary&
{
return std::get<Binary>(data);
}
Loc loc; Loc loc;
Ty ty; Ty ty;
Data data; Data data;
}; };
struct Ins { struct Line {
using Labels = std::unique_ptr<std::vector<Label>>; using Labels = std::unique_ptr<std::vector<Label>>;
using Args = std::vector<std::unique_ptr<Expr>>; using Args = std::vector<std::unique_ptr<Expr>>;
@ -76,7 +98,7 @@ namespace asmer {
{ {
} }
auto parse_ins() -> std::unique_ptr<Ins>; auto parse_ins() -> std::unique_ptr<Line>;
auto ok() const -> bool auto ok() const -> bool
{ {
@ -90,9 +112,11 @@ namespace asmer {
auto parse_operand() -> std::unique_ptr<Expr>; auto parse_operand() -> std::unique_ptr<Expr>;
auto eat(int ty) -> bool; auto eat(int ty) -> bool;
auto eat(Tok::Ty ty) -> bool;
void step(); void step();
auto test(int ty) const -> bool; auto test(int ty) const -> bool;
auto test(Tok::Ty ty) const -> bool;
auto current_loc() const -> Loc; auto current_loc() const -> Loc;
void error(Loc loc, std::string_view message); void error(Loc loc, std::string_view message);
@ -103,6 +127,31 @@ namespace asmer {
bool m_error_occured = false; bool m_error_occured = false;
}; };
struct EvaledOperand {
enum class Ty {
Reg,
Imm,
Str,
MemByteImm,
MemByteReg,
MemWordImm,
MemWordReg,
};
auto as_reg() -> Reg&
{
return std::get<Reg>(data);
}
auto as_imm() -> uint16_t&
{
return std::get<uint16_t>(data);
}
Loc loc;
Ty ty;
std::variant<Reg, uint16_t, std::string> data;
};
class Assembler { class Assembler {
public: public:
explicit Assembler(std::string_view text, Builder& builder) explicit Assembler(std::string_view text, Builder& builder)
@ -111,7 +160,11 @@ namespace asmer {
{ {
} }
void assemble_ins(Ins& ins); auto assemble_file(std::string_view text)
-> std::expected<std::vector<uint8_t>, std::string>;
void assemble_define_labels(const std::vector<Label>& labels);
void assemble_line(const Line& line);
auto ok() const -> bool auto ok() const -> bool
{ {
@ -119,10 +172,14 @@ namespace asmer {
} }
private: private:
void assemble_line(Ins& ins); auto eval_operand(const Expr& expr) -> std::unique_ptr<EvaledOperand>;
auto eval_operand_mem(const Expr& expr)
-> std::unique_ptr<EvaledOperand>;
auto eval_operand_to_imm(const Expr& expr)
-> std::unique_ptr<EvaledOperand>;
/// true means fail /// true means fail
bool arg_count_wrong(Ins& ins, size_t count); bool arg_count_wrong(const Line& ins, size_t count);
void error(Loc loc, std::string_view message); void error(Loc loc, std::string_view message);

View File

@ -71,13 +71,26 @@ public:
m_ip = ip; m_ip = ip;
} }
inline void push(uint16_t v) void push(uint16_t v)
{ {
m_data[m_ip] = v >> 8; m_data[m_ip] = v >> 8;
m_data[m_ip + 1] = v & 0xff; m_data[m_ip + 1] = v & 0xff;
m_ip += 2; m_ip += 2;
} }
void push_byte(uint8_t v)
{
m_data[m_ip] = v;
m_ip += 1;
}
void align_word()
{
if (m_ip & 1) {
m_ip += 1;
}
}
private: private:
void binary_reg(Reg dst, Reg op1, Reg op2, Op op); void binary_reg(Reg dst, Reg op1, Reg op2, Op op);
void binary_imm(Reg dst, Reg op1, uint16_t op2, Op op); void binary_imm(Reg dst, Reg op1, uint16_t op2, Op op);

View File

@ -6,11 +6,13 @@
using namespace vc5::tools; using namespace vc5::tools;
using namespace std::literals; using namespace std::literals;
using TT = Tok::Ty;
auto Scanner::next() -> Tok auto Scanner::next() -> Tok
{ {
auto loc = this->current_loc(); auto loc = this->current_loc();
if (done()) { if (done()) {
return tok(Tok::Eof, loc); return tok(TT::Eof, loc);
} }
if (test_in(" \t\r")) { if (test_in(" \t\r")) {
while (test_in(" \t\r")) { while (test_in(" \t\r")) {
@ -30,13 +32,13 @@ auto Scanner::next() -> Tok
or test_range('a', 'z')) { or test_range('a', 'z')) {
step(); step();
} }
return tok(Tok::Ident, loc); return tok(TT::Ident, loc);
} }
if (test_range('1', '9')) { if (test_range('1', '9')) {
while (test_range('0', '9')) { while (test_range('0', '9')) {
step(); step();
} }
return tok(Tok::Int, loc); return tok(TT::Int, loc);
} }
if (test('0')) { if (test('0')) {
step(); step();
@ -45,20 +47,66 @@ auto Scanner::next() -> Tok
while (test_in("01")) { while (test_in("01")) {
step(); step();
} }
return tok(Tok::Bin, loc); return tok(TT::Bin, loc);
} else if (test('x')) { } else if (test('x')) {
step(); step();
while (test_range('0', '9') or test_range('a', 'f') while (test_range('0', '9') or test_range('a', 'f')
or test_range('A', 'F')) { or test_range('A', 'F')) {
step(); step();
} }
return tok(Tok::Hex, loc); return tok(TT::Hex, loc);
} else { } else {
return tok(Tok::Int, loc); return tok(TT::Int, loc);
} }
} }
if (test('\'')) {
step();
if (done()) {
error(loc, "malformed literal");
return next();
}
if (test('\\')) {
step();
if (done()) {
error(loc, "malformed literal");
return next();
}
step();
} else {
if (test('\'')) {
error(loc, "malformed literal");
return next();
}
step();
}
if (done() or not test('\'')) {
error(loc, "malformed literal");
return next();
}
step();
return tok(TT::Char, loc);
}
if (test('"')) {
step();
while (not done() and not test('"')) {
if (test('\\')) {
step();
if (done()) {
error(loc, "malformed literal");
return next();
}
}
step();
}
if (done() or not test('"')) {
error(loc, "malformed literal");
return next();
}
step();
return tok(TT::Str, loc);
}
if (test_in("\n()[],:|^+-!")) { if (test_in("\n()[],:|^+-!")) {
auto ty = static_cast<Tok::Ty>(current()); auto ty = static_cast<TT>(current());
step(); step();
return tok(ty, loc); return tok(ty, loc);
} }
@ -83,7 +131,7 @@ void Scanner::step()
} }
} }
auto Scanner::tok(Tok::Ty ty, Loc loc) const -> Tok auto Scanner::tok(TT ty, Loc loc) const -> Tok
{ {
return Tok { m_text.substr(loc.idx, m_idx - loc.idx), loc, ty }; return Tok { m_text.substr(loc.idx, m_idx - loc.idx), loc, ty };
} }

View File

@ -14,12 +14,14 @@ struct Loc {
}; };
struct Tok { struct Tok {
enum Ty { enum class Ty {
Eof, Eof,
Ident, Ident,
Int, Int,
Hex, Hex,
Bin, Bin,
Char,
Str,
Newline = '\n', Newline = '\n',
LParen = '(', LParen = '(',
RParen = ')', RParen = ')',