Compare commits

...

2 Commits

Author SHA1 Message Date
c9d2fad4c8 add const keyword 2026-01-19 19:43:54 +01:00
2f99ef56d7 more assembler 2026-01-18 22:00:31 +01:00
6 changed files with 400 additions and 44 deletions

View File

@ -1,7 +1,7 @@
MAKEFLAGS += -j16
CXXFLAGS := -std=c++23 -Wall -Wextra -pedantic-errors -fsanitize=address,undefined
CXXFLAGS := -std=c++23 -Wall -Wextra -pedantic-errors -fsanitize=address
LDFLAGS :=
CXXFLAGS += $(shell pkgconf sdl2 --cflags)

View File

@ -13,6 +13,7 @@
#include <memory>
#include <string_view>
#include <unordered_map>
#include <utility>
#include <vector>
using namespace vc5::tools;
@ -22,6 +23,9 @@ namespace fs = std::filesystem;
using TT = Tok::Ty;
using EO = EvaledOperand;
using EOT = EO::Ty;
auto vc5::tools::assemble_file(fs::path input_path, fs::path output_path)
-> std::expected<void, std::string>
{
@ -49,6 +53,8 @@ auto vc5::tools::assemble_file(fs::path input_path, fs::path output_path)
auto assembler = Assembler(text, builder);
auto lines = std::vector<Line>();
while (true) {
auto ins = parser.parse_ins();
if (not ins) {
@ -75,10 +81,35 @@ auto vc5::tools::assemble_file(fs::path input_path, fs::path output_path)
return {};
}
void Assembler::assemble_ins(Ins& ins)
auto Assembler::assemble_file(std::string_view text)
-> std::expected<std::vector<uint8_t>, std::string>
{
if (ins.labels) {
for (const auto& label : *ins.labels) {
auto parser = Parser(text);
auto program = std::vector<uint8_t>(65536);
auto builder = Builder(program.data());
auto assembler = Assembler(text, builder);
auto lines = std::vector<Line>();
while (true) {
auto ins = parser.parse_ins();
if (not ins) {
break;
}
}
if (not parser.ok()) {
return std::unexpected("parsing failed");
}
return { std::move(program) };
}
void Assembler::assemble_define_labels(const std::vector<Label>& labels)
{
for (const auto& label : labels) {
if (label.is_local) {
m_local_labels[label.ident] = m_builder->ip();
} else {
@ -86,13 +117,13 @@ void Assembler::assemble_ins(Ins& ins)
m_global_labels[label.ident] = m_builder->ip();
}
}
}
assemble_line(ins);
}
void Assembler::assemble_line(Ins& ins)
void Assembler::assemble_line(const Line& ins)
{
if (ins.ident == "db") {
for (const auto& arg : ins.args) { }
}
if (ins.ident == "mov") {
if (arg_count_wrong(ins, 2))
return;
@ -102,7 +133,88 @@ void Assembler::assemble_line(Ins& ins)
std::format("instruction '{}' not supported/implemented", ins.ident));
}
bool Assembler::arg_count_wrong(Ins& ins, size_t count)
auto Assembler::eval_operand(const Expr& expr) -> std::unique_ptr<EvaledOperand>
{
auto loc = expr.loc;
switch (expr.ty) {
case Expr::Ty::Ident:
case Expr::Ty::SubLabel:
case Expr::Ty::Reg:
return std::make_unique<EO>(loc, EOT::Reg, expr.as_reg());
case Expr::Ty::Int:
case Expr::Ty::Str:
return std::make_unique<EO>(loc, EOT::Str, expr.as_str());
case Expr::Ty::Mem:
case Expr::Ty::MemByte:
case Expr::Ty::MemWord:
case Expr::Ty::Negate:
case Expr::Ty::Not:
case Expr::Ty::Or:
case Expr::Ty::Xor:
case Expr::Ty::And:
case Expr::Ty::Add:
case Expr::Ty::Sub:
break;
}
std::unreachable();
}
auto Assembler::eval_operand_mem(const Expr& expr)
-> std::unique_ptr<EvaledOperand>
{
switch (expr.ty) {
case Expr::Ty::Ident:
case Expr::Ty::SubLabel:
case Expr::Ty::Reg:
case Expr::Ty::Int:
case Expr::Ty::Str:
case Expr::Ty::Mem:
case Expr::Ty::MemByte:
case Expr::Ty::MemWord:
case Expr::Ty::Negate:
case Expr::Ty::Not:
case Expr::Ty::Or:
case Expr::Ty::Xor:
case Expr::Ty::And:
case Expr::Ty::Add:
case Expr::Ty::Sub:
break;
}
std::unreachable();
}
auto Assembler::eval_operand_to_imm(const Expr& expr)
-> std::unique_ptr<EvaledOperand>
{
switch (expr.ty) {
case Expr::Ty::Ident:
case Expr::Ty::SubLabel:
case Expr::Ty::Reg:
error(expr.loc, "registers cannot be part of an expression");
case Expr::Ty::Int:
return std::make_unique<EO>(
expr.loc, EOT::Imm, static_cast<uint16_t>(expr.as_int()));
case Expr::Ty::Str:
error(expr.loc, "strings cannot be part of an expression");
return nullptr;
case Expr::Ty::Mem:
case Expr::Ty::MemByte:
case Expr::Ty::MemWord:
error(expr.loc, "indirections cannot be part of an expression");
return nullptr;
case Expr::Ty::Negate:
case Expr::Ty::Not:
case Expr::Ty::Or:
case Expr::Ty::Xor:
case Expr::Ty::And:
case Expr::Ty::Add:
case Expr::Ty::Sub:
break;
}
std::unreachable();
}
bool Assembler::arg_count_wrong(const Line& ins, size_t count)
{
if (ins.args.size() != count) {
error(ins.loc, std::format("expected {} operands", count));
@ -117,11 +229,11 @@ void Assembler::error(Loc loc, std::string_view message)
loc.print_error(m_text, message);
}
auto Parser::parse_ins() -> std::unique_ptr<Ins>
auto Parser::parse_ins() -> std::unique_ptr<Line>
{
auto loc = current_loc();
auto labels = Ins::Labels(nullptr);
auto labels = Line::Labels(nullptr);
auto ident = std::string_view();
@ -158,7 +270,7 @@ auto Parser::parse_ins() -> std::unique_ptr<Ins>
}
}
auto args = Ins::Args();
auto args = Line::Args();
auto first = true;
while (not test(TT::Eof) and not eat('\n')) {
@ -179,7 +291,7 @@ auto Parser::parse_ins() -> std::unique_ptr<Ins>
error(current_loc(), "expected line ending");
}
return std::make_unique<Ins>(
return std::make_unique<Line>(
loc, std::move(labels), ident, std::move(args));
}
@ -238,7 +350,9 @@ auto Parser::parse_prefix() -> std::unique_ptr<Expr>
}
}
static const auto reg_idents = std::unordered_map<std::string_view, vc5::Reg> {
namespace {
const auto reg_idents = std::unordered_map<std::string_view, vc5::Reg> {
{ "R0", vc5::Reg::R0 },
{ "R1", vc5::Reg::R1 },
{ "R2", vc5::Reg::R2 },
@ -251,6 +365,8 @@ static const auto reg_idents = std::unordered_map<std::string_view, vc5::Reg> {
{ "Rip", vc5::Reg::Rip },
};
}
auto Parser::parse_operand() -> std::unique_ptr<Expr>
{
auto loc = current_loc();
@ -263,21 +379,47 @@ auto Parser::parse_operand() -> std::unique_ptr<Expr>
} else {
return std::make_unique<Expr>(loc, Expr::Ty::Ident, ident);
}
} else if (eat('.')) {
if (!test(TT::Ident)) {
error(current_loc(), "expected ')'");
return nullptr;
}
auto value
= std::string_view(m_tok.text.data() - 1, m_tok.text.size() + 1);
} else if (test(TT::Int)) {
auto text = std::string(m_tok.text);
step();
auto value = std::strtol(text.c_str(), nullptr, 10);
return std::make_unique<Expr>(
loc, Expr::Ty::Int, static_cast<int>(value));
} else if (test(TT::Bin)) {
auto text = std::string(m_tok.text);
step();
auto value = std::strtol(&text[2], nullptr, 2);
return std::make_unique<Expr>(
loc, Expr::Ty::Int, static_cast<int>(value));
} else if (test(TT::Hex)) {
auto text = std::string(m_tok.text);
step();
auto value = std::strtol(&text[2], nullptr, 16);
return std::make_unique<Expr>(
loc, Expr::Ty::Int, static_cast<int>(value));
} else if (test(TT::Char)) {
auto text = std::string(m_tok.text);
step();
int value = text.at(1) == '\\' ? unescape_escape_char(text.at(2))
: text.at(1);
return std::make_unique<Expr>(
loc, Expr::Ty::Int, static_cast<int>(value));
} else if (test(TT::Str)) {
auto text = std::string(m_tok.text);
step();
auto value = unescape_string(text.substr(1, text.size() - 2));
return std::make_unique<Expr>(loc, Expr::Ty::Str, std::move(value));
} else if (eat('(')) {
auto expr = parse_expr();
if (not eat(')')) {
@ -291,8 +433,31 @@ auto Parser::parse_operand() -> std::unique_ptr<Expr>
error(current_loc(), "expected ']'");
return nullptr;
}
return std::make_unique<Expr>(
loc, Expr::Ty::Indirection, std::move(expr));
return std::make_unique<Expr>(loc, Expr::Ty::Mem, std::move(expr));
} else if (test(TT::Ident) and m_tok.text == "byte") {
step();
if (not eat('[')) {
error(current_loc(), "expected '['");
return nullptr;
}
auto expr = parse_expr();
if (not eat(']')) {
error(current_loc(), "expected ']'");
return nullptr;
}
return std::make_unique<Expr>(loc, Expr::Ty::MemByte, std::move(expr));
} else if (test(TT::Ident) and m_tok.text == "word") {
step();
if (not eat('[')) {
error(current_loc(), "expected '['");
return nullptr;
}
auto expr = parse_expr();
if (not eat(']')) {
error(current_loc(), "expected ']'");
return nullptr;
}
return std::make_unique<Expr>(loc, Expr::Ty::MemWord, std::move(expr));
} else {
error(current_loc(), "expected expression");
return nullptr;
@ -300,6 +465,11 @@ auto Parser::parse_operand() -> std::unique_ptr<Expr>
}
auto Parser::eat(int ty) -> bool
{
return eat(static_cast<Tok::Ty>(ty));
}
auto Parser::eat(Tok::Ty ty) -> bool
{
if (test(ty)) {
step();
@ -314,6 +484,11 @@ void Parser::step()
}
auto Parser::test(int ty) const -> bool
{
return m_tok.ty == static_cast<Tok::Ty>(ty);
}
auto Parser::test(Tok::Ty ty) const -> bool
{
return m_tok.ty == ty;
}

View File

@ -28,9 +28,13 @@ namespace asmer {
struct Expr {
enum class Ty {
Ident,
SubLabel,
Reg,
Int,
Indirection,
Str,
Mem,
MemByte,
MemWord,
Negate,
Not,
Or,
@ -42,22 +46,40 @@ namespace asmer {
using Ptr = std::unique_ptr<Expr>;
using Binary = std::tuple<Ptr, Ptr>;
using Data = std::variant<std::string_view, Reg, int, Ptr, Binary>;
using Data = std::
variant<std::string_view, std::string, Reg, int, Ptr, Binary>;
// clang-format off
auto as_ident() -> std::string_view& { return std::get<std::string_view>(data); }
auto as_reg() -> Reg& { return std::get<Reg>(data); }
auto as_int() -> int& { return std::get<int>(data); }
auto as_unary() -> Ptr& { return std::get<Ptr>(data); }
auto as_binary() -> Binary& { return std::get<Binary>(data); }
// clang-format on
auto as_ident() const -> const std::string_view&
{
return std::get<std::string_view>(data);
}
auto as_str() const -> const std::string&
{
return std::get<std::string>(data);
}
auto as_reg() const -> Reg
{
return std::get<Reg>(data);
}
auto as_int() const -> int
{
return std::get<int>(data);
}
auto as_unary() const -> const Ptr&
{
return std::get<Ptr>(data);
}
auto as_binary() const -> const Binary&
{
return std::get<Binary>(data);
}
Loc loc;
Ty ty;
Data data;
};
struct Ins {
struct Line {
using Labels = std::unique_ptr<std::vector<Label>>;
using Args = std::vector<std::unique_ptr<Expr>>;
@ -67,6 +89,18 @@ namespace asmer {
Args args;
};
struct Const {
Loc loc;
std::string_view ident;
std::unique_ptr<Expr> expr;
};
struct Align {
Loc loc;
std::string_view ident;
std::unique_ptr<Expr> expr;
};
class Parser {
public:
explicit Parser(std::string_view text)
@ -76,7 +110,7 @@ namespace asmer {
{
}
auto parse_ins() -> std::unique_ptr<Ins>;
auto parse_ins() -> std::unique_ptr<Line>;
auto ok() const -> bool
{
@ -90,9 +124,11 @@ namespace asmer {
auto parse_operand() -> std::unique_ptr<Expr>;
auto eat(int ty) -> bool;
auto eat(Tok::Ty ty) -> bool;
void step();
auto test(int ty) const -> bool;
auto test(Tok::Ty ty) const -> bool;
auto current_loc() const -> Loc;
void error(Loc loc, std::string_view message);
@ -103,6 +139,31 @@ namespace asmer {
bool m_error_occured = false;
};
struct EvaledOperand {
enum class Ty {
Reg,
Imm,
Str,
MemByteImm,
MemByteReg,
MemWordImm,
MemWordReg,
};
auto as_reg() -> Reg&
{
return std::get<Reg>(data);
}
auto as_imm() -> uint16_t&
{
return std::get<uint16_t>(data);
}
Loc loc;
Ty ty;
std::variant<Reg, uint16_t, std::string> data;
};
class Assembler {
public:
explicit Assembler(std::string_view text, Builder& builder)
@ -111,7 +172,11 @@ namespace asmer {
{
}
void assemble_ins(Ins& ins);
auto assemble_file(std::string_view text)
-> std::expected<std::vector<uint8_t>, std::string>;
void assemble_define_labels(const std::vector<Label>& labels);
void assemble_line(const Line& line);
auto ok() const -> bool
{
@ -119,10 +184,14 @@ namespace asmer {
}
private:
void assemble_line(Ins& ins);
auto eval_operand(const Expr& expr) -> std::unique_ptr<EvaledOperand>;
auto eval_operand_mem(const Expr& expr)
-> std::unique_ptr<EvaledOperand>;
auto eval_operand_to_imm(const Expr& expr)
-> std::unique_ptr<EvaledOperand>;
/// true means fail
bool arg_count_wrong(Ins& ins, size_t count);
bool arg_count_wrong(const Line& ins, size_t count);
void error(Loc loc, std::string_view message);

View File

@ -71,13 +71,26 @@ public:
m_ip = ip;
}
inline void push(uint16_t v)
void push(uint16_t v)
{
m_data[m_ip] = v >> 8;
m_data[m_ip + 1] = v & 0xff;
m_ip += 2;
}
void push_byte(uint8_t v)
{
m_data[m_ip] = v;
m_ip += 1;
}
void align_word()
{
if (m_ip & 1) {
m_ip += 1;
}
}
private:
void binary_reg(Reg dst, Reg op1, Reg op2, Op op);
void binary_imm(Reg dst, Reg op1, uint16_t op2, Op op);

View File

@ -1,16 +1,20 @@
#include "scanner.hpp"
#include <cassert>
#include <print>
#include <string>
#include <string_view>
#include <unordered_map>
using namespace vc5::tools;
using namespace std::literals;
using TT = Tok::Ty;
auto Scanner::next() -> Tok
{
auto loc = this->current_loc();
if (done()) {
return tok(Tok::Eof, loc);
return tok(TT::Eof, loc);
}
if (test_in(" \t\r")) {
while (test_in(" \t\r")) {
@ -30,13 +34,24 @@ auto Scanner::next() -> Tok
or test_range('a', 'z')) {
step();
}
return tok(Tok::Ident, loc);
auto ident_tok = tok(TT::Ident, loc);
static const auto keywords = std::unordered_map<std::string_view, TT> {
{ "const", TT::KwConst },
};
if (keywords.contains(ident_tok.text)) {
return tok(keywords.at(ident_tok.text), loc);
}
return ident_tok;
}
if (test_range('1', '9')) {
while (test_range('0', '9')) {
step();
}
return tok(Tok::Int, loc);
return tok(TT::Int, loc);
}
if (test('0')) {
step();
@ -45,20 +60,66 @@ auto Scanner::next() -> Tok
while (test_in("01")) {
step();
}
return tok(Tok::Bin, loc);
return tok(TT::Bin, loc);
} else if (test('x')) {
step();
while (test_range('0', '9') or test_range('a', 'f')
or test_range('A', 'F')) {
step();
}
return tok(Tok::Hex, loc);
return tok(TT::Hex, loc);
} else {
return tok(Tok::Int, loc);
return tok(TT::Int, loc);
}
}
if (test('\'')) {
step();
if (done()) {
error(loc, "malformed literal");
return next();
}
if (test('\\')) {
step();
if (done()) {
error(loc, "malformed literal");
return next();
}
step();
} else {
if (test('\'')) {
error(loc, "malformed literal");
return next();
}
step();
}
if (done() or not test('\'')) {
error(loc, "malformed literal");
return next();
}
step();
return tok(TT::Char, loc);
}
if (test('"')) {
step();
while (not done() and not test('"')) {
if (test('\\')) {
step();
if (done()) {
error(loc, "malformed literal");
return next();
}
}
step();
}
if (done() or not test('"')) {
error(loc, "malformed literal");
return next();
}
step();
return tok(TT::Str, loc);
}
if (test_in("\n()[],:|^+-!")) {
auto ty = static_cast<Tok::Ty>(current());
auto ty = static_cast<TT>(current());
step();
return tok(ty, loc);
}
@ -83,7 +144,7 @@ void Scanner::step()
}
}
auto Scanner::tok(Tok::Ty ty, Loc loc) const -> Tok
auto Scanner::tok(TT ty, Loc loc) const -> Tok
{
return Tok { m_text.substr(loc.idx, m_idx - loc.idx), loc, ty };
}
@ -184,3 +245,35 @@ void Loc::print_error(std::string_view text, std::string_view message) const
message,
clear);
}
auto vc5::tools::unescape_escape_char(char ch) -> char
{
switch (ch) {
case 'n':
return '\n';
case 'r':
return '\r';
case 't':
return '\t';
case '0':
return '\0';
default:
return ch;
}
}
auto vc5::tools::unescape_string(std::string_view value) -> std::string
{
auto result = std::string();
size_t i = 0;
while (i < value.size()) {
if (value[0] == '\\') {
i += 1;
result += unescape_escape_char(value[i]);
} else {
result += value[i];
}
i += 1;
}
return result;
}

View File

@ -14,12 +14,15 @@ struct Loc {
};
struct Tok {
enum Ty {
enum class Ty {
Eof,
Ident,
Int,
Hex,
Bin,
Char,
Str,
KwConst,
Newline = '\n',
LParen = '(',
RParen = ')',
@ -74,4 +77,7 @@ private:
bool m_error_occured = false;
};
auto unescape_escape_char(char ch) -> char;
auto unescape_string(std::string_view value) -> std::string;
}