assemble operands and instructions

This commit is contained in:
sfja 2026-01-21 22:13:57 +01:00
parent c9d2fad4c8
commit 8cfcc54e12
4 changed files with 600 additions and 88 deletions

View File

@ -19,6 +19,7 @@
using namespace vc5::tools;
using namespace vc5::tools::asmer;
using namespace std::literals;
namespace fs = std::filesystem;
using TT = Tok::Ty;
@ -33,6 +34,7 @@ auto vc5::tools::assemble_file(fs::path input_path, fs::path output_path)
{
auto input_file = std::ifstream(input_path, std::ios_base::binary);
if (not input_file) {
return std::unexpected(
std::format("could not open file '{}' for reading",
@ -46,28 +48,14 @@ auto vc5::tools::assemble_file(fs::path input_path, fs::path output_path)
input_file.read(&text[0], size);
}
auto parser = Parser(text);
auto assembler = Assembler(text);
auto result = assembler.assemble_file();
auto program = std::vector<uint8_t>(65536);
auto builder = Builder(program.data());
auto assembler = Assembler(text, builder);
auto lines = std::vector<Line>();
while (true) {
auto ins = parser.parse_ins();
if (not ins) {
break;
}
}
if (not parser.ok()) {
return std::unexpected("parsing failed");
}
auto& program = *result;
{
auto output_file = std::ofstream(output_path, std::ios_base::binary);
if (not output_file) {
return std::unexpected(
std::format("could not open file '{}' for writing",
@ -81,22 +69,75 @@ auto vc5::tools::assemble_file(fs::path input_path, fs::path output_path)
return {};
}
auto Assembler::assemble_file(std::string_view text)
auto Assembler::assemble_file()
-> std::expected<std::vector<uint8_t>, std::string>
{
auto parser = Parser(text);
auto program = std::vector<uint8_t>(65536);
auto builder = Builder(program.data());
auto assembler = Assembler(text, builder);
auto lines = std::vector<Line>();
auto parser = Parser(m_text);
auto lines = std::vector<std::unique_ptr<Line>>();
while (true) {
auto ins = parser.parse_ins();
if (not ins) {
break;
if (parser.next_is_const()) {
auto stmt = parser.parse_const();
if (not stmt) {
return std::unexpected("parsing failed");
}
auto value = eval_operand_to_imm(*stmt->expr);
if (not value) {
return std::unexpected("assembling failed");
}
assert(value->ty == EOT::Imm);
m_syms[stmt->ident] = value->as_imm();
} else if (parser.next_is_align()) {
auto stmt = parser.parse_align();
if (not stmt) {
return std::unexpected("parsing failed");
}
auto value = eval_operand_to_imm(*stmt->expr);
if (not value) {
return std::unexpected("assembling failed");
}
assert(value->ty == EOT::Imm);
uint16_t bits = value->as_imm();
if (bits == 0 or bits % 8 != 0) {
error(stmt->loc, std::format("cannot align to {}", bits));
return std::unexpected("assembling failed");
}
uint16_t align_bit = bits / 8;
uint16_t align_mask = align_bit - 1;
uint16_t ip = m_builder.ip();
if ((ip & align_mask) != 0) {
ip &= ~align_mask;
ip += align_bit;
}
uint16_t delta = ip - m_builder.ip();
if (delta == 0)
continue;
auto ops = Line::Args();
for (uint16_t i = 0; i < delta; ++i) {
auto op = std::make_unique<Expr>(stmt->loc, Expr::Ty::Int, 0);
ops.push_back(std::move(op));
}
lines.push_back(std::make_unique<Line>(
stmt->loc, nullptr, "db", std::move(ops)));
} else {
auto line = parser.parse_line();
if (not line) {
break;
}
lines.push_back(std::move(line));
}
}
@ -104,59 +145,268 @@ auto Assembler::assemble_file(std::string_view text)
return std::unexpected("parsing failed");
}
return { std::move(program) };
m_builder.set_ip(0);
m_second_pass = true;
return std::move(m_program);
}
void Assembler::assemble_define_labels(const std::vector<Label>& labels)
{
for (const auto& label : labels) {
if (label.is_local) {
m_local_labels[label.ident] = m_builder->ip();
m_sublabels[label.ident] = m_builder.ip();
} else {
m_local_labels.clear();
m_global_labels[label.ident] = m_builder->ip();
m_sublabels.clear();
m_syms[label.ident] = m_builder.ip();
}
}
}
void Assembler::assemble_line(const Line& ins)
{
if (ins.ident == "db") {
for (const auto& arg : ins.args) { }
}
if (ins.ident == "mov") {
if (arg_count_wrong(ins, 2))
return;
}
namespace {
error(ins.loc,
std::format("instruction '{}' not supported/implemented", ins.ident));
enum class Mnemonic {
// clang-format off
db, dw, nop,
hlt, jmp, jnz,
mov, cmp,
or_, and_, xor_,
shl, rshl, shr,
rshr, add, sub,
rsub, reti, lvcd,
lkbd, dskr, dskw,
// clang-format on
};
using M = Mnemonic;
static const auto mnemonic_map
= std::unordered_map<std::string_view, Mnemonic> {
// clang-format off
{ "db", M::db }, { "dw", M::dw },
{ "nop", M::nop }, { "hlt", M::hlt },
{ "jmp", M::jmp }, { "jnz", M::jnz },
{ "mov", M::mov }, { "cmp", M::cmp },
{ "or_", M::or_ }, { "and_", M::and_ },
{ "xor_", M::xor_ }, { "shl", M::shl },
{ "rshl", M::rshl }, { "shr", M::shr },
{ "rshr", M::rshr }, { "add", M::add },
{ "sub", M::sub }, { "rsub", M::rsub },
{ "reti", M::reti }, { "lvcd", M::lvcd },
{ "lkbd", M::lkbd }, { "dskr", M::dskr },
{ "dskw", M::dskw },
// clang-format on
};
}
void Assembler::assemble_line(const Line& line)
{
auto loc = line.loc;
constexpr auto Reg = EOT::Reg;
constexpr auto Imm = EOT::Imm;
constexpr auto Str = EOT::Str;
constexpr auto MemByteImm = EOT::MemByteImm;
constexpr auto MemByteReg = EOT::MemByteReg;
constexpr auto MemWordImm = EOT::MemWordImm;
constexpr auto MemWordReg = EOT::MemWordReg;
auto& l = m_builder;
if (!mnemonic_map.contains(line.ident)) {
error(
line.loc, std::format("unrecognized mnemonic \"{}\"", line.ident));
return;
}
auto m = mnemonic_map.at(line.ident);
auto operation_not_supported
= [&]() { error(loc, "operand not supported"); };
switch (m) {
case M::db:
for (const auto& arg : line.args) {
auto op = eval_operand(*arg);
if (not op)
return;
if (op->ty == EOT::Imm) {
if (op->as_imm() >= 256) {
error(arg->loc, "exceeds capacity of a byte");
return;
}
l.push_byte(op->as_imm() & 0xff);
} else if (op->ty == EOT::Str) {
for (char ch : op->as_str()) {
l.push_byte(static_cast<uint8_t>(ch));
}
} else {
error(arg->loc, "operand not supported");
return;
}
}
break;
case M::dw:
for (const auto& arg : line.args) {
auto op = eval_operand(*arg);
if (not op)
return;
if (op->ty == EOT::Imm) {
l.push(op->as_imm());
} else {
error(arg->loc, "operand not supported");
return;
}
}
break;
case M::nop:
l.nop();
break;
case M::hlt:
l.hlt();
break;
case M::jmp: {
if (arg_count_wrong(line, 1))
return;
auto op = eval_operand(*line.args[0]);
if (op->is_reg()) {
l.jmp_reg(op->as_reg());
} else if (op->is_imm()) {
l.jmp_imm(op->as_imm());
} else {
operation_not_supported();
}
break;
}
case M::jnz: {
if (arg_count_wrong(line, 1))
return;
auto op1 = eval_operand(*line.args[0]);
auto op2 = eval_operand(*line.args[1]);
if (not op1->is_reg())
operation_not_supported();
if (op2->is_reg()) {
l.jnz_reg(op1->as_reg(), op2->as_reg());
} else if (op2->is_imm()) {
l.jnz_imm(op1->as_reg(), op2->as_imm());
} else {
operation_not_supported();
}
break;
}
case M::mov: {
if (arg_count_wrong(line, 2))
return;
auto dst = eval_operand(*line.args[0]);
auto src = eval_operand(*line.args[1]);
auto dst_ty = dst->ty;
auto src_ty = src->ty;
if (dst_ty == Reg and src_ty == Reg) {
l.mov_reg(dst->as_reg(), src->as_reg());
} else if (dst_ty == Reg and src_ty == Imm) {
l.mov_imm(dst->as_reg(), src->as_imm());
} else if (dst_ty == Reg and src_ty == MemWordReg) {
auto [reg, offset] = src->as_reg_imm_pair();
l.load_word_reg(dst->as_reg(), reg, offset);
} else if (dst_ty == Reg and src_ty == MemWordImm) {
l.load_word_imm(dst->as_reg(), src->as_imm());
} else if (dst_ty == MemWordReg and src_ty == Reg) {
auto [reg, offset] = dst->as_reg_imm_pair();
l.store_word_reg(reg, offset, src->as_reg());
} else if (dst_ty == MemWordImm and src_ty == Reg) {
l.store_word_imm(dst->as_imm(), src->as_reg());
} else if (dst_ty == Reg and src_ty == MemByteReg) {
auto [reg, offset] = src->as_reg_imm_pair();
l.load_byte_reg(dst->as_reg(), reg, offset);
} else if (dst_ty == Reg and src_ty == MemByteImm) {
l.load_byte_imm(dst->as_reg(), src->as_imm());
} else if (dst_ty == MemByteReg and src_ty == Reg) {
auto [reg, offset] = dst->as_reg_imm_pair();
l.store_byte_reg(reg, offset, src->as_reg());
} else if (dst_ty == MemByteImm and src_ty == Reg) {
l.store_byte_imm(dst->as_imm(), src->as_reg());
} else {
operation_not_supported();
}
break;
}
case M::cmp: {
if (arg_count_wrong(line, 2))
return;
auto op1 = eval_operand(*line.args[0]);
auto op2 = eval_operand(*line.args[1]);
if (op2->is_reg()) {
} else if (op2->is_imm()) {
} else {
operation_not_supported();
}
break;
}
case M::or_:
case M::and_:
case M::xor_:
case M::shl:
case M::rshl:
case M::shr:
case M::rshr:
case M::add:
case M::sub:
case M::rsub:
case M::reti:
case M::lvcd:
case M::lkbd:
case M::dskr:
case M::dskw:
break;
}
}
auto Assembler::eval_operand(const Expr& expr) -> std::unique_ptr<EvaledOperand>
{
auto loc = expr.loc;
switch (expr.ty) {
case Expr::Ty::Ident:
case Expr::Ty::SubLabel:
case Expr::Ty::Reg:
return std::make_unique<EO>(loc, EOT::Reg, expr.as_reg());
case Expr::Ty::Int:
case Expr::Ty::Str:
return std::make_unique<EO>(loc, EOT::Str, expr.as_str());
case Expr::Ty::Mem:
case Expr::Ty::MemByte:
case Expr::Ty::MemWord:
return eval_operand_mem(expr);
case Expr::Ty::MemByte: {
auto evaled = eval_operand_mem(expr);
switch (evaled->ty) {
case EOT::MemWordImm:
evaled->ty = EOT::MemByteImm;
break;
case EOT::MemWordReg:
evaled->ty = EOT::MemByteReg;
break;
default:
break;
}
return evaled;
}
case Expr::Ty::Int:
case Expr::Ty::Ident:
case Expr::Ty::SubLabel:
case Expr::Ty::Negate:
case Expr::Ty::Not:
case Expr::Ty::Or:
case Expr::Ty::Xor:
case Expr::Ty::And:
case Expr::Ty::Shl:
case Expr::Ty::Shr:
case Expr::Ty::Add:
case Expr::Ty::Sub:
break;
}
std::unreachable();
assert(false && "unexhaustive");
}
auto Assembler::eval_operand_mem(const Expr& expr)
@ -165,53 +415,201 @@ auto Assembler::eval_operand_mem(const Expr& expr)
switch (expr.ty) {
case Expr::Ty::Ident:
case Expr::Ty::SubLabel:
case Expr::Ty::Reg:
case Expr::Ty::Int:
case Expr::Ty::Str:
case Expr::Ty::Mem:
case Expr::Ty::MemByte:
case Expr::Ty::MemWord:
case Expr::Ty::Negate:
case Expr::Ty::Not:
case Expr::Ty::Or:
case Expr::Ty::Xor:
case Expr::Ty::And:
case Expr::Ty::Add:
case Expr::Ty::Sub:
break;
case Expr::Ty::Shl:
case Expr::Ty::Shr: {
auto op = eval_operand_to_imm(expr);
if (not op)
return nullptr;
return std::make_unique<EO>(
expr.loc, EOT::MemWordImm, op->as_imm());
}
case Expr::Ty::Reg:
return std::make_unique<EO>(expr.loc, EOT::Reg, expr.as_reg());
case Expr::Ty::Str:
case Expr::Ty::Mem:
case Expr::Ty::MemByte:
case Expr::Ty::MemWord:
error(expr.loc, "operand cannot be used in expressions");
return nullptr;
case Expr::Ty::Add: {
auto& [expr_left, expr_right] = expr.as_binary();
if (expr_left->ty == Expr::Ty::Reg) {
auto right = eval_operand_to_imm(*expr_right);
if (not right)
return nullptr;
return std::make_unique<EO>(expr.loc,
EOT::MemWordReg,
EO::RegImmPair { expr_left->as_reg(), right->as_imm() });
}
if (expr_right->ty == Expr::Ty::Reg) {
auto left = eval_operand_to_imm(*expr_left);
if (not left)
return nullptr;
return std::make_unique<EO>(expr.loc,
EOT::MemWordReg,
EO::RegImmPair { expr_right->as_reg(), left->as_imm() });
}
auto evaled = eval_operand_to_imm(expr);
if (not evaled)
return nullptr;
return std::make_unique<EO>(
expr.loc, EOT::MemWordImm, evaled->as_imm());
}
case Expr::Ty::Sub: {
auto& [expr_left, expr_right] = expr.as_binary();
if (expr_left->ty == Expr::Ty::Reg) {
auto right = eval_operand_to_imm(*expr_right);
if (not right)
return nullptr;
return std::make_unique<EO>(expr.loc,
EOT::MemWordReg,
EO::RegImmPair { expr_left->as_reg(), -right->as_imm() });
}
if (expr_right->ty == Expr::Ty::Reg) {
auto left = eval_operand_to_imm(*expr_left);
if (not left)
return nullptr;
return std::make_unique<EO>(expr.loc,
EOT::MemWordReg,
EO::RegImmPair { expr_right->as_reg(), -left->as_imm() });
}
auto evaled = eval_operand_to_imm(expr);
if (not evaled)
return nullptr;
return std::make_unique<EO>(
expr.loc, EOT::MemWordImm, evaled->as_imm());
}
}
std::unreachable();
assert(false && "unexhaustive");
}
auto Assembler::eval_operand_to_imm(const Expr& expr)
-> std::unique_ptr<EvaledOperand>
{
auto loc = expr.loc;
switch (expr.ty) {
case Expr::Ty::Ident:
case Expr::Ty::SubLabel:
case Expr::Ty::Ident: {
if (!m_syms.contains(expr.as_ident())) {
if (m_second_pass) {
error(expr.loc,
std::format(
"symbol \"{}\" not defined", expr.as_ident()));
return nullptr;
} else {
return std::make_unique<EO>(loc, EOT::Imm, uint16_t { 0 });
}
}
auto value = m_syms[expr.as_ident()];
return std::make_unique<EO>(loc, EOT::Imm, value);
}
case Expr::Ty::SubLabel: {
if (!m_sublabels.contains(expr.as_ident())) {
if (m_second_pass) {
error(expr.loc,
std::format(
"symbol \"{}\" not defined", expr.as_ident()));
return nullptr;
} else {
return std::make_unique<EO>(loc, EOT::Imm, uint16_t { 0 });
}
}
auto value = m_sublabels[expr.as_ident()];
return std::make_unique<EO>(loc, EOT::Imm, value);
}
case Expr::Ty::Reg:
error(expr.loc, "registers cannot be part of an expression");
error(loc, "registers cannot be part of an expression");
case Expr::Ty::Int:
return std::make_unique<EO>(
expr.loc, EOT::Imm, static_cast<uint16_t>(expr.as_int()));
case Expr::Ty::Str:
error(expr.loc, "strings cannot be part of an expression");
error(loc, "strings cannot be part of an expression");
return nullptr;
case Expr::Ty::Mem:
case Expr::Ty::MemByte:
case Expr::Ty::MemWord:
error(expr.loc, "indirections cannot be part of an expression");
error(loc, "indirections cannot be part of an expression");
return nullptr;
case Expr::Ty::Negate:
case Expr::Ty::Not:
case Expr::Ty::Or:
case Expr::Ty::Xor:
case Expr::Ty::And:
case Expr::Ty::Shl:
case Expr::Ty::Shr:
case Expr::Ty::Add:
case Expr::Ty::Sub:
case Expr::Ty::Sub: {
auto& [left_expr, right_expr] = expr.as_binary();
auto left = eval_operand_to_imm(*left_expr);
if (not left)
return nullptr;
if (left->ty != EOT::Imm) {
error(loc, "operand cannot be used in expressions");
return nullptr;
}
auto right = eval_operand_to_imm(*right_expr);
if (not right)
return nullptr;
if (right->ty != EOT::Imm) {
error(loc, "operand cannot be used in expressions");
return nullptr;
}
return std::make_unique<EO>(loc,
EOT::Imm,
binary_op(expr.ty, left->as_imm(), right->as_imm()));
break;
}
}
assert(false && "unexhaustive");
}
auto Assembler::binary_op(Expr::Ty exprTy, uint16_t left, uint16_t right)
-> uint16_t
{
switch (exprTy) {
case Expr::Ty::Or:
return left | right;
case Expr::Ty::Xor:
return left ^ right;
case Expr::Ty::And:
return left & right;
case Expr::Ty::Shl:
return (uint16_t)(left << right);
case Expr::Ty::Shr:
return (uint16_t)(left >> right);
case Expr::Ty::Add:
return (uint16_t)((int16_t)left + (int16_t)right);
case Expr::Ty::Sub:
return (uint16_t)((int16_t)left - (int16_t)right);
// taken from vc3
// case Expr::Ty::Mul:
// return (uint16_t)((int16_t)left * (int16_t)right);
// case Expr::Ty::Div:
// return (uint16_t)((int16_t)left / (int16_t)right);
// case Expr::Ty::Mod:
// return (uint16_t)((int16_t)left % (int16_t)right);
default:
assert(false && "unexhaustive");
}
std::unreachable();
}
bool Assembler::arg_count_wrong(const Line& ins, size_t count)
@ -229,7 +627,45 @@ void Assembler::error(Loc loc, std::string_view message)
loc.print_error(m_text, message);
}
auto Parser::parse_ins() -> std::unique_ptr<Line>
auto Parser::next_is_const() const -> bool
{
return test(TT::KwConst);
}
auto Parser::parse_const() -> std::unique_ptr<Const>
{
assert(test(TT::KwConst));
auto loc = m_tok.loc;
if (!test(TT::Ident)) {
error(current_loc(), "expected identifier");
return nullptr;
}
auto ident = m_tok.text;
step();
auto expr = parse_expr();
return std::make_unique<Const>(loc, ident, std::move(expr));
}
auto Parser::next_is_align() const -> bool
{
return test(TT::KwAlign);
}
auto Parser::parse_align() -> std::unique_ptr<Align>
{
assert(test(TT::KwAlign));
auto loc = m_tok.loc;
if (!test(TT::Ident)) {
error(current_loc(), "expected identifier");
return nullptr;
}
auto ident = m_tok.text;
step();
auto expr = parse_expr();
return std::make_unique<Align>(loc, ident, std::move(expr));
}
auto Parser::parse_line() -> std::unique_ptr<Line>
{
auto loc = current_loc();
@ -306,9 +742,11 @@ auto Parser::parse_binary(int prec) -> std::unique_ptr<Expr>
using Op = std::tuple<TT, T, int>;
constexpr auto ops = std::array {
Op { TT::Pipe, T::Or, 4 },
Op { TT::Hat, T::Xor, 3 },
Op { TT::Ampersand, T::And, 2 },
Op { TT::Pipe, T::Or, 5 },
Op { TT::Hat, T::Xor, 4 },
Op { TT::Ampersand, T::And, 3 },
Op { TT::LtLt, T::Shl, 2 },
Op { TT::GtGt, T::Shr, 2 },
Op { TT::Plus, T::Add, 1 },
Op { TT::Minus, T::Sub, 1 },
};
@ -381,11 +819,13 @@ auto Parser::parse_operand() -> std::unique_ptr<Expr>
}
} else if (eat('.')) {
if (!test(TT::Ident)) {
error(current_loc(), "expected ')'");
error(current_loc(), "expected identifier");
return nullptr;
}
auto value
= std::string_view(m_tok.text.data() - 1, m_tok.text.size() + 1);
return std::make_unique<Expr>(loc, Expr::Ty::SubLabel, value);
} else if (test(TT::Int)) {
auto text = std::string(m_tok.text);
step();

View File

@ -40,6 +40,8 @@ namespace asmer {
Or,
Xor,
And,
Shl,
Shr,
Add,
Sub,
};
@ -105,12 +107,18 @@ namespace asmer {
public:
explicit Parser(std::string_view text)
: m_text(text)
, m_lexer(text)
, m_lexer(text, Scanner::Kind::Assembler)
, m_tok(m_lexer.next())
{
}
auto parse_ins() -> std::unique_ptr<Line>;
auto next_is_const() const -> bool;
auto parse_const() -> std::unique_ptr<Const>;
auto next_is_align() const -> bool;
auto parse_align() -> std::unique_ptr<Align>;
auto parse_line() -> std::unique_ptr<Line>;
auto ok() const -> bool
{
@ -150,6 +158,8 @@ namespace asmer {
MemWordReg,
};
using RegImmPair = std::pair<Reg, uint16_t>;
auto as_reg() -> Reg&
{
return std::get<Reg>(data);
@ -158,21 +168,39 @@ namespace asmer {
{
return std::get<uint16_t>(data);
}
auto as_reg_imm_pair() -> RegImmPair&
{
return std::get<RegImmPair>(data);
}
auto as_str() -> std::string&
{
return std::get<std::string>(data);
}
auto is_reg() const -> bool
{
return ty == Ty::Reg;
}
auto is_imm() const -> bool
{
return ty == Ty::Imm;
}
Loc loc;
Ty ty;
std::variant<Reg, uint16_t, std::string> data;
std::variant<Reg, uint16_t, RegImmPair, std::string> data;
};
class Assembler {
public:
explicit Assembler(std::string_view text, Builder& builder)
explicit Assembler(std::string_view text)
: m_text(text)
, m_builder(&builder)
, m_program(65536)
, m_builder(m_program.data())
{
}
auto assemble_file(std::string_view text)
auto assemble_file()
-> std::expected<std::vector<uint8_t>, std::string>;
void assemble_define_labels(const std::vector<Label>& labels);
@ -190,15 +218,20 @@ namespace asmer {
auto eval_operand_to_imm(const Expr& expr)
-> std::unique_ptr<EvaledOperand>;
static auto binary_op(Expr::Ty exprTy, uint16_t left, uint16_t right)
-> uint16_t;
/// true means fail
bool arg_count_wrong(const Line& ins, size_t count);
void error(Loc loc, std::string_view message);
std::string_view m_text;
Builder* m_builder;
std::unordered_map<std::string_view, uint16_t> m_global_labels {};
std::unordered_map<std::string_view, uint16_t> m_local_labels {};
std::vector<uint8_t> m_program;
Builder m_builder;
std::unordered_map<std::string_view, uint16_t> m_syms {};
std::unordered_map<std::string_view, uint16_t> m_sublabels {};
bool m_second_pass = false;
bool m_failed = false;
};

View File

@ -22,7 +22,7 @@ auto Scanner::next() -> Tok
}
return next();
}
if (test(';')) {
if (is_assembler() and test(';')) {
step();
while (not done() and not test('\n')) {
step();
@ -37,12 +37,8 @@ auto Scanner::next() -> Tok
auto ident_tok = tok(TT::Ident, loc);
static const auto keywords = std::unordered_map<std::string_view, TT> {
{ "const", TT::KwConst },
};
if (keywords.contains(ident_tok.text)) {
return tok(keywords.at(ident_tok.text), loc);
if (m_keywords->contains(ident_tok.text)) {
return tok(m_keywords->at(ident_tok.text), loc);
}
return ident_tok;
@ -118,6 +114,22 @@ auto Scanner::next() -> Tok
step();
return tok(TT::Str, loc);
}
if (test('<')) {
step();
if (test('<')) {
step();
return tok(TT::LtLt, loc);
}
return tok(TT::Lt, loc);
}
if (test('>')) {
step();
if (test('>')) {
step();
return tok(TT::GtGt, loc);
}
return tok(TT::Gt, loc);
}
if (test_in("\n()[],:|^+-!")) {
auto ty = static_cast<TT>(current());
step();
@ -128,6 +140,11 @@ auto Scanner::next() -> Tok
return next();
}
const auto Scanner::keywords_kind_assembler = KeywordMap {
{ "const", TT::KwConst },
{ "align", TT::KwAlign },
};
void Scanner::step()
{
if (done())
@ -184,6 +201,11 @@ auto Scanner::done() const -> bool
return m_idx >= m_text.size();
}
auto Scanner::is_assembler() const -> bool
{
return m_kind == Kind::Assembler;
}
void Scanner::error(Loc loc, std::string_view message)
{
m_error_occured = true;

View File

@ -2,6 +2,7 @@
#include <cstddef>
#include <string_view>
#include <unordered_map>
namespace vc5::tools {
@ -23,6 +24,7 @@ struct Tok {
Char,
Str,
KwConst,
KwAlign,
Newline = '\n',
LParen = '(',
RParen = ')',
@ -36,6 +38,10 @@ struct Tok {
Plus = '+',
Minus = '-',
Exclam = '!',
Lt = '<',
Gt = '>',
LtLt,
GtGt,
};
std::string_view text;
@ -45,8 +51,12 @@ struct Tok {
class Scanner {
public:
explicit Scanner(std::string_view text)
enum class Kind { Assembler };
explicit Scanner(std::string_view text, Kind kind)
: m_text(text)
, m_keywords(&keywords_kind_assembler)
, m_kind(kind)
{
}
@ -68,12 +78,19 @@ private:
auto current() const -> char;
auto done() const -> bool;
auto is_assembler() const -> bool;
void error(Loc loc, std::string_view message);
using KeywordMap = std::unordered_map<std::string_view, Tok::Ty>;
static const KeywordMap keywords_kind_assembler;
std::string_view m_text;
const KeywordMap* m_keywords;
size_t m_idx = 0;
int m_line = 1;
int m_col = 1;
Kind m_kind;
bool m_error_occured = false;
};