ethos/src/front/parse.ts
sfja 9691fc38cb
All checks were successful
Check / Explore-Gitea-Actions (push) Successful in 8s
pointers and arrays work
2026-03-17 16:00:49 +01:00

475 lines
14 KiB
TypeScript

import * as ast from "../ast.ts";
import { FileReporter, Loc } from "../diagnostics.ts";
export function parse(
text: string,
reporter: FileReporter,
): ast.Node {
return new Parser(text, reporter).parseFile();
}
export class Parser {
private toks: Tok[];
private idx = 0;
private currentLoc: Loc = { idx: 0, line: 1, col: 1 };
private prevTok: Tok | null = null;
constructor(
private text: string,
private reporter: FileReporter,
) {
this.toks = tokenize(this.text, this.reporter);
}
parseFile(): ast.Node {
const loc = this.loc();
const stmts: ast.Node[] = [];
while (!this.done) {
stmts.push(this.parseStmt());
}
return ast.Node.create(loc, "File", { stmts });
}
parseBlock(): ast.Node {
const loc = this.loc();
this.mustEat("{");
const stmts: ast.Node[] = [];
while (!this.done && !this.test("}")) {
stmts.push(this.parseStmt());
}
this.mustEat("}");
return ast.Node.create(loc, "Block", { stmts });
}
parseStmt(): ast.Node {
const loc = this.loc();
if (this.test("fn")) {
return this.parseFnStmt();
} else if (this.test("return")) {
return this.parseReturnStmt();
} else if (this.test("let")) {
return this.parseLetStmt();
} else if (this.test("if")) {
return this.parseIfStmt();
} else if (this.test("while")) {
return this.parseWhileStmt();
} else if (this.test("break")) {
return this.parseBreakStmt();
} else {
const place = this.parseExpr();
if (this.eat("=")) {
const expr = this.parseExpr();
this.mustEat(";");
return ast.Node.create(loc, "AssignStmt", { place, expr });
}
this.mustEat(";");
return ast.Node.create(loc, "ExprStmt", { expr: place });
}
}
parseFnStmt(): ast.Node {
const loc = this.loc();
this.step();
const ident = this.mustEat("ident").value;
this.mustEat("(");
const params: ast.Node[] = [];
if (!this.test(")")) {
params.push(this.parseParam());
while (this.eat(",")) {
if (this.test(")")) {
break;
}
params.push(this.parseParam());
}
}
this.mustEat(")");
let retTy: ast.Node | null = null;
if (this.eat("->")) {
retTy = this.parseTy();
}
const body = this.parseBlock();
return ast.Node.create(loc, "FnStmt", { ident, params, retTy, body });
}
parseReturnStmt(): ast.Node {
const loc = this.loc();
this.step();
let expr: ast.Node | null = null;
if (!this.test(";")) {
expr = this.parseExpr();
}
this.mustEat(";");
return ast.Node.create(loc, "ReturnStmt", { expr });
}
parseLetStmt(): ast.Node {
const loc = this.loc();
this.step();
const param = this.parseParam();
this.mustEat("=");
const expr = this.parseExpr();
this.mustEat(";");
return ast.Node.create(loc, "LetStmt", { param, expr });
}
parseIfStmt(): ast.Node {
const loc = this.loc();
this.step();
const cond = this.parseExpr();
const truthy = this.parseBlock();
let falsy: ast.Node | null = null;
if (this.eat("else")) {
falsy = this.parseBlock();
}
return ast.Node.create(loc, "IfStmt", { cond, truthy, falsy });
}
parseWhileStmt(): ast.Node {
const loc = this.loc();
this.step();
const cond = this.parseExpr();
const body = this.parseBlock();
return ast.Node.create(loc, "WhileStmt", { cond, body });
}
parseBreakStmt(): ast.Node {
const loc = this.loc();
this.step();
this.mustEat(";");
return ast.Node.create(loc, "BreakStmt", {});
}
parseParam(): ast.Node {
const loc = this.loc();
const ident = this.mustEat("ident").value;
let ty: ast.Node | null = null;
if (this.eat(":")) {
ty = this.parseTy();
}
return ast.Node.create(loc, "Param", { ident, ty });
}
parseExpr(): ast.Node {
return this.parseRange();
}
parseRange(): ast.Node {
const loc = this.loc();
if (this.eat("..") || this.eat("..=")) {
return this.parseRangeTail(loc, null, this.prevTok!.type);
} else {
const begin = this.parseBinary();
if (this.eat("..") || this.eat("..=")) {
return this.parseRangeTail(loc, begin, this.prevTok!.type);
} else {
return begin;
}
}
}
parseRangeTail(loc: Loc, begin: ast.Node | null, tok: string): ast.Node {
const limit: ast.RangeLimit = tok === ".." ? "Exclusive" : "Inclusive";
let end: ast.Node | null = null;
if (![";", ",", ")", "]"].some((tok) => this.test(tok))) {
end = this.parseBinary();
}
return ast
.create(loc, "RangeExpr", { begin, end, limit });
}
parseBinary(prec = 7): ast.Node {
const loc = this.loc();
if (prec == 0) {
return this.parsePrefix();
}
const ops: [Tok["type"], ast.BinaryOp, number][] = [
["or", "Or", 9],
["and", "And", 8],
["==", "Eq", 7],
["!=", "Ne", 7],
["<", "Lt", 7],
[">", "Gt", 7],
["<=", "Lte", 7],
[">=", "Gte", 7],
["|", "BitOr", 6],
["^", "BitXor", 5],
["&", "BitAnd", 4],
["<<", "Shl", 3],
[">>", "Shr", 3],
["+", "Add", 2],
["-", "Subtract", 2],
["*", "Multiply", 1],
["/", "Divide", 1],
["%", "Remainder", 1],
];
let left = this.parseBinary(prec - 1);
let should_continue = true;
while (should_continue) {
should_continue = false;
for (const [tok, op, p] of ops) {
if (prec >= p && this.eat(tok)) {
const right = this.parseBinary(prec - 1);
left = ast.Node.create(
loc,
"BinaryExpr",
{ op, left, right, tok },
);
should_continue = true;
break;
}
}
}
return left;
}
parsePrefix(): ast.Node {
const loc = this.loc();
const ops: [Tok["type"], ast.UnaryOp][] = [
["not", "Not"],
["-", "Negate"],
["*", "Deref"],
];
for (const [tok, op] of ops) {
if (this.eat(tok)) {
const expr = this.parsePrefix();
return ast.Node.create(loc, "UnaryExpr", { op, expr, tok });
}
}
if (this.eat("&")) {
const op: ast.UnaryOp = this.eat("mut") ? "RefMut" : "Ref";
const expr = this.parsePrefix();
const tok = op === "Ref" ? "&" : "&mut";
return ast.Node.create(loc, "UnaryExpr", { op, expr, tok });
}
return this.parsePostfix();
}
parsePostfix(): ast.Node {
let expr = this.parseOperand();
while (true) {
const loc = this.loc();
if (this.eat(".*")) {
// use unary because it's already there
// TODO: consider making a separate node type
expr = ast.Node
.create(loc, "UnaryExpr", { expr, op: "Deref", tok: ".*" });
} else if (this.eat("[")) {
const arg = this.parseExpr();
this.mustEat("]");
expr = ast.Node.create(loc, "IndexExpr", { value: expr, arg });
} else if (this.eat("(")) {
const args: ast.Node[] = [];
if (!this.test(")")) {
args.push(this.parseExpr());
while (this.eat(",")) {
if (this.done || this.test(")")) {
break;
}
args.push(this.parseExpr());
}
}
this.mustEat(")");
expr = ast.Node.create(loc, "CallExpr", { value: expr, args });
} else {
break;
}
}
return expr;
}
parseOperand(): ast.Node {
const loc = this.loc();
if (this.test("ident")) {
const ident = this.current.value;
this.step();
return ast.Node.create(loc, "IdentExpr", { ident });
} else if (this.test("int")) {
const value = Number(this.current.value);
this.step();
return ast.Node.create(loc, "IntExpr", { value });
} else if (this.eat("(")) {
const expr = this.parseExpr();
this.mustEat(")");
return expr;
} else if (this.eat("[")) {
const values: ast.Node[] = [];
if (!this.done && !this.test("]")) {
values.push(this.parseExpr());
while (this.eat(",")) {
if (this.test("]")) {
break;
}
values.push(this.parseExpr());
}
}
this.mustEat("]");
return ast.Node.create(loc, "ArrayExpr", { values });
} else {
this.mustEat("<expression>");
throw new Error();
}
}
parseTy(): ast.Node {
const loc = this.loc();
if (this.test("ident")) {
const ident = this.current.value;
this.step();
return ast.Node.create(loc, "IdentTy", { ident });
} else if (this.eat("*")) {
const mutable = this.eat("mut");
const ty = this.parseTy();
return ast.Node.create(loc, mutable ? "PtrMutTy" : "PtrTy", { ty });
} else if (this.eat("[")) {
const ty = this.parseTy();
if (this.eat(";")) {
const length = this.parseExpr();
this.mustEat("]");
return ast.Node.create(loc, "ArrayTy", { ty, length });
} else {
this.mustEat("]");
return ast.Node.create(loc, "SliceTy", { ty });
}
} else {
this.mustEat("<type>");
throw new Error();
}
}
private mustEat(type: string, loc = this.loc()): Tok {
const tok = this.current;
if (tok.type !== type) {
this.reporter.error(
loc,
`expected '${type}', got '${
this.done ? "eof" : this.current.type
}'`,
);
if (type === ";" && this.idx > 0) {
this.reporter.info(
this.toks[this.idx - 1].loc,
`try adding '${type}' here`,
);
}
this.reporter.abort();
}
this.step();
return tok;
}
private eat(type: string): boolean {
if (this.test(type)) {
this.step();
return true;
}
return false;
}
private step() {
if (!this.done) {
this.prevTok = this.current;
}
this.idx += 1;
if (!this.done) {
this.currentLoc = this.current.loc;
}
}
private test(type: string): boolean {
return !this.done && this.current.type == type;
}
private loc(): Loc {
return this.currentLoc;
}
private get current(): Tok {
return this.toks[this.idx];
}
private get done(): boolean {
return this.idx >= this.toks.length;
}
}
export type Tok = { type: string; value: string; loc: Loc };
const keywordPattern =
/^(?:(?:fn)|(?:return)|(?:let)|(?:if)|(?:else)|(?:while)|(?:break)|(?:or)|(?:and)|(?:not)|(?:mut))/;
const operatorPattern2 =
/((?:\->)|(?:==)|(?:!=)|(?:<=)|(?:>=)|(?:<<)|(?:>>)|(?:\.\*)|(?:\.\.)|(?:\.\.=)|[\n\(\)\{\}\[\]\,\.\;\:\!\=\<\>\&\^\|\+\-\*\/\%])/g;
export function tokenize(text: string, reporter: FileReporter): Tok[] {
return new Lexer()
.add(/[ \t\r\n]+/, (_) => null)
.add(/\/\/[^\n]*/, (_) => null)
.add(operatorPattern2, (loc, value) => ({ type: value, value, loc }))
.add(/[a-zA-Z_][a-zA-Z0-9_]*/, (loc, value) => {
const type = keywordPattern.test(value) ? value : "ident";
return { type, value, loc };
})
.add(/0|(?:[1-9][0-9]*)/, (loc, value) => {
return { type: "int", value, loc };
})
.add(/./, (loc, value) => {
const escapedChar = JSON.stringify(value[0]).slice(1, -1);
reporter.error(loc, `illegal character '${escapedChar}'`);
return null;
})
.lex(text);
}
type LexRule = {
pattern: RegExp;
action: LexAction;
};
type LexAction = (loc: Loc, match: string) => Tok | null;
class Lexer {
private rules: LexRule[] = [];
add(pattern: RegExp, action: LexAction): this {
this.rules.push({
pattern: new RegExp(`^(?:${pattern.source})`),
action,
});
return this;
}
lex(text: string): Tok[] {
const toks: Tok[] = [];
let idx = 0;
let line = 1;
let col = 1;
outer_loop: while (idx < text.length) {
for (const rule of this.rules) {
const match = text.slice(idx).match(rule.pattern);
if (!match) {
continue;
}
const loc: Loc = { idx, line, col };
for (let i = 0; i < match[0].length; ++i) {
if (text[idx] == "\n") {
line += 1;
col = 1;
} else {
col += 1;
}
idx += 1;
}
const tok = rule.action(loc, match[0]);
if (tok) {
toks.push(tok);
}
continue outer_loop;
}
throw new Error(`no rule for character '${text[idx]}'`);
}
return toks;
}
}