ethos/src/front/parse.ts
sfja 1c92a3c077
All checks were successful
Check / Explore-Gitea-Actions (push) Successful in 8s
work on arrays and slices
2026-03-16 01:04:35 +01:00

415 lines
12 KiB
TypeScript

import * as ast from "../ast.ts";
import { printDiagnostics } from "../diagnostics.ts";
export function parse(
filename: string,
text: string,
): ast.Node {
return new Parser(filename, text).parseFile();
}
export class Parser {
private toks: Tok[];
private idx = 0;
private currentLine = 1;
private prevTok: Tok | null = null;
constructor(
private filename: string,
private text: string,
) {
this.toks = tokenize(text);
}
parseFile(): ast.Node {
const loc = this.loc();
const stmts: ast.Node[] = [];
while (!this.done) {
stmts.push(this.parseStmt());
}
return ast.Node.create(loc, "File", { stmts });
}
parseBlock(): ast.Node {
const loc = this.loc();
this.mustEat("{");
const stmts: ast.Node[] = [];
while (!this.done && !this.test("}")) {
stmts.push(this.parseStmt());
}
this.mustEat("}");
return ast.Node.create(loc, "Block", { stmts });
}
parseStmt(): ast.Node {
const loc = this.loc();
if (this.test("fn")) {
return this.parseFnStmt();
} else if (this.test("return")) {
return this.parseReturnStmt();
} else if (this.test("let")) {
return this.parseLetStmt();
} else if (this.test("if")) {
return this.parseIfStmt();
} else {
const place = this.parseExpr();
if (this.eat("=")) {
const expr = this.parseExpr();
this.mustEat(";");
return ast.Node.create(loc, "AssignStmt", { place, expr });
}
this.mustEat(";");
return ast.Node.create(loc, "ExprStmt", { expr: place });
}
}
parseFnStmt(): ast.Node {
const loc = this.loc();
this.step();
const ident = this.mustEat("ident").value;
this.mustEat("(");
const params: ast.Node[] = [];
if (!this.test(")")) {
params.push(this.parseParam());
while (this.eat(",")) {
if (this.test(")")) {
break;
}
params.push(this.parseParam());
}
}
this.mustEat(")");
let retTy: ast.Node | null = null;
if (this.eat("->")) {
retTy = this.parseTy();
}
const body = this.parseBlock();
return ast.Node.create(loc, "FnStmt", { ident, params, retTy, body });
}
parseReturnStmt(): ast.Node {
const loc = this.loc();
this.step();
let expr: ast.Node | null = null;
if (!this.test(";")) {
expr = this.parseExpr();
}
this.mustEat(";");
return ast.Node.create(loc, "ReturnStmt", { expr });
}
parseLetStmt(): ast.Node {
const loc = this.loc();
this.step();
const param = this.parseParam();
this.mustEat("=");
const expr = this.parseExpr();
this.mustEat(";");
return ast.Node.create(loc, "LetStmt", { param, expr });
}
parseIfStmt(): ast.Node {
const loc = this.loc();
this.step();
const cond = this.parseExpr();
const truthy = this.parseBlock();
let falsy: ast.Node | null = null;
if (this.eat("else")) {
falsy = this.parseBlock();
}
return ast.Node.create(loc, "IfStmt", { cond, truthy, falsy });
}
parseParam(): ast.Node {
const loc = this.loc();
const ident = this.mustEat("ident").value;
let ty: ast.Node | null = null;
if (this.eat(":")) {
ty = this.parseTy();
}
return ast.Node.create(loc, "Param", { ident, ty });
}
parseExpr(): ast.Node {
return this.parseRange();
}
parseRange(): ast.Node {
const loc = this.loc();
if (this.eat("..") || this.eat("..=")) {
return this.parseRangeTail(loc, null, this.prevTok!.type);
} else {
const begin = this.parseBinary();
if (this.eat("..") || this.eat("..=")) {
return this.parseRangeTail(loc, begin, this.prevTok!.type);
} else {
return begin;
}
}
}
parseRangeTail(loc: number, begin: ast.Node | null, tok: string): ast.Node {
const limit: ast.RangeLimit = tok === ".." ? "Exclusive" : "Inclusive";
let end: ast.Node | null = null;
if (![";", ",", ")", "]"].some((tok) => this.test(tok))) {
end = this.parseBinary();
}
return ast
.create(loc, "RangeExpr", { begin, end, limit });
}
parseBinary(prec = 7): ast.Node {
const loc = this.loc();
if (prec == 0) {
return this.parsePrefix();
}
const ops: [Tok["type"], ast.BinaryOp, number][] = [
["or", "Or", 9],
["and", "And", 8],
["==", "Eq", 7],
["!=", "Ne", 7],
["<", "Lt", 7],
[">", "Gt", 7],
["<=", "Lte", 7],
[">=", "Gte", 7],
["|", "BitOr", 6],
["^", "BitXor", 5],
["&", "BitAnd", 4],
["<<", "Shl", 3],
[">>", "Shr", 3],
["+", "Add", 2],
["-", "Subtract", 2],
["*", "Multiply", 1],
["/", "Divide", 1],
["%", "Remainder", 1],
];
let left = this.parseBinary(prec - 1);
let should_continue = true;
while (should_continue) {
should_continue = false;
for (const [tok, op, p] of ops) {
if (prec >= p && this.eat(tok)) {
const right = this.parseBinary(prec - 1);
left = ast.Node.create(
loc,
"BinaryExpr",
{ op, left, right, tok },
);
should_continue = true;
break;
}
}
}
return left;
}
parsePrefix(): ast.Node {
const loc = this.loc();
const ops: [Tok["type"], ast.UnaryOp][] = [
["not", "Not"],
["-", "Negate"],
["*", "Deref"],
];
for (const [tok, op] of ops) {
if (this.eat(tok)) {
const expr = this.parsePrefix();
return ast.Node.create(loc, "UnaryExpr", { op, expr, tok });
}
}
if (this.eat("&")) {
const op: ast.UnaryOp = this.eat("mut") ? "RefMut" : "Ref";
const expr = this.parsePrefix();
const tok = op === "Ref" ? "&" : "&mut";
return ast.Node.create(loc, "UnaryExpr", { op, expr, tok });
}
return this.parsePostfix();
}
parsePostfix(): ast.Node {
let expr = this.parseOperand();
while (true) {
const loc = this.loc();
if (this.eat(".*")) {
// use unary because it's already there
// TODO: consider making a separate node type
expr = ast.Node
.create(loc, "UnaryExpr", { expr, op: "Deref", tok: ".*" });
} else if (this.eat("[")) {
const arg = this.parseExpr();
this.mustEat("]");
expr = ast.Node.create(loc, "IndexExpr", { expr, arg });
} else if (this.eat("(")) {
const args: ast.Node[] = [];
if (!this.test(")")) {
args.push(this.parseExpr());
while (this.eat(",")) {
if (this.done || this.test(")")) {
break;
}
args.push(this.parseExpr());
}
}
this.mustEat(")");
expr = ast.Node.create(loc, "CallExpr", { expr, args });
} else {
break;
}
}
return expr;
}
parseOperand(): ast.Node {
const loc = this.loc();
if (this.test("ident")) {
const ident = this.current.value;
this.step();
return ast.Node.create(loc, "IdentExpr", { ident });
} else if (this.test("int")) {
const value = Number(this.current.value);
this.step();
return ast.Node.create(loc, "IntExpr", { value });
} else if (this.eat("(")) {
const expr = this.parseExpr();
this.mustEat(")");
return expr;
} else if (this.eat("[")) {
const values: ast.Node[] = [];
if (!this.done && !this.test("]")) {
values.push(this.parseExpr());
while (this.eat(",")) {
if (this.test("]")) {
break;
}
values.push(this.parseExpr());
}
}
this.mustEat("]");
return ast.Node.create(loc, "ArrayExpr", { values });
} else {
this.mustEat("<expression>");
throw new Error();
}
}
parseTy(): ast.Node {
const loc = this.loc();
if (this.test("ident")) {
const ident = this.current.value;
this.step();
return ast.Node.create(loc, "IdentTy", { ident });
} else if (this.eat("*")) {
const mutable = this.eat("mut");
const ty = this.parseTy();
return ast.Node.create(loc, mutable ? "PtrMutTy" : "PtrTy", { ty });
} else if (this.eat("[")) {
const ty = this.parseTy();
if (this.eat(";")) {
const length = this.parseExpr();
this.mustEat("]");
return ast.Node.create(loc, "ArrayTy", { ty, length });
} else {
this.mustEat("]");
return ast.Node.create(loc, "SliceTy", { ty });
}
} else {
this.mustEat("<type>");
throw new Error();
}
}
private mustEat(type: string, loc: number = this.loc()): Tok {
const tok = this.current;
if (tok.type !== type) {
this.error(
`expected '${type}', got '${
this.done ? "eof" : this.current.type
}'`,
loc,
);
}
this.step();
return tok;
}
private error(message: string, loc: number): never {
printDiagnostics(this.filename, loc, "error", message, this.text);
throw new Error();
Deno.exit(1);
}
private eat(type: string): boolean {
if (this.test(type)) {
this.step();
return true;
}
return false;
}
private step() {
if (!this.done) {
this.prevTok = this.current;
}
this.idx += 1;
if (!this.done) {
this.currentLine = this.current.line;
}
}
private test(type: string): boolean {
return !this.done && this.current.type == type;
}
private loc(): number {
return this.currentLine;
}
private get current(): Tok {
return this.toks[this.idx];
}
private get done(): boolean {
return this.idx >= this.toks.length;
}
}
export type Tok = { type: string; value: string; line: number };
const keywordPattern =
/^(?:fn)|(?:return)|(?:let)|(?:if)|(?:else)|(?:while)|(?:break)|(?:or)|(?:and)|(?:not)|(?:mut)$/;
const operatorPattern =
/((?:\->)|(?:==)|(?:!=)|(?:<=)|(?:>=)|(?:<<)|(?:>>)|(?:\.\*)|(?:\.\.)|(?:\.\.=)|[\n\(\)\{\}\[\]\,\.\;\:\!\=\<\>\&\^\|\+\-\*\/\%])/g;
export function tokenize(text: string): Tok[] {
return text
.replace(/\/\/[^\n]*/g, "")
.replace(operatorPattern, " $1 ")
.split(/[ \t\r]/)
.filter((value) => value !== "")
.reduce<[[string, number][], number]>(
([toks, line], value) => {
if (value === "\n") {
return [toks, line + 1];
} else {
return [[...toks, [value, line]], line];
}
},
[[], 1],
)[0]
.map<Tok>(([value, line]) => ({ type: value, value, line }))
.map((tok) =>
/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(tok.value)
? {
...tok,
type: keywordPattern.test(tok.value) ? tok.value : "ident",
}
: tok
)
.map((tok) =>
/^(?:0|(?:[1-9][0-9]*))$/.test(tok.value)
? { ...tok, type: "int" }
: tok
);
}