import * as ast from "../ast.ts"; import { printDiagnostics } from "../diagnostics.ts"; export function parse( filename: string, text: string, ): ast.Node { return new Parser(filename, text).parseFile(); } export class Parser { private toks: Tok[]; private idx = 0; private currentLine = 1; private prevTok: Tok | null = null; constructor( private filename: string, private text: string, ) { this.toks = tokenize(text); } parseFile(): ast.Node { const loc = this.loc(); const stmts: ast.Node[] = []; while (!this.done) { stmts.push(this.parseStmt()); } return ast.Node.create(loc, "File", { stmts }); } parseBlock(): ast.Node { const loc = this.loc(); this.mustEat("{"); const stmts: ast.Node[] = []; while (!this.done && !this.test("}")) { stmts.push(this.parseStmt()); } this.mustEat("}"); return ast.Node.create(loc, "Block", { stmts }); } parseStmt(): ast.Node { const loc = this.loc(); if (this.test("fn")) { return this.parseFnStmt(); } else if (this.test("return")) { return this.parseReturnStmt(); } else if (this.test("let")) { return this.parseLetStmt(); } else if (this.test("if")) { return this.parseIfStmt(); } else { const place = this.parseExpr(); if (this.eat("=")) { const expr = this.parseExpr(); this.mustEat(";"); return ast.Node.create(loc, "AssignStmt", { place, expr }); } this.mustEat(";"); return ast.Node.create(loc, "ExprStmt", { expr: place }); } } parseFnStmt(): ast.Node { const loc = this.loc(); this.step(); const ident = this.mustEat("ident").value; this.mustEat("("); const params: ast.Node[] = []; if (!this.test(")")) { params.push(this.parseParam()); while (this.eat(",")) { if (this.test(")")) { break; } params.push(this.parseParam()); } } this.mustEat(")"); let retTy: ast.Node | null = null; if (this.eat("->")) { retTy = this.parseTy(); } const body = this.parseBlock(); return ast.Node.create(loc, "FnStmt", { ident, params, retTy, body }); } parseReturnStmt(): ast.Node { const loc = this.loc(); this.step(); let expr: ast.Node | null = null; if (!this.test(";")) { expr = this.parseExpr(); } this.mustEat(";"); return ast.Node.create(loc, "ReturnStmt", { expr }); } parseLetStmt(): ast.Node { const loc = this.loc(); this.step(); const param = this.parseParam(); this.mustEat("="); const expr = this.parseExpr(); this.mustEat(";"); return ast.Node.create(loc, "LetStmt", { param, expr }); } parseIfStmt(): ast.Node { const loc = this.loc(); this.step(); const cond = this.parseExpr(); const truthy = this.parseBlock(); let falsy: ast.Node | null = null; if (this.eat("else")) { falsy = this.parseBlock(); } return ast.Node.create(loc, "IfStmt", { cond, truthy, falsy }); } parseParam(): ast.Node { const loc = this.loc(); const ident = this.mustEat("ident").value; let ty: ast.Node | null = null; if (this.eat(":")) { ty = this.parseTy(); } return ast.Node.create(loc, "Param", { ident, ty }); } parseExpr(): ast.Node { return this.parseRange(); } parseRange(): ast.Node { const loc = this.loc(); if (this.eat("..") || this.eat("..=")) { return this.parseRangeTail(loc, null, this.prevTok!.type); } else { const begin = this.parseBinary(); if (this.eat("..") || this.eat("..=")) { return this.parseRangeTail(loc, begin, this.prevTok!.type); } else { return begin; } } } parseRangeTail(loc: number, begin: ast.Node | null, tok: string): ast.Node { const limit: ast.RangeLimit = tok === ".." ? "Exclusive" : "Inclusive"; let end: ast.Node | null = null; if (![";", ",", ")", "]"].some((tok) => this.test(tok))) { end = this.parseBinary(); } return ast .create(loc, "RangeExpr", { begin, end, limit }); } parseBinary(prec = 7): ast.Node { const loc = this.loc(); if (prec == 0) { return this.parsePrefix(); } const ops: [Tok["type"], ast.BinaryOp, number][] = [ ["or", "Or", 9], ["and", "And", 8], ["==", "Eq", 7], ["!=", "Ne", 7], ["<", "Lt", 7], [">", "Gt", 7], ["<=", "Lte", 7], [">=", "Gte", 7], ["|", "BitOr", 6], ["^", "BitXor", 5], ["&", "BitAnd", 4], ["<<", "Shl", 3], [">>", "Shr", 3], ["+", "Add", 2], ["-", "Subtract", 2], ["*", "Multiply", 1], ["/", "Divide", 1], ["%", "Remainder", 1], ]; let left = this.parseBinary(prec - 1); let should_continue = true; while (should_continue) { should_continue = false; for (const [tok, op, p] of ops) { if (prec >= p && this.eat(tok)) { const right = this.parseBinary(prec - 1); left = ast.Node.create( loc, "BinaryExpr", { op, left, right, tok }, ); should_continue = true; break; } } } return left; } parsePrefix(): ast.Node { const loc = this.loc(); const ops: [Tok["type"], ast.UnaryOp][] = [ ["not", "Not"], ["-", "Negate"], ["*", "Deref"], ]; for (const [tok, op] of ops) { if (this.eat(tok)) { const expr = this.parsePrefix(); return ast.Node.create(loc, "UnaryExpr", { op, expr, tok }); } } if (this.eat("&")) { const op: ast.UnaryOp = this.eat("mut") ? "RefMut" : "Ref"; const expr = this.parsePrefix(); const tok = op === "Ref" ? "&" : "&mut"; return ast.Node.create(loc, "UnaryExpr", { op, expr, tok }); } return this.parsePostfix(); } parsePostfix(): ast.Node { let expr = this.parseOperand(); while (true) { const loc = this.loc(); if (this.eat(".*")) { // use unary because it's already there // TODO: consider making a separate node type expr = ast.Node .create(loc, "UnaryExpr", { expr, op: "Deref", tok: ".*" }); } else if (this.eat("[")) { const arg = this.parseExpr(); this.mustEat("]"); expr = ast.Node.create(loc, "IndexExpr", { expr, arg }); } else if (this.eat("(")) { const args: ast.Node[] = []; if (!this.test(")")) { args.push(this.parseExpr()); while (this.eat(",")) { if (this.done || this.test(")")) { break; } args.push(this.parseExpr()); } } this.mustEat(")"); expr = ast.Node.create(loc, "CallExpr", { expr, args }); } else { break; } } return expr; } parseOperand(): ast.Node { const loc = this.loc(); if (this.test("ident")) { const ident = this.current.value; this.step(); return ast.Node.create(loc, "IdentExpr", { ident }); } else if (this.test("int")) { const value = Number(this.current.value); this.step(); return ast.Node.create(loc, "IntExpr", { value }); } else if (this.eat("(")) { const expr = this.parseExpr(); this.mustEat(")"); return expr; } else if (this.eat("[")) { const values: ast.Node[] = []; if (!this.done && !this.test("]")) { values.push(this.parseExpr()); while (this.eat(",")) { if (this.test("]")) { break; } values.push(this.parseExpr()); } } this.mustEat("]"); return ast.Node.create(loc, "ArrayExpr", { values }); } else { this.mustEat(""); throw new Error(); } } parseTy(): ast.Node { const loc = this.loc(); if (this.test("ident")) { const ident = this.current.value; this.step(); return ast.Node.create(loc, "IdentTy", { ident }); } else if (this.eat("*")) { const mutable = this.eat("mut"); const ty = this.parseTy(); return ast.Node.create(loc, mutable ? "PtrMutTy" : "PtrTy", { ty }); } else if (this.eat("[")) { const ty = this.parseTy(); if (this.eat(";")) { const length = this.parseExpr(); this.mustEat("]"); return ast.Node.create(loc, "ArrayTy", { ty, length }); } else { this.mustEat("]"); return ast.Node.create(loc, "SliceTy", { ty }); } } else { this.mustEat(""); throw new Error(); } } private mustEat(type: string, loc: number = this.loc()): Tok { const tok = this.current; if (tok.type !== type) { this.error( `expected '${type}', got '${ this.done ? "eof" : this.current.type }'`, loc, ); } this.step(); return tok; } private error(message: string, loc: number): never { printDiagnostics(this.filename, loc, "error", message, this.text); throw new Error(); Deno.exit(1); } private eat(type: string): boolean { if (this.test(type)) { this.step(); return true; } return false; } private step() { if (!this.done) { this.prevTok = this.current; } this.idx += 1; if (!this.done) { this.currentLine = this.current.line; } } private test(type: string): boolean { return !this.done && this.current.type == type; } private loc(): number { return this.currentLine; } private get current(): Tok { return this.toks[this.idx]; } private get done(): boolean { return this.idx >= this.toks.length; } } export type Tok = { type: string; value: string; line: number }; const keywordPattern = /^(?:fn)|(?:return)|(?:let)|(?:if)|(?:else)|(?:while)|(?:break)|(?:or)|(?:and)|(?:not)|(?:mut)$/; const operatorPattern = /((?:\->)|(?:==)|(?:!=)|(?:<=)|(?:>=)|(?:<<)|(?:>>)|(?:\.\*)|(?:\.\.)|(?:\.\.=)|[\n\(\)\{\}\[\]\,\.\;\:\!\=\<\>\&\^\|\+\-\*\/\%])/g; export function tokenize(text: string): Tok[] { return text .replace(/\/\/[^\n]*/g, "") .replace(operatorPattern, " $1 ") .split(/[ \t\r]/) .filter((value) => value !== "") .reduce<[[string, number][], number]>( ([toks, line], value) => { if (value === "\n") { return [toks, line + 1]; } else { return [[...toks, [value, line]], line]; } }, [[], 1], )[0] .map(([value, line]) => ({ type: value, value, line })) .map((tok) => /^[a-zA-Z_][a-zA-Z0-9_]*$/.test(tok.value) ? { ...tok, type: keywordPattern.test(tok.value) ? tok.value : "ident", } : tok ) .map((tok) => /^(?:0|(?:[1-9][0-9]*))$/.test(tok.value) ? { ...tok, type: "int" } : tok ); }