import * as ast from "../ast.ts"; import { FileReporter, Loc } from "../diagnostics.ts"; export function parse( text: string, reporter: FileReporter, ): ast.Node { return new Parser(text, reporter).parseFile(); } export class Parser { private toks: Tok[]; private idx = 0; private currentLoc: Loc = { idx: 0, line: 1, col: 1 }; private prevTok: Tok | null = null; constructor( private text: string, private reporter: FileReporter, ) { this.toks = tokenize(this.text, this.reporter); } parseFile(): ast.Node { const loc = this.loc(); const stmts: ast.Node[] = []; while (!this.done) { stmts.push(this.parseStmt()); } return ast.Node.create(loc, "File", { stmts }); } parseBlock(): ast.Node { const loc = this.loc(); this.mustEat("{"); const stmts: ast.Node[] = []; while (!this.done && !this.test("}")) { stmts.push(this.parseStmt()); } this.mustEat("}"); return ast.Node.create(loc, "Block", { stmts }); } parseStmt(): ast.Node { const loc = this.loc(); if (this.test("fn")) { return this.parseFnStmt(); } else if (this.test("return")) { return this.parseReturnStmt(); } else if (this.test("let")) { return this.parseLetStmt(); } else if (this.test("if")) { return this.parseIfStmt(); } else if (this.test("while")) { return this.parseWhileStmt(); } else if (this.test("break")) { return this.parseBreakStmt(); } else { const place = this.parseExpr(); if (this.eat("=")) { const expr = this.parseExpr(); this.mustEat(";"); return ast.Node.create(loc, "AssignStmt", { place, expr }); } this.mustEat(";"); return ast.Node.create(loc, "ExprStmt", { expr: place }); } } parseFnStmt(): ast.Node { const loc = this.loc(); this.step(); const ident = this.mustEat("ident").value; this.mustEat("("); const params: ast.Node[] = []; if (!this.test(")")) { params.push(this.parseParam()); while (this.eat(",")) { if (this.test(")")) { break; } params.push(this.parseParam()); } } this.mustEat(")"); let retTy: ast.Node | null = null; if (this.eat("->")) { retTy = this.parseTy(); } const body = this.parseBlock(); return ast.Node.create(loc, "FnStmt", { ident, params, retTy, body }); } parseReturnStmt(): ast.Node { const loc = this.loc(); this.step(); let expr: ast.Node | null = null; if (!this.test(";")) { expr = this.parseExpr(); } this.mustEat(";"); return ast.Node.create(loc, "ReturnStmt", { expr }); } parseLetStmt(): ast.Node { const loc = this.loc(); this.step(); const param = this.parseParam(); this.mustEat("="); const expr = this.parseExpr(); this.mustEat(";"); return ast.Node.create(loc, "LetStmt", { param, expr }); } parseIfStmt(): ast.Node { const loc = this.loc(); this.step(); const cond = this.parseExpr(); const truthy = this.parseBlock(); let falsy: ast.Node | null = null; if (this.eat("else")) { falsy = this.parseBlock(); } return ast.Node.create(loc, "IfStmt", { cond, truthy, falsy }); } parseWhileStmt(): ast.Node { const loc = this.loc(); this.step(); const cond = this.parseExpr(); const body = this.parseBlock(); return ast.Node.create(loc, "WhileStmt", { cond, body }); } parseBreakStmt(): ast.Node { const loc = this.loc(); this.step(); this.mustEat(";"); return ast.Node.create(loc, "BreakStmt", {}); } parseParam(): ast.Node { const loc = this.loc(); const ident = this.mustEat("ident").value; let ty: ast.Node | null = null; if (this.eat(":")) { ty = this.parseTy(); } return ast.Node.create(loc, "Param", { ident, ty }); } parseExpr(): ast.Node { return this.parseRange(); } parseRange(): ast.Node { const loc = this.loc(); if (this.eat("..") || this.eat("..=")) { return this.parseRangeTail(loc, null, this.prevTok!.type); } else { const begin = this.parseBinary(); if (this.eat("..") || this.eat("..=")) { return this.parseRangeTail(loc, begin, this.prevTok!.type); } else { return begin; } } } parseRangeTail(loc: Loc, begin: ast.Node | null, tok: string): ast.Node { const limit: ast.RangeLimit = tok === ".." ? "Exclusive" : "Inclusive"; let end: ast.Node | null = null; if (![";", ",", ")", "]"].some((tok) => this.test(tok))) { end = this.parseBinary(); } return ast .create(loc, "RangeExpr", { begin, end, limit }); } parseBinary(prec = 7): ast.Node { const loc = this.loc(); if (prec == 0) { return this.parsePrefix(); } const ops: [Tok["type"], ast.BinaryOp, number][] = [ ["or", "Or", 9], ["and", "And", 8], ["==", "Eq", 7], ["!=", "Ne", 7], ["<", "Lt", 7], [">", "Gt", 7], ["<=", "Lte", 7], [">=", "Gte", 7], ["|", "BitOr", 6], ["^", "BitXor", 5], ["&", "BitAnd", 4], ["<<", "Shl", 3], [">>", "Shr", 3], ["+", "Add", 2], ["-", "Subtract", 2], ["*", "Multiply", 1], ["/", "Divide", 1], ["%", "Remainder", 1], ]; let left = this.parseBinary(prec - 1); let should_continue = true; while (should_continue) { should_continue = false; for (const [tok, op, p] of ops) { if (prec >= p && this.eat(tok)) { const right = this.parseBinary(prec - 1); left = ast.Node.create( loc, "BinaryExpr", { op, left, right, tok }, ); should_continue = true; break; } } } return left; } parsePrefix(): ast.Node { const loc = this.loc(); const ops: [Tok["type"], ast.UnaryOp][] = [ ["not", "Not"], ["-", "Negate"], ["*", "Deref"], ]; for (const [tok, op] of ops) { if (this.eat(tok)) { const expr = this.parsePrefix(); return ast.Node.create(loc, "UnaryExpr", { op, expr, tok }); } } if (this.eat("&")) { const op: ast.UnaryOp = this.eat("mut") ? "RefMut" : "Ref"; const expr = this.parsePrefix(); const tok = op === "Ref" ? "&" : "&mut"; return ast.Node.create(loc, "UnaryExpr", { op, expr, tok }); } return this.parsePostfix(); } parsePostfix(): ast.Node { let expr = this.parseOperand(); while (true) { const loc = this.loc(); if (this.eat(".*")) { // use unary because it's already there // TODO: consider making a separate node type expr = ast.Node .create(loc, "UnaryExpr", { expr, op: "Deref", tok: ".*" }); } else if (this.eat("[")) { const arg = this.parseExpr(); this.mustEat("]"); expr = ast.Node.create(loc, "IndexExpr", { value: expr, arg }); } else if (this.eat("(")) { const args: ast.Node[] = []; if (!this.test(")")) { args.push(this.parseExpr()); while (this.eat(",")) { if (this.done || this.test(")")) { break; } args.push(this.parseExpr()); } } this.mustEat(")"); expr = ast.Node.create(loc, "CallExpr", { value: expr, args }); } else { break; } } return expr; } parseOperand(): ast.Node { const loc = this.loc(); if (this.test("ident")) { const ident = this.current.value; this.step(); return ast.Node.create(loc, "IdentExpr", { ident }); } else if (this.test("int")) { const match = this.current.value .match(/(0|(?:[1-9][0-9]*))([iu](?:8|16|32|64|size))?$/); if (!match) { throw new Error(); } const value = Number(match[1]); const intTy = match[2] ?? "i32"; if ( intTy && !["8", "16", "32", "64", "size"].includes(intTy.slice(1)) ) { this.reporter.error( loc, `invalid integer size '${intTy[1]}'`, ); this.reporter.abort(); } this.step(); return ast.Node.create(loc, "IntExpr", { value, intTy: intTy as ast.IntTy ?? "i32", }); } else if (this.test("str")) { const value = this.current.value; this.step(); return ast.Node.create(loc, "StrExpr", { value }); } else if (this.eat("(")) { const expr = this.parseExpr(); this.mustEat(")"); return expr; } else if (this.eat("[")) { const values: ast.Node[] = []; if (!this.done && !this.test("]")) { values.push(this.parseExpr()); while (this.eat(",")) { if (this.test("]")) { break; } values.push(this.parseExpr()); } } this.mustEat("]"); return ast.Node.create(loc, "ArrayExpr", { values }); } else { this.mustEat(""); throw new Error(); } } parseTy(): ast.Node { const loc = this.loc(); if (this.test("ident")) { const ident = this.current.value; this.step(); return ast.Node.create(loc, "IdentTy", { ident }); } else if (this.eat("*")) { const mutable = this.eat("mut"); const ty = this.parseTy(); return ast.Node.create(loc, mutable ? "PtrMutTy" : "PtrTy", { ty }); } else if (this.eat("[")) { const ty = this.parseTy(); if (this.eat(";")) { const length = this.parseExpr(); this.mustEat("]"); return ast.Node.create(loc, "ArrayTy", { ty, length }); } else { this.mustEat("]"); return ast.Node.create(loc, "SliceTy", { ty }); } } else { this.mustEat(""); throw new Error(); } } private mustEat(type: string, loc = this.loc()): Tok { const tok = this.current; if (tok.type !== type) { this.reporter.error( loc, `expected '${type}', got '${ this.done ? "eof" : this.current.type }'`, ); if (type === ";" && this.idx > 0) { this.reporter.info( this.toks[this.idx - 1].loc, `try adding '${type}' here`, ); } this.reporter.abort(); } this.step(); return tok; } private eat(type: string): boolean { if (this.test(type)) { this.step(); return true; } return false; } private step() { if (!this.done) { this.prevTok = this.current; } this.idx += 1; if (!this.done) { this.currentLoc = this.current.loc; } } private test(type: string): boolean { return !this.done && this.current.type == type; } private loc(): Loc { return this.currentLoc; } private get current(): Tok { return this.toks[this.idx]; } private get done(): boolean { return this.idx >= this.toks.length; } } export type Tok = { type: string; value: string; loc: Loc }; const keywordPattern = /^(?:(?:fn)|(?:return)|(?:let)|(?:if)|(?:else)|(?:while)|(?:break)|(?:or)|(?:and)|(?:not)|(?:mut))/; const operatorPattern2 = /((?:\->)|(?:==)|(?:!=)|(?:<=)|(?:>=)|(?:<<)|(?:>>)|(?:\.\*)|(?:\.\.)|(?:\.\.=)|[\n\(\)\{\}\[\]\,\.\;\:\!\=\<\>\&\^\|\+\-\*\/\%])/g; export function tokenize(text: string, reporter: FileReporter): Tok[] { return new Lexer() .add(/[ \t\r\n]+/, (_) => null) .add(/\/\/[^\n]*/, (_) => null) .add(operatorPattern2, (loc, value) => ({ type: value, value, loc })) .add(/[a-zA-Z_][a-zA-Z0-9_]*/, (loc, value) => { const type = keywordPattern.test(value) ? value : "ident"; return { type, value, loc }; }) .add( /(?:0|(?:[1-9][0-9]*))(?:[iu](?:8|16|32|64|size))?/, (loc, value) => { return { type: "int", value, loc }; }, ) .add(/"(?:[^\\"]|\\.)*"/, (loc, literal) => { let i = 1; let value = ""; while (i < literal.length - 1) { if (literal[i] === "\\") { i += 1; value += { "0": "\0", "t": "\t", "r": "\r", "n": "\n", }[literal[i]] ?? literal[i]; } else { value += literal[i]; } i += 1; } return { type: "str", value, loc }; }) .add(/./, (loc, value) => { const escapedChar = JSON.stringify(value[0]).slice(1, -1); reporter.error(loc, `illegal character '${escapedChar}'`); return null; }) .lex(text); } type LexRule = { pattern: RegExp; action: LexAction; }; type LexAction = (loc: Loc, match: string) => Tok | null; class Lexer { private rules: LexRule[] = []; add(pattern: RegExp, action: LexAction): this { this.rules.push({ pattern: new RegExp(`^(?:${pattern.source})`), action, }); return this; } lex(text: string): Tok[] { const toks: Tok[] = []; let idx = 0; let line = 1; let col = 1; outer_loop: while (idx < text.length) { for (const rule of this.rules) { const match = text.slice(idx).match(rule.pattern); if (!match) { continue; } const loc: Loc = { idx, line, col }; for (let i = 0; i < match[0].length; ++i) { if (text[idx] == "\n") { line += 1; col = 1; } else { col += 1; } idx += 1; } const tok = rule.action(loc, match[0]); if (tok) { toks.push(tok); } continue outer_loop; } throw new Error(`no rule for character '${text[idx]}'`); } return toks; } }