453 lines
14 KiB
TypeScript
453 lines
14 KiB
TypeScript
import * as ast from "../ast.ts";
|
|
import { Loc, printDiagnostics } from "../diagnostics.ts";
|
|
|
|
export function parse(
|
|
filename: string,
|
|
text: string,
|
|
): ast.Node {
|
|
return new Parser(filename, text).parseFile();
|
|
}
|
|
|
|
export class Parser {
|
|
private toks: Tok[];
|
|
private idx = 0;
|
|
private currentLoc: Loc = { idx: 0, line: 1, col: 1 };
|
|
private prevTok: Tok | null = null;
|
|
|
|
constructor(
|
|
private filename: string,
|
|
private text: string,
|
|
) {
|
|
this.toks = tokenize(text);
|
|
}
|
|
|
|
parseFile(): ast.Node {
|
|
const loc = this.loc();
|
|
const stmts: ast.Node[] = [];
|
|
while (!this.done) {
|
|
stmts.push(this.parseStmt());
|
|
}
|
|
return ast.Node.create(loc, "File", { stmts });
|
|
}
|
|
|
|
parseBlock(): ast.Node {
|
|
const loc = this.loc();
|
|
this.mustEat("{");
|
|
const stmts: ast.Node[] = [];
|
|
while (!this.done && !this.test("}")) {
|
|
stmts.push(this.parseStmt());
|
|
}
|
|
this.mustEat("}");
|
|
return ast.Node.create(loc, "Block", { stmts });
|
|
}
|
|
|
|
parseStmt(): ast.Node {
|
|
const loc = this.loc();
|
|
if (this.test("fn")) {
|
|
return this.parseFnStmt();
|
|
} else if (this.test("return")) {
|
|
return this.parseReturnStmt();
|
|
} else if (this.test("let")) {
|
|
return this.parseLetStmt();
|
|
} else if (this.test("if")) {
|
|
return this.parseIfStmt();
|
|
} else {
|
|
const place = this.parseExpr();
|
|
if (this.eat("=")) {
|
|
const expr = this.parseExpr();
|
|
this.mustEat(";");
|
|
return ast.Node.create(loc, "AssignStmt", { place, expr });
|
|
}
|
|
this.mustEat(";");
|
|
return ast.Node.create(loc, "ExprStmt", { expr: place });
|
|
}
|
|
}
|
|
|
|
parseFnStmt(): ast.Node {
|
|
const loc = this.loc();
|
|
this.step();
|
|
const ident = this.mustEat("ident").value;
|
|
this.mustEat("(");
|
|
const params: ast.Node[] = [];
|
|
if (!this.test(")")) {
|
|
params.push(this.parseParam());
|
|
while (this.eat(",")) {
|
|
if (this.test(")")) {
|
|
break;
|
|
}
|
|
params.push(this.parseParam());
|
|
}
|
|
}
|
|
this.mustEat(")");
|
|
let retTy: ast.Node | null = null;
|
|
if (this.eat("->")) {
|
|
retTy = this.parseTy();
|
|
}
|
|
const body = this.parseBlock();
|
|
return ast.Node.create(loc, "FnStmt", { ident, params, retTy, body });
|
|
}
|
|
|
|
parseReturnStmt(): ast.Node {
|
|
const loc = this.loc();
|
|
this.step();
|
|
let expr: ast.Node | null = null;
|
|
if (!this.test(";")) {
|
|
expr = this.parseExpr();
|
|
}
|
|
this.mustEat(";");
|
|
return ast.Node.create(loc, "ReturnStmt", { expr });
|
|
}
|
|
|
|
parseLetStmt(): ast.Node {
|
|
const loc = this.loc();
|
|
this.step();
|
|
const param = this.parseParam();
|
|
this.mustEat("=");
|
|
const expr = this.parseExpr();
|
|
this.mustEat(";");
|
|
return ast.Node.create(loc, "LetStmt", { param, expr });
|
|
}
|
|
|
|
parseIfStmt(): ast.Node {
|
|
const loc = this.loc();
|
|
this.step();
|
|
const cond = this.parseExpr();
|
|
const truthy = this.parseBlock();
|
|
let falsy: ast.Node | null = null;
|
|
if (this.eat("else")) {
|
|
falsy = this.parseBlock();
|
|
}
|
|
return ast.Node.create(loc, "IfStmt", { cond, truthy, falsy });
|
|
}
|
|
|
|
parseParam(): ast.Node {
|
|
const loc = this.loc();
|
|
const ident = this.mustEat("ident").value;
|
|
let ty: ast.Node | null = null;
|
|
if (this.eat(":")) {
|
|
ty = this.parseTy();
|
|
}
|
|
return ast.Node.create(loc, "Param", { ident, ty });
|
|
}
|
|
|
|
parseExpr(): ast.Node {
|
|
return this.parseRange();
|
|
}
|
|
|
|
parseRange(): ast.Node {
|
|
const loc = this.loc();
|
|
if (this.eat("..") || this.eat("..=")) {
|
|
return this.parseRangeTail(loc, null, this.prevTok!.type);
|
|
} else {
|
|
const begin = this.parseBinary();
|
|
if (this.eat("..") || this.eat("..=")) {
|
|
return this.parseRangeTail(loc, begin, this.prevTok!.type);
|
|
} else {
|
|
return begin;
|
|
}
|
|
}
|
|
}
|
|
|
|
parseRangeTail(loc: Loc, begin: ast.Node | null, tok: string): ast.Node {
|
|
const limit: ast.RangeLimit = tok === ".." ? "Exclusive" : "Inclusive";
|
|
let end: ast.Node | null = null;
|
|
if (![";", ",", ")", "]"].some((tok) => this.test(tok))) {
|
|
end = this.parseBinary();
|
|
}
|
|
return ast
|
|
.create(loc, "RangeExpr", { begin, end, limit });
|
|
}
|
|
|
|
parseBinary(prec = 7): ast.Node {
|
|
const loc = this.loc();
|
|
if (prec == 0) {
|
|
return this.parsePrefix();
|
|
}
|
|
const ops: [Tok["type"], ast.BinaryOp, number][] = [
|
|
["or", "Or", 9],
|
|
["and", "And", 8],
|
|
["==", "Eq", 7],
|
|
["!=", "Ne", 7],
|
|
["<", "Lt", 7],
|
|
[">", "Gt", 7],
|
|
["<=", "Lte", 7],
|
|
[">=", "Gte", 7],
|
|
["|", "BitOr", 6],
|
|
["^", "BitXor", 5],
|
|
["&", "BitAnd", 4],
|
|
["<<", "Shl", 3],
|
|
[">>", "Shr", 3],
|
|
["+", "Add", 2],
|
|
["-", "Subtract", 2],
|
|
["*", "Multiply", 1],
|
|
["/", "Divide", 1],
|
|
["%", "Remainder", 1],
|
|
];
|
|
|
|
let left = this.parseBinary(prec - 1);
|
|
|
|
let should_continue = true;
|
|
while (should_continue) {
|
|
should_continue = false;
|
|
for (const [tok, op, p] of ops) {
|
|
if (prec >= p && this.eat(tok)) {
|
|
const right = this.parseBinary(prec - 1);
|
|
left = ast.Node.create(
|
|
loc,
|
|
"BinaryExpr",
|
|
{ op, left, right, tok },
|
|
);
|
|
should_continue = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
return left;
|
|
}
|
|
|
|
parsePrefix(): ast.Node {
|
|
const loc = this.loc();
|
|
const ops: [Tok["type"], ast.UnaryOp][] = [
|
|
["not", "Not"],
|
|
["-", "Negate"],
|
|
["*", "Deref"],
|
|
];
|
|
for (const [tok, op] of ops) {
|
|
if (this.eat(tok)) {
|
|
const expr = this.parsePrefix();
|
|
return ast.Node.create(loc, "UnaryExpr", { op, expr, tok });
|
|
}
|
|
}
|
|
if (this.eat("&")) {
|
|
const op: ast.UnaryOp = this.eat("mut") ? "RefMut" : "Ref";
|
|
const expr = this.parsePrefix();
|
|
const tok = op === "Ref" ? "&" : "&mut";
|
|
return ast.Node.create(loc, "UnaryExpr", { op, expr, tok });
|
|
}
|
|
return this.parsePostfix();
|
|
}
|
|
|
|
parsePostfix(): ast.Node {
|
|
let expr = this.parseOperand();
|
|
while (true) {
|
|
const loc = this.loc();
|
|
if (this.eat(".*")) {
|
|
// use unary because it's already there
|
|
// TODO: consider making a separate node type
|
|
expr = ast.Node
|
|
.create(loc, "UnaryExpr", { expr, op: "Deref", tok: ".*" });
|
|
} else if (this.eat("[")) {
|
|
const arg = this.parseExpr();
|
|
this.mustEat("]");
|
|
expr = ast.Node.create(loc, "IndexExpr", { expr, arg });
|
|
} else if (this.eat("(")) {
|
|
const args: ast.Node[] = [];
|
|
if (!this.test(")")) {
|
|
args.push(this.parseExpr());
|
|
while (this.eat(",")) {
|
|
if (this.done || this.test(")")) {
|
|
break;
|
|
}
|
|
args.push(this.parseExpr());
|
|
}
|
|
}
|
|
this.mustEat(")");
|
|
expr = ast.Node.create(loc, "CallExpr", { expr, args });
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
return expr;
|
|
}
|
|
|
|
parseOperand(): ast.Node {
|
|
const loc = this.loc();
|
|
if (this.test("ident")) {
|
|
const ident = this.current.value;
|
|
this.step();
|
|
return ast.Node.create(loc, "IdentExpr", { ident });
|
|
} else if (this.test("int")) {
|
|
const value = Number(this.current.value);
|
|
this.step();
|
|
return ast.Node.create(loc, "IntExpr", { value });
|
|
} else if (this.eat("(")) {
|
|
const expr = this.parseExpr();
|
|
this.mustEat(")");
|
|
return expr;
|
|
} else if (this.eat("[")) {
|
|
const values: ast.Node[] = [];
|
|
if (!this.done && !this.test("]")) {
|
|
values.push(this.parseExpr());
|
|
while (this.eat(",")) {
|
|
if (this.test("]")) {
|
|
break;
|
|
}
|
|
values.push(this.parseExpr());
|
|
}
|
|
}
|
|
this.mustEat("]");
|
|
return ast.Node.create(loc, "ArrayExpr", { values });
|
|
} else {
|
|
this.mustEat("<expression>");
|
|
throw new Error();
|
|
}
|
|
}
|
|
|
|
parseTy(): ast.Node {
|
|
const loc = this.loc();
|
|
if (this.test("ident")) {
|
|
const ident = this.current.value;
|
|
this.step();
|
|
return ast.Node.create(loc, "IdentTy", { ident });
|
|
} else if (this.eat("*")) {
|
|
const mutable = this.eat("mut");
|
|
const ty = this.parseTy();
|
|
return ast.Node.create(loc, mutable ? "PtrMutTy" : "PtrTy", { ty });
|
|
} else if (this.eat("[")) {
|
|
const ty = this.parseTy();
|
|
if (this.eat(";")) {
|
|
const length = this.parseExpr();
|
|
this.mustEat("]");
|
|
return ast.Node.create(loc, "ArrayTy", { ty, length });
|
|
} else {
|
|
this.mustEat("]");
|
|
return ast.Node.create(loc, "SliceTy", { ty });
|
|
}
|
|
} else {
|
|
this.mustEat("<type>");
|
|
throw new Error();
|
|
}
|
|
}
|
|
|
|
private mustEat(type: string, loc = this.loc()): Tok {
|
|
const tok = this.current;
|
|
if (tok.type !== type) {
|
|
this.error(
|
|
`expected '${type}', got '${
|
|
this.done ? "eof" : this.current.type
|
|
}'`,
|
|
loc,
|
|
);
|
|
}
|
|
this.step();
|
|
return tok;
|
|
}
|
|
|
|
private error(message: string, loc: Loc): never {
|
|
printDiagnostics(this.filename, loc, "error", message, this.text);
|
|
throw new Error();
|
|
Deno.exit(1);
|
|
}
|
|
|
|
private eat(type: string): boolean {
|
|
if (this.test(type)) {
|
|
this.step();
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
private step() {
|
|
if (!this.done) {
|
|
this.prevTok = this.current;
|
|
}
|
|
this.idx += 1;
|
|
if (!this.done) {
|
|
this.currentLoc = this.current.loc;
|
|
}
|
|
}
|
|
|
|
private test(type: string): boolean {
|
|
return !this.done && this.current.type == type;
|
|
}
|
|
|
|
private loc(): Loc {
|
|
return this.currentLoc;
|
|
}
|
|
|
|
private get current(): Tok {
|
|
return this.toks[this.idx];
|
|
}
|
|
|
|
private get done(): boolean {
|
|
return this.idx >= this.toks.length;
|
|
}
|
|
}
|
|
|
|
export type Tok = { type: string; value: string; loc: Loc };
|
|
|
|
const keywordPattern =
|
|
/^(?:(?:fn)|(?:return)|(?:let)|(?:if)|(?:else)|(?:while)|(?:break)|(?:or)|(?:and)|(?:not)|(?:mut))/;
|
|
|
|
const operatorPattern2 =
|
|
/((?:\->)|(?:==)|(?:!=)|(?:<=)|(?:>=)|(?:<<)|(?:>>)|(?:\.\*)|(?:\.\.)|(?:\.\.=)|[\n\(\)\{\}\[\]\,\.\;\:\!\=\<\>\&\^\|\+\-\*\/\%])/g;
|
|
|
|
export function tokenize(text: string): Tok[] {
|
|
return new Lexer()
|
|
.add(/[ \t\r\n]+/, (_) => null)
|
|
.add(/\/\/[^\n]*/, (_) => null)
|
|
.add(operatorPattern2, (loc, value) => ({ type: value, value, loc }))
|
|
.add(/[a-zA-Z_][a-zA-Z0-9_]*/, (loc, value) => {
|
|
const type = keywordPattern.test(value) ? value : "ident";
|
|
return { type, value, loc };
|
|
})
|
|
.add(/0|(?:[1-9][0-9]*)/, (loc, value) => {
|
|
return { type: "int", value, loc };
|
|
})
|
|
.add(/./, (loc, value) => {
|
|
return null;
|
|
})
|
|
.lex(text);
|
|
}
|
|
|
|
type LexRule = {
|
|
pattern: RegExp;
|
|
action: LexAction;
|
|
};
|
|
|
|
type LexAction = (loc: Loc, match: string) => Tok | null;
|
|
|
|
class Lexer {
|
|
private rules: LexRule[] = [];
|
|
|
|
add(pattern: RegExp, action: LexAction): this {
|
|
this.rules.push({
|
|
pattern: new RegExp(`^(?:${pattern.source})`),
|
|
action,
|
|
});
|
|
return this;
|
|
}
|
|
|
|
lex(text: string): Tok[] {
|
|
const toks: Tok[] = [];
|
|
let idx = 0;
|
|
let line = 1;
|
|
let col = 1;
|
|
outer_loop: while (idx < text.length) {
|
|
for (const rule of this.rules) {
|
|
const match = text.slice(idx).match(rule.pattern);
|
|
if (!match) {
|
|
continue;
|
|
}
|
|
const loc: Loc = { idx, line, col };
|
|
for (let i = 0; i < match[0].length; ++i) {
|
|
if (text[idx] == "\n") {
|
|
line += 1;
|
|
col = 1;
|
|
} else {
|
|
col += 1;
|
|
}
|
|
idx += 1;
|
|
}
|
|
const tok = rule.action(loc, match[0]);
|
|
if (tok) {
|
|
toks.push(tok);
|
|
}
|
|
continue outer_loop;
|
|
}
|
|
throw new Error(`no rule for character '${text[idx]}'`);
|
|
}
|
|
return toks;
|
|
}
|
|
}
|