415 lines
12 KiB
TypeScript
415 lines
12 KiB
TypeScript
import * as ast from "../ast.ts";
|
|
import { printDiagnostics } from "../diagnostics.ts";
|
|
|
|
export function parse(
|
|
filename: string,
|
|
text: string,
|
|
): ast.Node {
|
|
return new Parser(filename, text).parseFile();
|
|
}
|
|
|
|
export class Parser {
|
|
private toks: Tok[];
|
|
private idx = 0;
|
|
private currentLine = 1;
|
|
private prevTok: Tok | null = null;
|
|
|
|
constructor(
|
|
private filename: string,
|
|
private text: string,
|
|
) {
|
|
this.toks = tokenize(text);
|
|
}
|
|
|
|
parseFile(): ast.Node {
|
|
const loc = this.loc();
|
|
const stmts: ast.Node[] = [];
|
|
while (!this.done) {
|
|
stmts.push(this.parseStmt());
|
|
}
|
|
return ast.Node.create(loc, "File", { stmts });
|
|
}
|
|
|
|
parseBlock(): ast.Node {
|
|
const loc = this.loc();
|
|
this.mustEat("{");
|
|
const stmts: ast.Node[] = [];
|
|
while (!this.done && !this.test("}")) {
|
|
stmts.push(this.parseStmt());
|
|
}
|
|
this.mustEat("}");
|
|
return ast.Node.create(loc, "Block", { stmts });
|
|
}
|
|
|
|
parseStmt(): ast.Node {
|
|
const loc = this.loc();
|
|
if (this.test("fn")) {
|
|
return this.parseFnStmt();
|
|
} else if (this.test("return")) {
|
|
return this.parseReturnStmt();
|
|
} else if (this.test("let")) {
|
|
return this.parseLetStmt();
|
|
} else if (this.test("if")) {
|
|
return this.parseIfStmt();
|
|
} else {
|
|
const place = this.parseExpr();
|
|
if (this.eat("=")) {
|
|
const expr = this.parseExpr();
|
|
this.mustEat(";");
|
|
return ast.Node.create(loc, "AssignStmt", { place, expr });
|
|
}
|
|
this.mustEat(";");
|
|
return ast.Node.create(loc, "ExprStmt", { expr: place });
|
|
}
|
|
}
|
|
|
|
parseFnStmt(): ast.Node {
|
|
const loc = this.loc();
|
|
this.step();
|
|
const ident = this.mustEat("ident").value;
|
|
this.mustEat("(");
|
|
const params: ast.Node[] = [];
|
|
if (!this.test(")")) {
|
|
params.push(this.parseParam());
|
|
while (this.eat(",")) {
|
|
if (this.test(")")) {
|
|
break;
|
|
}
|
|
params.push(this.parseParam());
|
|
}
|
|
}
|
|
this.mustEat(")");
|
|
let retTy: ast.Node | null = null;
|
|
if (this.eat("->")) {
|
|
retTy = this.parseTy();
|
|
}
|
|
const body = this.parseBlock();
|
|
return ast.Node.create(loc, "FnStmt", { ident, params, retTy, body });
|
|
}
|
|
|
|
parseReturnStmt(): ast.Node {
|
|
const loc = this.loc();
|
|
this.step();
|
|
let expr: ast.Node | null = null;
|
|
if (!this.test(";")) {
|
|
expr = this.parseExpr();
|
|
}
|
|
this.mustEat(";");
|
|
return ast.Node.create(loc, "ReturnStmt", { expr });
|
|
}
|
|
|
|
parseLetStmt(): ast.Node {
|
|
const loc = this.loc();
|
|
this.step();
|
|
const param = this.parseParam();
|
|
this.mustEat("=");
|
|
const expr = this.parseExpr();
|
|
this.mustEat(";");
|
|
return ast.Node.create(loc, "LetStmt", { param, expr });
|
|
}
|
|
|
|
parseIfStmt(): ast.Node {
|
|
const loc = this.loc();
|
|
this.step();
|
|
const cond = this.parseExpr();
|
|
const truthy = this.parseBlock();
|
|
let falsy: ast.Node | null = null;
|
|
if (this.eat("else")) {
|
|
falsy = this.parseBlock();
|
|
}
|
|
return ast.Node.create(loc, "IfStmt", { cond, truthy, falsy });
|
|
}
|
|
|
|
parseParam(): ast.Node {
|
|
const loc = this.loc();
|
|
const ident = this.mustEat("ident").value;
|
|
let ty: ast.Node | null = null;
|
|
if (this.eat(":")) {
|
|
ty = this.parseTy();
|
|
}
|
|
return ast.Node.create(loc, "Param", { ident, ty });
|
|
}
|
|
|
|
parseExpr(): ast.Node {
|
|
return this.parseRange();
|
|
}
|
|
|
|
parseRange(): ast.Node {
|
|
const loc = this.loc();
|
|
if (this.eat("..") || this.eat("..=")) {
|
|
return this.parseRangeTail(loc, null, this.prevTok!.type);
|
|
} else {
|
|
const begin = this.parseBinary();
|
|
if (this.eat("..") || this.eat("..=")) {
|
|
return this.parseRangeTail(loc, begin, this.prevTok!.type);
|
|
} else {
|
|
return begin;
|
|
}
|
|
}
|
|
}
|
|
|
|
parseRangeTail(loc: number, begin: ast.Node | null, tok: string): ast.Node {
|
|
const limit: ast.RangeLimit = tok === ".." ? "Exclusive" : "Inclusive";
|
|
let end: ast.Node | null = null;
|
|
if (![";", ",", ")", "]"].some((tok) => this.test(tok))) {
|
|
end = this.parseBinary();
|
|
}
|
|
return ast
|
|
.create(loc, "RangeExpr", { begin, end, limit });
|
|
}
|
|
|
|
parseBinary(prec = 7): ast.Node {
|
|
const loc = this.loc();
|
|
if (prec == 0) {
|
|
return this.parsePrefix();
|
|
}
|
|
const ops: [Tok["type"], ast.BinaryOp, number][] = [
|
|
["or", "Or", 9],
|
|
["and", "And", 8],
|
|
["==", "Eq", 7],
|
|
["!=", "Ne", 7],
|
|
["<", "Lt", 7],
|
|
[">", "Gt", 7],
|
|
["<=", "Lte", 7],
|
|
[">=", "Gte", 7],
|
|
["|", "BitOr", 6],
|
|
["^", "BitXor", 5],
|
|
["&", "BitAnd", 4],
|
|
["<<", "Shl", 3],
|
|
[">>", "Shr", 3],
|
|
["+", "Add", 2],
|
|
["-", "Subtract", 2],
|
|
["*", "Multiply", 1],
|
|
["/", "Divide", 1],
|
|
["%", "Remainder", 1],
|
|
];
|
|
|
|
let left = this.parseBinary(prec - 1);
|
|
|
|
let should_continue = true;
|
|
while (should_continue) {
|
|
should_continue = false;
|
|
for (const [tok, op, p] of ops) {
|
|
if (prec >= p && this.eat(tok)) {
|
|
const right = this.parseBinary(prec - 1);
|
|
left = ast.Node.create(
|
|
loc,
|
|
"BinaryExpr",
|
|
{ op, left, right, tok },
|
|
);
|
|
should_continue = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
return left;
|
|
}
|
|
|
|
parsePrefix(): ast.Node {
|
|
const loc = this.loc();
|
|
const ops: [Tok["type"], ast.UnaryOp][] = [
|
|
["not", "Not"],
|
|
["-", "Negate"],
|
|
["*", "Deref"],
|
|
];
|
|
for (const [tok, op] of ops) {
|
|
if (this.eat(tok)) {
|
|
const expr = this.parsePrefix();
|
|
return ast.Node.create(loc, "UnaryExpr", { op, expr, tok });
|
|
}
|
|
}
|
|
if (this.eat("&")) {
|
|
const op: ast.UnaryOp = this.eat("mut") ? "RefMut" : "Ref";
|
|
const expr = this.parsePrefix();
|
|
const tok = op === "Ref" ? "&" : "&mut";
|
|
return ast.Node.create(loc, "UnaryExpr", { op, expr, tok });
|
|
}
|
|
return this.parsePostfix();
|
|
}
|
|
|
|
parsePostfix(): ast.Node {
|
|
let expr = this.parseOperand();
|
|
while (true) {
|
|
const loc = this.loc();
|
|
if (this.eat(".*")) {
|
|
// use unary because it's already there
|
|
// TODO: consider making a separate node type
|
|
expr = ast.Node
|
|
.create(loc, "UnaryExpr", { expr, op: "Deref", tok: ".*" });
|
|
} else if (this.eat("[")) {
|
|
const arg = this.parseExpr();
|
|
this.mustEat("]");
|
|
expr = ast.Node.create(loc, "IndexExpr", { expr, arg });
|
|
} else if (this.eat("(")) {
|
|
const args: ast.Node[] = [];
|
|
if (!this.test(")")) {
|
|
args.push(this.parseExpr());
|
|
while (this.eat(",")) {
|
|
if (this.done || this.test(")")) {
|
|
break;
|
|
}
|
|
args.push(this.parseExpr());
|
|
}
|
|
}
|
|
this.mustEat(")");
|
|
expr = ast.Node.create(loc, "CallExpr", { expr, args });
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
return expr;
|
|
}
|
|
|
|
parseOperand(): ast.Node {
|
|
const loc = this.loc();
|
|
if (this.test("ident")) {
|
|
const ident = this.current.value;
|
|
this.step();
|
|
return ast.Node.create(loc, "IdentExpr", { ident });
|
|
} else if (this.test("int")) {
|
|
const value = Number(this.current.value);
|
|
this.step();
|
|
return ast.Node.create(loc, "IntExpr", { value });
|
|
} else if (this.eat("(")) {
|
|
const expr = this.parseExpr();
|
|
this.mustEat(")");
|
|
return expr;
|
|
} else if (this.eat("[")) {
|
|
const values: ast.Node[] = [];
|
|
if (!this.done && !this.test("]")) {
|
|
values.push(this.parseExpr());
|
|
while (this.eat(",")) {
|
|
if (this.test("]")) {
|
|
break;
|
|
}
|
|
values.push(this.parseExpr());
|
|
}
|
|
}
|
|
this.mustEat("]");
|
|
return ast.Node.create(loc, "ArrayExpr", { values });
|
|
} else {
|
|
this.mustEat("<expression>");
|
|
throw new Error();
|
|
}
|
|
}
|
|
|
|
parseTy(): ast.Node {
|
|
const loc = this.loc();
|
|
if (this.test("ident")) {
|
|
const ident = this.current.value;
|
|
this.step();
|
|
return ast.Node.create(loc, "IdentTy", { ident });
|
|
} else if (this.eat("*")) {
|
|
const mutable = this.eat("mut");
|
|
const ty = this.parseTy();
|
|
return ast.Node.create(loc, mutable ? "PtrMutTy" : "PtrTy", { ty });
|
|
} else if (this.eat("[")) {
|
|
const ty = this.parseTy();
|
|
if (this.eat(";")) {
|
|
const length = this.parseExpr();
|
|
this.mustEat("]");
|
|
return ast.Node.create(loc, "ArrayTy", { ty, length });
|
|
} else {
|
|
this.mustEat("]");
|
|
return ast.Node.create(loc, "SliceTy", { ty });
|
|
}
|
|
} else {
|
|
this.mustEat("<type>");
|
|
throw new Error();
|
|
}
|
|
}
|
|
|
|
private mustEat(type: string, loc: number = this.loc()): Tok {
|
|
const tok = this.current;
|
|
if (tok.type !== type) {
|
|
this.error(
|
|
`expected '${type}', got '${
|
|
this.done ? "eof" : this.current.type
|
|
}'`,
|
|
loc,
|
|
);
|
|
}
|
|
this.step();
|
|
return tok;
|
|
}
|
|
|
|
private error(message: string, loc: number): never {
|
|
printDiagnostics(this.filename, loc, "error", message, this.text);
|
|
throw new Error();
|
|
Deno.exit(1);
|
|
}
|
|
|
|
private eat(type: string): boolean {
|
|
if (this.test(type)) {
|
|
this.step();
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
private step() {
|
|
if (!this.done) {
|
|
this.prevTok = this.current;
|
|
}
|
|
this.idx += 1;
|
|
if (!this.done) {
|
|
this.currentLine = this.current.line;
|
|
}
|
|
}
|
|
|
|
private test(type: string): boolean {
|
|
return !this.done && this.current.type == type;
|
|
}
|
|
|
|
private loc(): number {
|
|
return this.currentLine;
|
|
}
|
|
|
|
private get current(): Tok {
|
|
return this.toks[this.idx];
|
|
}
|
|
|
|
private get done(): boolean {
|
|
return this.idx >= this.toks.length;
|
|
}
|
|
}
|
|
|
|
export type Tok = { type: string; value: string; line: number };
|
|
|
|
const keywordPattern =
|
|
/^(?:fn)|(?:return)|(?:let)|(?:if)|(?:else)|(?:while)|(?:break)|(?:or)|(?:and)|(?:not)|(?:mut)$/;
|
|
const operatorPattern =
|
|
/((?:\->)|(?:==)|(?:!=)|(?:<=)|(?:>=)|(?:<<)|(?:>>)|(?:\.\*)|(?:\.\.)|(?:\.\.=)|[\n\(\)\{\}\[\]\,\.\;\:\!\=\<\>\&\^\|\+\-\*\/\%])/g;
|
|
|
|
export function tokenize(text: string): Tok[] {
|
|
return text
|
|
.replace(/\/\/[^\n]*/g, "")
|
|
.replace(operatorPattern, " $1 ")
|
|
.split(/[ \t\r]/)
|
|
.filter((value) => value !== "")
|
|
.reduce<[[string, number][], number]>(
|
|
([toks, line], value) => {
|
|
if (value === "\n") {
|
|
return [toks, line + 1];
|
|
} else {
|
|
return [[...toks, [value, line]], line];
|
|
}
|
|
},
|
|
[[], 1],
|
|
)[0]
|
|
.map<Tok>(([value, line]) => ({ type: value, value, line }))
|
|
.map((tok) =>
|
|
/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(tok.value)
|
|
? {
|
|
...tok,
|
|
type: keywordPattern.test(tok.value) ? tok.value : "ident",
|
|
}
|
|
: tok
|
|
)
|
|
.map((tok) =>
|
|
/^(?:0|(?:[1-9][0-9]*))$/.test(tok.value)
|
|
? { ...tok, type: "int" }
|
|
: tok
|
|
);
|
|
}
|