new lexer
All checks were successful
Check / Explore-Gitea-Actions (push) Successful in 8s

This commit is contained in:
sfja 2026-03-16 21:35:11 +01:00
parent 1c92a3c077
commit c7741b8d31
3 changed files with 93 additions and 32 deletions

View File

@ -1,3 +1,18 @@
export class Reporter {
report() {}
}
export type Loc = {
idx: number;
line: number;
col: number;
};
export type FileInfo = {
filename: string;
text: string;
};
export function printDiagnostics(
filename: string,
line: number,

View File

@ -4,6 +4,12 @@ import { Ty } from "../ty.ts";
import { builtins } from "./builtins.ts";
import { ResolveMap } from "./resolve.ts";
// export class Tys {
// private nodeTys = new Map<number, Ty>();
//
// expr(expr: ast.Node): Ty {}
// }
export class Checker {
private nodeTys = new Map<number, Ty>();

View File

@ -1,5 +1,5 @@
import * as ast from "../ast.ts";
import { printDiagnostics } from "../diagnostics.ts";
import { Loc, printDiagnostics } from "../diagnostics.ts";
export function parse(
filename: string,
@ -375,40 +375,80 @@ export class Parser {
}
export type Tok = { type: string; value: string; line: number };
export type Tok2 = { type: string; value: string; loc: Loc };
const keywordPattern =
/^(?:fn)|(?:return)|(?:let)|(?:if)|(?:else)|(?:while)|(?:break)|(?:or)|(?:and)|(?:not)|(?:mut)$/;
const operatorPattern =
/^(?:(?:fn)|(?:return)|(?:let)|(?:if)|(?:else)|(?:while)|(?:break)|(?:or)|(?:and)|(?:not)|(?:mut))/;
const operatorPattern2 =
/((?:\->)|(?:==)|(?:!=)|(?:<=)|(?:>=)|(?:<<)|(?:>>)|(?:\.\*)|(?:\.\.)|(?:\.\.=)|[\n\(\)\{\}\[\]\,\.\;\:\!\=\<\>\&\^\|\+\-\*\/\%])/g;
export function tokenize(text: string): Tok[] {
return text
.replace(/\/\/[^\n]*/g, "")
.replace(operatorPattern, " $1 ")
.split(/[ \t\r]/)
.filter((value) => value !== "")
.reduce<[[string, number][], number]>(
([toks, line], value) => {
if (value === "\n") {
return [toks, line + 1];
return new Lexer<Tok2>()
.add(/[ \t\r\n]+/, (_) => null)
.add(/\/\/[^\n]*/, (_) => null)
.add(operatorPattern2, (loc, value) => ({ type: value, value, loc }))
.add(/[a-zA-Z_][a-zA-Z0-9_]*/, (loc, value) => {
const type = keywordPattern.test(value) ? value : "ident";
return ({ type, value, loc });
})
.add(/0|(?:[1-9][0-9]*)/, (loc, value) => {
return { type: "int", value, loc };
})
.add(/./, (loc, value) => {
return null;
})
.lex(text)
.map<Tok>(({ type, value, loc: { line } }) => ({ type, value, line }));
}
type LexRule<TokT> = {
pattern: RegExp;
action: LexAction<TokT>;
};
type LexAction<TokT> = (loc: Loc, match: string) => TokT | null;
class Lexer<TokT> {
private rules: LexRule<TokT>[] = [];
add(pattern: RegExp, action: LexAction<TokT>): this {
this.rules.push({
pattern: new RegExp(`^(?:${pattern.source})`),
action,
});
return this;
}
lex(text: string): TokT[] {
const toks: TokT[] = [];
let idx = 0;
let line = 1;
let col = 1;
outer_loop: while (idx < text.length) {
for (const rule of this.rules) {
const match = text.slice(idx).match(rule.pattern);
if (!match) {
continue;
}
const loc: Loc = { idx, line, col };
for (let i = 0; i < match[0].length; ++i) {
if (text[idx] == "\n") {
line += 1;
col = 1;
} else {
return [[...toks, [value, line]], line];
col += 1;
}
},
[[], 1],
)[0]
.map<Tok>(([value, line]) => ({ type: value, value, line }))
.map((tok) =>
/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(tok.value)
? {
...tok,
type: keywordPattern.test(tok.value) ? tok.value : "ident",
idx += 1;
}
const tok = rule.action(loc, match[0]);
if (tok) {
toks.push(tok);
}
continue outer_loop;
}
throw new Error(`no rule for character '${text[idx]}'`);
}
return toks;
}
: tok
)
.map((tok) =>
/^(?:0|(?:[1-9][0-9]*))$/.test(tok.value)
? { ...tok, type: "int" }
: tok
);
}