new lexer
All checks were successful
Check / Explore-Gitea-Actions (push) Successful in 8s

This commit is contained in:
sfja 2026-03-16 21:35:11 +01:00
parent 1c92a3c077
commit c7741b8d31
3 changed files with 93 additions and 32 deletions

View File

@ -1,3 +1,18 @@
export class Reporter {
report() {}
}
export type Loc = {
idx: number;
line: number;
col: number;
};
export type FileInfo = {
filename: string;
text: string;
};
export function printDiagnostics( export function printDiagnostics(
filename: string, filename: string,
line: number, line: number,

View File

@ -4,6 +4,12 @@ import { Ty } from "../ty.ts";
import { builtins } from "./builtins.ts"; import { builtins } from "./builtins.ts";
import { ResolveMap } from "./resolve.ts"; import { ResolveMap } from "./resolve.ts";
// export class Tys {
// private nodeTys = new Map<number, Ty>();
//
// expr(expr: ast.Node): Ty {}
// }
export class Checker { export class Checker {
private nodeTys = new Map<number, Ty>(); private nodeTys = new Map<number, Ty>();

View File

@ -1,5 +1,5 @@
import * as ast from "../ast.ts"; import * as ast from "../ast.ts";
import { printDiagnostics } from "../diagnostics.ts"; import { Loc, printDiagnostics } from "../diagnostics.ts";
export function parse( export function parse(
filename: string, filename: string,
@ -375,40 +375,80 @@ export class Parser {
} }
export type Tok = { type: string; value: string; line: number }; export type Tok = { type: string; value: string; line: number };
export type Tok2 = { type: string; value: string; loc: Loc };
const keywordPattern = const keywordPattern =
/^(?:fn)|(?:return)|(?:let)|(?:if)|(?:else)|(?:while)|(?:break)|(?:or)|(?:and)|(?:not)|(?:mut)$/; /^(?:(?:fn)|(?:return)|(?:let)|(?:if)|(?:else)|(?:while)|(?:break)|(?:or)|(?:and)|(?:not)|(?:mut))/;
const operatorPattern =
const operatorPattern2 =
/((?:\->)|(?:==)|(?:!=)|(?:<=)|(?:>=)|(?:<<)|(?:>>)|(?:\.\*)|(?:\.\.)|(?:\.\.=)|[\n\(\)\{\}\[\]\,\.\;\:\!\=\<\>\&\^\|\+\-\*\/\%])/g; /((?:\->)|(?:==)|(?:!=)|(?:<=)|(?:>=)|(?:<<)|(?:>>)|(?:\.\*)|(?:\.\.)|(?:\.\.=)|[\n\(\)\{\}\[\]\,\.\;\:\!\=\<\>\&\^\|\+\-\*\/\%])/g;
export function tokenize(text: string): Tok[] { export function tokenize(text: string): Tok[] {
return text return new Lexer<Tok2>()
.replace(/\/\/[^\n]*/g, "") .add(/[ \t\r\n]+/, (_) => null)
.replace(operatorPattern, " $1 ") .add(/\/\/[^\n]*/, (_) => null)
.split(/[ \t\r]/) .add(operatorPattern2, (loc, value) => ({ type: value, value, loc }))
.filter((value) => value !== "") .add(/[a-zA-Z_][a-zA-Z0-9_]*/, (loc, value) => {
.reduce<[[string, number][], number]>( const type = keywordPattern.test(value) ? value : "ident";
([toks, line], value) => { return ({ type, value, loc });
if (value === "\n") { })
return [toks, line + 1]; .add(/0|(?:[1-9][0-9]*)/, (loc, value) => {
return { type: "int", value, loc };
})
.add(/./, (loc, value) => {
return null;
})
.lex(text)
.map<Tok>(({ type, value, loc: { line } }) => ({ type, value, line }));
}
type LexRule<TokT> = {
pattern: RegExp;
action: LexAction<TokT>;
};
type LexAction<TokT> = (loc: Loc, match: string) => TokT | null;
class Lexer<TokT> {
private rules: LexRule<TokT>[] = [];
add(pattern: RegExp, action: LexAction<TokT>): this {
this.rules.push({
pattern: new RegExp(`^(?:${pattern.source})`),
action,
});
return this;
}
lex(text: string): TokT[] {
const toks: TokT[] = [];
let idx = 0;
let line = 1;
let col = 1;
outer_loop: while (idx < text.length) {
for (const rule of this.rules) {
const match = text.slice(idx).match(rule.pattern);
if (!match) {
continue;
}
const loc: Loc = { idx, line, col };
for (let i = 0; i < match[0].length; ++i) {
if (text[idx] == "\n") {
line += 1;
col = 1;
} else { } else {
return [[...toks, [value, line]], line]; col += 1;
} }
}, idx += 1;
[[], 1], }
)[0] const tok = rule.action(loc, match[0]);
.map<Tok>(([value, line]) => ({ type: value, value, line })) if (tok) {
.map((tok) => toks.push(tok);
/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(tok.value) }
? { continue outer_loop;
...tok, }
type: keywordPattern.test(tok.value) ? tok.value : "ident", throw new Error(`no rule for character '${text[idx]}'`);
}
return toks;
} }
: tok
)
.map((tok) =>
/^(?:0|(?:[1-9][0-9]*))$/.test(tok.value)
? { ...tok, type: "int" }
: tok
);
} }