From c7741b8d3166bf553f2cf32e303a56e5efa2e545 Mon Sep 17 00:00:00 2001 From: sfja Date: Mon, 16 Mar 2026 21:35:11 +0100 Subject: [PATCH] new lexer --- src/diagnostics.ts | 15 +++++++ src/front/check.ts | 6 +++ src/front/parse.ts | 104 +++++++++++++++++++++++++++++++-------------- 3 files changed, 93 insertions(+), 32 deletions(-) diff --git a/src/diagnostics.ts b/src/diagnostics.ts index aa1769c..4d7b245 100644 --- a/src/diagnostics.ts +++ b/src/diagnostics.ts @@ -1,3 +1,18 @@ +export class Reporter { + report() {} +} + +export type Loc = { + idx: number; + line: number; + col: number; +}; + +export type FileInfo = { + filename: string; + text: string; +}; + export function printDiagnostics( filename: string, line: number, diff --git a/src/front/check.ts b/src/front/check.ts index 3a4d00e..3f503f9 100644 --- a/src/front/check.ts +++ b/src/front/check.ts @@ -4,6 +4,12 @@ import { Ty } from "../ty.ts"; import { builtins } from "./builtins.ts"; import { ResolveMap } from "./resolve.ts"; +// export class Tys { +// private nodeTys = new Map(); +// +// expr(expr: ast.Node): Ty {} +// } + export class Checker { private nodeTys = new Map(); diff --git a/src/front/parse.ts b/src/front/parse.ts index 6d47653..7d52acb 100644 --- a/src/front/parse.ts +++ b/src/front/parse.ts @@ -1,5 +1,5 @@ import * as ast from "../ast.ts"; -import { printDiagnostics } from "../diagnostics.ts"; +import { Loc, printDiagnostics } from "../diagnostics.ts"; export function parse( filename: string, @@ -375,40 +375,80 @@ export class Parser { } export type Tok = { type: string; value: string; line: number }; +export type Tok2 = { type: string; value: string; loc: Loc }; const keywordPattern = - /^(?:fn)|(?:return)|(?:let)|(?:if)|(?:else)|(?:while)|(?:break)|(?:or)|(?:and)|(?:not)|(?:mut)$/; -const operatorPattern = + /^(?:(?:fn)|(?:return)|(?:let)|(?:if)|(?:else)|(?:while)|(?:break)|(?:or)|(?:and)|(?:not)|(?:mut))/; + +const operatorPattern2 = /((?:\->)|(?:==)|(?:!=)|(?:<=)|(?:>=)|(?:<<)|(?:>>)|(?:\.\*)|(?:\.\.)|(?:\.\.=)|[\n\(\)\{\}\[\]\,\.\;\:\!\=\<\>\&\^\|\+\-\*\/\%])/g; export function tokenize(text: string): Tok[] { - return text - .replace(/\/\/[^\n]*/g, "") - .replace(operatorPattern, " $1 ") - .split(/[ \t\r]/) - .filter((value) => value !== "") - .reduce<[[string, number][], number]>( - ([toks, line], value) => { - if (value === "\n") { - return [toks, line + 1]; - } else { - return [[...toks, [value, line]], line]; - } - }, - [[], 1], - )[0] - .map(([value, line]) => ({ type: value, value, line })) - .map((tok) => - /^[a-zA-Z_][a-zA-Z0-9_]*$/.test(tok.value) - ? { - ...tok, - type: keywordPattern.test(tok.value) ? tok.value : "ident", - } - : tok - ) - .map((tok) => - /^(?:0|(?:[1-9][0-9]*))$/.test(tok.value) - ? { ...tok, type: "int" } - : tok - ); + return new Lexer() + .add(/[ \t\r\n]+/, (_) => null) + .add(/\/\/[^\n]*/, (_) => null) + .add(operatorPattern2, (loc, value) => ({ type: value, value, loc })) + .add(/[a-zA-Z_][a-zA-Z0-9_]*/, (loc, value) => { + const type = keywordPattern.test(value) ? value : "ident"; + return ({ type, value, loc }); + }) + .add(/0|(?:[1-9][0-9]*)/, (loc, value) => { + return { type: "int", value, loc }; + }) + .add(/./, (loc, value) => { + return null; + }) + .lex(text) + .map(({ type, value, loc: { line } }) => ({ type, value, line })); +} + +type LexRule = { + pattern: RegExp; + action: LexAction; +}; + +type LexAction = (loc: Loc, match: string) => TokT | null; + +class Lexer { + private rules: LexRule[] = []; + + add(pattern: RegExp, action: LexAction): this { + this.rules.push({ + pattern: new RegExp(`^(?:${pattern.source})`), + action, + }); + return this; + } + + lex(text: string): TokT[] { + const toks: TokT[] = []; + let idx = 0; + let line = 1; + let col = 1; + outer_loop: while (idx < text.length) { + for (const rule of this.rules) { + const match = text.slice(idx).match(rule.pattern); + if (!match) { + continue; + } + const loc: Loc = { idx, line, col }; + for (let i = 0; i < match[0].length; ++i) { + if (text[idx] == "\n") { + line += 1; + col = 1; + } else { + col += 1; + } + idx += 1; + } + const tok = rule.action(loc, match[0]); + if (tok) { + toks.push(tok); + } + continue outer_loop; + } + throw new Error(`no rule for character '${text[idx]}'`); + } + return toks; + } }