diff --git a/example.lang4 b/example.lang4 index 7e469d1..c56b204 100644 --- a/example.lang4 +++ b/example.lang4 @@ -3,7 +3,7 @@ fn main() -> int { let v: int = 123; let ch = 'c'; - let s = "hello\ world"; + let s = "hello world"; inner(); diff --git a/src/main.ts b/src/main.ts index 5084c0a..f1f959d 100644 --- a/src/main.ts +++ b/src/main.ts @@ -6,7 +6,8 @@ import { Resolver } from "./resolve.ts"; async function main() { const text = await Deno.readTextFile(Deno.args[0]); const toks = tokenize(text); - // console.log({ toks }); + console.log({ toks }); + return; const parser = new Parser(toks); const file = parser.parseFile(); diff --git a/src/tok.ts b/src/tok.ts index 5d83357..a4ca70a 100644 --- a/src/tok.ts +++ b/src/tok.ts @@ -1,81 +1,83 @@ export type Tok = { type: string; + idx: number; line: number; + col: number; + length: number; value?: string; }; - -const keywords = new Set([ - "true", - "false", - "bool", - "int", - "char", - "str", - "fn", - "let", -]); - -type OpTree = Map; -const opTreeRoot: OpTree = new Map( - Object.entries({ - "-": new Map(Object.entries({ - ">": null, - })), - }), -); +const keywords = ["true", "false", "bool", "int", "char", "str", "fn", "let"]; export function tokenize(text: string): Tok[] { - return text - .replace(/\/\/[^\n]*/g, "") - .replace(/\/\*.*?\*\//gs, "") - .replace(/([^a-zA-Z0-9_'"\\ \t\r])/g, " $1 ") - .split(/(? tok !== "") - .map((tok) => tok.replace(/\\ /g, " ")) - .reduce<[string[], OpTree]>(([toks, opTree], tok) => { - if (toks.length === 0) { - toks.push(tok); - return [toks, opTree]; + const rules: Record = { + "whitespace": { match: /^[ \t\r]+/, ignore: true }, + "newline": { match: /^\n/s, ignore: true }, + "linecomment": { match: /^\/\/[^\n]*/, ignore: true }, + "blockcomment": { match: /^\/\*.*?\*\//s, ignore: true }, + _keywords: { + match: new RegExp( + `^(${keywords.map((s) => `(?:${s})`).join("|")})`, + ), + }, + _identity: { + match: new RegExp( + `^(?:(?:\-\>)|[${RegExp.escape("()[]{}+-*/,.;:!=<>&|?")}])`, + ), + }, + "ident": { match: /^[a-zA-Z_][a-zA-Z0-9_]*/ }, + "int": { match: /^[0-9_]+/ }, + "char": { match: /^'(?:(?:\\.)|[^'\n])'/ }, + "str": { match: /^"(?:(?:\\.)|[^"])*"/s }, + }; + + const toks: Tok[] = []; + let idx = 0; + let line = 1; + let col = 1; + while (idx < text.length) { + let found = false; + for (const [id, rule] of Object.entries(rules)) { + const match = text.slice(idx).match(rule.match); + if (!match) { + continue; } - const last = toks.at(-1)!; - if (!opTree.has(last)) { - toks.push(tok); - return [toks, opTreeRoot]; - } - if (opTree.get(last) === null) { - toks.push(tok); - return [toks, opTreeRoot]; - } else if (opTree.get(last)!.has(tok)) { - toks[toks.length - 1] += tok; - return [toks, opTree.get(last)!]; + found = true; + + idx += match[0].length; + if (rule.match.dotAll && /\n/.test(match[0])) { + line += (match[0].match(/\n/g) ?? []).length; + col = match[0].length - match[0].lastIndexOf("\n"); } else { - toks.push(tok); - return [toks, opTreeRoot]; + col += match[0].length; } - }, [[], opTreeRoot])[0] - .slice(0, -1) - .reduce<[Tok[], number]>(([toks, line], type) => { - if (type === "\n") { - return [toks, line + 1]; + + if (rule.ignore) continue; + + const length = match[0].length; + const tok: Tok = { type: id, idx, line, col, length }; + if (id === "_keywords" || id === "_identity") { + tok.type = match[0]; } else { - toks.push({ type, line }); - return [toks, line]; + tok.value = match[0]; } - }, [[], 1])[0] - .map((tok) => { - if ( - /^[a-zA-Z_][a-zA-Z0-9_]*$/.test(tok.type) && - !keywords.has(tok.type) - ) { - return { type: "ident", line: tok.line, value: tok.type }; - } else if (/^[0-9_]+$/.test(tok.type)) { - return { type: "int", line: tok.line, value: tok.type }; - } else if (/^'.*?'$/.test(tok.type)) { - return { type: "char", line: tok.line, value: tok.type }; - } else if (/^".*?"$/.test(tok.type)) { - return { type: "str", line: tok.line, value: tok.type }; - } else { - return tok; - } - }); + toks.push(tok); + break; + } + if (!found) { + printError(line, `invalid character '${text[idx]}'`); + idx += 1; + } + } + + return toks; +} + +function printError(line: number, message: string) { + console.error( + `%cerror%c: ${message}\n %c--> line ${line}%c`, + "font-weight: bold; color: red", + "font-weight: bold; color: while", + "color: cyan", + "", + ); }