export type Tok = { type: string; line: number; value?: string; }; const keywords = new Set([ "fn", "let", "true", "false", ]); type OpTree = Map; const opTreeRoot: OpTree = new Map( Object.entries({ "-": new Map(Object.entries({ ">": null, })), }), ); export function tokenize(text: string): Tok[] { return text .replace(/\/\/[^\n]*/g, "") .replace(/\/\*.*?\*\//gs, "") .replace(/([^a-zA-Z0-9_'"\\ \t\r])/g, " $1 ") .split(/(? tok !== "") .map((tok) => tok.replace(/\\ /g, " ")) .reduce<[string[], OpTree]>(([toks, opTree], tok) => { if (toks.length === 0) { toks.push(tok); return [toks, opTree]; } const last = toks.at(-1)!; if (!opTree.has(last)) { toks.push(tok); return [toks, opTreeRoot]; } if (opTree.get(last) === null) { toks.push(tok); return [toks, opTreeRoot]; } else if (opTree.get(last)!.has(tok)) { toks[toks.length - 1] += tok; return [toks, opTree.get(last)!]; } else { toks.push(tok); return [toks, opTreeRoot]; } }, [[], opTreeRoot])[0] .slice(0, -1) .reduce<[Tok[], number]>(([toks, line], type) => { if (type === "\n") { return [toks, line + 1]; } else { toks.push({ type, line }); return [toks, line]; } }, [[], 1])[0] .map((tok) => { if ( /^[a-zA-Z_][a-zA-Z0-9_]*$/.test(tok.type) && !keywords.has(tok.type) ) { return { type: "ident", line: tok.line, value: tok.type }; } else if (/^[0-9_]+$/.test(tok.type)) { return { type: "int", line: tok.line, value: tok.type }; } else if (/^'.*?'$/.test(tok.type)) { return { type: "char", line: tok.line, value: tok.type }; } else if (/^".*?"$/.test(tok.type)) { return { type: "str", line: tok.line, value: tok.type }; } else { return tok; } }); }