lang10/src/tok.ts
2025-12-10 22:46:32 +01:00

78 lines
2.3 KiB
TypeScript

export type Tok = {
type: string;
line: number;
value?: string;
};
const keywords = new Set([
"fn",
"let",
"true",
"false",
]);
type OpTree = Map<string, OpTree | null>;
const opTreeRoot: OpTree = new Map(
Object.entries({
"-": new Map(Object.entries({
">": null,
})),
}),
);
export function tokenize(text: string): Tok[] {
return text
.replace(/\/\/[^\n]*/g, "")
.replace(/\/\*.*?\*\//gs, "")
.replace(/([^a-zA-Z0-9_'"\\ \t\r])/g, " $1 ")
.split(/(?<!\\)[ \t\r]/)
.filter((tok) => tok !== "")
.map((tok) => tok.replace(/\\ /g, " "))
.reduce<[string[], OpTree]>(([toks, opTree], tok) => {
if (toks.length === 0) {
toks.push(tok);
return [toks, opTree];
}
const last = toks.at(-1)!;
if (!opTree.has(last)) {
toks.push(tok);
return [toks, opTreeRoot];
}
if (opTree.get(last) === null) {
toks.push(tok);
return [toks, opTreeRoot];
} else if (opTree.get(last)!.has(tok)) {
toks[toks.length - 1] += tok;
return [toks, opTree.get(last)!];
} else {
toks.push(tok);
return [toks, opTreeRoot];
}
}, [[], opTreeRoot])[0]
.slice(0, -1)
.reduce<[Tok[], number]>(([toks, line], type) => {
if (type === "\n") {
return [toks, line + 1];
} else {
toks.push({ type, line });
return [toks, line];
}
}, [[], 1])[0]
.map((tok) => {
if (
/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(tok.type) &&
!keywords.has(tok.type)
) {
return { type: "ident", line: tok.line, value: tok.type };
} else if (/^[0-9_]+$/.test(tok.type)) {
return { type: "int", line: tok.line, value: tok.type };
} else if (/^'.*?'$/.test(tok.type)) {
return { type: "char", line: tok.line, value: tok.type };
} else if (/^".*?"$/.test(tok.type)) {
return { type: "str", line: tok.line, value: tok.type };
} else {
return tok;
}
});
}