78 lines
2.3 KiB
TypeScript
78 lines
2.3 KiB
TypeScript
export type Tok = {
|
|
type: string;
|
|
line: number;
|
|
value?: string;
|
|
};
|
|
|
|
const keywords = new Set([
|
|
"fn",
|
|
"let",
|
|
"true",
|
|
"false",
|
|
]);
|
|
|
|
type OpTree = Map<string, OpTree | null>;
|
|
const opTreeRoot: OpTree = new Map(
|
|
Object.entries({
|
|
"-": new Map(Object.entries({
|
|
">": null,
|
|
})),
|
|
}),
|
|
);
|
|
|
|
export function tokenize(text: string): Tok[] {
|
|
return text
|
|
.replace(/\/\/[^\n]*/g, "")
|
|
.replace(/\/\*.*?\*\//gs, "")
|
|
.replace(/([^a-zA-Z0-9_'"\\ \t\r])/g, " $1 ")
|
|
.split(/(?<!\\)[ \t\r]/)
|
|
.filter((tok) => tok !== "")
|
|
.map((tok) => tok.replace(/\\ /g, " "))
|
|
.reduce<[string[], OpTree]>(([toks, opTree], tok) => {
|
|
if (toks.length === 0) {
|
|
toks.push(tok);
|
|
return [toks, opTree];
|
|
}
|
|
const last = toks.at(-1)!;
|
|
if (!opTree.has(last)) {
|
|
toks.push(tok);
|
|
return [toks, opTreeRoot];
|
|
}
|
|
if (opTree.get(last) === null) {
|
|
toks.push(tok);
|
|
return [toks, opTreeRoot];
|
|
} else if (opTree.get(last)!.has(tok)) {
|
|
toks[toks.length - 1] += tok;
|
|
return [toks, opTree.get(last)!];
|
|
} else {
|
|
toks.push(tok);
|
|
return [toks, opTreeRoot];
|
|
}
|
|
}, [[], opTreeRoot])[0]
|
|
.slice(0, -1)
|
|
.reduce<[Tok[], number]>(([toks, line], type) => {
|
|
if (type === "\n") {
|
|
return [toks, line + 1];
|
|
} else {
|
|
toks.push({ type, line });
|
|
return [toks, line];
|
|
}
|
|
}, [[], 1])[0]
|
|
.map((tok) => {
|
|
if (
|
|
/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(tok.type) &&
|
|
!keywords.has(tok.type)
|
|
) {
|
|
return { type: "ident", line: tok.line, value: tok.type };
|
|
} else if (/^[0-9_]+$/.test(tok.type)) {
|
|
return { type: "int", line: tok.line, value: tok.type };
|
|
} else if (/^'.*?'$/.test(tok.type)) {
|
|
return { type: "char", line: tok.line, value: tok.type };
|
|
} else if (/^".*?"$/.test(tok.type)) {
|
|
return { type: "str", line: tok.line, value: tok.type };
|
|
} else {
|
|
return tok;
|
|
}
|
|
});
|
|
}
|