lexer
This commit is contained in:
parent
2960f1c830
commit
3a8d833e6a
96
src/Lexer.ts
96
src/Lexer.ts
@ -1,6 +1,6 @@
|
||||
import { Pos, Token } from "./Token.ts";
|
||||
|
||||
class Lexer {
|
||||
export class Lexer {
|
||||
private index = 0;
|
||||
private line = 1;
|
||||
private col = 1;
|
||||
@ -8,7 +8,88 @@ class Lexer {
|
||||
|
||||
public constructor (private text: string) {}
|
||||
|
||||
public next(): Token | null { return null }
|
||||
public next(): Token | null {
|
||||
if (this.done())
|
||||
return null;
|
||||
const pos = this.pos();
|
||||
if (this.test(/[ \t\n]/)) {
|
||||
while (!this.done() && this.test(/[ \t\n]/))
|
||||
this.step();
|
||||
return this.next();
|
||||
}
|
||||
if (this.test("/")) {
|
||||
this.step()
|
||||
if (this.test("/")) {
|
||||
while (!this.done() && !this.test("\n"))
|
||||
this.step();
|
||||
return this.token("//", pos)
|
||||
}
|
||||
return this.token("/", pos)
|
||||
}
|
||||
if (this.test(/[a-zA-Z_]/)) {
|
||||
let value = "";
|
||||
while (!this.done() && this.test(/[a-zA-Z0-9_]/)) {
|
||||
value += this.current();
|
||||
this.step();
|
||||
}
|
||||
switch (value) {
|
||||
case "if":
|
||||
return this.token("if", pos);
|
||||
case "else":
|
||||
return this.token("else", pos);
|
||||
default:
|
||||
return { ...this.token("ident", pos), identValue: value };
|
||||
}
|
||||
}
|
||||
if (this.test(/[0-9]/)) {
|
||||
let textValue = "";
|
||||
while (!this.done() && this.test(/[0-9]/)) {
|
||||
textValue += this.current();
|
||||
this.step();
|
||||
}
|
||||
return { ...this.token("int", pos), intValue: parseInt(textValue) };
|
||||
}
|
||||
if (this.test("\"")) {
|
||||
this.step();
|
||||
let value = "";
|
||||
while (!this.done() && !this.test("\"")) {
|
||||
if (this.test("\\")) {
|
||||
this.step();
|
||||
if (this.done())
|
||||
break;
|
||||
value += {
|
||||
"n": "\n",
|
||||
"t": "\t",
|
||||
"0": "\0",
|
||||
}[this.current()] ?? this.current();
|
||||
} else {
|
||||
value += this.current();
|
||||
}
|
||||
this.step();
|
||||
}
|
||||
if (this.done() || !this.test("\"")) {
|
||||
console.error(
|
||||
`Lexer: unclosed/malformed string`
|
||||
+ ` at ${pos.line}:${pos.col}`,
|
||||
);
|
||||
return this.token("error", pos);
|
||||
}
|
||||
this.step();
|
||||
return { ...this.token("string", pos), stringValue: value };
|
||||
}
|
||||
if (this.test(/[\+\{\};=]/)) {
|
||||
const first = this.current();
|
||||
this.step();
|
||||
if (first === "=" && !this.done() && this.test("=")) {
|
||||
this.step();
|
||||
return this.token("==", pos);
|
||||
}
|
||||
return this.token(first, pos);
|
||||
}
|
||||
console.error(`Lexer: illegal character '${this.current()}' at ${pos.line}:${pos.col}`);
|
||||
this.step();
|
||||
return this.next();
|
||||
}
|
||||
|
||||
private done(): boolean { return this.index >= this.text.length; }
|
||||
private current(): string { return this.text[this.index]; }
|
||||
@ -37,4 +118,15 @@ class Lexer {
|
||||
const length = this.index - pos.index;
|
||||
return { type, pos, length };
|
||||
}
|
||||
|
||||
private test(pattern: RegExp | string): boolean {
|
||||
if (typeof pattern === "string")
|
||||
return this.current() === pattern;
|
||||
else
|
||||
return pattern.test(this.current());
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
16
src/main.ts
Normal file
16
src/main.ts
Normal file
@ -0,0 +1,16 @@
|
||||
import { Lexer } from "./Lexer.ts";
|
||||
|
||||
const text = `
|
||||
a1 123 +
|
||||
// comment
|
||||
"hello"
|
||||
"escaped\\"\\nnewline"
|
||||
`;
|
||||
|
||||
const lexer = new Lexer(text);
|
||||
let token = lexer.next();
|
||||
while (token !== null) {
|
||||
const value = token.identValue ?? token.intValue ?? token.stringValue ?? "";
|
||||
console.log(`Lexed ${token}(${value})`);
|
||||
token = lexer.next();
|
||||
}
|
Loading…
Reference in New Issue
Block a user