From 3a8d833e6ad5b1ee1302e5a5a6206a9afb7031f4 Mon Sep 17 00:00:00 2001 From: Mikkel Kongsted Date: Mon, 4 Nov 2024 14:54:55 +0100 Subject: [PATCH] lexer --- src/Lexer.ts | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++-- src/main.ts | 16 +++++++++ 2 files changed, 110 insertions(+), 2 deletions(-) create mode 100644 src/main.ts diff --git a/src/Lexer.ts b/src/Lexer.ts index e69e070..6deae17 100644 --- a/src/Lexer.ts +++ b/src/Lexer.ts @@ -1,6 +1,6 @@ import { Pos, Token } from "./Token.ts"; -class Lexer { +export class Lexer { private index = 0; private line = 1; private col = 1; @@ -8,7 +8,88 @@ class Lexer { public constructor (private text: string) {} - public next(): Token | null { return null } + public next(): Token | null { + if (this.done()) + return null; + const pos = this.pos(); + if (this.test(/[ \t\n]/)) { + while (!this.done() && this.test(/[ \t\n]/)) + this.step(); + return this.next(); + } + if (this.test("/")) { + this.step() + if (this.test("/")) { + while (!this.done() && !this.test("\n")) + this.step(); + return this.token("//", pos) + } + return this.token("/", pos) + } + if (this.test(/[a-zA-Z_]/)) { + let value = ""; + while (!this.done() && this.test(/[a-zA-Z0-9_]/)) { + value += this.current(); + this.step(); + } + switch (value) { + case "if": + return this.token("if", pos); + case "else": + return this.token("else", pos); + default: + return { ...this.token("ident", pos), identValue: value }; + } + } + if (this.test(/[0-9]/)) { + let textValue = ""; + while (!this.done() && this.test(/[0-9]/)) { + textValue += this.current(); + this.step(); + } + return { ...this.token("int", pos), intValue: parseInt(textValue) }; + } + if (this.test("\"")) { + this.step(); + let value = ""; + while (!this.done() && !this.test("\"")) { + if (this.test("\\")) { + this.step(); + if (this.done()) + break; + value += { + "n": "\n", + "t": "\t", + "0": "\0", + }[this.current()] ?? this.current(); + } else { + value += this.current(); + } + this.step(); + } + if (this.done() || !this.test("\"")) { + console.error( + `Lexer: unclosed/malformed string` + + ` at ${pos.line}:${pos.col}`, + ); + return this.token("error", pos); + } + this.step(); + return { ...this.token("string", pos), stringValue: value }; + } + if (this.test(/[\+\{\};=]/)) { + const first = this.current(); + this.step(); + if (first === "=" && !this.done() && this.test("=")) { + this.step(); + return this.token("==", pos); + } + return this.token(first, pos); + } + console.error(`Lexer: illegal character '${this.current()}' at ${pos.line}:${pos.col}`); + this.step(); + return this.next(); + } private done(): boolean { return this.index >= this.text.length; } private current(): string { return this.text[this.index]; } @@ -37,4 +118,15 @@ class Lexer { const length = this.index - pos.index; return { type, pos, length }; } + + private test(pattern: RegExp | string): boolean { + if (typeof pattern === "string") + return this.current() === pattern; + else + return pattern.test(this.current()); + } + + + + } diff --git a/src/main.ts b/src/main.ts new file mode 100644 index 0000000..31de7e5 --- /dev/null +++ b/src/main.ts @@ -0,0 +1,16 @@ +import { Lexer } from "./Lexer.ts"; + +const text = ` + a1 123 + + // comment + "hello" + "escaped\\"\\nnewline" +`; + +const lexer = new Lexer(text); +let token = lexer.next(); +while (token !== null) { + const value = token.identValue ?? token.intValue ?? token.stringValue ?? ""; + console.log(`Lexed ${token}(${value})`); + token = lexer.next(); +}