From 8b38e5e8839552a46e23e664fdcd6bbacd0be7d5 Mon Sep 17 00:00:00 2001 From: sfja Date: Wed, 10 Sep 2025 02:04:13 +0200 Subject: [PATCH] compiler stuff --- compile.phi | 92 +++++++++++- package.json | 1 + phi.js | 343 ++++++++++++++++++++++++++++++++------------- vim/syntax/phi.vim | 16 +++ 4 files changed, 352 insertions(+), 100 deletions(-) diff --git a/compile.phi b/compile.phi index 1a62bc5..1a18172 100644 --- a/compile.phi +++ b/compile.phi @@ -1,7 +1,93 @@ -(let text (call read_text_file "program.phi")) -(fn a (b c) (do - +(let identChars "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890+-*/%&|=?!<>'_") + + +(fn tokenize (text) (do + (let text_len (call len text)) + + (let tokens ()) + (let i 0) + (let line 1) + + (loop (do + (if (>= i text_len) (break)) + + (let ch (call at text i)) + + (if (call contains " \t\r\n" ch) (do + (call println "line = %, ch = '%'" line ch) + (if (== ch "\n") (do + (+= line 1) + )) + (+= i 1) + ) (if (call slice_eq text i "//") (do + (loop (do + if (or (>= i text_len) (== (call at text i) "\n") (do + (break) + )) + (+= i 1) + )) + ) (if (call contains "()" ch) (do + (call push tokens (ch line)) + (+= i 1) + ) (if (== ch "\"") (do + + (+= i 1) + ) (if (call contains identChars ch) (do + (let value "") + (loop (do + (= ch (call at text i)) + (if (or (>= i text_len) (not (call contains identChars ch))) (do + (break) + )) + (call push value ch) + (+= i 1) + )) + (call push tokens ("ident" line value)) + ) (do + (call println "illegal char '%'" ch) + (+= i 1) + )))))) + + )) + (return tokens) )) +(fn contains (text ch) (do + (let text_len (call len text)) + (let i 0) + (loop (do + (if (>= i text_len) (break)) + (if (== (call at text i) ch) (do + (return true) + )) + (+= i 1) + )) + (return false) +)) + +(fn slice_eq (str slice_idx substr) (do + (let str_len (call len str)) + (let substr_len (call len substr)) + (let i slice_idx) + (loop (do + (if (or (>= (+ slice_idx i) str_len) (>= i substr_len)) + (return false)) + (if (!= (call at str (+ slice_idx i)) (call at substr i)) + (return false)) + (+= i 1) + )) + (return true) +)) + +(let text (call read_text_file "program.phi")) + +(let tokens (call tokenize text)) + +(call println "=== text ===") (call println text) +(call println "=== tokens ===") +(call println tokens) +(call println (+ 1 2)) + + diff --git a/package.json b/package.json index 56cda96..aaffb4a 100644 --- a/package.json +++ b/package.json @@ -1,4 +1,5 @@ { "name": "phi-lang", "version": "1.0.0", + "type": "module" } diff --git a/phi.js b/phi.js index dee087e..6798668 100644 --- a/phi.js +++ b/phi.js @@ -1,6 +1,7 @@ "use strict"; -import * as fs from "fs"; +import * as fs from "node:fs"; +import process from "node:process"; function main() { const text = fs.readFileSync(process.argv[2]).toString(); @@ -28,7 +29,7 @@ class Evaluator { } /** - * @param {Expr} expr + * @param {Expr} expr */ eval(expr) { if (expr.type === "list") { @@ -36,22 +37,22 @@ class Evaluator { } else if (expr.type === "int") { return { type: "value", value: { type: "int", value: expr.value } }; } else if (expr.type === "string") { - return { type: "value", value: { type: "string", value: expr.value } }; + return { + type: "value", + value: { type: "string", value: expr.value }, + }; } else if (expr.type === "ident") { - const findInTree = (syms, ident) => { - if (syms.map.has(ident)) - return syms.map.get(ident); - else if (syms.parent) - return findInTree(syms.parent, ident); - else - return undefined; + const sym = this.findSym(expr.value); + if (!sym) { + throw new Error( + `could not find symbol '${expr.value}' on line ${expr.line}`, + ); } - const sym = findInTree(this.syms, expr.value); - if (!sym) - throw new Error(`could not find symbol '${expr.value}' on line ${expr.line}`); return { type: "value", value: sym }; } else { - throw new Error(`unknown expr type '${expr.type}' on line ${expr.line}`); + throw new Error( + `unknown expr type '${expr.type}' on line ${expr.line}`, + ); } } @@ -65,19 +66,67 @@ class Evaluator { evalList(expr) { const s = expr.values; - const id = s[0]?.value ?? undefined; + const id = s[0]?.type === "ident" ? s[0].value : undefined; if (id === "fn") { const name = s[1].value; this.syms.map.set(name, { type: "fn", name, - params: s[2].values.map(ident => ident.value), + params: s[2].values.map((ident) => ident.value), body: s[3], syms: this.syms, }); return { type: "value", value: { type: "null" } }; + } else if (id === "return") { + return { + type: "return", + value: s[1] ? this.evalToValue(s[1]) : { type: "null" }, + }; + } else if (id === "let") { + const value = this.evalToValue(s[2]); + this.syms.map.set(s[1].value, value); + return { type: "value", value: { type: "null" } }; + } else if (id === "if") { + const cond = this.evalToValue(s[1]); + if (cond.type !== "bool") { + throw new Error( + `expected bool on line ${expr.line}`, + ); + } + if (cond.value) { + return this.eval(s[2]); + } else if (s[3]) { + return this.eval(s[3]); + } else { + return { type: "value", value: "null" }; + } + } else if (id === "loop") { + while (true) { + const result = this.eval(s[1]); + if (result.type === "break") { + return { type: "value", value: result.value }; + } else if (result.type !== "value") { + return result; + } + } + } else if (id === "break") { + return { + type: "break", + value: s[1] ? this.evalToValue(s[1]) : { type: "null" }, + }; + } else if (id === "do") { + let lastValue = { type: "null" }; + + for (const expr of s.slice(1)) { + const result = this.eval(expr); + if (result.type !== "value") { + return result; + } + lastValue = result.value; + } + return { type: "value", value: lastValue }; } else if (id === "call") { - const args = s.slice(2).map(arg => this.evalToValue(arg)); + const args = s.slice(2).map((arg) => this.evalToValue(arg)); const fnValue = this.evalToValue(s[1]); if (fnValue.type === "builtin") { @@ -91,7 +140,9 @@ class Evaluator { map: new Map(), }; if (fnValue.params.length !== args.length) { - throw new Error(`incorrect amount of arguments on line ${line}`); + throw new Error( + `incorrect amount of arguments on line ${line}`, + ); } for (let i = 0; i < fnValue.params.length; ++i) { this.syms.map.set(fnValue.params[i], args[i]); @@ -103,70 +154,159 @@ class Evaluator { if (result.type === "value" || result.type === "return") { returnValue = result.value; } else { - throw new Error(`illegal ${result.type} across boundry`) + throw new Error(`illegal ${result.type} across boundry`); } this.syms = callerSyms; return { type: "value", value: returnValue }; - } else if (id === "return") { - return { type: "return", value: s[1] ? this.evalToValue(s[1]) : { type: "null" } }; - } else if (id === "let") { - const value = this.evalToValue(s[2]); - this.syms.map.set(s[1].value, value); - return { type: "value", value: { type: "null" } }; - } else if (id === "do") { - let lastValue = { type: "null" }; - - for (const expr of s.slice(1)) { - const result = this.eval(expr); - if (result.type !== "value") { - break; - } - lastValue = result.value; - } - return { type: "value", value: lastValue }; - } else if (s[0] === "if") { - const cond = this.evalToValue(s[1]); - if (cond.type !== "bool") { - throw new Error(`expected bool on line ${line}`); - } - if (cond.value) { - return this.eval(s[2]); - } else if (s[3]) { - return this.eval(s[3]); + } else if (id === "not") { + const value = this.evalToValue(s[1]); + return { + type: "value", + value: { type: "bool", value: !value.value }, + }; + } else if (id === "or") { + const left = this.evalToValue(s[1]); + if (left.value) { + return { type: "value", value: left }; } else { - return { type: "value", value: "null" }; + const right = this.evalToValue(s[2]); + return { type: "value", value: right }; } - } else if (s[0] === "loop") { - while (true) { - const result = this.eval(s[1]); - if (result.type === "break") { - return { type: "value", value: result.value }; - } else if (result.type !== "value") { - return result; + } else if (id === "and") { + const left = this.evalToValue(s[1]); + if (left.value) { + const right = this.evalToValue(s[2]); + return { type: "value", value: right }; + } else { + return { type: "value", value: left }; + } + } else if (id in artithmeticOps) { + const left = this.evalToValue(s[1]); + const right = this.evalToValue(s[2]); + return { + type: "value", + value: { + type: "int", + value: artithmeticOps[id](left.value, right.value), + }, + }; + } else if (id in comparisonOps) { + const left = this.evalToValue(s[1]); + const right = this.evalToValue(s[2]); + return { + type: "value", + value: { + type: "bool", + value: comparisonOps[id](left.value, right.value), + }, + }; + } else if (id in assignOps) { + if (s[1].type === "ident") { + const sym = this.findSym(s[1].value); + if (!sym) { + throw new Error( + `could not find symbol '${expr.value}' on line ${expr.line}`, + ); } + const right = this.evalToValue(s[2]); + const newValue = assignOps[id](sym, right); + sym.type = newValue.type; + sym.value = newValue.value; + } else { + throw new Error( + `cannot assign to expression on line ${expr.line}`, + ); } - } else if (s[0] === "break") { - return { type: "break", value: s[1] ? this.evalToValue(s[1]) : { type: "null" } }; + return { type: "value", value: { type: "null" } }; } else { - return { type: "value", value: { type: "list", values: s.map(expr => this.evalToValue(expr)) } }; + return { + type: "value", + value: { + type: "list", + values: s.map((expr) => this.evalToValue(expr)), + }, + }; + } + } + + findSym(ident, syms = this.syms) { + if (syms.map.has(ident)) { + return syms.map.get(ident); + } else if (syms.parent) { + return this.findSym(ident, syms.parent); + } else { + return undefined; } } } +const artithmeticOps = { + "+": (left, right) => right + left, + "-": (left, right) => right - left, +}; +const comparisonOps = { + "==": (left, right) => left === right, + "!=": (left, right) => left !== right, + "<": (left, right) => left < right, + ">": (left, right) => left > right, + "<=": (left, right) => left <= right, + ">=": (left, right) => left >= right, +}; +const assignOps = { + "=": (_, right) => right, + "+=": (left, right) => ({ type: "int", value: left.value + right.value }), + "-=": (left, right) => ({ type: "int", value: left.value - right.value }), +}; + const builtinFns = { - println(msg) { - console.log(valueToPrint(msg)); + println(msg, ...args) { + let text = valueToPrint(msg); + + for (const arg of args) { + text = text.replace("%", valueToPrint(arg)); + } + + console.log(text); return { type: "null" }; }, read_text_file(path) { - const text = fs.readFileSync(path.value); + const text = fs.readFileSync(path.value).toString(); return { type: "string", value: text }; - } + }, + push(list, value) { + if (list.type === "string") { + list.value += value.value; + return list; + } + list.values.push(value); + return list; + }, + at(value, index) { + if (value.type === "string") { + return { type: "string", value: value.value[index.value] }; + } + return value.values[index.value]; + }, + len(value) { + if (value.type === "string") { + return { type: "int", value: value.value.length }; + } + return { type: "int", value: value.values.length }; + }, }; -const builtins = Object.entries(builtinFns) - .map(([key, fn]) => [key, { type: "builtin", fn }]); +const consts = { + "null": { type: "null" }, + "false": { type: "bool", value: false }, + "true": { type: "bool", value: true }, +}; + +const builtins = [ + ...Object.entries(builtinFns) + .map(([key, fn]) => [key, { type: "builtin", fn }]), + ...Object.entries(consts), +]; function valueToPrint(value) { if (value.type === "null") { @@ -178,7 +318,7 @@ function valueToPrint(value) { } else if (value.type === "string") { return `${value.value}`; } else if (value.type === "list") { - return `(${value.values.map(v => valueToString(v)).join(" ")})`; + return `(${value.values.map((v) => valueToString(v)).join(" ")})`; } else { throw new Error(`unknown value type ${value.type}`); } @@ -194,7 +334,7 @@ function valueToString(value) { } else if (value.type === "string") { return `"${value.value}"`; } else if (value.type === "list") { - return `(${value.values.map(v => valueToString(v)).join(" ")})`; + return `(${value.values.map((v) => valueToString(v)).join(" ")})`; } else { throw new Error(`unknown value type ${value.type}`); } @@ -210,14 +350,14 @@ function valueToJs(value) { } else if (value.type === "string") { return value.value; } else if (value.type === "list") { - return value.values.map(v => valueToJs(v)); + return value.values.map((v) => valueToJs(v)); } else { throw new Error(`unknown value type ${value.type}`); } } /** - * @param {Expr} expr + * @param {Expr} expr * @returns {string} */ function exprToString(expr) { @@ -228,7 +368,7 @@ function exprToString(expr) { } else if (expr.type === "string") { return `"${expr.value}"`; } else if (expr.type === "list") { - return `(${expr.values.map(v => exprToString(v)).join(" ")})`; + return `(${expr.values.map((v) => exprToString(v)).join(" ")})`; } else { throw new Error(`unknown value type ${expr.type}`); } @@ -248,18 +388,18 @@ class Parser { .replace(/\/\/.*?$/mg, "") .replace(/([\(\)\n])/g, " $1 ") .split(/[ \t\r]/) - .filter(tok => tok !== ""); + .filter((tok) => tok !== ""); this.idx = 0; this.line = 1; } /** - * * @returns {Expr[]} */ parse() { - if (this.curr === "\n") + if (this.curr === "\n") { this.step(); + } const exprs = []; while (!this.done) { @@ -276,62 +416,66 @@ class Parser { values.push(this.parseExpr()); } if (!this.test(")")) { - throw new Error(`expected ')'`) + throw new Error(`expected ')'`); } this.step(); return { type: "list", line, values }; - } - else if (this.test(/STRING_\d+/)) { + } else if (this.test(/STRING_\d+/)) { const id = Number(this.curr.match(/STRING_(\d+)/)[1]); this.step(); return { type: "string", line, value: this.strings[id] }; - } - else if (this.test(/0|(:?[1-9][0-9]*)/)) { + } else if (this.test(/0|(:?[1-9][0-9]*)/)) { const value = Number(this.curr); this.step(); return { type: "int", line, value }; - } - else if (this.test(/[a-zA-Z0-9\+\-\*/%&\|=\?\!<>'_]+/)) { + } else if (this.test(/[a-zA-Z0-9\+\-\*/%&\|=\?\!<>'_]+/)) { const value = this.curr; this.step(); return { type: "ident", line, value }; - } - else { - throw new Error(`expected expression, got ${this.curr}`) + } else { + throw new Error( + `expected expression, got ${this.curr} on line ${this.line}`, + ); } } eat(tok) { - if (!this.test(tok)) + if (!this.test(tok)) { return false; - this.step() + } + this.step(); return true; } test(tok) { - if (this.done) + if (this.done) { return false; - if (typeof tok === "string") + } + if (typeof tok === "string") { return this.curr === tok; - else if (tok instanceof RegExp) + } else if (tok instanceof RegExp) { return new RegExp(`^${tok.source}$`) .test(this.curr); - else - throw new Error() + } else { + throw new Error(); + } } step() { do { - this.idx += 1; if (!this.done && this.curr === "\n") { this.line += 1; } + this.idx += 1; } while (!this.done && this.curr === "\n"); } - get done() { return this.idx >= this.tokens.length; } - get curr() { return this.tokens[this.idx]; } + get done() { + return this.idx >= this.tokens.length; + } + get curr() { + return this.tokens[this.idx]; + } } - class StringExtractor { constructor(text) { this.text = text; @@ -342,8 +486,8 @@ class StringExtractor { extract() { while (this.idx < this.text.length) { - if (this.text[this.idx] == '\"') { - this.extractString() + if (this.text[this.idx] == '"') { + this.extractString(); } else { this.outputText += this.text[this.idx]; this.idx += 1; @@ -355,10 +499,11 @@ class StringExtractor { this.idx += 1; let value = ""; while (this.idx < this.text.length && this.text[this.idx] != '"') { - if (this.text[this.idx] == '\\') { + if (this.text[this.idx] == "\\") { this.idx += 1; - if (this.idx > this.text.length) + if (this.idx > this.text.length) { break; + } const ch = this.text[this.idx]; value += { "0": "\0", @@ -380,8 +525,12 @@ class StringExtractor { this.outputText += `STRING_${id}`; } - getStrings() { return this.strings; } - getOutputText() { return this.outputText; } + getStrings() { + return this.strings; + } + getOutputText() { + return this.outputText; + } } main(); diff --git a/vim/syntax/phi.vim b/vim/syntax/phi.vim index 6402b2c..3fb443e 100644 --- a/vim/syntax/phi.vim +++ b/vim/syntax/phi.vim @@ -9,9 +9,25 @@ endif syn keyword Keyword fn call return loop break if let do +syn keyword Operator and or not syn keyword Special null syn keyword Boolean true false +syn match Operator '+' +syn match Operator '-' +syn match Operator '\*' +syn match Operator '/' +syn match Operator '=' +syn match Operator '+=' +syn match Operator '-=' +syn match Operator '==' +syn match Operator '!=' +syn match Operator '<' +syn match Operator '>' +syn match Operator '<=' +syn match Operator '>=' + + syn match Number '0' syn match Number '[1-9][0-9]*'