phi-lang/compiler/parse.phi
2025-09-25 19:31:24 +02:00

169 lines
4.5 KiB
Plaintext

(import "../stdlib.phi" (slice slice_eq contains))
(import "./counter.phi" (Counter))
(fn Parser (tokens) (do
(let i 0)
(let tok (at tokens i))
(let (id_count increment_id) (Counter))
(fn next_id () (do
(let id (id_count))
(increment_id)
(return id)
))
(fn parse () (do
(let exprs (list))
(loop (do
(if (done) (break))
(push exprs (parse_expr))
))
(return exprs)
))
(fn parse_expr () (do
(let (ty line value) tok)
(if (eat "(") (do
(let values (list))
(loop (do
(if (test ")") (break))
(push values (parse_expr))
))
(if (not (eat ")")) (do
(panic "expected ')' on line %" (at tok 1))
))
(return (list (next_id) "list" line values))
) (if (eat "string") (do
(return (list (next_id) "string" line value))
) (if (eat "int") (do
(return (list (next_id) "int" line (string_to_int value)))
) (if (eat "ident") (do
(return (list (next_id) "ident" line value))
) (do
(panic "expected expression, got '%' on line %" ty line)
)))))
))
(fn eat (pat) (do
(if (not (test pat)) (return false))
(step)
(return true)
))
(fn step () (do
(+= i 1)
(if (not (done)) (do
(let new_tok (at tokens i))
(= tok new_tok)
))
))
(fn test (pat) (do
(if (done) (return false))
(let (ty) tok)
(return (== pat ty))
))
(fn done () (do
(return (>= i (len tokens)))
))
(return (list parse))
))
(fn tokenize (text) (do
(let text_len (len text))
(let tokens (list))
(let i 0)
(let line 1)
(let ident_chars (+ "abcdefghijklmnopqrstuvwxyz"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890+-*/%&|=?!<>'_"))
(loop (do
(if (>= i text_len) (break))
(let ch (at text i))
(if (contains " \t\r\n" ch) (do
(if (== ch "\n") (do
(+= line 1)
))
(+= i 1)
) (if (slice_eq text i "//") (do
(loop (do
(if (or (>= i text_len) (== (at text i) "\n")) (do
(break)
))
(+= i 1)
))
) (if (contains "()" ch) (do
(push tokens (list ch line))
(+= i 1)
) (if (== ch "\"") (do
(let value "")
(+= i 1)
(= ch (at text i))
(loop (do
(if (or (>= i text_len) (== ch "\"")) (do
(break)
))
(if (== ch "\\") (do
(+= i 1)
(if (>= i text_len) (do
(break)
))
(= ch (at text i))
(if (== ch "t") (do
(+= value "\t")
) (if (== ch "r") (do
(+= value "\r")
) (if (== ch "n") (do
(+= value "\n")
) (if (== ch "0") (do
(+= value "\n")
) (do
(+= value ch)
)))))
) (do
(+= value ch)
))
(+= i 1)
(= ch (at text i))
))
(if (or (>= i text_len) (!= ch "\"")) (do
(panic "expected '\"' on line %" line)
))
(+= i 1)
(push tokens (list "string" line value))
) (if (contains "0123456789" ch) (do
(let value "")
(loop (do
(= ch (at text i))
(if (or (>= i text_len) (not (contains "0123456789" ch))) (do
(break)
))
(+= value ch)
(+= i 1)
))
(push tokens (list "int" line value))
) (if (contains ident_chars ch) (do
(let value "")
(loop (do
(= ch (at text i))
(if (or (>= i text_len) (not (contains ident_chars ch))) (do
(break)
))
(+= value ch)
(+= i 1)
))
(push tokens (list "ident" line value))
) (do
(println "illegal char '%'" ch)
(+= i 1)
)))))))
))
(return tokens)
))