From a31668c560e211f64501e6b640c823824bd1b100 Mon Sep 17 00:00:00 2001 From: riton Date: Tue, 15 Jul 2025 23:45:18 +0300 Subject: added parser Squashed commit of the following: commit 744b32488bfd56193e22d5144c41b8fc0b08705e Author: riton Date: Tue Jul 15 23:27:11 2025 +0300 parse function literals and call expressions commit 46e04e75efeed277b9bd102348e3eebb2d42f762 Author: riton Date: Tue Jul 15 21:17:00 2025 +0300 parse if expressions commit 19bfdae150be2a3fa72f9d030f169355e8a9c035 Author: riton Date: Tue Jul 15 20:21:48 2025 +0300 don't need (emit bool) because generic handles it commit 1b6953a608c18a26244c52fcc455809982d0b616 Author: riton Date: Tue Jul 15 20:13:39 2025 +0300 parse grouped expr, return and let statements commit be480aed29ce5d2ad72677f6ae0abebf16d9ad0e Author: riton Date: Tue Jul 15 19:49:54 2025 +0300 parse infix expressions commit 70859a54c500ace8541f2e4d56c580b3b13824bc Author: riton Date: Tue Jul 15 18:27:38 2025 +0300 parse booleans and prefix expressions commit bb9e53dbfe39fd53e57837795b81e425f9028883 Author: riton Date: Tue Jul 15 17:15:23 2025 +0300 start parser --- monkey.asd | 4 +- src/ast.lisp | 207 ++++++++++++++++++++++++++++++++++++++++ src/package.lisp | 31 +++++- src/parser.lisp | 284 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 523 insertions(+), 3 deletions(-) create mode 100644 src/ast.lisp create mode 100644 src/parser.lisp diff --git a/monkey.asd b/monkey.asd index 80ff674..eed1411 100644 --- a/monkey.asd +++ b/monkey.asd @@ -7,8 +7,8 @@ (:file "util") (:file "token") (:file "lexer") - ;; (:file "parser") - ) + (:file "ast") + (:file "parser")) :in-order-to ((test-op (test-op "monkey/test")))) (defsystem "monkey/test" diff --git a/src/ast.lisp b/src/ast.lisp new file mode 100644 index 0000000..c33de88 --- /dev/null +++ b/src/ast.lisp @@ -0,0 +1,207 @@ +(in-package :ast) + +(defclass node () + ((token :accessor token + :initarg :token + :type token))) + +(defgeneric stringify (node) + (:method ((node node)) + (with-output-to-string (out) + (emit node out)))) + +(defparameter *indent* 0) +(defun emit-new-line (stream) + (write-char #\Newline stream) + (loop :for i :from 0 :below *indent* + :do (write-char #\Space stream))) + +(defgeneric emit (node stream) + (:method ((node node) stream) + (write-string (token-literal node) stream))) + +(defgeneric token-literal (node) + (:method ((node node)) + (token:literal (token node)))) + +(defclass statement (node) + ()) + +(defclass expression (node) + ()) + +(defclass program (node) + ((statements :accessor statements + :initform (make-array 10 :adjustable t :fill-pointer 0)))) + +(defmethod token-literal ((node program)) + (if (> (length (statements node)) 0) + (token-literal (aref (statements node) 0)) + "")) + +(defmethod emit ((node program) stream) + (loop :for stmt :across (statements node) + :do (emit stmt stream))) + +(defclass identifier (expression) + ()) + +(defclass let-statement (statement) + ((name :accessor name + :initarg :name + :type identifier) + (value :accessor value + :initarg :value + :type expression))) + +(defmethod emit ((node let-statement) stream) + (write-string (token-literal node) stream) + (write-char #\Space stream) + (emit (name node) stream) + (write-string " = " stream) + (emit (value node) stream) + (write-char #\; stream) + (write-char #\Newline stream)) + +(defclass return-statement (statement) + ((return-value :accessor return-value + :initarg :return-value + :type expression))) + +(defmethod emit ((node return-statement) stream) + (write-string (token-literal node) stream) + (write-char #\Space stream) + (emit (return-value node) stream) + (write-char #\; stream) + (write-char #\Newline stream)) + +(defclass integer-literal (expression) + ((value :accessor value + :initarg :value + :type integer))) + +(defclass prefix-expression (expression) + ((operator :accessor operator + :initarg :operator + :type token-type) + (right :accessor right + :initarg :right + :type expression))) + +(defmethod emit ((node prefix-expression) stream) + (write-char #\( stream) + (write-string (token-literal node) stream) + (emit (right node) stream) + (write-char #\) stream)) + + +(defclass expression-statement (statement) + ((expression :accessor expression + :initarg :expression + :type expression))) + +(defmethod emit ((node expression-statement) stream) + (emit (expression node) stream) + (write-char #\; stream) + (write-char #\Newline stream)) + +(defclass boolean-expression (expression) + ((value :accessor value + :initarg :value + :type boolean))) + +(defclass infix-expression (expression) + ((left :accessor left + :initarg :left + :type expression) + (operator :accessor operator + :initarg :operator + :type token-type) + (right :accessor right + :initarg :right + :type expression))) + +(defmethod emit ((node infix-expression) stream) + (write-char #\( stream) + (emit (left node) stream) + (let ((tok (token:as-token (operator node)))) + (format stream " ~a " (token:literal tok))) + (emit (right node) stream) + (write-char #\) stream)) + +(defclass if-expression (expression) + ((con :accessor con + :initarg :condition + :type expression) + (consequence :accessor consequence + :initarg :consequence + :type block-statement) + (alternative :accessor alternative + :initform nil + :initarg :alternative + :type (or block-statement null)))) + +(defmethod emit ((node if-expression) stream) + (write-string "if (" stream) + (emit (con node) stream) + (write-string ") " stream) + (emit (consequence node) stream) + (unless (null (alternative node)) + (write-string "else " stream) + (emit (alternative node) stream))) + +(defclass block-statement (statement) + ((statements :accessor statements + :initarg statements + :initform (make-array 0 :element-type 'statement + :adjustable t :fill-pointer 0) + :type (vector statement)))) + +(defmethod emit ((node block-statement) stream) + (write-char #\{ stream) + (let ((*indent* (+ 4 *indent*))) + (emit-new-line stream) + (loop :for stmt :across (statements node) + :do (emit stmt stream))) + (format stream "}")) + +(defclass function-literal (expression) + ((parameters :accessor parameters + :initarg :parameters + :initform (make-array 0 :element-type 'identifier + :adjustable t :fill-pointer 0) + :type (vector identifier)) + (body :accessor body + :initarg :body + :type block-statement))) + +(defmethod emit ((node function-literal) stream) + (write-string (token-literal node) stream) + (write-char #\( stream) + (loop :for param :across (parameters node) + :for i :from 1 + :do (write-string (token-literal param) stream) + (when (< i (length (parameters node))) + (write-string ", " stream))) + (write-char #\) stream) + (emit (body node) stream)) + +(defclass call-expression (expression) + ((fn-expr :accessor fn-expr + :initarg :function + :type expression) + (args :accessor args + :initarg :args + :initform (make-array 0 :element-type 'expression + :adjustable t :fill-pointer 0) + :type (vector expression)))) + +(defmethod emit ((node call-expression) stream) + (emit (fn-expr node) stream) + (write-char #\( stream) + (loop :for arg :across (args node) + :for i :from 1 + :do (emit arg stream) + (when (< i (length (args node))) + (write-string ", " stream))) + (write-char #\) stream)) diff --git a/src/package.lisp b/src/package.lisp index 6f05abe..cf8ce51 100644 --- a/src/package.lisp +++ b/src/package.lisp @@ -11,6 +11,8 @@ (:use #:common-lisp #:utilities) (:shadow #:type) (:export #:token + #:literal + #:type #:type-is #:token= #:make @@ -24,7 +26,34 @@ (defpackage #:lexer (:use #:common-lisp #:utilities) - (:export #:make #:lex)) + (:export #:make #:lex #:next-token)) + +(defpackage #:ast + (:use #:common-lisp #:utilities) + (:export #:node + #:stringify + #:token-literal + #:emit + #:statement + #:expression + #:program + #:statements + #:identifier + #:integer-literal + #:prefix-expression #:right + #:expression-statement + #:boolean-expression + #:infix-expression + #:let-statement #:name #:value + #:return-statement #:return-value + #:if-expression + #:block-statement + #:function-literal + #:call-expression #:args)) + +(defpackage #:parser + (:use #:common-lisp #:utilities) + (:export #:make #:parse-program)) (defpackage #:monkey/test (:use #:common-lisp #:utilities)) diff --git a/src/parser.lisp b/src/parser.lisp new file mode 100644 index 0000000..c3e09a9 --- /dev/null +++ b/src/parser.lisp @@ -0,0 +1,284 @@ +(in-package :parser) + +(defclass parser () + ((lexer :accessor lexer + :initarg :lexer + :type lexer) + (token :accessor token + :type token) + (peek-token :accessor peek-token + :type token + :initform nil))) + +(defmethod next-token ((parser parser)) + (setf (token parser) (peek-token parser)) + (setf (peek-token parser) (lexer:next-token (lexer parser)))) + +(defun make (lexer) + (let ((parser (make-instance 'parser :lexer lexer))) + (next-token parser) + (next-token parser) + parser)) + +(defun make-from-string (string &optional length) + (make (lexer:make string length))) + +(define-compiler-macro make (&whole form lexer? &optional length) + (if (stringp lexer?) + `(make-from-string ,lexer? ,length) + form)) + +(defmethod token-is ((parser parser) token-type) + (token:type-is (token parser) token-type)) + +(defmethod peek-is ((parser parser) token-type) + (token:type-is (peek-token parser) token-type)) + +(defmethod expect-peek ((parser parser) token-type) + (let ((expect? (peek-is parser token-type))) + (if (not expect?) + (error "(expect-peek) error: expect: ~a, got: ~a" + token-type + (peek-token parser)) + (next-token parser)) + expect?)) + + +(defmethod parse-program ((parser parser)) + (let ((program (make-instance 'ast:program))) + (loop :until (token-is parser :t/eof) + :for stmt := (parse-statement parser) + :unless (null stmt) + :do (vector-push-extend stmt (ast:statements program)) + :do (next-token parser)) + program)) + +(defmethod parse-statement ((parser parser)) + (case (token:type (token parser)) + (:t/let (parse-let-statement parser)) + (:t/return (parse-return-statement parser)) + (otherwise (parse-expression-statement parser)))) + +(defmethod parse-let-statement ((parser parser)) + (let ((stmt (make-instance 'ast:let-statement :token (token parser)))) + (expect-peek parser :t/ident) + (setf (ast:name stmt) (make-instance 'ast:identifier :token (token parser))) + (expect-peek parser :t/=) + (next-token parser) + (setf (ast:value stmt) (parse-expression parser +lowest+)) + (when (peek-is parser :t/semicolon) + (next-token parser)) + stmt)) + +(defmethod parse-return-statement ((parser parser)) + (let ((stmt (make-instance 'ast:return-statement + :token (token parser)))) + (next-token parser) + (setf (ast:return-value stmt) (parse-expression parser +lowest+)) + (when (peek-is parser :t/semicolon) + (next-token parser)) + stmt)) + + +;;;; Expression parsing and precedences +(defparameter +lowest+ 0) +(defparameter +equals+ 1) +(defparameter +lessgreater+ 2) +(defparameter +sum+ 3) +(defparameter +product+ 4) +(defparameter +prefix+ 5) +(defparameter +call+ 6) + +(let ((precedences (make-hash-table))) + (setf (gethash :t/= precedences) +equals+) + (setf (gethash :t/!= precedences) +equals+) + (setf (gethash :t/< precedences) +lessgreater+) + (setf (gethash :t/> precedences) +lessgreater+) + (setf (gethash :t/+ precedences) +sum+) + (setf (gethash :t/- precedences) +sum+) + (setf (gethash :t// precedences) +product+) + (setf (gethash :t/* precedences) +product+) + (setf (gethash :t/lparen precedences) +call+) + + (defmethod peek-precedence ((parser parser)) + (multiple-value-bind (prec ok) (gethash (token:type (peek-token parser)) precedences) + (if (not ok) + +lowest+ + prec))) + + (defmethod cur-precedence ((parser parser)) + (multiple-value-bind (prec ok) (gethash (token:type (token parser)) precedences) + (if (not ok) + +lowest+ + prec)))) + +(defun get-prefix-fn (token-type) + (case token-type + (:t/ident #'parse-identifier) + (:t/int #'parse-integer-literal) + ((:t/! :t/-) #'parse-prefix-expression) + ((:t/true :t/false) #'parse-boolean) + (:t/lparen #'parse-grouped-expression) + (:t/if #'parse-if-expression) + (:t/function #'parse-function-literal) + (otherwise nil))) + +(defun get-infix-fn (token-type) + (case token-type + ((:t/= :t/!= :t/== :t/< :t/> :t/+ :t/- :t// :t/*) #'parse-infix-expression) + (:t/lparen #'parse-call-expression) + (otherwise nil))) + +(defmethod parse-expression-statement ((parser parser)) + (let ((expr (parse-expression parser +lowest+))) + (when (peek-is parser :t/semicolon) + (next-token parser)) + (make-instance 'ast:expression-statement + :token (token parser) + :expression expr))) + +(defmethod parse-expression ((parser parser) precedence) + (let ((prefix (get-prefix-fn (token:type (token parser))))) + (when (null prefix) + (error "token ~a does not have a prefix parse" (token:literal (token parser)))) + (let ((left-expr (funcall prefix parser))) + (loop :while (and (not (peek-is parser :t/semicolon)) + (< precedence (peek-precedence parser))) + :do (let ((infix (get-infix-fn (token:type (peek-token parser))))) + (cond ((null infix) + (return-from parse-expression left-expr)) + (t + (next-token parser) + (setf left-expr (funcall infix parser left-expr)))))) + left-expr))) + +(defmethod parse-integer-literal ((parser parser)) + (let ((int (parse-integer (token:literal (token parser))))) + (make-instance 'ast:integer-literal + :token (token parser) + :value int))) + +(defmethod parse-identifier ((parser parser)) + (make-instance 'ast:identifier :token (token parser))) + +(defmethod parse-prefix-expression ((parser parser)) + (let* ((token (token parser)) + (expr (make-instance 'ast:prefix-expression + :token token + :operator (token:type token)))) + (next-token parser) + (setf (ast:right expr) (parse-expression parser +prefix+)) + expr)) + +(defmethod parse-boolean ((parser parser)) + (make-instance 'ast:boolean-expression + :token (token parser) + :value (token:type-is (token parser) :t/true))) + +(defmethod parse-infix-expression ((parser parser) left-expr) + (let ((expr (make-instance 'ast:infix-expression + :token (token parser) + :operator (token:type (token parser)) + :left left-expr)) + (prec (cur-precedence parser))) + (next-token parser) + (setf (ast:right expr) (parse-expression parser prec)) + expr)) + +(defmethod parse-grouped-expression ((parser parser)) + (next-token parser) + (let ((expr (parse-expression parser +lowest+))) + (if (not (expect-peek parser :t/rparen)) + (error "(parse-grouped-expression): expected a right parenthesis token") + expr))) + +(defmethod parse-block-statement ((parser parser)) + (let ((block-stmt (make-instance 'ast:block-statement :token (token parser)))) + (next-token parser) + (loop :until (or (token-is parser :t/rbrace) + (token-is parser :t/eof)) + :do (let ((stmt (parse-statement parser))) + (unless (null stmt) + (vector-push-extend stmt (ast:statements block-stmt))) + (next-token parser))) + block-stmt)) + +(defmethod parse-if-expression ((parser parser)) + (let ((tok (token parser)) con conseq alter) + (expect-peek parser :t/lparen) + (next-token parser) + (setf con (parse-expression parser +lowest+)) + (expect-peek parser :t/rparen) + (expect-peek parser :t/lbrace) + (setf conseq (parse-block-statement parser)) + (when (peek-is parser :t/else) + (next-token parser) + (expect-peek parser :t/lbrace) + (setf alter (parse-block-statement parser))) + (make-instance 'ast:if-expression + :token tok + :condition con + :consequence conseq + :alternative alter))) + +(defmethod parse-function-literal ((parser parser)) + (let ((tok (token parser)) params body) + (expect-peek parser :t/lparen) + (setf params (parse-function-parameters parser)) + (expect-peek parser :t/lbrace) + (setf body (parse-block-statement parser)) + (make-instance 'ast:function-literal + :token tok + :parameters params + :body body))) + +(defmethod parse-function-parameters ((parser parser)) + (block nil + (let ((idents (make-array 0 :element-type 'ast:identifier + :adjustable t :fill-pointer 0))) + (when (peek-is parser :t/rparen) + (next-token parser) + (return idents)) + (next-token parser) + (unless (token-is parser :t/ident) + (error "(parse-function-parameters): expected identifier, got ~a" + (token:literal (token parser)))) + (vector-push-extend (make-instance 'ast:identifier + :token (token parser)) + idents) + (loop :while (peek-is parser :t/comma) + :do (next-token parser) + (next-token parser) + (unless (token-is parser :t/ident) + (error "(parse-function-parameters): expected identifier, got ~a" + (token:literal (token parser)))) + (vector-push-extend (make-instance 'ast:identifier + :token (token parser)) + idents)) + (expect-peek parser :t/rparen) + idents))) + +(defmethod parse-call-expression ((parser parser) func) + (let ((expr (make-instance 'ast:call-expression :token (token parser) :function func))) + (parse-call-arguments parser expr) + expr)) + +(defmethod parse-call-arguments ((parser parser) expr) + (block nil + (when (peek-is parser :t/rparen) + (next-token parser) + (return)) + (next-token parser) + (vector-push-extend (parse-expression parser +lowest+) + (ast:args expr)) + (loop :while (peek-is parser :t/comma) + :do (next-token parser) + (next-token parser) + (vector-push-extend (parse-expression parser +lowest+) + (ast:args expr))) + (expect-peek parser :t/rparen) + expr)) + +(defun parse-and-stringify (str) + (let ((parser (make-from-string str))) + (ast:stringify (parse-program parser)))) -- cgit v1.2.3