From 8afcee0b1087dad6c821aa2ee1e917e3301f0e81 Mon Sep 17 00:00:00 2001 From: riton Date: Sat, 12 Jul 2025 04:27:10 +0300 Subject: refactor: token-type uses deftype instead of int enum --- src/lexer-test.lisp | 164 ++++++++++++++++++++++++++-------------------------- src/lexer.lisp | 40 ++++++------- src/token.lisp | 115 +++++++++++++++++++++++------------- src/util.lisp | 3 - 4 files changed, 176 insertions(+), 146 deletions(-) diff --git a/src/lexer-test.lisp b/src/lexer-test.lisp index cf3ba03..916af29 100644 --- a/src/lexer-test.lisp +++ b/src/lexer-test.lisp @@ -4,7 +4,7 @@ (loop :for token :in tokens :if (consp token) :collect (make-token (car token) (cadr token)) - :if (numberp token) + :if (typep token 'token-type) :collect (as-token token))) (defun test-lexer (str expect) @@ -14,15 +14,15 @@ (defun test-1 () (test-lexer "=+(){},;" - (make-tokens token-= - token-+ - token-lparen - token-rparen - token-lbrace - token-rbrace - token-comma - token-semicolon - token-eof))) + (make-tokens :t/= + :t/+ + :t/lparen + :t/rparen + :t/lbrace + :t/rbrace + :t/comma + :t/semicolon + :t/eof))) (defun test-2 () @@ -45,75 +45,75 @@ if (5 < 10) { 10 == 10; 10 != 9; " - (make-tokens token-let - (list token-ident "five") - token-= - (list token-int "5") - token-semicolon - token-let - (list token-ident "ten") - token-= - (list token-int "10") - token-semicolon - token-let - (list token-ident "add") - token-= - token-function - token-lparen - (list token-ident "x") - token-comma - (list token-ident "y") - token-rparen - token-lbrace - (list token-ident "x") - token-+ - (list token-ident "y") - token-rbrace - token-semicolon - token-let - (list token-ident "result") - token-= - (list token-ident "add") - token-lparen - (list token-ident "five") - token-comma - (list token-ident "ten") - token-rparen - token-semicolon - token-! - token-- - token-/ - token-* - token-semicolon - (list token-int "5") - token-< - (list token-int "10") - token-> - (list token-int "5") - token-semicolon - (list token-if "if") - token-lparen - (list token-int "5") - token-< - (list token-int "10") - token-rparen - token-lbrace - token-return - token-true - token-semicolon - token-rbrace - token-else - token-lbrace - token-return - token-false - token-semicolon - token-rbrace - (list token-int "10") - token-== - (list token-int "10") - token-semicolon - (list token-int "10") - token-!= - (list token-int "9") - token-semicolon - token-eof ""))) + (make-tokens :t/let + (list :t/ident "five") + :t/= + (list :t/int "5") + :t/semicolon + :t/let + (list :t/ident "ten") + :t/= + (list :t/int "10") + :t/semicolon + :t/let + (list :t/ident "add") + :t/= + :t/function + :t/lparen + (list :t/ident "x") + :t/comma + (list :t/ident "y") + :t/rparen + :t/lbrace + (list :t/ident "x") + :t/+ + (list :t/ident "y") + :t/rbrace + :t/semicolon + :t/let + (list :t/ident "result") + :t/= + (list :t/ident "add") + :t/lparen + (list :t/ident "five") + :t/comma + (list :t/ident "ten") + :t/rparen + :t/semicolon + :t/! + :t/- + :t// + :t/* + :t/semicolon + (list :t/int "5") + :t/< + (list :t/int "10") + :t/> + (list :t/int "5") + :t/semicolon + (list :t/if "if") + :t/lparen + (list :t/int "5") + :t/< + (list :t/int "10") + :t/rparen + :t/lbrace + :t/return + :t/true + :t/semicolon + :t/rbrace + :t/else + :t/lbrace + :t/return + :t/false + :t/semicolon + :t/rbrace + (list :t/int "10") + :t/== + (list :t/int "10") + :t/semicolon + (list :t/int "10") + :t/!= + (list :t/int "9") + :t/semicolon + :t/eof ""))) diff --git a/src/lexer.lisp b/src/lexer.lisp index 3865cd6..8694aa1 100644 --- a/src/lexer.lisp +++ b/src/lexer.lisp @@ -67,35 +67,35 @@ (prog1 (case (ch l) (#\= (cond ((char= #\= (lexer-peek l)) (lexer-read l) - (make-token token-== "==")) + (make-token :t/== "==")) (t ;;(whitespace? (lexer-peek l)) - (make-token token-= (ch l))))) - (#\+ (make-token token-+ (ch l))) - (#\- (make-token token-- (ch l))) + (make-token :t/= (ch l))))) + (#\+ (make-token :t/+ (ch l))) + (#\- (make-token :t/- (ch l))) (#\! (cond ((char= #\= (lexer-peek l)) (lexer-read l) - (make-token token-!= "!=")) + (make-token :t/!= "!=")) (t ;; (whitespace? (lexer-peek l)) - (make-token token-! (ch l))))) - (#\/ (make-token token-/ (ch l))) - (#\* (make-token token-* (ch l))) - (#\< (make-token token-< (ch l))) - (#\> (make-token token-> (ch l))) - (#\; (make-token token-semicolon (ch l))) - (#\, (make-token token-comma (ch l))) - (#\( (make-token token-lparen (ch l))) - (#\) (make-token token-rparen (ch l))) - (#\{ (make-token token-lbrace (ch l))) - (#\} (make-token token-rbrace (ch l))) + (make-token :t/! (ch l))))) + (#\/ (make-token :t// (ch l))) + (#\* (make-token :t/* (ch l))) + (#\< (make-token :t/< (ch l))) + (#\> (make-token :t/> (ch l))) + (#\; (make-token :t/semicolon (ch l))) + (#\, (make-token :t/comma (ch l))) + (#\( (make-token :t/lparen (ch l))) + (#\) (make-token :t/rparen (ch l))) + (#\{ (make-token :t/lbrace (ch l))) + (#\} (make-token :t/rbrace (ch l))) (otherwise (cond ((eof? (ch l)) - (make-token token-eof "")) + (make-token :t/eof "")) ((letter? (ch l)) (let ((literal (read-identifier l))) (return (make-token (lookup-identifier literal) literal)))) ((digit? (ch l)) - (return (make-token token-int (read-number l)))) - (t (make-token token-illegal "ILLEGAL"))))) + (return (make-token :t/int (read-number l)))) + (t (make-token :t/illegal "ILLEGAL"))))) (lexer-read l)))) (defmethod read-number ((l lexer)) @@ -116,5 +116,5 @@ (defmethod lexer-tokens ((l lexer)) (loop :for token := (next-token l) :collect token - :until (= token-eof (_type token)))) + :until (eq :t/eof (_type token)))) diff --git a/src/token.lisp b/src/token.lisp index 7b4f372..f6536ac 100644 --- a/src/token.lisp +++ b/src/token.lisp @@ -1,26 +1,63 @@ (in-package :monkey) (eval-always - (defmacro def-tokens (prefix &body tok-def) - (let ((i 0) - (hash-sym (gensym)) - (body (list))) - (loop :for def :in tok-def - :if (consp def) - :do (let ((sym (concat-symbols prefix (car def)))) - (push `(defparameter ,sym ,i) body) - (push `(setf (gethash ,sym ,hash-sym) ,(cadr def)) body)) - :else - :do (push `(defparameter ,(concat-symbols prefix def) ,i) body) - :do (incf i) - :finally (setf body (reverse body))) - `(progn (defparameter ,hash-sym (make-hash-table)) - ,@body - (defparameter ,(concat-symbols prefix 'count) ,i) - (defun as-token (token-type) - (make-token token-type (or (gethash token-type ,hash-sym) ""))))))) + (defun process-line (def prefix) + (labels ((make-token-symbol (prefix def) + (nth-value 0 (intern (format nil "~a~a" prefix def) "KEYWORD")))) + (cond ((symbolp def) + (make-token-symbol prefix def)) + ((consp def) + (let* ((token-sym (make-token-symbol prefix (car def))) + (hash (cadr def)) + (keyword (and (keywordp (caddr def)) + (equal :kw (caddr def))))) + (values token-sym + (list (cons token-sym hash)) + (if (null keyword) + nil + (list (cons hash token-sym))))))))) -(def-tokens token + (defun make-token-defs (defs prefix) + (labels ((helper (defs members hashes keywords) + (cond ((null defs) + (values members hashes keywords)) + ((consp defs) + (multiple-value-bind (token hash keyword) + (process-line (car defs) prefix) + (helper (cdr defs) + (cons token members) + (append hash hashes) + (append keyword keywords))))))) + (helper defs (list) (list) (list)))) + + (defun def-tokens* (name prefix literal-getter keyword-getter defs) + (let ((hash-sym (gensym "LITERALS")) + (token-type-sym (gensym)) + (keyword-sym (gensym "KEYWORDS")) + (str-sym (gensym))) + (multiple-value-bind (members hashes keywords) (make-token-defs defs prefix) + `(progn (deftype ,name () (quote (member ,@members))) + (defparameter ,hash-sym (make-hash-table)) + (defparameter ,keyword-sym (make-hash-table :test 'equal)) + (defun ,literal-getter (,token-type-sym) + (gethash ,token-type-sym ,hash-sym)) + (defun ,keyword-getter (,str-sym) + (gethash ,str-sym ,keyword-sym)) + ,@(mapcar (lambda (hash) + `(setf (gethash ,(car hash) ,hash-sym) ,(cdr hash))) + hashes) + ,@(mapcar (lambda (kw) + `(Setf (gethash ,(car kw) ,keyword-sym) ,(cdr kw))) + keywords))))) + + (defmacro def-tokens ((&key name prefix literal-getter keyword-getter) + &body defs) + (def-tokens* name prefix literal-getter keyword-getter defs))) + +(def-tokens (:name token-type + :prefix t/ + :literal-getter token->string + :keyword-getter string->token) illegal eof ident @@ -41,21 +78,24 @@ (rparen ")") (lbrace "{") (rbrace "}") - (function "fn") - (let "let") - (true "true") - (false "false") - (if "if") - (else "else") - (return "return")) + (function "fn" :kw) + (let "let" :kw) + (true "true" :kw) + (false "false" :kw) + (if "if" :kw) + (else "else" :kw) + (return "return" :kw)) (defclass token () - ((_type :reader _type :initarg :type) - (literal :reader literal :initarg :literal))) + ((_type :reader _type + :initarg :type + :type token-type) + (literal :reader literal + :initarg :literal))) (defmethod token= ((t1 token) (t2 token)) - (and (= (_type t1) (_type t2)) + (and (eq (_type t1) (_type t2)) (string= (literal t1) (literal t2)))) (defun make-token (type literal) @@ -65,19 +105,12 @@ (print-unreadable-object (token stream :type t :identity t) (princ (literal token) stream))) -(defparameter *keywords* - (let ((kw (make-hash-table))) - (setf (gethash "fn" kw) token-function) - (setf (gethash "let" kw) token-let) - (setf (gethash "true" kw) token-true) - (setf (gethash "false" kw) token-false) - (setf (gethash "if" kw) token-if) - (setf (gethash "else" kw) token-else) - (setf (gethash "return" kw) token-return) - kw)) +(defun as-token (token-type) + (multiple-value-bind (str ok) (token->string token-type) + (make-token token-type (if (null ok) "" str)))) (defun lookup-identifier (str) - (multiple-value-bind (val ok) (gethash str *keywords*) + (multiple-value-bind (val ok) (string->token str) (if (not (null ok)) val - token-ident))) + :t/ident))) diff --git a/src/util.lisp b/src/util.lisp index cf3bc76..5cf28fa 100644 --- a/src/util.lisp +++ b/src/util.lisp @@ -1,8 +1,5 @@ (in-package :monkey) -(defun concat-symbols (sym1 sym2) - (intern (format nil "~a-~a" (symbol-name sym1) (symbol-name sym2)))) - (eval-when (:compile-toplevel :load-toplevel :execute) (defmacro eval-always (&body body) `(eval-when (:compile-toplevel :load-toplevel :execute) -- cgit v1.2.3