summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorriton <riton@riton.home>2025-07-10 00:52:19 +0300
committerriton <riton@riton.home>2025-07-10 00:52:19 +0300
commit64eee7885954e794de60530b140efb152b2a0e8b (patch)
tree3c14e3a24de102e3fe6d6d86556ce10e2d8a9663
parenteeadc1a24395d1de45a09e8df47addf8d7a21a68 (diff)
lexer of monkey language
-rw-r--r--monkey.asd11
-rw-r--r--src/lexer-test.lisp119
-rw-r--r--src/lexer.lisp124
-rw-r--r--src/package.lisp2
-rw-r--r--src/token.lisp83
-rw-r--r--src/util.lisp22
6 files changed, 361 insertions, 0 deletions
diff --git a/monkey.asd b/monkey.asd
new file mode 100644
index 0000000..b885b76
--- /dev/null
+++ b/monkey.asd
@@ -0,0 +1,11 @@
+(defsystem monkey
+ :author "Emre Akan"
+ :description "A Monkey implementation"
+ ;; :depends-on ()
+ :pathname "src/"
+ :components ((:file "package")
+ (:file "util")
+ (:file "token")
+ (:file "lexer")
+ ;; (:file "parser")
+ ))
diff --git a/src/lexer-test.lisp b/src/lexer-test.lisp
new file mode 100644
index 0000000..cf3ba03
--- /dev/null
+++ b/src/lexer-test.lisp
@@ -0,0 +1,119 @@
+(in-package :monkey)
+
+(defun make-tokens (&rest tokens)
+ (loop :for token :in tokens
+ :if (consp token)
+ :collect (make-token (car token) (cadr token))
+ :if (numberp token)
+ :collect (as-token token)))
+
+(defun test-lexer (str expect)
+ (let ((lexer (make-lexer str)))
+ (not (find nil (mapcar #'token= (lexer-tokens lexer) expect)))))
+
+
+(defun test-1 ()
+ (test-lexer "=+(){},;"
+ (make-tokens token-=
+ token-+
+ token-lparen
+ token-rparen
+ token-lbrace
+ token-rbrace
+ token-comma
+ token-semicolon
+ token-eof)))
+
+
+(defun test-2 ()
+ (test-lexer "let five = 5;
+let ten = 10;
+
+let add = fn(x, y) {
+ x + y;
+};
+
+let result = add(five, ten);
+!-/*;
+5 < 10 > 5;
+
+if (5 < 10) {
+ return true;
+} else {
+ return false;
+}
+10 == 10;
+10 != 9;
+"
+ (make-tokens token-let
+ (list token-ident "five")
+ token-=
+ (list token-int "5")
+ token-semicolon
+ token-let
+ (list token-ident "ten")
+ token-=
+ (list token-int "10")
+ token-semicolon
+ token-let
+ (list token-ident "add")
+ token-=
+ token-function
+ token-lparen
+ (list token-ident "x")
+ token-comma
+ (list token-ident "y")
+ token-rparen
+ token-lbrace
+ (list token-ident "x")
+ token-+
+ (list token-ident "y")
+ token-rbrace
+ token-semicolon
+ token-let
+ (list token-ident "result")
+ token-=
+ (list token-ident "add")
+ token-lparen
+ (list token-ident "five")
+ token-comma
+ (list token-ident "ten")
+ token-rparen
+ token-semicolon
+ token-!
+ token--
+ token-/
+ token-*
+ token-semicolon
+ (list token-int "5")
+ token-<
+ (list token-int "10")
+ token->
+ (list token-int "5")
+ token-semicolon
+ (list token-if "if")
+ token-lparen
+ (list token-int "5")
+ token-<
+ (list token-int "10")
+ token-rparen
+ token-lbrace
+ token-return
+ token-true
+ token-semicolon
+ token-rbrace
+ token-else
+ token-lbrace
+ token-return
+ token-false
+ token-semicolon
+ token-rbrace
+ (list token-int "10")
+ token-==
+ (list token-int "10")
+ token-semicolon
+ (list token-int "10")
+ token-!=
+ (list token-int "9")
+ token-semicolon
+ token-eof "")))
diff --git a/src/lexer.lisp b/src/lexer.lisp
new file mode 100644
index 0000000..cd47881
--- /dev/null
+++ b/src/lexer.lisp
@@ -0,0 +1,124 @@
+(in-package :monkey)
+
+(defparameter *eof* (code-char 0))
+
+(defun eof? (char)
+ (char= char *eof*))
+
+(defun digit? (char)
+ (char<= #\0 char #\9))
+
+(defun letter? (char)
+ (or (char<= #\a char #\z)
+ (char<= #\A char #\Z)))
+
+(defun whitespace? (char)
+ (or (char= char #\Space)
+ (char= char #\Tab)
+ (char= char #\Newline)
+ (char= char #\Linefeed)
+ (char= char #\_)))
+
+
+(defparameter *lexer-start-pos* -1)
+
+(defclass lexer ()
+ ((input :reader input :initarg :input)
+ (len :reader len :initarg :length)
+ (pos :accessor pos :initform *lexer-start-pos*)
+ (ch :accessor ch)
+ (peek-ch :accessor peek-ch)))
+
+(defun make-lexer (input-string &optional len)
+ (let ((l (make-instance 'lexer :input input-string
+ :length (if (not (null len))
+ len
+ (length input-string)))))
+ (lexer-peek* l)
+ (lexer-read l)
+ l))
+
+(defmethod lexer-reset ((l lexer))
+ (setf (pos l) *lexer-start-pos*)
+ (lexer-peek* l)
+ (lexer-read l))
+
+(defmethod lexer-read ((l lexer))
+ (prog1 (setf (ch l) (peek-ch l))
+ (incf (pos l))
+ (lexer-peek* l)))
+
+(defmethod lexer-peek* ((l lexer))
+ (setf (peek-ch l)
+ (if (< (+ 1 (pos l)) (len l))
+ (char (input l) (+ 1 (pos l)))
+ *eof*)))
+
+(defmethod lexer-peek ((l lexer))
+ (peek-ch l))
+
+(defmethod skip-ws ((l lexer))
+ (loop :while (whitespace? (ch l))
+ :do (lexer-read l)))
+
+(defmethod next-token ((l lexer))
+ (skip-ws l)
+ (block nil
+ (prog1 (case (ch l)
+ (#\= (cond ((char= #\= (lexer-peek l))
+ (lexer-read l)
+ (make-token token-== "=="))
+ (t ;;(whitespace? (lexer-peek l))
+ (make-token token-= (ch l)))))
+ (#\+ (make-token token-+ (ch l)))
+ (#\- (make-token token-- (ch l)))
+ (#\! (cond ((char= #\= (lexer-peek l))
+ (lexer-read l)
+ (make-token token-!= "!="))
+ (t ;; (whitespace? (lexer-peek l))
+ (make-token token-! (ch l)))))
+ (#\/ (make-token token-/ (ch l)))
+ (#\* (make-token token-* (ch l)))
+ (#\< (make-token token-< (ch l)))
+ (#\> (make-token token-> (ch l)))
+ (#\; (make-token token-semicolon (ch l)))
+ (#\, (make-token token-comma (ch l)))
+ (#\( (make-token token-lparen (ch l)))
+ (#\) (make-token token-rparen (ch l)))
+ (#\{ (make-token token-lbrace (ch l)))
+ (#\} (make-token token-rbrace (ch l)))
+ (otherwise (cond ((eof? (ch l))
+ (make-token token-eof ""))
+ ((letter? (ch l))
+ (let ((literal (read-identifier l)))
+ (return (make-token (lookup-identifier literal)
+ literal))))
+ ((digit? (ch l))
+ (return (make-token token-int (read-number l))))
+ (t (make-token token-illegal "ILLEGAL")))))
+ (lexer-read l))))
+
+(defmethod read-number ((l lexer))
+ (let ((start (pos l)))
+ (lexer-read l)
+ (loop :while (digit? (ch l))
+ :do (lexer-read l))
+ (make-array (- (pos l) start) :element-type 'character
+ :displaced-to (input l)
+ :displaced-index-offset start)))
+
+(defmethod read-identifier ((l lexer))
+ (let ((start (pos l)))
+ (lexer-read l)
+ (loop :while (letter? (ch l))
+ :do (lexer-read l))
+ (make-array (- (pos l) start) :element-type 'character
+ :displaced-to (input l)
+ :displaced-index-offset start)))
+
+
+(defmethod lexer-tokens ((l lexer))
+ (loop :for token := (next-token l)
+ :collect token
+ :until (= token-eof (_type token))))
+
diff --git a/src/package.lisp b/src/package.lisp
new file mode 100644
index 0000000..aa399d9
--- /dev/null
+++ b/src/package.lisp
@@ -0,0 +1,2 @@
+(defpackage :monkey
+ (:use :common-lisp))
diff --git a/src/token.lisp b/src/token.lisp
new file mode 100644
index 0000000..7b4f372
--- /dev/null
+++ b/src/token.lisp
@@ -0,0 +1,83 @@
+(in-package :monkey)
+
+(eval-always
+ (defmacro def-tokens (prefix &body tok-def)
+ (let ((i 0)
+ (hash-sym (gensym))
+ (body (list)))
+ (loop :for def :in tok-def
+ :if (consp def)
+ :do (let ((sym (concat-symbols prefix (car def))))
+ (push `(defparameter ,sym ,i) body)
+ (push `(setf (gethash ,sym ,hash-sym) ,(cadr def)) body))
+ :else
+ :do (push `(defparameter ,(concat-symbols prefix def) ,i) body)
+ :do (incf i)
+ :finally (setf body (reverse body)))
+ `(progn (defparameter ,hash-sym (make-hash-table))
+ ,@body
+ (defparameter ,(concat-symbols prefix 'count) ,i)
+ (defun as-token (token-type)
+ (make-token token-type (or (gethash token-type ,hash-sym) "")))))))
+
+(def-tokens token
+ illegal
+ eof
+ ident
+ int
+ (= "=")
+ (+ "+")
+ (- "-")
+ (! "!")
+ (* "*")
+ (/ "/")
+ (== "==")
+ (!= "!=")
+ (< "<")
+ (> ">")
+ (comma ",")
+ (semicolon ";")
+ (lparen "(")
+ (rparen ")")
+ (lbrace "{")
+ (rbrace "}")
+ (function "fn")
+ (let "let")
+ (true "true")
+ (false "false")
+ (if "if")
+ (else "else")
+ (return "return"))
+
+
+(defclass token ()
+ ((_type :reader _type :initarg :type)
+ (literal :reader literal :initarg :literal)))
+
+(defmethod token= ((t1 token) (t2 token))
+ (and (= (_type t1) (_type t2))
+ (string= (literal t1) (literal t2))))
+
+(defun make-token (type literal)
+ (make-instance 'token :type type :literal (string literal)))
+
+(defmethod print-object ((token token) stream)
+ (print-unreadable-object (token stream :type t :identity t)
+ (princ (literal token) stream)))
+
+(defparameter *keywords*
+ (let ((kw (make-hash-table)))
+ (setf (gethash "fn" kw) token-function)
+ (setf (gethash "let" kw) token-let)
+ (setf (gethash "true" kw) token-true)
+ (setf (gethash "false" kw) token-false)
+ (setf (gethash "if" kw) token-if)
+ (setf (gethash "else" kw) token-else)
+ (setf (gethash "return" kw) token-return)
+ kw))
+
+(defun lookup-identifier (str)
+ (multiple-value-bind (val ok) (gethash str *keywords*)
+ (if (not (null ok))
+ val
+ token-ident)))
diff --git a/src/util.lisp b/src/util.lisp
new file mode 100644
index 0000000..d58cde8
--- /dev/null
+++ b/src/util.lisp
@@ -0,0 +1,22 @@
+(in-package :monkey)
+
+(defun concat-symbols (sym1 sym2)
+ (intern (format nil "~a-~a" (symbol-name sym1) (symbol-name sym2))))
+
+(eval-when (:compile-toplevel :load-toplevel :execute)
+ (defmacro eval-always (&body body)
+ `(eval-when (:compile-toplevel :load-toplevel :execute)
+ ,@body)))
+
+(defun read-file-contents (file-name)
+ (let* ((len 0)
+ (contents
+ (with-output-to-string (out)
+ (with-open-file (in file-name :external-format :utf-8)
+ (loop :with buffer := (make-array 8192 :element-type 'character)
+ :for n := (read-sequence buffer in)
+ :while (< 0 n)
+ :do (incf len n)
+ (write-sequence buffer out :start 0 :end n))))))
+ (values contents len)))
+