summaryrefslogtreecommitdiff
path: root/codegen.lisp
diff options
context:
space:
mode:
authormRnea <akannemre@gmail.com>2024-08-16 10:09:45 +0300
committermRnea <akannemre@gmail.com>2024-08-16 10:09:45 +0300
commite90d1248920b50e5f8c25ab406a9095e3f6a2358 (patch)
tree298141c8cfabc515c37d2dd7996d8590b711698b /codegen.lisp
parent68947d00aa6666d6e4daed6a0f75009c9bf3048d (diff)
changed project name from cl-forth to kurt
Diffstat (limited to 'codegen.lisp')
-rw-r--r--codegen.lisp482
1 files changed, 482 insertions, 0 deletions
diff --git a/codegen.lisp b/codegen.lisp
new file mode 100644
index 0000000..1e8c364
--- /dev/null
+++ b/codegen.lisp
@@ -0,0 +1,482 @@
+(in-package :kurt)
+
+(defparameter *psuedo-identifiers*
+ '(syscall-1 syscall-2 syscall-3 syscall-4 syscall-5 syscall-6 makro son kütüphane)
+ "These do not map to operations that generate code directly, but are valid to lexer and parser")
+
+(defparameter *identifiers* ())
+;; '(+ - dump = ise yoksa yap eş push değiş üst rot düş döngü iken < >
+;; bel oku yaz >> << & "|")
+
+(defun is-identifier (sym)
+ (or (find sym *identifiers* :test #'string=)
+ (find sym *psuedo-identifiers* :test #'string=)))
+
+(eval-always
+ (defparameter *targets* '(:nasm :c))
+
+ (defun normalize-op-list (asm-list)
+ (cons 'list
+ (mapcar (lambda (el) (cond ((stringp el) el)
+ ((listp el) `(format nil ,@el))))
+ asm-list)))
+
+ (defun defop-format (str space-num asm-list)
+ (format str
+ (format nil "~~{~a~~a~~%~~}"
+ (make-string space-num :initial-element #\Space))
+ asm-list))
+
+ (defun replace-write (out-stream indent forms)
+ (if (consp forms)
+ (if (eq :write (car forms))
+ `(defop-format ,out-stream ,indent
+ ,(normalize-op-list (cdr forms)))
+ (cons (replace-write out-stream indent (car forms))
+ (replace-write out-stream indent (cdr forms))))
+ forms))
+
+ (defun add-indent (indent fmt-string)
+ (format nil "~a~a"
+ (make-string indent :initial-element #\Space)
+ fmt-string))
+
+ (defun split-stack (stack)
+ (let ((split-num (position '-- stack)))
+ (values (butlast stack (- (length stack) split-num))
+ (nthcdr (+ 1 split-num) stack))))
+
+ (defun op->string (asm-instruction &key (push? t))
+ "asm-instruction is something like (:add rax rbx)"
+ (destructuring-bind (op arg1 arg2) asm-instruction
+ (let ((*print-case* :downcase))
+ (if (null push?)
+ (format nil (format nil "~a ~a, ~a" op arg1 arg2))
+ (list (format nil "~a ~a, ~a" op arg1 arg2)
+ (format nil "push ~a" arg1))))))
+
+ (defun stack->string (stack)
+ (multiple-value-bind (prev next)
+ (split-stack stack)
+ (let ((*print-case* :downcase))
+ (append (iter (for sym in (reverse prev))
+ (collect (format nil "pop ~a" sym)))
+ (iter (for form in next)
+ (cond ((symbolp form)
+ (appending (list (format nil "push ~a" form))))
+ ((listp form)
+ (appending (op->string form)))))))))
+
+ (defparameter *stack-fn-assoc* '((:= :cmove)
+ (:> :cmovg)
+ (:< :cmovl)))
+
+ (defun stack-unextend (stack)
+ "Turns an extended stack to body of a defop, second part of an extended stack is in the form of (:op arg1 arg2 :then val1 :else val2) which is asserted."
+ (multiple-value-bind (fst snd) (split-stack stack)
+ (assert (= 1 (length snd)))
+ (append (iter (for x in (reverse fst))
+ (let ((*print-case* :downcase))
+ (collect (format nil "pop ~a" x))))
+ (let* ((push-part (car snd))
+ (ifs (nthcdr 3 push-part)))
+ (list `(:mov rdx ,(getf ifs :ise))
+ `(:mov rcx ,(getf ifs :değilse))
+ `(:cmp ,(second push-part) ,(third push-part))
+ `(,(cadr (assoc (car push-part) *stack-fn-assoc*))
+ rcx rdx)
+ (format nil "push rcx"))))))
+
+ (defun syntax-of (form)
+ (cond ((or (stringp form)
+ (and (consp form) (stringp (car form))))
+ :string)
+ ((and (listp form) (find '-- form))
+ (if (multiple-value-bind (fst snd) (split-stack form)
+ (declare (ignore fst))
+ (and (consp snd)
+ (consp (car snd))
+ (find (caar snd) *stack-fn-assoc*
+ :key #'car)))
+ :stack-extended
+ :stack))
+ ((and (listp form) (keywordp (car form)))
+ :op)
+ (t :general)))
+
+ (defun group-by-syntax (forms &optional (syntax nil) (cur ()) (acc ()))
+ (when (null forms)
+ (return-from group-by-syntax
+ (cdr (reverse (append (list (cons (syntax-of (car cur))
+ (reverse cur)))
+ acc)))))
+ (let* ((form (car forms))
+ (form-syntax (syntax-of form)))
+ (cond ((eq syntax form-syntax)
+ (group-by-syntax (cdr forms) syntax
+ (cons form cur) acc))
+ (t (group-by-syntax (cdr forms) form-syntax
+ (list form) (append (list (cons syntax
+ (reverse cur)))
+ acc))))))
+
+ (defun expand-nasm-group (group out-stream &key (indent 4))
+ (destructuring-bind (syntax-type . forms) group
+ (case syntax-type
+ (:stack `((defop-format ,out-stream ,indent
+ ,(cons 'list (mapcan (lambda (form) (stack->string form))
+ forms)))))
+ (:stack-extended (expand-nasm out-stream indent
+ (stack-unextend (car forms))))
+ (:op `((defop-format ,out-stream ,indent
+ ,(cons 'list (mapcar (lambda (form) (op->string form :push? nil))
+ forms)))))
+ (:string `((defop-format ,out-stream ,indent
+ ,(normalize-op-list forms))))
+ (:general `((progn ,@(mapcar (lambda (form) (replace-write out-stream indent
+ form))
+ forms)))))))
+
+ (defun expand-nasm (out-stream indent body)
+ (mapcan #'(lambda (group) (expand-nasm-group group out-stream
+ :indent indent))
+ (group-by-syntax body)))
+
+ (defun expand-c-group (group out-stream &key (indent 4))
+ (destructuring-bind (syntax-type . forms) group
+ (case syntax-type
+ ((:stack :stack-extended) `((defop-format ,out-stream ,indent
+ ,(cons 'list
+ (c-stack->string (car forms))))))
+ (:string `((defop-format ,out-stream ,indent
+ ,(normalize-op-list forms))))
+ (:general `((progn ,@(mapcar (lambda (form) (replace-write out-stream indent
+ form))
+ forms)))))))
+
+ (defun expand-c (out-stream indent body)
+ (mapcan #'(lambda (group) (expand-c-group group out-stream :indent indent))
+ (group-by-syntax body)))
+
+ (defun expand-for-target (target out-stream body &optional (indent 4))
+ (case target
+ (:nasm (expand-nasm out-stream indent body))
+ (:c (expand-c out-stream indent body))))
+
+ (defun expand-method (target out-stream indent op-name args body)
+ (with-gensyms (_op _args _target)
+ (declare (ignorable _args))
+ `(defmethod write-op
+ ((,_target (eql ,target)) ,out-stream
+ (,_op (eql ,(intern (string op-name) "KEYWORD")))
+ ,_args)
+ ,@(if (null args)
+ (expand-for-target target out-stream body indent)
+ `((destructuring-bind ,args ,_args
+ ,@(expand-for-target target out-stream body indent)))))))
+
+ ;;hack, because there is no cl in C only nasm
+ (defun not-cl (sym)
+ (if (eq 'cl sym)
+ 'rcx
+ sym))
+
+ (defparameter *c-ops* '((:add "+")
+ (:sub "-")
+ (:shl "<<")
+ (:shr ">>")
+ (:or "|")
+ (:and "&")
+ (:< "<")
+ (:> ">")
+ (:= "==")))
+
+ (defun c-stack->string (stack)
+ (multiple-value-bind (prev next) (split-stack stack)
+ (let ((*print-case* :downcase))
+ (append (iter (for x in (reverse prev))
+ (collect (format nil "~a = pop();" x)))
+ (iter (for x in next)
+ (if (and (consp x) (assoc (car x) *c-ops*))
+ (destructuring-bind (op arg1 arg2 . conds) x
+ (let ((opstr (cadr (assoc op *c-ops*))))
+ (if (null conds)
+ (collect (format nil "push(~a ~a ~a);"
+ arg1 opstr (not-cl arg2)))
+ (collect (format nil
+ "push((~a ~a ~a) ? ~a : ~a);"
+ arg1 opstr (not-cl arg2)
+ (getf conds :ise)
+ (getf conds :değilse))))))
+ (collect (format nil "push(~a);" x)))))))))
+
+(defun comment-safe-str (str)
+ "Handle newlines for comment"
+ (with-output-to-string (new-str)
+ (iter (for ch in-string str with-index i)
+ (cond ((> i 10)
+ (princ "..." new-str)
+ (finish))
+ ((char= #\Newline ch)
+ (princ "\\n" new-str))
+ (t (write-char ch new-str))))))
+
+(defgeneric write-op (target stream op args)
+ (:documentation "Generate code for OP and ARGS and write it to STREAM according to the TARGET. WRITE-OP methods are defined by the DEFOP macro.")
+ (:method :before ((target (eql :nasm)) stream op args)
+ (format stream " ;; -- ~s --~%"
+ (mapcar (lambda (x) (if (stringp x) (comment-safe-str x) x))
+ (cons op args))))
+ (:method :before ((target (eql :c)) stream op args)
+ (format stream "~% /* ~s */~%"
+ ;; comment-safe is probably not necessary with multiline comments
+ (mapcar (lambda (x) (if (stringp x) (comment-safe-str x) x))
+ (cons op args)))))
+
+(defmacro defop (op-name+args (&key (indent 4) (lex t) (targets *targets*)
+ (as nil))
+ &body body)
+ (declare (ignorable indent))
+ (with-gensyms (out-stream)
+ (destructuring-bind (op-name . args) (mklist op-name+args)
+ `(progn
+ ,@(unless (null lex)
+ `((push ',op-name *identifiers*)))
+ ,@(iter (for target in (mklist targets))
+ (collect (expand-method target out-stream indent
+ (if (null as) op-name as)
+ args body)))))))
+
+;;; TODO: Turn stack operation comments to DEFOP option,
+;;; which then can be used by the user as a documentation
+;;; DONE: Better yet, generate the asm code directly from
+;;; the stack op documentation (this seems easily doable)
+;;; Hopefully these two are done, need testing...
+
+(defop + ()
+ (rbx rax -- (:add rax rbx)))
+
+(defop - ()
+ (rbx rax -- (:sub rbx rax)))
+
+(defop = ()
+ (rbx rax -- (:= rbx rax :ise 1 :değilse 0)))
+
+(defop eş ()
+ (rax -- rax rax))
+
+(defop düş ()
+ (rax -- ))
+
+(defop < ()
+ (rax rbx -- (:< rax rbx :ise 1 :değilse 0)))
+
+(defop > ()
+ (rax rbx -- (:> rax rbx :ise 1 :değilse 0)))
+
+(defop üst ()
+ (rbx rax -- rbx rax rbx))
+
+(defop rot ()
+ (rcx rbx rax -- rbx rax rcx))
+
+(defop değiş ()
+ (rbx rax -- rax rbx))
+
+(defop << ()
+ (rbx rcx -- (:shl rbx cl)))
+
+(defop >> ()
+ (rbx rcx -- (:shr rbx cl)))
+
+(defop "|" (:as pipe)
+ (rbx rax -- (:or rbx rax)))
+
+(defop & ()
+ (rbx rax -- (:and rbx rax)))
+
+
+
+;;; NASM operations
+(defop bel (:targets :nasm)
+ ( -- bel))
+
+(defop oku (:targets :nasm)
+ (rax -- )
+ (:xor rbx rbx)
+ (:mov bl [rax])
+ ( -- rbx))
+
+(defop yaz (:targets :nasm)
+ (rax rbx -- )
+ (:mov [rax] bl))
+
+;; ( -- a)
+(defop (push-int a) (:lex nil :targets :nasm)
+ ("push ~d" a))
+
+(defop (push-str len addr str) (:lex nil :targets :nasm)
+ (progn (:write ("push ~d" len)
+ ("push str_~d" addr))
+ (list :string addr str)))
+
+(defop dump (:targets :nasm)
+ "pop rdi"
+ "call dump")
+
+(defop (exit code) (:lex nil :targets :nasm)
+ "mov rax, 60"
+ ("mov rdi, ~a" code)
+ "syscall")
+
+(defop (ise label-num) (:targets :nasm)
+ "pop rax"
+ "test rax, rax"
+ ("jz et_~a" label-num))
+
+(defop (yoksa yap-num ise-num) (:indent 0 :targets :nasm)
+ (" jmp et_~a" yap-num)
+ ("et_~a:" ise-num))
+
+(defop (yap label-num &optional döngü-num) (:indent 0 :targets :nasm)
+ (if (null döngü-num)
+ (:write ("et_~a:" label-num))
+ (:write (" jmp et_~a" döngü-num)
+ ("et_~a:" label-num))))
+
+(defop (iken label-num) (:targets :nasm)
+ "pop rax"
+ "test rax, rax"
+ ("jz et_~a" label-num))
+
+(defop (döngü label-num) (:indent 0 :targets :nasm)
+ ("et_~a:" label-num))
+
+(defop (syscall num) (:lex nil :targets :nasm)
+ (iter (with call-regs = #("rdi" "rsi" "rdx" "r10" "r8" "r9"))
+ (initially (:write "pop rax"))
+ (for i from (- num 1) downto 0)
+ (:write ("pop ~a" (aref call-regs i)))
+ (finally (:write "syscall"))))
+
+(defun gen-dump (str)
+ (format str "~{~a~%~}"
+ '("dump:"
+ " mov r9, -3689348814741910323"
+ " sub rsp, 40"
+ " mov BYTE [rsp+31], 10"
+ " lea rcx, [rsp+30]"
+ ".L2:"
+ " mov rax, rdi"
+ " lea r8, [rsp+32]"
+ " mul r9"
+ " mov rax, rdi"
+ " sub r8, rcx"
+ " shr rdx, 3"
+ " lea rsi, [rdx+rdx*4]"
+ " add rsi, rsi"
+ " sub rax, rsi"
+ " add eax, 48"
+ " mov BYTE [rcx], al"
+ " mov rax, rdi"
+ " mov rdi, rdx"
+ " mov rdx, rcx"
+ " sub rcx, 1"
+ " cmp rax, 9"
+ " ja .L2"
+ " lea rax, [rsp+32]"
+ " mov edi, 1"
+ " sub rdx, rax"
+ " xor eax, eax"
+ " lea rsi, [rsp+32+rdx]"
+ " mov rdx, r8"
+ " mov rax, 1"
+ " syscall"
+ " add rsp, 40"
+ " ret")))
+
+
+;;; C operations
+(defop (push-int a) (:lex nil :targets :c)
+ ("push(~d);" a))
+
+;; (defop (push-str a) (:lex nil :targets :c)
+;; ("push(&stack, ~d);" a))
+
+(defop dump (:targets :c)
+ ("printf(\"%d\\n\", pop());"))
+
+(defop (ise label-num) (:targets :c)
+ "rax = pop();"
+ ("if(!rax){ goto et_~a; }" label-num))
+
+(defop (yoksa yap-num ise-num) (:indent 0 :targets :c)
+ (" goto et_~a;" yap-num)
+ ("et_~a:" ise-num))
+
+(defop (yap label-num &optional döngü-num) (:indent 0 :targets :c)
+ (if (null döngü-num)
+ (:write ("et_~a:" label-num))
+ (:write (" goto et_~a;" döngü-num)
+ ("et_~a:" label-num))))
+
+(defop (iken label-num) (:targets :c)
+ "rax = pop();"
+ ("if(!rax){ goto et_~a; }" label-num))
+
+(defop (döngü label-num) (:indent 0 :targets :c)
+ ("et_~a:" label-num))
+
+(defop bel (:targets :c)
+ "push((uintptr_t) bel);")
+
+(defop oku (:targets :c)
+ "push(*((char*) pop()));")
+
+(defop yaz (:targets :c)
+ "rax = pop();"
+ "*((char*) pop()) = rax;")
+
+(defop (syscall num) (:lex nil :targets :c)
+ (iter (with call-regs = #("rdi" "rsi" "rdx" "r10" "r8" "r9"))
+ (initially (:write "rax = pop();"))
+ (for i from (- num 1) downto 0)
+ (:write ("~a = pop();" (aref call-regs i)))
+ (collect (aref call-regs i) into used-regs)
+ (finally (:write ("syscall(rax~{, ~a~});" (reverse used-regs))))))
+
+(defun gen-c-stack (stream)
+ (format stream "~{~a~%~}"
+ '("#include <stdio.h>"
+ "#include <stdint.h>"
+ ""
+ "struct Stack {"
+ " uintptr_t content[1000000];"
+ " int i;"
+ "};"
+ ""
+ "typedef struct Stack Stack;"
+ ""
+ "Stack stack = { .i = 0 };"
+ ""
+ "void push(uintptr_t val){"
+ " stack.content[stack.i] = val;"
+ " stack.i += 1;"
+ "}"
+ ""
+ "uintptr_t pop(){"
+ " stack.i -= 1;"
+ " return stack.content[stack.i];"
+ "}"
+ ""
+ "uintptr_t rax, rbx, rcx, rdi, rsi, rdx, r10, r8, r9;"
+ "char bel[640000];"
+ "")))
+
+(defmacro with-c-fn ((ret name) args out &body body)
+ `(let ((*print-case* :downcase))
+ (format ,out "~a ~a(~{~a ~a~^, ~}){~%" ',ret ',name ',args)
+ ,@body
+ (format ,out "~&}~%")))
+
+