From e90d1248920b50e5f8c25ab406a9095e3f6a2358 Mon Sep 17 00:00:00 2001 From: mRnea Date: Fri, 16 Aug 2024 10:09:45 +0300 Subject: changed project name from cl-forth to kurt --- codegen.lisp | 482 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 482 insertions(+) create mode 100644 codegen.lisp (limited to 'codegen.lisp') diff --git a/codegen.lisp b/codegen.lisp new file mode 100644 index 0000000..1e8c364 --- /dev/null +++ b/codegen.lisp @@ -0,0 +1,482 @@ +(in-package :kurt) + +(defparameter *psuedo-identifiers* + '(syscall-1 syscall-2 syscall-3 syscall-4 syscall-5 syscall-6 makro son kütüphane) + "These do not map to operations that generate code directly, but are valid to lexer and parser") + +(defparameter *identifiers* ()) +;; '(+ - dump = ise yoksa yap eş push değiş üst rot düş döngü iken < > +;; bel oku yaz >> << & "|") + +(defun is-identifier (sym) + (or (find sym *identifiers* :test #'string=) + (find sym *psuedo-identifiers* :test #'string=))) + +(eval-always + (defparameter *targets* '(:nasm :c)) + + (defun normalize-op-list (asm-list) + (cons 'list + (mapcar (lambda (el) (cond ((stringp el) el) + ((listp el) `(format nil ,@el)))) + asm-list))) + + (defun defop-format (str space-num asm-list) + (format str + (format nil "~~{~a~~a~~%~~}" + (make-string space-num :initial-element #\Space)) + asm-list)) + + (defun replace-write (out-stream indent forms) + (if (consp forms) + (if (eq :write (car forms)) + `(defop-format ,out-stream ,indent + ,(normalize-op-list (cdr forms))) + (cons (replace-write out-stream indent (car forms)) + (replace-write out-stream indent (cdr forms)))) + forms)) + + (defun add-indent (indent fmt-string) + (format nil "~a~a" + (make-string indent :initial-element #\Space) + fmt-string)) + + (defun split-stack (stack) + (let ((split-num (position '-- stack))) + (values (butlast stack (- (length stack) split-num)) + (nthcdr (+ 1 split-num) stack)))) + + (defun op->string (asm-instruction &key (push? t)) + "asm-instruction is something like (:add rax rbx)" + (destructuring-bind (op arg1 arg2) asm-instruction + (let ((*print-case* :downcase)) + (if (null push?) + (format nil (format nil "~a ~a, ~a" op arg1 arg2)) + (list (format nil "~a ~a, ~a" op arg1 arg2) + (format nil "push ~a" arg1)))))) + + (defun stack->string (stack) + (multiple-value-bind (prev next) + (split-stack stack) + (let ((*print-case* :downcase)) + (append (iter (for sym in (reverse prev)) + (collect (format nil "pop ~a" sym))) + (iter (for form in next) + (cond ((symbolp form) + (appending (list (format nil "push ~a" form)))) + ((listp form) + (appending (op->string form))))))))) + + (defparameter *stack-fn-assoc* '((:= :cmove) + (:> :cmovg) + (:< :cmovl))) + + (defun stack-unextend (stack) + "Turns an extended stack to body of a defop, second part of an extended stack is in the form of (:op arg1 arg2 :then val1 :else val2) which is asserted." + (multiple-value-bind (fst snd) (split-stack stack) + (assert (= 1 (length snd))) + (append (iter (for x in (reverse fst)) + (let ((*print-case* :downcase)) + (collect (format nil "pop ~a" x)))) + (let* ((push-part (car snd)) + (ifs (nthcdr 3 push-part))) + (list `(:mov rdx ,(getf ifs :ise)) + `(:mov rcx ,(getf ifs :değilse)) + `(:cmp ,(second push-part) ,(third push-part)) + `(,(cadr (assoc (car push-part) *stack-fn-assoc*)) + rcx rdx) + (format nil "push rcx")))))) + + (defun syntax-of (form) + (cond ((or (stringp form) + (and (consp form) (stringp (car form)))) + :string) + ((and (listp form) (find '-- form)) + (if (multiple-value-bind (fst snd) (split-stack form) + (declare (ignore fst)) + (and (consp snd) + (consp (car snd)) + (find (caar snd) *stack-fn-assoc* + :key #'car))) + :stack-extended + :stack)) + ((and (listp form) (keywordp (car form))) + :op) + (t :general))) + + (defun group-by-syntax (forms &optional (syntax nil) (cur ()) (acc ())) + (when (null forms) + (return-from group-by-syntax + (cdr (reverse (append (list (cons (syntax-of (car cur)) + (reverse cur))) + acc))))) + (let* ((form (car forms)) + (form-syntax (syntax-of form))) + (cond ((eq syntax form-syntax) + (group-by-syntax (cdr forms) syntax + (cons form cur) acc)) + (t (group-by-syntax (cdr forms) form-syntax + (list form) (append (list (cons syntax + (reverse cur))) + acc)))))) + + (defun expand-nasm-group (group out-stream &key (indent 4)) + (destructuring-bind (syntax-type . forms) group + (case syntax-type + (:stack `((defop-format ,out-stream ,indent + ,(cons 'list (mapcan (lambda (form) (stack->string form)) + forms))))) + (:stack-extended (expand-nasm out-stream indent + (stack-unextend (car forms)))) + (:op `((defop-format ,out-stream ,indent + ,(cons 'list (mapcar (lambda (form) (op->string form :push? nil)) + forms))))) + (:string `((defop-format ,out-stream ,indent + ,(normalize-op-list forms)))) + (:general `((progn ,@(mapcar (lambda (form) (replace-write out-stream indent + form)) + forms))))))) + + (defun expand-nasm (out-stream indent body) + (mapcan #'(lambda (group) (expand-nasm-group group out-stream + :indent indent)) + (group-by-syntax body))) + + (defun expand-c-group (group out-stream &key (indent 4)) + (destructuring-bind (syntax-type . forms) group + (case syntax-type + ((:stack :stack-extended) `((defop-format ,out-stream ,indent + ,(cons 'list + (c-stack->string (car forms)))))) + (:string `((defop-format ,out-stream ,indent + ,(normalize-op-list forms)))) + (:general `((progn ,@(mapcar (lambda (form) (replace-write out-stream indent + form)) + forms))))))) + + (defun expand-c (out-stream indent body) + (mapcan #'(lambda (group) (expand-c-group group out-stream :indent indent)) + (group-by-syntax body))) + + (defun expand-for-target (target out-stream body &optional (indent 4)) + (case target + (:nasm (expand-nasm out-stream indent body)) + (:c (expand-c out-stream indent body)))) + + (defun expand-method (target out-stream indent op-name args body) + (with-gensyms (_op _args _target) + (declare (ignorable _args)) + `(defmethod write-op + ((,_target (eql ,target)) ,out-stream + (,_op (eql ,(intern (string op-name) "KEYWORD"))) + ,_args) + ,@(if (null args) + (expand-for-target target out-stream body indent) + `((destructuring-bind ,args ,_args + ,@(expand-for-target target out-stream body indent))))))) + + ;;hack, because there is no cl in C only nasm + (defun not-cl (sym) + (if (eq 'cl sym) + 'rcx + sym)) + + (defparameter *c-ops* '((:add "+") + (:sub "-") + (:shl "<<") + (:shr ">>") + (:or "|") + (:and "&") + (:< "<") + (:> ">") + (:= "=="))) + + (defun c-stack->string (stack) + (multiple-value-bind (prev next) (split-stack stack) + (let ((*print-case* :downcase)) + (append (iter (for x in (reverse prev)) + (collect (format nil "~a = pop();" x))) + (iter (for x in next) + (if (and (consp x) (assoc (car x) *c-ops*)) + (destructuring-bind (op arg1 arg2 . conds) x + (let ((opstr (cadr (assoc op *c-ops*)))) + (if (null conds) + (collect (format nil "push(~a ~a ~a);" + arg1 opstr (not-cl arg2))) + (collect (format nil + "push((~a ~a ~a) ? ~a : ~a);" + arg1 opstr (not-cl arg2) + (getf conds :ise) + (getf conds :değilse)))))) + (collect (format nil "push(~a);" x))))))))) + +(defun comment-safe-str (str) + "Handle newlines for comment" + (with-output-to-string (new-str) + (iter (for ch in-string str with-index i) + (cond ((> i 10) + (princ "..." new-str) + (finish)) + ((char= #\Newline ch) + (princ "\\n" new-str)) + (t (write-char ch new-str)))))) + +(defgeneric write-op (target stream op args) + (:documentation "Generate code for OP and ARGS and write it to STREAM according to the TARGET. WRITE-OP methods are defined by the DEFOP macro.") + (:method :before ((target (eql :nasm)) stream op args) + (format stream " ;; -- ~s --~%" + (mapcar (lambda (x) (if (stringp x) (comment-safe-str x) x)) + (cons op args)))) + (:method :before ((target (eql :c)) stream op args) + (format stream "~% /* ~s */~%" + ;; comment-safe is probably not necessary with multiline comments + (mapcar (lambda (x) (if (stringp x) (comment-safe-str x) x)) + (cons op args))))) + +(defmacro defop (op-name+args (&key (indent 4) (lex t) (targets *targets*) + (as nil)) + &body body) + (declare (ignorable indent)) + (with-gensyms (out-stream) + (destructuring-bind (op-name . args) (mklist op-name+args) + `(progn + ,@(unless (null lex) + `((push ',op-name *identifiers*))) + ,@(iter (for target in (mklist targets)) + (collect (expand-method target out-stream indent + (if (null as) op-name as) + args body))))))) + +;;; TODO: Turn stack operation comments to DEFOP option, +;;; which then can be used by the user as a documentation +;;; DONE: Better yet, generate the asm code directly from +;;; the stack op documentation (this seems easily doable) +;;; Hopefully these two are done, need testing... + +(defop + () + (rbx rax -- (:add rax rbx))) + +(defop - () + (rbx rax -- (:sub rbx rax))) + +(defop = () + (rbx rax -- (:= rbx rax :ise 1 :değilse 0))) + +(defop eş () + (rax -- rax rax)) + +(defop düş () + (rax -- )) + +(defop < () + (rax rbx -- (:< rax rbx :ise 1 :değilse 0))) + +(defop > () + (rax rbx -- (:> rax rbx :ise 1 :değilse 0))) + +(defop üst () + (rbx rax -- rbx rax rbx)) + +(defop rot () + (rcx rbx rax -- rbx rax rcx)) + +(defop değiş () + (rbx rax -- rax rbx)) + +(defop << () + (rbx rcx -- (:shl rbx cl))) + +(defop >> () + (rbx rcx -- (:shr rbx cl))) + +(defop "|" (:as pipe) + (rbx rax -- (:or rbx rax))) + +(defop & () + (rbx rax -- (:and rbx rax))) + + + +;;; NASM operations +(defop bel (:targets :nasm) + ( -- bel)) + +(defop oku (:targets :nasm) + (rax -- ) + (:xor rbx rbx) + (:mov bl [rax]) + ( -- rbx)) + +(defop yaz (:targets :nasm) + (rax rbx -- ) + (:mov [rax] bl)) + +;; ( -- a) +(defop (push-int a) (:lex nil :targets :nasm) + ("push ~d" a)) + +(defop (push-str len addr str) (:lex nil :targets :nasm) + (progn (:write ("push ~d" len) + ("push str_~d" addr)) + (list :string addr str))) + +(defop dump (:targets :nasm) + "pop rdi" + "call dump") + +(defop (exit code) (:lex nil :targets :nasm) + "mov rax, 60" + ("mov rdi, ~a" code) + "syscall") + +(defop (ise label-num) (:targets :nasm) + "pop rax" + "test rax, rax" + ("jz et_~a" label-num)) + +(defop (yoksa yap-num ise-num) (:indent 0 :targets :nasm) + (" jmp et_~a" yap-num) + ("et_~a:" ise-num)) + +(defop (yap label-num &optional döngü-num) (:indent 0 :targets :nasm) + (if (null döngü-num) + (:write ("et_~a:" label-num)) + (:write (" jmp et_~a" döngü-num) + ("et_~a:" label-num)))) + +(defop (iken label-num) (:targets :nasm) + "pop rax" + "test rax, rax" + ("jz et_~a" label-num)) + +(defop (döngü label-num) (:indent 0 :targets :nasm) + ("et_~a:" label-num)) + +(defop (syscall num) (:lex nil :targets :nasm) + (iter (with call-regs = #("rdi" "rsi" "rdx" "r10" "r8" "r9")) + (initially (:write "pop rax")) + (for i from (- num 1) downto 0) + (:write ("pop ~a" (aref call-regs i))) + (finally (:write "syscall")))) + +(defun gen-dump (str) + (format str "~{~a~%~}" + '("dump:" + " mov r9, -3689348814741910323" + " sub rsp, 40" + " mov BYTE [rsp+31], 10" + " lea rcx, [rsp+30]" + ".L2:" + " mov rax, rdi" + " lea r8, [rsp+32]" + " mul r9" + " mov rax, rdi" + " sub r8, rcx" + " shr rdx, 3" + " lea rsi, [rdx+rdx*4]" + " add rsi, rsi" + " sub rax, rsi" + " add eax, 48" + " mov BYTE [rcx], al" + " mov rax, rdi" + " mov rdi, rdx" + " mov rdx, rcx" + " sub rcx, 1" + " cmp rax, 9" + " ja .L2" + " lea rax, [rsp+32]" + " mov edi, 1" + " sub rdx, rax" + " xor eax, eax" + " lea rsi, [rsp+32+rdx]" + " mov rdx, r8" + " mov rax, 1" + " syscall" + " add rsp, 40" + " ret"))) + + +;;; C operations +(defop (push-int a) (:lex nil :targets :c) + ("push(~d);" a)) + +;; (defop (push-str a) (:lex nil :targets :c) +;; ("push(&stack, ~d);" a)) + +(defop dump (:targets :c) + ("printf(\"%d\\n\", pop());")) + +(defop (ise label-num) (:targets :c) + "rax = pop();" + ("if(!rax){ goto et_~a; }" label-num)) + +(defop (yoksa yap-num ise-num) (:indent 0 :targets :c) + (" goto et_~a;" yap-num) + ("et_~a:" ise-num)) + +(defop (yap label-num &optional döngü-num) (:indent 0 :targets :c) + (if (null döngü-num) + (:write ("et_~a:" label-num)) + (:write (" goto et_~a;" döngü-num) + ("et_~a:" label-num)))) + +(defop (iken label-num) (:targets :c) + "rax = pop();" + ("if(!rax){ goto et_~a; }" label-num)) + +(defop (döngü label-num) (:indent 0 :targets :c) + ("et_~a:" label-num)) + +(defop bel (:targets :c) + "push((uintptr_t) bel);") + +(defop oku (:targets :c) + "push(*((char*) pop()));") + +(defop yaz (:targets :c) + "rax = pop();" + "*((char*) pop()) = rax;") + +(defop (syscall num) (:lex nil :targets :c) + (iter (with call-regs = #("rdi" "rsi" "rdx" "r10" "r8" "r9")) + (initially (:write "rax = pop();")) + (for i from (- num 1) downto 0) + (:write ("~a = pop();" (aref call-regs i))) + (collect (aref call-regs i) into used-regs) + (finally (:write ("syscall(rax~{, ~a~});" (reverse used-regs)))))) + +(defun gen-c-stack (stream) + (format stream "~{~a~%~}" + '("#include " + "#include " + "" + "struct Stack {" + " uintptr_t content[1000000];" + " int i;" + "};" + "" + "typedef struct Stack Stack;" + "" + "Stack stack = { .i = 0 };" + "" + "void push(uintptr_t val){" + " stack.content[stack.i] = val;" + " stack.i += 1;" + "}" + "" + "uintptr_t pop(){" + " stack.i -= 1;" + " return stack.content[stack.i];" + "}" + "" + "uintptr_t rax, rbx, rcx, rdi, rsi, rdx, r10, r8, r9;" + "char bel[640000];" + ""))) + +(defmacro with-c-fn ((ret name) args out &body body) + `(let ((*print-case* :downcase)) + (format ,out "~a ~a(~{~a ~a~^, ~}){~%" ',ret ',name ',args) + ,@body + (format ,out "~&}~%"))) + + -- cgit v1.2.3