Files
kurt/codegen.lisp

495 lines
17 KiB
Common Lisp

(in-package :kurt)
(defparameter *psuedo-identifiers*
'(syscall-1 syscall-2 syscall-3 syscall-4 syscall-5 syscall-6 makro son kütüphane)
"These do not map to operations that generate code directly, but are valid to lexer and parser")
(defparameter *identifiers* ())
;; '(+ - dump = ise yoksa yap eş push değiş üst rot düş döngü iken < >
;; bel oku yaz >> << & "|")
(defun is-identifier (sym)
(or (find sym *identifiers* :test #'string=)
(find sym *psuedo-identifiers* :test #'string=)))
(eval-always
(defparameter *targets* '(:nasm :c))
(defun normalize-op-list (asm-list)
(cons 'list
(mapcar (lambda (el) (cond ((stringp el) el)
((listp el) `(format nil ,@el))))
asm-list)))
(defun defop-format (str space-num asm-list)
(format str
(format nil "~~{~a~~a~~%~~}"
(make-string space-num :initial-element #\Space))
asm-list))
(defun replace-write (out-stream indent forms)
(if (consp forms)
(if (eq :write (car forms))
`(defop-format ,out-stream ,indent
,(normalize-op-list (cdr forms)))
(cons (replace-write out-stream indent (car forms))
(replace-write out-stream indent (cdr forms))))
forms))
(defun add-indent (indent fmt-string)
(format nil "~a~a"
(make-string indent :initial-element #\Space)
fmt-string))
(defun split-stack (stack)
(let ((split-num (position '-- stack)))
(values (butlast stack (- (length stack) split-num))
(nthcdr (+ 1 split-num) stack))))
(defun op->string (asm-instruction &key (push? t))
"asm-instruction is something like (:add rax rbx)"
(destructuring-bind (op arg1 arg2) asm-instruction
(let ((*print-case* :downcase))
(if (null push?)
(format nil (format nil "~a ~a, ~a" op arg1 arg2))
(list (format nil "~a ~a, ~a" op arg1 arg2)
(format nil "push ~a" arg1))))))
(defun stack->string (stack)
(multiple-value-bind (prev next)
(split-stack stack)
(let ((*print-case* :downcase))
(append (iter (for sym in (reverse prev))
(collect (format nil "pop ~a" sym)))
(iter (for form in next)
(cond ((symbolp form)
(appending (list (format nil "push ~a" form))))
((listp form)
(appending (op->string form)))))))))
(defparameter *stack-fn-assoc* '((:= :cmove)
(:> :cmovg)
(:< :cmovl)))
(defun stack-unextend (stack)
"Turns an extended stack to body of a defop, second part of an extended stack is in the form of (:op arg1 arg2 :then val1 :else val2) which is asserted."
(multiple-value-bind (fst snd) (split-stack stack)
(assert (= 1 (length snd)))
(append (iter (for x in (reverse fst))
(let ((*print-case* :downcase))
(collect (format nil "pop ~a" x))))
(let* ((push-part (car snd))
(ifs (nthcdr 3 push-part)))
(list `(:mov rdx ,(getf ifs :ise))
`(:mov rcx ,(getf ifs :değilse))
`(:cmp ,(second push-part) ,(third push-part))
`(,(cadr (assoc (car push-part) *stack-fn-assoc*))
rcx rdx)
(format nil "push rcx"))))))
(defun syntax-of (form)
(cond ((or (stringp form)
(and (consp form) (stringp (car form))))
:string)
((and (listp form) (find '-- form))
(if (multiple-value-bind (fst snd) (split-stack form)
(declare (ignore fst))
(and (consp snd)
(consp (car snd))
(find (caar snd) *stack-fn-assoc*
:key #'car)))
:stack-extended
:stack))
((and (listp form) (keywordp (car form)))
:op)
(t :general)))
(defun group-by-syntax (forms &optional (syntax nil) (cur ()) (acc ()))
(when (null forms)
(return-from group-by-syntax
(cdr (reverse (append (list (cons (syntax-of (car cur))
(reverse cur)))
acc)))))
(let* ((form (car forms))
(form-syntax (syntax-of form)))
(cond ((eq syntax form-syntax)
(group-by-syntax (cdr forms) syntax
(cons form cur) acc))
(t (group-by-syntax (cdr forms) form-syntax
(list form) (append (list (cons syntax
(reverse cur)))
acc))))))
(defun expand-nasm-group (group out-stream &key (indent 4))
(destructuring-bind (syntax-type . forms) group
(case syntax-type
(:stack `((defop-format ,out-stream ,indent
,(cons 'list (mapcan (lambda (form) (stack->string form))
forms)))))
(:stack-extended (expand-nasm out-stream indent
(stack-unextend (car forms))))
(:op `((defop-format ,out-stream ,indent
,(cons 'list (mapcar (lambda (form) (op->string form :push? nil))
forms)))))
(:string `((defop-format ,out-stream ,indent
,(normalize-op-list forms))))
(:general `((progn ,@(mapcar (lambda (form) (replace-write out-stream indent
form))
forms)))))))
(defun expand-nasm (out-stream indent body)
(mapcan #'(lambda (group) (expand-nasm-group group out-stream
:indent indent))
(group-by-syntax body)))
(defun expand-c-group (group out-stream &key (indent 4))
(destructuring-bind (syntax-type . forms) group
(case syntax-type
((:stack :stack-extended) `((defop-format ,out-stream ,indent
,(cons 'list
(c-stack->string (car forms))))))
(:string `((defop-format ,out-stream ,indent
,(normalize-op-list forms))))
(:general `((progn ,@(mapcar (lambda (form) (replace-write out-stream indent
form))
forms)))))))
(defun expand-c (out-stream indent body)
(mapcan #'(lambda (group) (expand-c-group group out-stream :indent indent))
(group-by-syntax body)))
(defun expand-for-target (target out-stream body &optional (indent 4))
(case target
(:nasm (expand-nasm out-stream indent body))
(:c (expand-c out-stream indent body))))
(defun expand-method (target out-stream indent op-name args body)
(with-gensyms (_op _args _target)
(declare (ignorable _args))
`(defmethod write-op
((,_target (eql ,target)) ,out-stream
(,_op (eql ,(intern (string op-name) "KEYWORD")))
,_args)
,@(if (null args)
(expand-for-target target out-stream body indent)
`((destructuring-bind ,args ,_args
,@(expand-for-target target out-stream body indent)))))))
;;hack, because there is no cl in C only nasm
(defun not-cl (sym)
(if (eq 'cl sym)
'rcx
sym))
(defparameter *c-ops* '((:add "+")
(:sub "-")
(:shl "<<")
(:shr ">>")
(:or "|")
(:and "&")
(:< "<")
(:> ">")
(:= "==")))
(defun c-stack->string (stack)
(multiple-value-bind (prev next) (split-stack stack)
(let ((*print-case* :downcase))
(append (iter (for x in (reverse prev))
(collect (format nil "~a = pop();" x)))
(iter (for x in next)
(if (and (consp x) (assoc (car x) *c-ops*))
(destructuring-bind (op arg1 arg2 . conds) x
(let ((opstr (cadr (assoc op *c-ops*))))
(if (null conds)
(collect (format nil "push(~a ~a ~a);"
arg1 opstr (not-cl arg2)))
(collect (format nil
"push((~a ~a ~a) ? ~a : ~a);"
arg1 opstr (not-cl arg2)
(getf conds :ise)
(getf conds :değilse))))))
(collect (format nil "push(~a);" x)))))))))
(defun comment-safe-str (str)
"Handle newlines for comment"
(with-output-to-string (new-str)
(iter (for ch in-string str with-index i)
(cond ((> i 10)
(princ "..." new-str)
(finish))
((char= #\Newline ch)
(princ "\\n" new-str))
(t (write-char ch new-str))))))
(defgeneric write-op (target stream op args)
(:documentation "Generate code for OP and ARGS and write it to STREAM according to the TARGET. WRITE-OP methods are defined by the DEFOP macro.")
(:method :before ((target (eql :nasm)) stream op args)
(format stream " ;; -- ~s --~%"
(mapcar (lambda (x) (if (stringp x) (comment-safe-str x) x))
(cons op args))))
(:method :before ((target (eql :c)) stream op args)
(format stream "~% /* ~s */~%"
;; comment-safe is probably not necessary with multiline comments
(mapcar (lambda (x) (if (stringp x) (comment-safe-str x) x))
(cons op args)))))
(defmacro defop (op-name+args (&key (indent 4) (lex t) (targets *targets*)
(as nil))
&body body)
(declare (ignorable indent))
(with-gensyms (out-stream)
(destructuring-bind (op-name . args) (mklist op-name+args)
`(progn
,@(unless (null lex)
`((push ',op-name *identifiers*)))
,@(iter (for target in (mklist targets))
(collect (expand-method target out-stream indent
(if (null as) op-name as)
args body)))))))
;;; TODO: Turn stack operation comments to DEFOP option,
;;; which then can be used by the user as a documentation
;;; DONE: Better yet, generate the asm code directly from
;;; the stack op documentation (this seems easily doable)
;;; Hopefully these two are done, need testing...
(defop + ()
(rbx rax -- (:add rax rbx)))
(defop - ()
(rbx rax -- (:sub rbx rax)))
(defop = ()
(rbx rax -- (:= rbx rax :ise 1 :değilse 0)))
(defop ()
(rax -- rax rax))
(defop düş ()
(rax -- ))
(defop < ()
(rax rbx -- (:< rax rbx :ise 1 :değilse 0)))
(defop > ()
(rax rbx -- (:> rax rbx :ise 1 :değilse 0)))
(defop üst ()
(rbx rax -- rbx rax rbx))
(defop rot ()
(rcx rbx rax -- rbx rax rcx))
(defop değiş ()
(rbx rax -- rax rbx))
(defop << ()
(rbx rcx -- (:shl rbx cl)))
(defop >> ()
(rbx rcx -- (:shr rbx cl)))
(defop "|" (:as pipe)
(rbx rax -- (:or rbx rax)))
(defop & ()
(rbx rax -- (:and rbx rax)))
;;; NASM operations
(defop bel (:targets :nasm)
( -- bel))
(defop oku (:targets :nasm)
(rax -- )
(:xor rbx rbx)
(:mov bl [rax])
( -- rbx))
(defop yaz (:targets :nasm)
(rax rbx -- )
(:mov [rax] bl))
;; ( -- a)
(defop (push-int a) (:lex nil :targets :nasm)
("mov rax, ~a" a)
("push rax"))
(defop (push-str len addr str) (:lex nil :targets :nasm)
(progn (:write ("push ~d" len)
("push str_~d" addr))
(list :string addr str)))
(defop dump (:targets :nasm)
"pop rdi"
"call dump")
(defop (exit code) (:lex nil :targets :nasm)
"mov rax, 60"
("mov rdi, ~a" code)
"syscall")
(defop (ise label-num) (:targets :nasm)
"pop rax"
"test rax, rax"
("jz et_~a" label-num))
(defop (yoksa yap-num ise-num) (:indent 0 :targets :nasm)
(" jmp et_~a" yap-num)
("et_~a:" ise-num))
(defop (yap label-num &optional döngü-num) (:indent 0 :targets :nasm)
(if (null döngü-num)
(:write ("et_~a:" label-num))
(:write (" jmp et_~a" döngü-num)
("et_~a:" label-num))))
(defop (iken label-num) (:targets :nasm)
"pop rax"
"test rax, rax"
("jz et_~a" label-num))
(defop (döngü label-num) (:indent 0 :targets :nasm)
("et_~a:" label-num))
(defop (syscall num) (:lex nil :targets :nasm)
(iter (with call-regs = #("rdi" "rsi" "rdx" "r10" "r8" "r9"))
(initially (:write "pop rax"))
(for i from (- num 1) downto 0)
(:write ("pop ~a" (aref call-regs i)))
(finally (:write "syscall"))))
(defop divmod (:targets :nasm)
"mov rdx, 0"
(rax rcx -- )
"div rcx"
(-- rax rdx))
(defop * (:targets :nasm)
(rbx rax -- )
"mul rbx"
( -- rax))
(defun gen-dump (str)
(format str "~{~a~%~}"
'("dump:"
" mov r9, -3689348814741910323"
" sub rsp, 40"
" mov BYTE [rsp+31], 10"
" lea rcx, [rsp+30]"
".L2:"
" mov rax, rdi"
" lea r8, [rsp+32]"
" mul r9"
" mov rax, rdi"
" sub r8, rcx"
" shr rdx, 3"
" lea rsi, [rdx+rdx*4]"
" add rsi, rsi"
" sub rax, rsi"
" add eax, 48"
" mov BYTE [rcx], al"
" mov rax, rdi"
" mov rdi, rdx"
" mov rdx, rcx"
" sub rcx, 1"
" cmp rax, 9"
" ja .L2"
" lea rax, [rsp+32]"
" mov edi, 1"
" sub rdx, rax"
" xor eax, eax"
" lea rsi, [rsp+32+rdx]"
" mov rdx, r8"
" mov rax, 1"
" syscall"
" add rsp, 40"
" ret")))
;;; C operations
(defop (push-int a) (:lex nil :targets :c)
("push(~d);" a))
;; (defop (push-str a) (:lex nil :targets :c)
;; ("push(&stack, ~d);" a))
(defop dump (:targets :c)
("printf(\"%d\\n\", pop());"))
(defop (ise label-num) (:targets :c)
"rax = pop();"
("if(!rax){ goto et_~a; }" label-num))
(defop (yoksa yap-num ise-num) (:indent 0 :targets :c)
(" goto et_~a;" yap-num)
("et_~a:" ise-num))
(defop (yap label-num &optional döngü-num) (:indent 0 :targets :c)
(if (null döngü-num)
(:write ("et_~a:" label-num))
(:write (" goto et_~a;" döngü-num)
("et_~a:" label-num))))
(defop (iken label-num) (:targets :c)
"rax = pop();"
("if(!rax){ goto et_~a; }" label-num))
(defop (döngü label-num) (:indent 0 :targets :c)
("et_~a:" label-num))
(defop bel (:targets :c)
"push((uintptr_t) bel);")
(defop oku (:targets :c)
"push(*((char*) pop()));")
(defop yaz (:targets :c)
"rax = pop();"
"*((char*) pop()) = rax;")
(defop (syscall num) (:lex nil :targets :c)
(iter (with call-regs = #("rdi" "rsi" "rdx" "r10" "r8" "r9"))
(initially (:write "rax = pop();"))
(for i from (- num 1) downto 0)
(:write ("~a = pop();" (aref call-regs i)))
(collect (aref call-regs i) into used-regs)
(finally (:write ("syscall(rax~{, ~a~});" (reverse used-regs))))))
(defun gen-c-stack (stream)
(format stream "~{~a~%~}"
'("#include <stdio.h>"
"#include <stdint.h>"
""
"struct Stack {"
" uintptr_t content[1000000];"
" int i;"
"};"
""
"typedef struct Stack Stack;"
""
"Stack stack = { .i = 0 };"
""
"void push(uintptr_t val){"
" stack.content[stack.i] = val;"
" stack.i += 1;"
"}"
""
"uintptr_t pop(){"
" stack.i -= 1;"
" return stack.content[stack.i];"
"}"
""
"uintptr_t rax, rbx, rcx, rdi, rsi, rdx, r10, r8, r9;"
"char bel[640000];"
"")))
(defmacro with-c-fn ((ret name) args out &body body)
`(let ((*print-case* :downcase))
(format ,out "~a ~a(~{~a ~a~^, ~}){~%" ',ret ',name ',args)
,@body
(format ,out "~&}~%")))