Add exported "interface" to user-side compiler

This commit is contained in:
John Lorentzson 2025-07-09 15:18:04 +02:00
parent fb8f010e0d
commit 7940298753
3 changed files with 210 additions and 1 deletions

View file

@ -0,0 +1,204 @@
(in-package #:user-side-compiler)
(defun usc-init ()
(setf *asm-functions* (make-hash-table :test #'equalp))
(populate-asm-functions))
;;; CI - Compiler Interface
(defun ci-tokenize (string)
"Tokenizes from STRING. Returns a list of tokens."
(with-input-from-string (input string)
(tokenize input string)))
(defun ci-tokenize-info (tokens)
(flet
((count-token-type (type)
(count-if (lambda (token)
(typep token type))
tokens)))
(format *debug-io* "Token count: ~D~%" (length tokens))
(loop :for (text type)
:in '(("End-of-statement tokens" token-end-of-statement)
("Atomic (simple character) tokens, excluding EOS"
(and token-atomic (not token-end-of-statement)))
("Name tokens" token-name)
("Number tokens" token-number)
("Keyword tokens" token-keyword))
:do (format *debug-io* "~A: ~D~%" text (count-token-type type))))
(terpri *debug-io*))
(defun ci-parse (tokens)
(let ((*token-stream* (make-token-stream tokens)))
(match-syntax program)))
(defun ci-parse-info (syntax-tree)
;; TODO
(terpri *debug-io*))
(defun ci-compile-to-ir (syntax-tree)
(let ((iblock (make-instance 'iblock
:name "toplevel"
:program (make-instance 'ir-program)))
(builder (make-instance 'builder)))
(build-begin builder iblock)
(compile-node syntax-tree builder)
(fix-iblock-flow iblock)
(unless (null (next iblock))
(make-iblock-names-unique (next iblock)))
iblock))
(defun ci-ir-info (iblock)
(let ((ir-count 0)
(unique-data-count 0))
(do-iblocks (ib iblock)
(do-instructions (inst ib)
(incf ir-count)
(incf unique-data-count
(+ (length (inputs inst))
(if (typep inst 'ir-no-output)
0
1)))))
(format *debug-io* "IR instructions: ~D~%" ir-count)
(format *debug-io* "Data count: ~D~%" unique-data-count)
(terpri *debug-io*)))
(defun ci-optimize-ir (start-iblock)
(do-iblocks (ib start-iblock)
(optim-reorder-arguments ib)
(optim-call-duplicate-args ib)
(optim-remove-unused ib)))
(defun ci-allocation-info (allocs)
(format *debug-io* "Values: ~D~%" (length allocs))
(format *debug-io* "Allocated VARVEC slots: ~D~%"
(loop :with anyp := nil
:for alloc :in allocs
:when (member (strategy alloc)
'(:named-variable :temporary-variable))
:maximizing (1+ (varvec-index alloc))
:and :do (setf anyp t)))
(loop :for (text strategy)
:in '(("Named variables" :named-variable)
("Temporary variables" :temporary-variable)
("Discarded values" :not-saved)
("Immediately used (via register)" :accumulator)
("Immediately used (function arguments)" :direct-to-argvec)
("Immediately used (branches)" :branch)
("Constants" :constant))
:do (format *debug-io* "~A: ~D~%" text
(count strategy allocs :key #'strategy)))
(let ((temporaries (remove-if-not
(lambda (a) (eql (strategy a) :temporary-variable))
allocs)))
(format *debug-io* "Reused temporary variable slots: ~D~%"
(- (length temporaries)
(length (remove-duplicates temporaries :key #'varvec-index)))))
(terpri *debug-io*))
(defun ci-compile-to-asm (iblock allocs)
(with-variable-allocations allocs
(compile-iblocks iblock)))
(defun ci-asm-info (asm-obj)
(format *debug-io* "Final program length: ~D bytes.~%"
(length (compiled-bytes asm-obj)))
(terpri *debug-io*))
(defun compile-string-to-bytes (string &key print-ir-p
return-tokens-p return-syntax-tree-p
return-iblock-p return-allocs-p
return-asm-p)
(let (tokens syntax-tree iblock allocs asm-obj bytes)
(format *debug-io* "Tokenizing...~%")
(setf tokens (ci-tokenize string))
(format *debug-io* "Done tokenizing.~%")
(ci-tokenize-info tokens)
(format *debug-io* "Parsing...~%")
(setf syntax-tree (ci-parse tokens))
(format *debug-io* "Done parsing.~%")
(ci-parse-info syntax-tree)
(format *debug-io* "Compiling to IR...~%")
(setf iblock (ci-compile-to-ir syntax-tree))
(format *debug-io* "Done compiling to IR.~%")
(when (member print-ir-p '(:original :all))
(print-iblocks iblock))
(ci-ir-info iblock)
(format *debug-io* "Optimizing IR...~%")
(ci-optimize-ir iblock)
(format *debug-io* "Done optimizing IR.~%")
(when (member print-ir-p '(:optimized :all))
(print-iblocks iblock))
(ci-ir-info iblock)
(format *debug-io* "Pre-assembly preparations...~%")
(setf allocs (pre-assembly iblock))
(format *debug-io* "Done with pre-assembly preparations.~%")
(when (member print-ir-p '(:final :all t))
(print-iblocks iblock))
(ci-ir-info iblock)
(ci-allocation-info allocs)
(format *debug-io* "Compiling to machine language...~%")
(setf asm-obj (ci-compile-to-asm iblock allocs))
(setf bytes (compiled-bytes asm-obj))
(format *debug-io* "Done compiling to machine language.~%")
(ci-asm-info asm-obj)
(values
bytes
(append (when return-tokens-p (list :tokens tokens))
(when return-syntax-tree-p (list :syntax-tree syntax-tree))
(when return-iblock-p (list :iblock iblock))
(when return-allocs-p (list :allocs allocs))
(when return-asm-p (list :asm asm-obj))))))
(defun batch-compile (input-filepath &optional output-filepath)
(usc-init)
(setf input-filepath (probe-file input-filepath))
(assert (pathnamep input-filepath))
(when (null output-filepath)
(setf output-filepath (make-pathname :name (pathname-name input-filepath)
:type "bin")))
(unless (string= "bin" (pathname-type output-filepath))
(format t "To avoid overwriting files incorrectly, output files must end in \".bin\".")
#-swank
(sb-ext:exit :code -1)
#+swank
(abort))
(assert (pathnamep output-filepath))
(handler-bind ((missing-function-error
(lambda (c)
(format t "Encountered unknown function \"~A\". Dummy created.~%"
(first (format-arguments c)))
(force-output)
(continue c)))
(usc-error
(lambda (c)
(format t "~A~%~%Error in batch mode, exiting.~%"
c)
#-swank
(sb-ext:exit :code 65)
#+swank
(abort c)))
(error
(lambda (c)
(format t "---- COMPILER BUG ----~%")
(format t "An UNHANDLED error was encountered during compilation.~%~%")
(trivial-backtrace:print-backtrace c)
(format t "Please contact Duuqnd (duuqnd@stacken.kth.se).~%")
#-swank
(sb-ext:exit :code 70)
#+swank
(abort c))))
(let ((bytes (compile-string-to-bytes
(uiop:read-file-string input-filepath))))
(with-open-file (stream output-filepath :direction :output
:element-type '(unsigned-byte 8)
:if-exists :supersede)
(loop :for byte :in bytes
:do (write-byte byte stream)))
(format t "Finished, wrote compiled program to \"~A\".~%"
output-filepath))))

View file

@ -7,4 +7,8 @@
(defpackage #:user-side-compiler (defpackage #:user-side-compiler
(:nicknames #:usc) (:nicknames #:usc)
(:local-nicknames (#:tlk #:user-side-compiler/toolkit)) (:local-nicknames (#:tlk #:user-side-compiler/toolkit))
(:export #:usc-init #:compile-string-to-bytes
;; Errors
#:usc-error #:tokenizer-error #:missing-function-error
#:parser-error))
(:use #:cl)) (:use #:cl))

View file

@ -36,4 +36,5 @@
:serial t :serial t
:components ((:file "value-allocator") :components ((:file "value-allocator")
(:file "pre-assembly") (:file "pre-assembly")
(:file "code-generator"))))) (:file "code-generator")))
(:file "interface")))