From 7940298753a9ff6ae7a976a01ec7c8b31b6384ad Mon Sep 17 00:00:00 2001 From: John Lorentzson Date: Wed, 9 Jul 2025 15:18:04 +0200 Subject: [PATCH] Add exported "interface" to user-side compiler --- wip-duuqnd/user-side-compiler/interface.lisp | 204 ++++++++++++++++++ wip-duuqnd/user-side-compiler/package.lisp | 4 + .../user-side-compiler/user-side-compiler.asd | 3 +- 3 files changed, 210 insertions(+), 1 deletion(-) create mode 100644 wip-duuqnd/user-side-compiler/interface.lisp diff --git a/wip-duuqnd/user-side-compiler/interface.lisp b/wip-duuqnd/user-side-compiler/interface.lisp new file mode 100644 index 0000000..91b711e --- /dev/null +++ b/wip-duuqnd/user-side-compiler/interface.lisp @@ -0,0 +1,204 @@ +(in-package #:user-side-compiler) + +(defun usc-init () + (setf *asm-functions* (make-hash-table :test #'equalp)) + (populate-asm-functions)) + +;;; CI - Compiler Interface + +(defun ci-tokenize (string) + "Tokenizes from STRING. Returns a list of tokens." + (with-input-from-string (input string) + (tokenize input string))) + +(defun ci-tokenize-info (tokens) + (flet + ((count-token-type (type) + (count-if (lambda (token) + (typep token type)) + tokens))) + (format *debug-io* "Token count: ~D~%" (length tokens)) + (loop :for (text type) + :in '(("End-of-statement tokens" token-end-of-statement) + ("Atomic (simple character) tokens, excluding EOS" + (and token-atomic (not token-end-of-statement))) + ("Name tokens" token-name) + ("Number tokens" token-number) + ("Keyword tokens" token-keyword)) + :do (format *debug-io* "~A: ~D~%" text (count-token-type type)))) + (terpri *debug-io*)) + +(defun ci-parse (tokens) + (let ((*token-stream* (make-token-stream tokens))) + (match-syntax program))) + +(defun ci-parse-info (syntax-tree) + ;; TODO + (terpri *debug-io*)) + +(defun ci-compile-to-ir (syntax-tree) + (let ((iblock (make-instance 'iblock + :name "toplevel" + :program (make-instance 'ir-program))) + (builder (make-instance 'builder))) + (build-begin builder iblock) + (compile-node syntax-tree builder) + (fix-iblock-flow iblock) + (unless (null (next iblock)) + (make-iblock-names-unique (next iblock))) + iblock)) + +(defun ci-ir-info (iblock) + (let ((ir-count 0) + (unique-data-count 0)) + (do-iblocks (ib iblock) + (do-instructions (inst ib) + (incf ir-count) + (incf unique-data-count + (+ (length (inputs inst)) + (if (typep inst 'ir-no-output) + 0 + 1))))) + (format *debug-io* "IR instructions: ~D~%" ir-count) + (format *debug-io* "Data count: ~D~%" unique-data-count) + (terpri *debug-io*))) + +(defun ci-optimize-ir (start-iblock) + (do-iblocks (ib start-iblock) + (optim-reorder-arguments ib) + (optim-call-duplicate-args ib) + (optim-remove-unused ib))) + +(defun ci-allocation-info (allocs) + (format *debug-io* "Values: ~D~%" (length allocs)) + (format *debug-io* "Allocated VARVEC slots: ~D~%" + (loop :with anyp := nil + :for alloc :in allocs + :when (member (strategy alloc) + '(:named-variable :temporary-variable)) + :maximizing (1+ (varvec-index alloc)) + :and :do (setf anyp t))) + (loop :for (text strategy) + :in '(("Named variables" :named-variable) + ("Temporary variables" :temporary-variable) + ("Discarded values" :not-saved) + ("Immediately used (via register)" :accumulator) + ("Immediately used (function arguments)" :direct-to-argvec) + ("Immediately used (branches)" :branch) + ("Constants" :constant)) + :do (format *debug-io* "~A: ~D~%" text + (count strategy allocs :key #'strategy))) + (let ((temporaries (remove-if-not + (lambda (a) (eql (strategy a) :temporary-variable)) + allocs))) + (format *debug-io* "Reused temporary variable slots: ~D~%" + (- (length temporaries) + (length (remove-duplicates temporaries :key #'varvec-index))))) + (terpri *debug-io*)) + +(defun ci-compile-to-asm (iblock allocs) + (with-variable-allocations allocs + (compile-iblocks iblock))) + +(defun ci-asm-info (asm-obj) + (format *debug-io* "Final program length: ~D bytes.~%" + (length (compiled-bytes asm-obj))) + (terpri *debug-io*)) + +(defun compile-string-to-bytes (string &key print-ir-p + return-tokens-p return-syntax-tree-p + return-iblock-p return-allocs-p + return-asm-p) + (let (tokens syntax-tree iblock allocs asm-obj bytes) + (format *debug-io* "Tokenizing...~%") + (setf tokens (ci-tokenize string)) + (format *debug-io* "Done tokenizing.~%") + (ci-tokenize-info tokens) + + (format *debug-io* "Parsing...~%") + (setf syntax-tree (ci-parse tokens)) + (format *debug-io* "Done parsing.~%") + (ci-parse-info syntax-tree) + + (format *debug-io* "Compiling to IR...~%") + (setf iblock (ci-compile-to-ir syntax-tree)) + (format *debug-io* "Done compiling to IR.~%") + (when (member print-ir-p '(:original :all)) + (print-iblocks iblock)) + (ci-ir-info iblock) + + (format *debug-io* "Optimizing IR...~%") + (ci-optimize-ir iblock) + (format *debug-io* "Done optimizing IR.~%") + (when (member print-ir-p '(:optimized :all)) + (print-iblocks iblock)) + (ci-ir-info iblock) + + (format *debug-io* "Pre-assembly preparations...~%") + (setf allocs (pre-assembly iblock)) + (format *debug-io* "Done with pre-assembly preparations.~%") + (when (member print-ir-p '(:final :all t)) + (print-iblocks iblock)) + (ci-ir-info iblock) + (ci-allocation-info allocs) + + (format *debug-io* "Compiling to machine language...~%") + (setf asm-obj (ci-compile-to-asm iblock allocs)) + (setf bytes (compiled-bytes asm-obj)) + (format *debug-io* "Done compiling to machine language.~%") + (ci-asm-info asm-obj) + (values + bytes + (append (when return-tokens-p (list :tokens tokens)) + (when return-syntax-tree-p (list :syntax-tree syntax-tree)) + (when return-iblock-p (list :iblock iblock)) + (when return-allocs-p (list :allocs allocs)) + (when return-asm-p (list :asm asm-obj)))))) + +(defun batch-compile (input-filepath &optional output-filepath) + (usc-init) + (setf input-filepath (probe-file input-filepath)) + (assert (pathnamep input-filepath)) + (when (null output-filepath) + (setf output-filepath (make-pathname :name (pathname-name input-filepath) + :type "bin"))) + (unless (string= "bin" (pathname-type output-filepath)) + (format t "To avoid overwriting files incorrectly, output files must end in \".bin\".") + #-swank + (sb-ext:exit :code -1) + #+swank + (abort)) + (assert (pathnamep output-filepath)) + (handler-bind ((missing-function-error + (lambda (c) + (format t "Encountered unknown function \"~A\". Dummy created.~%" + (first (format-arguments c))) + (force-output) + (continue c))) + (usc-error + (lambda (c) + (format t "~A~%~%Error in batch mode, exiting.~%" + c) + #-swank + (sb-ext:exit :code 65) + #+swank + (abort c))) + (error + (lambda (c) + (format t "---- COMPILER BUG ----~%") + (format t "An UNHANDLED error was encountered during compilation.~%~%") + (trivial-backtrace:print-backtrace c) + (format t "Please contact Duuqnd (duuqnd@stacken.kth.se).~%") + #-swank + (sb-ext:exit :code 70) + #+swank + (abort c)))) + (let ((bytes (compile-string-to-bytes + (uiop:read-file-string input-filepath)))) + (with-open-file (stream output-filepath :direction :output + :element-type '(unsigned-byte 8) + :if-exists :supersede) + (loop :for byte :in bytes + :do (write-byte byte stream))) + (format t "Finished, wrote compiled program to \"~A\".~%" + output-filepath)))) diff --git a/wip-duuqnd/user-side-compiler/package.lisp b/wip-duuqnd/user-side-compiler/package.lisp index 57b6aa4..de9d877 100644 --- a/wip-duuqnd/user-side-compiler/package.lisp +++ b/wip-duuqnd/user-side-compiler/package.lisp @@ -7,4 +7,8 @@ (defpackage #:user-side-compiler (:nicknames #:usc) (:local-nicknames (#:tlk #:user-side-compiler/toolkit)) + (:export #:usc-init #:compile-string-to-bytes + ;; Errors + #:usc-error #:tokenizer-error #:missing-function-error + #:parser-error)) (:use #:cl)) diff --git a/wip-duuqnd/user-side-compiler/user-side-compiler.asd b/wip-duuqnd/user-side-compiler/user-side-compiler.asd index 0abb93d..3868969 100644 --- a/wip-duuqnd/user-side-compiler/user-side-compiler.asd +++ b/wip-duuqnd/user-side-compiler/user-side-compiler.asd @@ -36,4 +36,5 @@ :serial t :components ((:file "value-allocator") (:file "pre-assembly") - (:file "code-generator"))))) + (:file "code-generator"))) + (:file "interface")))