From 156edc2f09896799265e8cba132862e9175ebdd3 Mon Sep 17 00:00:00 2001 From: John Lorentzson Date: Sun, 6 Jul 2025 19:04:36 +0200 Subject: [PATCH] Rethink how data works at the assembly code generation level --- .../backend/code-generator.lisp | 135 +++++++++++------- .../backend/value-allocator.lisp | 64 ++++++--- .../user-side-compiler/middle/data.lisp | 2 +- .../middle/instructions.lisp | 27 ++-- .../middle/optimizations.lisp | 27 +--- 5 files changed, 152 insertions(+), 103 deletions(-) diff --git a/wip-duuqnd/user-side-compiler/backend/code-generator.lisp b/wip-duuqnd/user-side-compiler/backend/code-generator.lisp index 4f7eaf5..a3ba1a5 100644 --- a/wip-duuqnd/user-side-compiler/backend/code-generator.lisp +++ b/wip-duuqnd/user-side-compiler/backend/code-generator.lisp @@ -40,7 +40,7 @@ (emit-asm-object label))) (defvar *variable-allocations* (make-hash-table)) -(defvar *last-instruction* '(:nop)) +(defvar *last-instruction* :useless) (defun varvec-index-in-zeropage-p (index) ;; TODO: Handle case of too many variables @@ -65,29 +65,42 @@ (defun data-reference (data) (ecase (strategy (allocation-details data)) ((:named-variable :temporary-variable) - (+ (varvec-index (allocation-details data)) - +varvec-offset+)) + (cons :address (+ (varvec-index (allocation-details data)) + +varvec-offset+))) (:direct-to-argvec - (+ (position data (inputs (user data))) - +argvec-offset+)))) + (cons :address (+ (position data (inputs (first (users data)))) + +argvec-offset+))) + (:constant + (cons :immediate (ir-constant-value data))) + (:accumulator + (error "Accumulator-allocated data ~A being dereferenced. Ensuring this does not happen +is the responsibility of the pre-assembly compilation step." + data)) + (:branch + (error "Branch result ~A being dereferenced. This doesn't make much sense." data)) + (:not-saved + (assert (null (users data))) + (cons :not-saved nil)))) (defmacro define-normal-emitter (name immediate-opcode zeropage-opcode absolute-opcode) - `(defun ,name (mode value) - (cond ((eql mode :immediate) - (emit-asm-instruction :opcode ,immediate-opcode - :operand (the (unsigned-byte 8) value) - :byte-length 2)) - ((and (eql mode :address) - (< value #x100)) - (emit-asm-instruction :opcode ,zeropage-opcode - :operand (the (unsigned-byte 8) value) - :byte-length 2)) - ((eql mode :address) - (emit-asm-instruction :opcode ,absolute-opcode - :operand (the (unsigned-byte 16) value) - :byte-length 3)) - (t - (error "Invalid usage of ~A with arguments (~A ~A)" ',name mode value))))) + `(defun ,name (data-reference) + (destructuring-bind (mode . value) + data-reference + (cond ((eql mode :immediate) + (emit-asm-instruction :opcode ,immediate-opcode + :operand (the (unsigned-byte 8) value) + :byte-length 2)) + ((and (eql mode :address) + (< value #x100)) + (emit-asm-instruction :opcode ,zeropage-opcode + :operand (the (unsigned-byte 8) value) + :byte-length 2)) + ((eql mode :address) + (emit-asm-instruction :opcode ,absolute-opcode + :operand (the (unsigned-byte 16) value) + :byte-length 3)) + (t + (error "Invalid usage of ~A with arguments (~A ~A)" ',name mode value)))))) (define-normal-emitter emit-lda #xa9 #xa5 #xad) (define-normal-emitter emit-sta (error "STA has no immediate mode.") #x85 #x8d) @@ -96,20 +109,20 @@ (define-normal-emitter emit-cmp #xc9 #xc5 #xcd) (defun emit-store-data (data) - (if (or (null (allocation-details data)) + (if (or (eql (strategy (allocation-details data)) :not-saved) (eql (strategy (allocation-details data)) :constant)) - (setf *last-instruction* '(:useless)) + (setf *last-instruction* :useless) (progn - (emit-sta :address (data-reference data)) + (unless (eql (strategy (allocation-details data)) :accumulator) + (emit-sta (data-reference data))) (setf *last-instruction* (list :store data))))) (defun emit-store-bool (data) "Stores the inverse of the zeroflag to DATA. Inverse so that non-0 is TRUE." ;; The "DATA is stored"-case - (if (or (null (allocation-details data)) - (member (strategy (allocation-details data)) - '(:constant :flags))) - (setf *last-instruction* '(:useless)) + (if (or (member (strategy (allocation-details data)) + '(:constant :branch :not-saved))) + (setf *last-instruction* :useless) (progn (emit-asm-instruction :opcode :php :byte-length 1) (emit-asm-instruction :opcode :pla :byte-length 1) @@ -117,24 +130,29 @@ (emit-asm-instruction :opcode :lsr-a :byte-length 1) (emit-asm-instruction :opcode :not :operand #b00000001 :byte-length 2) (emit-sta :address (data-reference data)) - (setf *last-instruction* '(:store-zero-flag data))))) + (setf *last-instruction* (list :store-zero-flag data))))) (defun emit-load-data (data) - (if (or (eql (strategy (allocation-details data)) :direct-to-argvec) - (equal *last-instruction* (list :store data)) - (equal *last-instruction* (list :load data))) - (setf *last-instruction* '(:useless)) - (progn - (if (eql (strategy (allocation-details data)) :constant) - (emit-lda :immediate (ir-constant-value data)) - (emit-lda :address (data-reference data))) - (setf *last-instruction* (list :load data))))) + (cond ((eql (strategy (allocation-details data)) :not-saved) + (error "Tried to load unallocated data ~A." data)) + ((eql (strategy (allocation-details data)) :accumulator) + (assert (eql (next (definition data)) (last-use data))) + (setf *last-instruction* :useless)) + ((eql (strategy (allocation-details data)) :direct-to-argvec) + ;; TODO: Assert that it actually has been stored + (setf *last-instruction* :useless)) + ((or (equal *last-instruction* (list :store data)) + (equal *last-instruction* (list :load data))) + (setf *last-instruction* :useless)) + (t + (emit-lda (data-reference data)) + (setf *last-instruction* (list :load data))))) (defun emit-load-bool (data) (if (or (eql (strategy (allocation-details data)) :flags) (equal *last-instruction* (list :store-zero-flag data)) (equal *last-instruction* (list :load-zero-flag data))) - (setf *last-instruction* '(:useless)) + (setf *last-instruction* :useless) (progn (if (eql (strategy (allocation-details data)) :constant) (progn @@ -158,10 +176,7 @@ (error "During the final code generation step, IR-PLUS must have exactly 2 operands.")) (emit-load-data (first (inputs inst))) (emit-asm-instruction :opcode #x18 :byte-length 1) ; Clear Carry - (if (eql (strategy (allocation-details (second (inputs inst)))) - :constant) - (emit-adc :immediate (ir-constant-value (second (inputs inst)))) - (emit-adc :address (data-reference (second (inputs inst))))) + (emit-adc (data-reference (second (inputs inst)))) (emit-store-data (output inst))) (defmethod compile-ir ((inst ir-minus)) @@ -169,26 +184,30 @@ (error "During the final code generation step, IR-MINUS must have exactly 2 operands.")) (emit-load-data (first (inputs inst))) (emit-asm-instruction :opcode #x38 :byte-length 1) ; Set Carry - (if (eql (strategy (allocation-details (second (inputs inst)))) - :constant) - (emit-sbc :immediate (ir-constant-value (second (inputs inst)))) - (emit-sbc :address (data-reference (second (inputs inst))))) + (emit-sbc (data-reference (second (inputs inst)))) (emit-store-data (output inst))) (defmethod compile-ir ((inst ir-assign)) (emit-load-data (input inst)) - (emit-store-data (output inst))) + (emit-store-data (output inst)) + (setf *last-instruction* (list :store (output inst)))) (defmethod compile-ir ((inst ir-fetchvar)) (emit-load-data (input inst)) - (emit-store-data (output inst))) + (emit-store-data (output inst)) + (setf *last-instruction* (list :store (output inst)))) + +(defmethod compile-ir ((inst ir-getconst)) + (emit-lda (cons :immediate (input inst))) + (emit-store-data (output inst)) + (setf *last-instruction* (list :store (output inst)))) (defmethod compile-ir ((inst ir-call)) (loop :for arg :in (inputs inst) :for arg-index :from 0 :do (emit-load-data arg) :unless (eql (strategy (allocation-details arg)) :direct-to-argvec) - :do (emit-sta :address (+ arg-index +argvec-offset+))) + :do (emit-sta (cons :address (+ arg-index +argvec-offset+)))) (emit-asm-instruction :opcode #x20 :operand (callee inst) :byte-length 3) (emit-store-data (output inst))) @@ -198,7 +217,7 @@ (emit-asm-instruction :opcode #x4C :operand (first (destinations inst)) :byte-length 3)) - (setf *last-instruction* '(:jump))) + (setf *last-instruction* (list :jump (first (destinations inst))))) (defmethod compile-ir ((inst ir-if)) (let ((next-iblock (next (iblock inst))) @@ -213,7 +232,7 @@ (emit-asm-instruction :opcode #xD0 :operand else-iblock :byte-length 2) - (setf *last-instruction* '(:conditional)))) + (setf *last-instruction* :branch))) (defmethod compile-ir ((inst ir-test-equal)) (emit-load-data (first (inputs inst))) @@ -290,6 +309,7 @@ (defun compiled-bytes (start-instruction) (declare (optimize (debug 3))) + (check-type start-instruction asm-object) (loop :for asm-obj := start-instruction :then (next asm-obj) :do (loop :until (or (null asm-obj) (typep asm-obj 'asm-instruction)) :do (setf asm-obj (next asm-obj))) @@ -308,12 +328,15 @@ (let ((*token-stream* (make-token-stream (tokenize source-stream)))) (let ((rb (with-compilation-setup (root-block builder) (compile-node (match-syntax program) builder) - root-block))) + root-block)) + (return-values '())) (do-iblocks (ib rb) + (optim-prepare-direct-instructions ib) + ;;(optim-commutative-constant-folding ib) (optim-reorder-arguments ib) - (optim-direct-variable-use ib) (optim-call-duplicate-args ib) (optim-remove-unused ib)) + (push rb return-values) (let ((allocations (allocate-values rb))) (optim-reuse-temporary-slots rb allocations) (when print-ir-p @@ -327,6 +350,8 @@ (unless (null (varvec-index allocation)) (list (varvec-index allocation))))) (terpri)) + (push allocations return-values) (when make-asm-p (with-variable-allocations allocations - (compile-iblocks rb)))))))) + (push (compile-iblocks rb) return-values)))) + (values-list (nreverse return-values)))))) diff --git a/wip-duuqnd/user-side-compiler/backend/value-allocator.lisp b/wip-duuqnd/user-side-compiler/backend/value-allocator.lisp index 820fc6b..916b860 100644 --- a/wip-duuqnd/user-side-compiler/backend/value-allocator.lisp +++ b/wip-duuqnd/user-side-compiler/backend/value-allocator.lisp @@ -5,38 +5,65 @@ ;;; 6502-related nonsense going on as well, it feels better to call it a value ;;; allocator instead. +(defparameter +accumulator-users+ + '(or ir-operation ir-assign)) + (defun calls-exist-between-p (start end) - (cond ((eql start end) - nil) - ((typep start 'ir-call) - start) - ((null start) - (error "Calls check crossed iblock boundary. This should not happen.")) - (t - (calls-exist-between-p (next start) end)))) + (labels + ((iter (now end) + (cond + ((eql now end) + nil) + ((typep now 'ir-call) + now) + ((null now) + (error "Calls check crossed iblock boundary. This should not happen.")) + (t + (iter (next now) end))))) + (iter (next start) end))) (defclass value-allocation () ((%data :accessor data :initarg :data) (%strategy :accessor strategy :initform :temporary-variable) (%varvec-index :accessor varvec-index :initform nil))) +(defun optim-prepare-direct-instructions (iblock) + (do-instructions (inst iblock) + (when (typep inst 'ir-operation) + (assert (= (length (inputs inst)) 2)) + (destructuring-bind (accumulator operand) + (inputs inst) + (when (typep (definition accumulator) '(or ir-fetchvar ir-getconst)) + (move-instruction-above (definition accumulator) inst)) + (when (typep (definition operand) '(or ir-fetchvar ir-getconst)) + (when (typep (definition operand) 'ir-fetchvar) + (setf (inputs inst) + (substitute (input (definition operand)) operand + (inputs inst)))) + (delete-instruction (definition operand))))))) + (defmethod choose-allocation-strategy ((alc value-allocation)) (setf (strategy alc) (let ((data (data alc))) (cond ((null (users data)) - :do-not-save) - ((typep data 'ir-constant) - :constant) + :not-saved) ((typep data 'ir-variable) :named-variable) ((and (eql (next (definition data)) (last-use data)) - (typep (last-use data) 'ir-if)) - :flags) - ((and (not (typep data 'ir-reusable)) + (typep (last-use data) +accumulator-users+) + (eql data (first (inputs (last-use data))))) + :accumulator) + ((and (eql (next (definition data)) (last-use data)) + (typep (last-use data) 'ir-if) + (typep (definition data) 'ir-comparison)) + :branch) + ((and (= (length (users data)) 1) (typep (last-use data) 'ir-call) (not (calls-exist-between-p (definition data) (last-use data)))) :direct-to-argvec) + ((typep data 'ir-constant) + :constant) (t :temporary-variable))))) @@ -49,7 +76,12 @@ (find (output inst) allocations :key #'data)) (let ((allocation (make-instance 'value-allocation :data (output inst)))) (choose-allocation-strategy allocation) - (push allocation allocations))))) + (push allocation allocations))) + (loop :for input :in (inputs inst) + :unless (or (not (typep input 'ir-data)) (find input allocations :key #'data)) + :do (let ((allocation (make-instance 'value-allocation :data input))) + (choose-allocation-strategy allocation) + (push allocation allocations))))) (setf allocations (nreverse allocations)) (let ((counter -1) (named (remove :named-variable allocations @@ -58,7 +90,7 @@ :test-not #'eql :key #'strategy))) (loop :for allocation :in (append named temporary) :do (setf (varvec-index allocation) (incf counter)))) - (remove-if #'null allocations :key (lambda (a) (users (data a)))))) + allocations)) (defun optim-reuse-temporary-slots (start-iblock allocations) (let ((free '())) diff --git a/wip-duuqnd/user-side-compiler/middle/data.lisp b/wip-duuqnd/user-side-compiler/middle/data.lisp index a9e6841..fe094cf 100644 --- a/wip-duuqnd/user-side-compiler/middle/data.lisp +++ b/wip-duuqnd/user-side-compiler/middle/data.lisp @@ -2,7 +2,7 @@ (defclass ir-data () ((%definition :accessor definition :initarg :definition) - (%last-use :accessor last-use))) + (%last-use :accessor last-use :initform nil))) (defclass ir-result (ir-data) ((%user :accessor user :initarg :user :initform nil))) diff --git a/wip-duuqnd/user-side-compiler/middle/instructions.lisp b/wip-duuqnd/user-side-compiler/middle/instructions.lisp index df0f5b7..29d1a39 100644 --- a/wip-duuqnd/user-side-compiler/middle/instructions.lisp +++ b/wip-duuqnd/user-side-compiler/middle/instructions.lisp @@ -80,8 +80,9 @@ solely due to its output being unused.")) (defclass ir-assign (ir-one-input ir-inst) ()) (defclass ir-operation (ir-inst) ()) +(defclass ir-comparison (ir-operation) ()) -(defclass ir-call (ir-operation) +(defclass ir-call (ir-inst) ((%callee :accessor callee :initarg :callee))) (defmethod effect-used-p ((inst ir-call)) @@ -89,6 +90,13 @@ solely due to its output being unused.")) t) ;;; A messy but quick way to define all the very similar arithmetic operations +(defvar *expr-inst-mappings* '()) + +(defun get-ir-expr-inst-type (node) + (loop :for (node-class . ir-class) :in *expr-inst-mappings* + :when (typep node node-class) + :return ir-class)) + (macrolet ((ops ((&rest superclasses) &rest classes) `(progn ,@(loop :for (class-name ignore symbol) :in classes @@ -97,14 +105,17 @@ solely due to its output being unused.")) (defmethod print-ir-inst ((inst ,class-name)) (format t " (~A~{ ~A~}) -> ~A~%" ,symbol (inputs inst) (output inst))))) - (defun get-ir-expr-inst-type (node) - (typecase node - ,@(loop :for (ir-class node-class ignore) :in classes - :unless (null node-class) - :collect `(,node-class ',ir-class))))))) + (setf *expr-inst-mappings* + (append + *expr-inst-mappings* + ',(loop :for (ir-class node-class ignore) :in classes + :unless (null node-class) + :collect `(,node-class . ,ir-class))))))) + (setf *expr-inst-mappings* '()) (ops (ir-operation) ; NILs indicate TODOs here for now + (ir-plus node-expr-plus "+") (ir-minus node-expr-minus "-") (ir-mult node-expr-multiply "*") (ir-div node-expr-divide "/")) + (ops (ir-comparison) (ir-test-equal node-expr-test-equal "==") (ir-test-not-equal node-expr-test-not-equal "!=") (ir-test-less nil "<") (ir-test-greater nil ">") - (ir-test-less-or-equal nil "<=") (ir-test-greater-or-equal nil ">=") - (ir-plus node-expr-plus "+") (ir-minus node-expr-minus "-") (ir-mult node-expr-multiply "*") (ir-div node-expr-divide "/"))) + (ir-test-less-or-equal nil "<=") (ir-test-greater-or-equal nil ">="))) diff --git a/wip-duuqnd/user-side-compiler/middle/optimizations.lisp b/wip-duuqnd/user-side-compiler/middle/optimizations.lisp index 3724b03..53c1db6 100644 --- a/wip-duuqnd/user-side-compiler/middle/optimizations.lisp +++ b/wip-duuqnd/user-side-compiler/middle/optimizations.lisp @@ -118,12 +118,12 @@ No guarantees of success are made, I just hope it's not incorrect." (substitute new old (inputs user))))))))) (defun optim-reorder-arguments (iblock) - "Puts the simpler non-operation arguments right above the operation that -uses them to assist in generating more direct 6502 code." + "Puts the simpler non-operation arguments right above the call or operation +that uses them to assist in generating more direct 6502 code." (do-instructions (inst iblock) - (when (typep inst 'ir-operation) + (when (typep inst '(or ir-call ir-operation)) (loop :for input :in (inputs inst) - :when (and (not (typep (definition input) 'ir-operation)) + :when (and (not (typep (definition input) '(or ir-operation ir-call))) (not (typep input 'ir-reusable))) :do (move-instruction-above (definition input) inst))))) @@ -169,25 +169,6 @@ though I'm pretty sure it can't anyway.") (loop :for (new . old) :in replacements :do (setf (inputs call) (substitute new old (inputs call))))))))) -(defun optim-direct-variable-use (iblock) - "Removes unnecessary uses of IR-FETCHVAR. -Some operations do not require their operands to be copied before use. CPU -instructions, for example. This optimization pass goes through and removes -IR-FETCHVAR instructions that would serve no purpose in compiled code." - ;; TODO: Add more instructions to the candidates - (let ((candidates-type '(or ir-test-equal ir-plus ir-minus)) - (to-remove '())) - (do-instructions (inst iblock) - (when (typep inst 'ir-fetchvar) - (let ((result (output inst)) - (src (input inst))) - (when (and (not (typep (output inst) 'ir-reusable)) - (typep (user result) candidates-type)) - (let ((user (user (output inst)))) - (setf (inputs user) (substitute src result (inputs user))) - (push inst to-remove)))))) - (mapc #'delete-instruction to-remove))) - (defun compute-lifetime-knowledge (start-iblock) (do-iblocks (iblock start-iblock) (do-instructions (inst iblock)