Rethink how data works at the assembly code generation level

This commit is contained in:
John Lorentzson 2025-07-06 19:04:36 +02:00
parent 99d1156e7d
commit 156edc2f09
5 changed files with 152 additions and 103 deletions

View file

@ -40,7 +40,7 @@
(emit-asm-object label)))
(defvar *variable-allocations* (make-hash-table))
(defvar *last-instruction* '(:nop))
(defvar *last-instruction* :useless)
(defun varvec-index-in-zeropage-p (index)
;; TODO: Handle case of too many variables
@ -65,29 +65,42 @@
(defun data-reference (data)
(ecase (strategy (allocation-details data))
((:named-variable :temporary-variable)
(+ (varvec-index (allocation-details data))
+varvec-offset+))
(cons :address (+ (varvec-index (allocation-details data))
+varvec-offset+)))
(:direct-to-argvec
(+ (position data (inputs (user data)))
+argvec-offset+))))
(cons :address (+ (position data (inputs (first (users data))))
+argvec-offset+)))
(:constant
(cons :immediate (ir-constant-value data)))
(:accumulator
(error "Accumulator-allocated data ~A being dereferenced. Ensuring this does not happen
is the responsibility of the pre-assembly compilation step."
data))
(:branch
(error "Branch result ~A being dereferenced. This doesn't make much sense." data))
(:not-saved
(assert (null (users data)))
(cons :not-saved nil))))
(defmacro define-normal-emitter (name immediate-opcode zeropage-opcode absolute-opcode)
`(defun ,name (mode value)
(cond ((eql mode :immediate)
(emit-asm-instruction :opcode ,immediate-opcode
:operand (the (unsigned-byte 8) value)
:byte-length 2))
((and (eql mode :address)
(< value #x100))
(emit-asm-instruction :opcode ,zeropage-opcode
:operand (the (unsigned-byte 8) value)
:byte-length 2))
((eql mode :address)
(emit-asm-instruction :opcode ,absolute-opcode
:operand (the (unsigned-byte 16) value)
:byte-length 3))
(t
(error "Invalid usage of ~A with arguments (~A ~A)" ',name mode value)))))
`(defun ,name (data-reference)
(destructuring-bind (mode . value)
data-reference
(cond ((eql mode :immediate)
(emit-asm-instruction :opcode ,immediate-opcode
:operand (the (unsigned-byte 8) value)
:byte-length 2))
((and (eql mode :address)
(< value #x100))
(emit-asm-instruction :opcode ,zeropage-opcode
:operand (the (unsigned-byte 8) value)
:byte-length 2))
((eql mode :address)
(emit-asm-instruction :opcode ,absolute-opcode
:operand (the (unsigned-byte 16) value)
:byte-length 3))
(t
(error "Invalid usage of ~A with arguments (~A ~A)" ',name mode value))))))
(define-normal-emitter emit-lda #xa9 #xa5 #xad)
(define-normal-emitter emit-sta (error "STA has no immediate mode.") #x85 #x8d)
@ -96,20 +109,20 @@
(define-normal-emitter emit-cmp #xc9 #xc5 #xcd)
(defun emit-store-data (data)
(if (or (null (allocation-details data))
(if (or (eql (strategy (allocation-details data)) :not-saved)
(eql (strategy (allocation-details data)) :constant))
(setf *last-instruction* '(:useless))
(setf *last-instruction* :useless)
(progn
(emit-sta :address (data-reference data))
(unless (eql (strategy (allocation-details data)) :accumulator)
(emit-sta (data-reference data)))
(setf *last-instruction* (list :store data)))))
(defun emit-store-bool (data)
"Stores the inverse of the zeroflag to DATA. Inverse so that non-0 is TRUE."
;; The "DATA is stored"-case
(if (or (null (allocation-details data))
(member (strategy (allocation-details data))
'(:constant :flags)))
(setf *last-instruction* '(:useless))
(if (or (member (strategy (allocation-details data))
'(:constant :branch :not-saved)))
(setf *last-instruction* :useless)
(progn
(emit-asm-instruction :opcode :php :byte-length 1)
(emit-asm-instruction :opcode :pla :byte-length 1)
@ -117,24 +130,29 @@
(emit-asm-instruction :opcode :lsr-a :byte-length 1)
(emit-asm-instruction :opcode :not :operand #b00000001 :byte-length 2)
(emit-sta :address (data-reference data))
(setf *last-instruction* '(:store-zero-flag data)))))
(setf *last-instruction* (list :store-zero-flag data)))))
(defun emit-load-data (data)
(if (or (eql (strategy (allocation-details data)) :direct-to-argvec)
(equal *last-instruction* (list :store data))
(equal *last-instruction* (list :load data)))
(setf *last-instruction* '(:useless))
(progn
(if (eql (strategy (allocation-details data)) :constant)
(emit-lda :immediate (ir-constant-value data))
(emit-lda :address (data-reference data)))
(setf *last-instruction* (list :load data)))))
(cond ((eql (strategy (allocation-details data)) :not-saved)
(error "Tried to load unallocated data ~A." data))
((eql (strategy (allocation-details data)) :accumulator)
(assert (eql (next (definition data)) (last-use data)))
(setf *last-instruction* :useless))
((eql (strategy (allocation-details data)) :direct-to-argvec)
;; TODO: Assert that it actually has been stored
(setf *last-instruction* :useless))
((or (equal *last-instruction* (list :store data))
(equal *last-instruction* (list :load data)))
(setf *last-instruction* :useless))
(t
(emit-lda (data-reference data))
(setf *last-instruction* (list :load data)))))
(defun emit-load-bool (data)
(if (or (eql (strategy (allocation-details data)) :flags)
(equal *last-instruction* (list :store-zero-flag data))
(equal *last-instruction* (list :load-zero-flag data)))
(setf *last-instruction* '(:useless))
(setf *last-instruction* :useless)
(progn
(if (eql (strategy (allocation-details data)) :constant)
(progn
@ -158,10 +176,7 @@
(error "During the final code generation step, IR-PLUS must have exactly 2 operands."))
(emit-load-data (first (inputs inst)))
(emit-asm-instruction :opcode #x18 :byte-length 1) ; Clear Carry
(if (eql (strategy (allocation-details (second (inputs inst))))
:constant)
(emit-adc :immediate (ir-constant-value (second (inputs inst))))
(emit-adc :address (data-reference (second (inputs inst)))))
(emit-adc (data-reference (second (inputs inst))))
(emit-store-data (output inst)))
(defmethod compile-ir ((inst ir-minus))
@ -169,26 +184,30 @@
(error "During the final code generation step, IR-MINUS must have exactly 2 operands."))
(emit-load-data (first (inputs inst)))
(emit-asm-instruction :opcode #x38 :byte-length 1) ; Set Carry
(if (eql (strategy (allocation-details (second (inputs inst))))
:constant)
(emit-sbc :immediate (ir-constant-value (second (inputs inst))))
(emit-sbc :address (data-reference (second (inputs inst)))))
(emit-sbc (data-reference (second (inputs inst))))
(emit-store-data (output inst)))
(defmethod compile-ir ((inst ir-assign))
(emit-load-data (input inst))
(emit-store-data (output inst)))
(emit-store-data (output inst))
(setf *last-instruction* (list :store (output inst))))
(defmethod compile-ir ((inst ir-fetchvar))
(emit-load-data (input inst))
(emit-store-data (output inst)))
(emit-store-data (output inst))
(setf *last-instruction* (list :store (output inst))))
(defmethod compile-ir ((inst ir-getconst))
(emit-lda (cons :immediate (input inst)))
(emit-store-data (output inst))
(setf *last-instruction* (list :store (output inst))))
(defmethod compile-ir ((inst ir-call))
(loop :for arg :in (inputs inst)
:for arg-index :from 0
:do (emit-load-data arg)
:unless (eql (strategy (allocation-details arg)) :direct-to-argvec)
:do (emit-sta :address (+ arg-index +argvec-offset+)))
:do (emit-sta (cons :address (+ arg-index +argvec-offset+))))
(emit-asm-instruction :opcode #x20 :operand (callee inst) :byte-length 3)
(emit-store-data (output inst)))
@ -198,7 +217,7 @@
(emit-asm-instruction :opcode #x4C
:operand (first (destinations inst))
:byte-length 3))
(setf *last-instruction* '(:jump)))
(setf *last-instruction* (list :jump (first (destinations inst)))))
(defmethod compile-ir ((inst ir-if))
(let ((next-iblock (next (iblock inst)))
@ -213,7 +232,7 @@
(emit-asm-instruction :opcode #xD0
:operand else-iblock
:byte-length 2)
(setf *last-instruction* '(:conditional))))
(setf *last-instruction* :branch)))
(defmethod compile-ir ((inst ir-test-equal))
(emit-load-data (first (inputs inst)))
@ -290,6 +309,7 @@
(defun compiled-bytes (start-instruction)
(declare (optimize (debug 3)))
(check-type start-instruction asm-object)
(loop :for asm-obj := start-instruction :then (next asm-obj)
:do (loop :until (or (null asm-obj) (typep asm-obj 'asm-instruction))
:do (setf asm-obj (next asm-obj)))
@ -308,12 +328,15 @@
(let ((*token-stream* (make-token-stream (tokenize source-stream))))
(let ((rb (with-compilation-setup (root-block builder)
(compile-node (match-syntax program) builder)
root-block)))
root-block))
(return-values '()))
(do-iblocks (ib rb)
(optim-prepare-direct-instructions ib)
;;(optim-commutative-constant-folding ib)
(optim-reorder-arguments ib)
(optim-direct-variable-use ib)
(optim-call-duplicate-args ib)
(optim-remove-unused ib))
(push rb return-values)
(let ((allocations (allocate-values rb)))
(optim-reuse-temporary-slots rb allocations)
(when print-ir-p
@ -327,6 +350,8 @@
(unless (null (varvec-index allocation))
(list (varvec-index allocation)))))
(terpri))
(push allocations return-values)
(when make-asm-p
(with-variable-allocations allocations
(compile-iblocks rb))))))))
(push (compile-iblocks rb) return-values))))
(values-list (nreverse return-values))))))

View file

@ -5,38 +5,65 @@
;;; 6502-related nonsense going on as well, it feels better to call it a value
;;; allocator instead.
(defparameter +accumulator-users+
'(or ir-operation ir-assign))
(defun calls-exist-between-p (start end)
(cond ((eql start end)
nil)
((typep start 'ir-call)
start)
((null start)
(error "Calls check crossed iblock boundary. This should not happen."))
(t
(calls-exist-between-p (next start) end))))
(labels
((iter (now end)
(cond
((eql now end)
nil)
((typep now 'ir-call)
now)
((null now)
(error "Calls check crossed iblock boundary. This should not happen."))
(t
(iter (next now) end)))))
(iter (next start) end)))
(defclass value-allocation ()
((%data :accessor data :initarg :data)
(%strategy :accessor strategy :initform :temporary-variable)
(%varvec-index :accessor varvec-index :initform nil)))
(defun optim-prepare-direct-instructions (iblock)
(do-instructions (inst iblock)
(when (typep inst 'ir-operation)
(assert (= (length (inputs inst)) 2))
(destructuring-bind (accumulator operand)
(inputs inst)
(when (typep (definition accumulator) '(or ir-fetchvar ir-getconst))
(move-instruction-above (definition accumulator) inst))
(when (typep (definition operand) '(or ir-fetchvar ir-getconst))
(when (typep (definition operand) 'ir-fetchvar)
(setf (inputs inst)
(substitute (input (definition operand)) operand
(inputs inst))))
(delete-instruction (definition operand)))))))
(defmethod choose-allocation-strategy ((alc value-allocation))
(setf
(strategy alc)
(let ((data (data alc)))
(cond ((null (users data))
:do-not-save)
((typep data 'ir-constant)
:constant)
:not-saved)
((typep data 'ir-variable)
:named-variable)
((and (eql (next (definition data)) (last-use data))
(typep (last-use data) 'ir-if))
:flags)
((and (not (typep data 'ir-reusable))
(typep (last-use data) +accumulator-users+)
(eql data (first (inputs (last-use data)))))
:accumulator)
((and (eql (next (definition data)) (last-use data))
(typep (last-use data) 'ir-if)
(typep (definition data) 'ir-comparison))
:branch)
((and (= (length (users data)) 1)
(typep (last-use data) 'ir-call)
(not (calls-exist-between-p (definition data) (last-use data))))
:direct-to-argvec)
((typep data 'ir-constant)
:constant)
(t
:temporary-variable)))))
@ -49,7 +76,12 @@
(find (output inst) allocations :key #'data))
(let ((allocation (make-instance 'value-allocation :data (output inst))))
(choose-allocation-strategy allocation)
(push allocation allocations)))))
(push allocation allocations)))
(loop :for input :in (inputs inst)
:unless (or (not (typep input 'ir-data)) (find input allocations :key #'data))
:do (let ((allocation (make-instance 'value-allocation :data input)))
(choose-allocation-strategy allocation)
(push allocation allocations)))))
(setf allocations (nreverse allocations))
(let ((counter -1)
(named (remove :named-variable allocations
@ -58,7 +90,7 @@
:test-not #'eql :key #'strategy)))
(loop :for allocation :in (append named temporary)
:do (setf (varvec-index allocation) (incf counter))))
(remove-if #'null allocations :key (lambda (a) (users (data a))))))
allocations))
(defun optim-reuse-temporary-slots (start-iblock allocations)
(let ((free '()))

View file

@ -2,7 +2,7 @@
(defclass ir-data ()
((%definition :accessor definition :initarg :definition)
(%last-use :accessor last-use)))
(%last-use :accessor last-use :initform nil)))
(defclass ir-result (ir-data)
((%user :accessor user :initarg :user :initform nil)))

View file

@ -80,8 +80,9 @@ solely due to its output being unused."))
(defclass ir-assign (ir-one-input ir-inst) ())
(defclass ir-operation (ir-inst) ())
(defclass ir-comparison (ir-operation) ())
(defclass ir-call (ir-operation)
(defclass ir-call (ir-inst)
((%callee :accessor callee :initarg :callee)))
(defmethod effect-used-p ((inst ir-call))
@ -89,6 +90,13 @@ solely due to its output being unused."))
t)
;;; A messy but quick way to define all the very similar arithmetic operations
(defvar *expr-inst-mappings* '())
(defun get-ir-expr-inst-type (node)
(loop :for (node-class . ir-class) :in *expr-inst-mappings*
:when (typep node node-class)
:return ir-class))
(macrolet ((ops ((&rest superclasses) &rest classes)
`(progn
,@(loop :for (class-name ignore symbol) :in classes
@ -97,14 +105,17 @@ solely due to its output being unused."))
(defmethod print-ir-inst ((inst ,class-name))
(format t " (~A~{ ~A~}) -> ~A~%"
,symbol (inputs inst) (output inst)))))
(defun get-ir-expr-inst-type (node)
(typecase node
,@(loop :for (ir-class node-class ignore) :in classes
:unless (null node-class)
:collect `(,node-class ',ir-class)))))))
(setf *expr-inst-mappings*
(append
*expr-inst-mappings*
',(loop :for (ir-class node-class ignore) :in classes
:unless (null node-class)
:collect `(,node-class . ,ir-class)))))))
(setf *expr-inst-mappings* '())
(ops (ir-operation) ; NILs indicate TODOs here for now
(ir-plus node-expr-plus "+") (ir-minus node-expr-minus "-") (ir-mult node-expr-multiply "*") (ir-div node-expr-divide "/"))
(ops (ir-comparison)
(ir-test-equal node-expr-test-equal "==")
(ir-test-not-equal node-expr-test-not-equal "!=")
(ir-test-less nil "<") (ir-test-greater nil ">")
(ir-test-less-or-equal nil "<=") (ir-test-greater-or-equal nil ">=")
(ir-plus node-expr-plus "+") (ir-minus node-expr-minus "-") (ir-mult node-expr-multiply "*") (ir-div node-expr-divide "/")))
(ir-test-less-or-equal nil "<=") (ir-test-greater-or-equal nil ">=")))

View file

@ -118,12 +118,12 @@ No guarantees of success are made, I just hope it's not incorrect."
(substitute new old (inputs user)))))))))
(defun optim-reorder-arguments (iblock)
"Puts the simpler non-operation arguments right above the operation that
uses them to assist in generating more direct 6502 code."
"Puts the simpler non-operation arguments right above the call or operation
that uses them to assist in generating more direct 6502 code."
(do-instructions (inst iblock)
(when (typep inst 'ir-operation)
(when (typep inst '(or ir-call ir-operation))
(loop :for input :in (inputs inst)
:when (and (not (typep (definition input) 'ir-operation))
:when (and (not (typep (definition input) '(or ir-operation ir-call)))
(not (typep input 'ir-reusable)))
:do (move-instruction-above (definition input) inst)))))
@ -169,25 +169,6 @@ though I'm pretty sure it can't anyway.")
(loop :for (new . old) :in replacements
:do (setf (inputs call) (substitute new old (inputs call)))))))))
(defun optim-direct-variable-use (iblock)
"Removes unnecessary uses of IR-FETCHVAR.
Some operations do not require their operands to be copied before use. CPU
instructions, for example. This optimization pass goes through and removes
IR-FETCHVAR instructions that would serve no purpose in compiled code."
;; TODO: Add more instructions to the candidates
(let ((candidates-type '(or ir-test-equal ir-plus ir-minus))
(to-remove '()))
(do-instructions (inst iblock)
(when (typep inst 'ir-fetchvar)
(let ((result (output inst))
(src (input inst)))
(when (and (not (typep (output inst) 'ir-reusable))
(typep (user result) candidates-type))
(let ((user (user (output inst))))
(setf (inputs user) (substitute src result (inputs user)))
(push inst to-remove))))))
(mapc #'delete-instruction to-remove)))
(defun compute-lifetime-knowledge (start-iblock)
(do-iblocks (iblock start-iblock)
(do-instructions (inst iblock)