Improved performance of line_down.s before loop.

This commit is contained in:
hugova 2025-03-26 22:00:09 +01:00
parent 89c9bc4129
commit 801cd55541
2 changed files with 51 additions and 43 deletions

View file

@ -78,8 +78,13 @@
ROL low_
ROL hi_
.endif
.endmacro
.macro Mov_16 a_low, a_hi, b_low, b_hi
LDA b_low
STA a_low
LDA b_hi
STA a_hi
.endmacro
;;Larger then operation, uses the A register

View file

@ -9,7 +9,26 @@
;;NOTE THAT X_pos <= X_end, Y_pos <= Y_end. Max 45deg!
.proc line_down
.include "line.inc"; Defines memory positions, ex X_pos
;; TEMPORARY
;; Hack because changing dx and dy makes other line draws bugg and idk why
;; This is offcorse temporary
;.include "line.inc"; Defines memory positions, ex X_pos
X_end = $04
Y_end = $05
X_pos = $FC
Y_pos = $FB
dx = $0c
dy = $06
dy_2 = $0607
dx_2 = dy_2
V = $0809
D = $0a0b
;;These are also used in pixel_draw. Look there to find out more
byte_to_paint = $FE ;Byte with one 1 that corasponds to a pixel.
btp_mem_pos =$494A; byte to paint memory position ;Position of byte on screen
;;END TEMPORARY
;;We need to clear this memory
LDA #$00
STA <V
@ -18,18 +37,13 @@
;; V = 2*(dx -dy)
SEC
LDA Y_end
SBC Y_pos
STA >V
STA >dy_2; >dy_2 = dy. Needed for dy_2 (not for V)
LDA dx
SEC
SBC >V
STA >V; <V = dx - dy
mult_16 >V, <V; V = 2*(dx -dy)
SBC dy
STA >V
mult_16 >V, <V
;dy_2 = dy*2
mult_16 >dy_2, <dy_2, !
mult_16 >dy_2, <dy_2 ;>dy_2 = dy (same address)
;; This is an Bresenham's line algorithm, se wikipedia bellow.
;;https://en.wikipedia.org/wiki/Bresenham%27s_line_algorithm
@ -42,44 +56,33 @@
;; and to its branch logic later in the loop.
;;D = 2*dy - dx + 2*255
LDA >dy_2
STA >D
LDA <dy_2
STA <D
Mov_16 >D, <D, >dy_2, <dy_2
Add_16 >D, <D, #$ff, #$01, !
Sub_16 >D, <D, dx, #$00
jsr pixel_draw ;;only used first pixel. after this relative position is abused
LDX X_pos
;; Self modifying code. Makes LDA instructions take 1 cycle less.
;; Code will run without this but slower!
;; Modifies LDA instructions for dy_2 and SBC for V
selfmod:
;; Self modifying code. Makes LDA and SBC instructions each take 1 cycle less.
;; You can remove this if you run the loop without # at dy_2 and V.
;;Note: The offsets like +2 etc is because there are instructions betwean the label and the
;address that needs to be modified
;;; dy_2
;;; Modifies LDA >dy_2
;LDA #$A9 ; LDA (immediate)
;STA case_2
;LDA >dy_2
;STA case_2 +1
;;; Modifies LDA <dy_2
;LDA #$A9 ; LDA (immediate)
;STA case_2 +6 ; ADC is +2 bytes, STA is + 2 bytes, Offset from before is +2 bytes.
;LDA <dy_2
;STA case_2 +7
;;; V
;;;Modidies SBC >V
;LDA #$E9 ;SBC (immediate)
;STA case_1 +2;LDA is +2
;LDA >V
;STA case_1 +3
;;; Modifies SBC <V
;LDA #$E9 ;SBC (immediate)
;STA case_1 +8; Offset before +4 bytes, STA +2, LDA +2.
;LDA <V
;STA case_1 +9
;; dy_2
;; Modifies LDA >dy_2
LDA >dy_2
STA case_2 +1
;; Modifies LDA <dy_2
LDA <dy_2
STA case_2 +7
;; V
;;Modidies SBC >V
LDA >V
STA case_1 +3
;; Modifies SBC <V
LDA <V
STA case_1 +9
end_selfmod:
for_x:
;; Paints A to address in |btp_mem_pos* + Y|
@ -104,7 +107,7 @@ increment_pixel_x_end:
;;else case 1.
Lag_16 >D, <D, #$00, #$02, case_2
case_1:; C =1 so we can use !
Sub_16 >D, <D, >V, <V, ! ; D = D - V
Sub_16 >D, <D, #>V, #<V, ! ; D = D - V
increment_y_pos:
INY ; Increment Y pos inside the buffer
CPY #$08
@ -116,7 +119,7 @@ move_8px_down: ; Z=1 --> C=1
JMP for_x
increment_y_pos_end:
case_2: ;; C =0 because LAG_16 so we can use !
Add_16 >D, <D, >dy_2, <dy_2, ! ;D = D + 2*dy
Add_16 >D, <D, #>dy_2, #<dy_2, ! ;D = D + 2*dy
JMP for_x
end:
RTS