Improved performance of line_down.s before loop.

This commit is contained in:
hugova 2025-03-26 22:00:09 +01:00
parent 89c9bc4129
commit 801cd55541
2 changed files with 51 additions and 43 deletions

View file

@ -78,8 +78,13 @@
ROL low_ ROL low_
ROL hi_ ROL hi_
.endif .endif
.endmacro
.macro Mov_16 a_low, a_hi, b_low, b_hi
LDA b_low
STA a_low
LDA b_hi
STA a_hi
.endmacro .endmacro
;;Larger then operation, uses the A register ;;Larger then operation, uses the A register

View file

@ -9,7 +9,26 @@
;;NOTE THAT X_pos <= X_end, Y_pos <= Y_end. Max 45deg! ;;NOTE THAT X_pos <= X_end, Y_pos <= Y_end. Max 45deg!
.proc line_down .proc line_down
.include "line.inc"; Defines memory positions, ex X_pos
;; TEMPORARY
;; Hack because changing dx and dy makes other line draws bugg and idk why
;; This is offcorse temporary
;.include "line.inc"; Defines memory positions, ex X_pos
X_end = $04
Y_end = $05
X_pos = $FC
Y_pos = $FB
dx = $0c
dy = $06
dy_2 = $0607
dx_2 = dy_2
V = $0809
D = $0a0b
;;These are also used in pixel_draw. Look there to find out more
byte_to_paint = $FE ;Byte with one 1 that corasponds to a pixel.
btp_mem_pos =$494A; byte to paint memory position ;Position of byte on screen
;;END TEMPORARY
;;We need to clear this memory ;;We need to clear this memory
LDA #$00 LDA #$00
STA <V STA <V
@ -18,18 +37,13 @@
;; V = 2*(dx -dy) ;; V = 2*(dx -dy)
SEC SEC
LDA Y_end
SBC Y_pos
STA >V
STA >dy_2; >dy_2 = dy. Needed for dy_2 (not for V)
LDA dx LDA dx
SEC SBC dy
SBC >V STA >V
STA >V; <V = dx - dy mult_16 >V, <V
mult_16 >V, <V; V = 2*(dx -dy)
;dy_2 = dy*2 ;dy_2 = dy*2
mult_16 >dy_2, <dy_2, ! mult_16 >dy_2, <dy_2 ;>dy_2 = dy (same address)
;; This is an Bresenham's line algorithm, se wikipedia bellow. ;; This is an Bresenham's line algorithm, se wikipedia bellow.
;;https://en.wikipedia.org/wiki/Bresenham%27s_line_algorithm ;;https://en.wikipedia.org/wiki/Bresenham%27s_line_algorithm
@ -42,44 +56,33 @@
;; and to its branch logic later in the loop. ;; and to its branch logic later in the loop.
;;D = 2*dy - dx + 2*255 ;;D = 2*dy - dx + 2*255
LDA >dy_2 Mov_16 >D, <D, >dy_2, <dy_2
STA >D
LDA <dy_2
STA <D
Add_16 >D, <D, #$ff, #$01, ! Add_16 >D, <D, #$ff, #$01, !
Sub_16 >D, <D, dx, #$00 Sub_16 >D, <D, dx, #$00
jsr pixel_draw ;;only used first pixel. after this relative position is abused jsr pixel_draw ;;only used first pixel. after this relative position is abused
LDX X_pos LDX X_pos
;; Self modifying code. Makes LDA instructions take 1 cycle less. selfmod:
;; Code will run without this but slower! ;; Self modifying code. Makes LDA and SBC instructions each take 1 cycle less.
;; Modifies LDA instructions for dy_2 and SBC for V ;; You can remove this if you run the loop without # at dy_2 and V.
;;Note: The offsets like +2 etc is because there are instructions betwean the label and the ;;Note: The offsets like +2 etc is because there are instructions betwean the label and the
;address that needs to be modified ;address that needs to be modified
;; dy_2
;;; dy_2 ;; Modifies LDA >dy_2
;;; Modifies LDA >dy_2 LDA >dy_2
;LDA #$A9 ; LDA (immediate) STA case_2 +1
;STA case_2 ;; Modifies LDA <dy_2
;LDA >dy_2 LDA <dy_2
;STA case_2 +1 STA case_2 +7
;;; Modifies LDA <dy_2 ;; V
;LDA #$A9 ; LDA (immediate) ;;Modidies SBC >V
;STA case_2 +6 ; ADC is +2 bytes, STA is + 2 bytes, Offset from before is +2 bytes. LDA >V
;LDA <dy_2 STA case_1 +3
;STA case_2 +7 ;; Modifies SBC <V
;;; V LDA <V
;;;Modidies SBC >V STA case_1 +9
;LDA #$E9 ;SBC (immediate) end_selfmod:
;STA case_1 +2;LDA is +2
;LDA >V
;STA case_1 +3
;;; Modifies SBC <V
;LDA #$E9 ;SBC (immediate)
;STA case_1 +8; Offset before +4 bytes, STA +2, LDA +2.
;LDA <V
;STA case_1 +9
for_x: for_x:
;; Paints A to address in |btp_mem_pos* + Y| ;; Paints A to address in |btp_mem_pos* + Y|
@ -104,7 +107,7 @@ increment_pixel_x_end:
;;else case 1. ;;else case 1.
Lag_16 >D, <D, #$00, #$02, case_2 Lag_16 >D, <D, #$00, #$02, case_2
case_1:; C =1 so we can use ! case_1:; C =1 so we can use !
Sub_16 >D, <D, >V, <V, ! ; D = D - V Sub_16 >D, <D, #>V, #<V, ! ; D = D - V
increment_y_pos: increment_y_pos:
INY ; Increment Y pos inside the buffer INY ; Increment Y pos inside the buffer
CPY #$08 CPY #$08
@ -116,7 +119,7 @@ move_8px_down: ; Z=1 --> C=1
JMP for_x JMP for_x
increment_y_pos_end: increment_y_pos_end:
case_2: ;; C =0 because LAG_16 so we can use ! case_2: ;; C =0 because LAG_16 so we can use !
Add_16 >D, <D, >dy_2, <dy_2, ! ;D = D + 2*dy Add_16 >D, <D, #>dy_2, #<dy_2, ! ;D = D + 2*dy
JMP for_x JMP for_x
end: end:
RTS RTS