Performance improvment in test by 0.1s. Mady by better arithmatic for *40 in draw_pixel and fast_unsafe parameter for most macros

This commit is contained in:
hugova 2025-03-05 21:02:54 +01:00
parent 982b6c7ea6
commit 76f29f6b81
5 changed files with 102 additions and 61 deletions

View file

@ -1,16 +1,46 @@
.macro Add_16 a_low, a_hi, b_low, b_hi ; a = a + b
;; Can use A as b_low!
;; And X or Y is b_hi
;; Can add ", !" to the end for it to run faster but C=0 is not garantied!
.macro Add_16 a_low, a_hi, b_low, b_hi, fast_unsafe ; a = a + b
;; IF to run it fast
.ifblank fast_unsafe
CLC
.endif
;; If b_low != A
.if .match(.mid (0, 1, {b_low}), A )
.elseif .match(.mid (0, 1, {b_low}), X)
LDX
kssk
.match(.mid (0, 1, {b_low}), Y)
LDY
kkdkd
.else
LDA b_low
.endif
LDA b_low
ADC a_low
STA a_low
LDA b_hi
.if .match(.mid (0, 1, {b_low}), X )
LDX
kdkdkd
.elseif .match(.mid (0, 1, {b_low}), Y )
LDY
dkkdk
.else
LDA b_hi
.endif
ADC a_hi
STA a_hi
.endmacro
.macro Sub_16 a_low, a_hi, b_low, b_hi ; a = a - b
.macro Sub_16 a_low, a_hi, b_low, b_hi, fast_unsafe ; a = a - b
.ifblank fast_unsafe
SEC
.endif
LDA a_low
SBC b_low
STA a_low
@ -19,9 +49,11 @@
STA a_hi
.endmacro
.macro mult_16 low_, hi_ ; [low, hi] = [low, hi]*2
.macro mult_16 low_, hi_, fast_unsafe ; [low, hi] = [low, hi]*2
;; IF to run it fast
.ifblank fast_unsafe
CLC
.endif
ROL low_
ROL hi_
.endmacro

View file

@ -1,4 +1,6 @@
;; Max 1.5s
;; Max 1.5s eller 0.9s vet ej villken
;; skriv time_start .. kod .. time_stop
;; och läs värdet i f1
.macro time_start
PHA
LDA $DC08 ; Bit 0..3: Tenth seconds in BCD-format, others may be 0 or 1

View file

@ -31,7 +31,7 @@
mult_16 >V, <V; V = 2*(dx -dy)
;dy_2 = dy*2
mult_16 >dy_2, <dy_2
mult_16 >dy_2, <dy_2, !
;; D = 2*dy - dx
;; In loop we have that D = D -V
@ -46,11 +46,8 @@
STA >D
LDA <dy_2
STA <D
Add_16 >D, <D, #$ff, #$01
Add_16 >D, <D, #$ff, #$01, !
Sub_16 >D, <D, dx, #$00
;hihi:
;jmp hihi
for_x:
jsr pixel_draw
@ -59,16 +56,15 @@ for_x:
LDX X_pos
CPX X_end
BEQ end
;;If D < %00000010 00000000: case_2
;;else case 1.
Lag_16 >D, <D, #$00, #$02, case_2
case_1:
case_1:; C =1 so we can use !
INC Y_pos
Sub_16 >D, <D, >V, <V; D = D - V
Sub_16 >D, <D, >V, <V, !; D = D - V
JMP for_x
case_2:
Add_16 >D, <D, >dy_2, <dy_2;D = D + 2*dy
case_2: ;; C =0 because LAG_16 so we can use !
Add_16 >D, <D, >dy_2, <dy_2, !;D = D + 2*dy
JMP for_x
end:
RTS

View file

@ -7,13 +7,13 @@
X_pos_ = $0E
Y_end_ = $10
X_end_ = $11
LDA #$d0
LDA #$a0
STA X_pos_
LDA #$60
LDA #$30
STA Y_pos_
LDA #$ff
STA X_end
LDA #$0
LDA #$30
STA Y_end
;; Short test for timing
@ -28,7 +28,7 @@ time_start
jsr line
INC Y_end
LDA Y_end
CMP #$90
CMP #$40
BEQ end__
jmp @loop
end__:

View file

@ -1,23 +1,29 @@
;;Screen print. Draws a pixel at a specified position.
;; Destroys A X Y
.proc pixel_draw; Draws a pixel at [Y = FB , X = FC, FD]. Y = 0 - 320, X= 0 - 200
;;write_byte = 00010000,
LDA $FC ; X (mod 8)
Y_pos = $FB
X_pos = $FCFD
byte_to_paint = $FE
btp_mem_pos =$494A; byte to paint memory position
C = $7071
B =$7273
;; X = X_pos (mod 8)
LDA >X_pos ; X (mod 8)
AND #%00000111
TAX
;;Store pixel in byte
;;Store pixel in byte_to_paint
TAX
LDA #%10000000
INX
tt:
@shift_btp:
DEX
BEQ end__;Y=0 end this
BEQ end__;X=0 end this
CLC
ROR A
jmp tt
jmp @shift_btp
end__:
STA $FE
STA byte_to_paint
;;FIND THE POSITION IN MEMORY TO WRITE PIXEL
;; + + + + + > X
@ -26,58 +32,63 @@ end__:
;;\/
;; Y
;;
;; Let be this position in memory be stored in [$49, $4A] temporaraly
;;pos = x_offset
LDA #%11111000
AND $FC
STA $49
AND >X_pos
STA >btp_mem_pos
LDA $FD
STA $4A
LDA <X_pos
STA <btp_mem_pos
;;y_offset because chuncks aka y_offset_bc
;;The y_pos adds offset because chunk offsets + inside chunk offset.
;; Adding inside chunk offset
LDA #%00000111 ; A = y (mod 8)
AND $FB
AND Y_pos ;; offset to add
;;pos += y_offset_bc
CLC
ADC $49
STA $49
;;ading offset, same as psudocode bellow
;;Add_16 >btp_mem_pos, <btp_mem_pos, A, #$00
CLC ; Y = b_low
ADC >btp_mem_pos
STA >btp_mem_pos
LDA #$00
ADC $4A
STA $4A
ADC <btp_mem_pos ; C =0
STA <btp_mem_pos
LDY $FB
LDA #$00
STA $4B
;;y =8 translates to 320 bytes.
LDA #%11111000 ; A = y - [y (mod 8)]
AND $FB
STA $FB
AND Y_pos
STA >C
STA >B
LDA #$00
STA <C
STA <B
;;We need to A = A*40 =A * 2^3 * 5
;;A = A*2^3
mult_16 $FB, $4B
mult_16 $FB, $4B
mult_16 $FB, $4B
;;We need to calculate C*40. 40 = 2*2*2*(2^2 +1)
;; _*2^2
mult_16 >C, <C, !
mult_16 >C, <C, !
;;*5
Add_16 $49, $4A, $FB, $4B
Add_16 $49, $4A, $FB, $4B
Add_16 $49, $4A, $FB, $4B
Add_16 $49, $4A, $FB, $4B
Add_16 $49, $4A, $FB, $4B
;; + _*1
Add_16 >C, <C, >B, <B, !
STY $FB
;; *2*2*2
mult_16 >C, <C, !
mult_16 >C, <C, !
mult_16 >C, <C, !
Add_16 >btp_mem_pos, <btp_mem_pos, >C, <C, !
;;add offset for where bitmap is
Add_16 $49, $4A, #<Bitmap, #>Bitmap
Add_16 >btp_mem_pos, <btp_mem_pos, #<Bitmap, #>Bitmap, !
;;Let draw some stuff
LDX #$00
LDA $FE
ORA ($49, X)
STA ($49, X)
LDA byte_to_paint
ORA (>btp_mem_pos, X)
STA (>btp_mem_pos, X)
RTS
.endproc