Performance improvment in test by 0.1s. Mady by better arithmatic for *40 in draw_pixel and fast_unsafe parameter for most macros

This commit is contained in:
hugova 2025-03-05 21:02:54 +01:00
parent 982b6c7ea6
commit 76f29f6b81
5 changed files with 102 additions and 61 deletions

View file

@ -1,16 +1,46 @@
.macro Add_16 a_low, a_hi, b_low, b_hi ; a = a + b
CLC ;; Can use A as b_low!
;; And X or Y is b_hi
;; Can add ", !" to the end for it to run faster but C=0 is not garantied!
.macro Add_16 a_low, a_hi, b_low, b_hi, fast_unsafe ; a = a + b
;; IF to run it fast
.ifblank fast_unsafe
CLC
.endif
;; If b_low != A
.if .match(.mid (0, 1, {b_low}), A )
.elseif .match(.mid (0, 1, {b_low}), X)
LDX
kssk
.match(.mid (0, 1, {b_low}), Y)
LDY
kkdkd
.else
LDA b_low
.endif
LDA b_low LDA b_low
ADC a_low ADC a_low
STA a_low STA a_low
LDA b_hi LDA b_hi
.if .match(.mid (0, 1, {b_low}), X )
LDX
kdkdkd
.elseif .match(.mid (0, 1, {b_low}), Y )
LDY
dkkdk
.else
LDA b_hi
.endif
ADC a_hi ADC a_hi
STA a_hi STA a_hi
.endmacro .endmacro
.macro Sub_16 a_low, a_hi, b_low, b_hi ; a = a - b
.macro Sub_16 a_low, a_hi, b_low, b_hi, fast_unsafe ; a = a - b
.ifblank fast_unsafe
SEC SEC
.endif
LDA a_low LDA a_low
SBC b_low SBC b_low
STA a_low STA a_low
@ -19,9 +49,11 @@
STA a_hi STA a_hi
.endmacro .endmacro
.macro mult_16 low_, hi_, fast_unsafe ; [low, hi] = [low, hi]*2
.macro mult_16 low_, hi_ ; [low, hi] = [low, hi]*2 ;; IF to run it fast
CLC .ifblank fast_unsafe
CLC
.endif
ROL low_ ROL low_
ROL hi_ ROL hi_
.endmacro .endmacro

View file

@ -1,4 +1,6 @@
;; Max 1.5s ;; Max 1.5s eller 0.9s vet ej villken
;; skriv time_start .. kod .. time_stop
;; och läs värdet i f1
.macro time_start .macro time_start
PHA PHA
LDA $DC08 ; Bit 0..3: Tenth seconds in BCD-format, others may be 0 or 1 LDA $DC08 ; Bit 0..3: Tenth seconds in BCD-format, others may be 0 or 1

View file

@ -31,7 +31,7 @@
mult_16 >V, <V; V = 2*(dx -dy) mult_16 >V, <V; V = 2*(dx -dy)
;dy_2 = dy*2 ;dy_2 = dy*2
mult_16 >dy_2, <dy_2 mult_16 >dy_2, <dy_2, !
;; D = 2*dy - dx ;; D = 2*dy - dx
;; In loop we have that D = D -V ;; In loop we have that D = D -V
@ -46,11 +46,8 @@
STA >D STA >D
LDA <dy_2 LDA <dy_2
STA <D STA <D
Add_16 >D, <D, #$ff, #$01 Add_16 >D, <D, #$ff, #$01, !
Sub_16 >D, <D, dx, #$00 Sub_16 >D, <D, dx, #$00
;hihi:
;jmp hihi
for_x: for_x:
jsr pixel_draw jsr pixel_draw
@ -59,16 +56,15 @@ for_x:
LDX X_pos LDX X_pos
CPX X_end CPX X_end
BEQ end BEQ end
;;If D < %00000010 00000000: case_2 ;;If D < %00000010 00000000: case_2
;;else case 1. ;;else case 1.
Lag_16 >D, <D, #$00, #$02, case_2 Lag_16 >D, <D, #$00, #$02, case_2
case_1: case_1:; C =1 so we can use !
INC Y_pos INC Y_pos
Sub_16 >D, <D, >V, <V; D = D - V Sub_16 >D, <D, >V, <V, !; D = D - V
JMP for_x JMP for_x
case_2: case_2: ;; C =0 because LAG_16 so we can use !
Add_16 >D, <D, >dy_2, <dy_2;D = D + 2*dy Add_16 >D, <D, >dy_2, <dy_2, !;D = D + 2*dy
JMP for_x JMP for_x
end: end:
RTS RTS

View file

@ -7,13 +7,13 @@
X_pos_ = $0E X_pos_ = $0E
Y_end_ = $10 Y_end_ = $10
X_end_ = $11 X_end_ = $11
LDA #$d0 LDA #$a0
STA X_pos_ STA X_pos_
LDA #$60 LDA #$30
STA Y_pos_ STA Y_pos_
LDA #$ff LDA #$ff
STA X_end STA X_end
LDA #$0 LDA #$30
STA Y_end STA Y_end
;; Short test for timing ;; Short test for timing
@ -28,7 +28,7 @@ time_start
jsr line jsr line
INC Y_end INC Y_end
LDA Y_end LDA Y_end
CMP #$90 CMP #$40
BEQ end__ BEQ end__
jmp @loop jmp @loop
end__: end__:

View file

@ -1,23 +1,29 @@
;;Screen print. Draws a pixel at a specified position. ;;Screen print. Draws a pixel at a specified position.
;; Destroys A X Y ;; Destroys A X Y
.proc pixel_draw; Draws a pixel at [Y = FB , X = FC, FD]. Y = 0 - 320, X= 0 - 200 .proc pixel_draw; Draws a pixel at [Y = FB , X = FC, FD]. Y = 0 - 320, X= 0 - 200
;;write_byte = 00010000, Y_pos = $FB
LDA $FC ; X (mod 8) X_pos = $FCFD
byte_to_paint = $FE
btp_mem_pos =$494A; byte to paint memory position
C = $7071
B =$7273
;; X = X_pos (mod 8)
LDA >X_pos ; X (mod 8)
AND #%00000111 AND #%00000111
TAX
;;Store pixel in byte ;;Store pixel in byte_to_paint
TAX TAX
LDA #%10000000 LDA #%10000000
INX INX
tt: @shift_btp:
DEX DEX
BEQ end__;Y=0 end this BEQ end__;X=0 end this
CLC CLC
ROR A ROR A
jmp tt jmp @shift_btp
end__: end__:
STA byte_to_paint
STA $FE
;;FIND THE POSITION IN MEMORY TO WRITE PIXEL ;;FIND THE POSITION IN MEMORY TO WRITE PIXEL
;; + + + + + > X ;; + + + + + > X
@ -26,58 +32,63 @@ end__:
;;\/ ;;\/
;; Y ;; Y
;; ;;
;; Let be this position in memory be stored in [$49, $4A] temporaraly
;;pos = x_offset ;;pos = x_offset
LDA #%11111000 LDA #%11111000
AND $FC AND >X_pos
STA $49 STA >btp_mem_pos
LDA $FD LDA <X_pos
STA $4A STA <btp_mem_pos
;;y_offset because chuncks aka y_offset_bc ;;The y_pos adds offset because chunk offsets + inside chunk offset.
;; Adding inside chunk offset
LDA #%00000111 ; A = y (mod 8) LDA #%00000111 ; A = y (mod 8)
AND $FB AND Y_pos ;; offset to add
;;pos += y_offset_bc ;;ading offset, same as psudocode bellow
CLC ;;Add_16 >btp_mem_pos, <btp_mem_pos, A, #$00
ADC $49 CLC ; Y = b_low
STA $49 ADC >btp_mem_pos
STA >btp_mem_pos
LDA #$00 LDA #$00
ADC $4A ADC <btp_mem_pos ; C =0
STA $4A STA <btp_mem_pos
LDY $FB
LDA #$00 LDA #$00
STA $4B STA $4B
;;y =8 translates to 320 bytes. ;;y =8 translates to 320 bytes.
LDA #%11111000 ; A = y - [y (mod 8)] LDA #%11111000 ; A = y - [y (mod 8)]
AND $FB AND Y_pos
STA $FB STA >C
STA >B
LDA #$00
STA <C
STA <B
;;We need to A = A*40 =A * 2^3 * 5 ;;We need to calculate C*40. 40 = 2*2*2*(2^2 +1)
;;A = A*2^3 ;; _*2^2
mult_16 $FB, $4B mult_16 >C, <C, !
mult_16 $FB, $4B mult_16 >C, <C, !
mult_16 $FB, $4B
;;*5 ;; + _*1
Add_16 $49, $4A, $FB, $4B Add_16 >C, <C, >B, <B, !
Add_16 $49, $4A, $FB, $4B
Add_16 $49, $4A, $FB, $4B
Add_16 $49, $4A, $FB, $4B
Add_16 $49, $4A, $FB, $4B
STY $FB ;; *2*2*2
mult_16 >C, <C, !
mult_16 >C, <C, !
mult_16 >C, <C, !
Add_16 >btp_mem_pos, <btp_mem_pos, >C, <C, !
;;add offset for where bitmap is ;;add offset for where bitmap is
Add_16 $49, $4A, #<Bitmap, #>Bitmap Add_16 >btp_mem_pos, <btp_mem_pos, #<Bitmap, #>Bitmap, !
;;Let draw some stuff ;;Let draw some stuff
LDX #$00 LDX #$00
LDA $FE LDA byte_to_paint
ORA ($49, X) ORA (>btp_mem_pos, X)
STA ($49, X) STA (>btp_mem_pos, X)
RTS RTS
.endproc .endproc