optimise line_* by using 2:s compliment (see commit before))

This commit is contained in:
hugova 2025-07-23 18:03:06 +02:00
parent b23a63ddf5
commit 3fe7cdb8ac
8 changed files with 209 additions and 229 deletions

View file

@ -1,5 +1,5 @@
al 001444 .__BSS_LOAD__ al 001319 .__BSS_LOAD__
al 001444 .__BSS_RUN__ al 001319 .__BSS_RUN__
al 000000 .__BSS_SIZE__ al 000000 .__BSS_SIZE__
al 000001 .__EXEHDR__ al 000001 .__EXEHDR__
al 000001 .__LOADADDR__ al 000001 .__LOADADDR__
@ -7,162 +7,153 @@ al 000000 .__ZP_FILEOFFS__
al 000002 .__ZP_LAST__ al 000002 .__ZP_LAST__
al 0000FE .__ZP_SIZE__ al 0000FE .__ZP_SIZE__
al 000002 .__ZP_START__ al 000002 .__ZP_START__
al 001404 .big_y_offset al 0012D9 .big_y_offset
al 0013CE .binary_factor al 0012A3 .binary_factor
al 0013C3 .log al 001298 .log
al 0013BA .inverse_factor_value al 00128F .inverse_factor_value
al 0013B9 .for_i_end al 00128E .for_i_end
al 0013B1 .R_pos al 001286 .R_pos
al 0013B5 .R_neg al 00128A .R_neg
al 0013A4 .for_i al 001279 .for_i
al 00139A .div al 00126F .div
al 001399 .endloop al 00126E .endloop
al 00138C .loop al 001261 .loop
al 00138E .start al 001263 .start
al 001387 .mult al 00125C .mult
al 001377 .change_length al 00124C .change_length
al 001380 .y_overflow al 001255 .y_overflow
al 001370 .loop al 001245 .loop
al 00136E .memcpy al 001243 .memcpy
al 00132F .big_set_end al 001204 .big_set_end
al 001337 .small_set al 00120C .small_set
al 001030 .big_set al 000F05 .big_set
al 001029 .memset al 000EFE .memset
al 000FFF .move_data al 000ED4 .move_data
al 000FD6 .calculate_screen_position al 000EAB .calculate_screen_position
al 000FBC .calculate_petski_position al 000E91 .calculate_petski_position
al 000FBC .char_draw al 000E91 .char_draw
al 000FB3 .calc_byte_to_paint al 000E88 .calc_byte_to_paint
al 000F95 .pixel_calc al 000E6A .pixel_calc
al 000F90 .draw al 000E65 .draw
al 000F88 .calc_byte_to_paint al 000E5D .calc_byte_to_paint
al 000F6A .pixel_draw al 000E3F .pixel_draw
al 000F68 .draw_lower_triangle al 000E3D .draw_lower_triangle
al 000F5C .draw_upper_triangle al 000E31 .draw_upper_triangle
al 000F45 .BC_overflow al 000E1A .BC_overflow
al 000F5C .BC_overflow_end al 000E31 .BC_overflow_end
al 000F28 .AB_overflow al 000DFD .AB_overflow
al 000F3C .AB_overflow_end al 000E11 .AB_overflow_end
al 000F1F .triangle al 000DF4 .triangle
al 000F1E .end al 000DF3 .end
al 000EF9 .qbb_y_overflow al 000DCE .qbb_y_overflow
al 000F15 .qbb_y_end al 000DEA .qbb_y_end
al 000EF3 .qbb_y al 000DC8 .qbb_y
al 000ED9 .qdb_overflow al 000DAE .qdb_overflow
al 000EF3 .qdb_y_end al 000DC8 .qdb_y_end
al 000ED5 .qdb_y al 000DAA .qdb_y
al 000EBB .qca_x_overflow al 000D90 .qca_x_overflow
al 000ED5 .qca_x_end al 000DAA .qca_x_end
al 000EB7 .qca_x al 000D8C .qca_x
al 000E9D .qaa_x_overflow al 000D72 .qaa_x_overflow
al 000EB7 .qaa_x_end al 000D8C .qaa_x_end
al 000E99 .qaa_x al 000D6E .qaa_x
al 000E95 .change_x al 000D6A .change_x
al 000F15 .endif al 000DEA .endif
al 000E95 .if al 000D6A .if
al 000E6C .qcb_x_overflow al 000D41 .qcb_x_overflow
al 000E87 .qcb_x_end al 000D5C .qcb_x_end
al 000E68 .qcb_x al 000D3D .qcb_x
al 000E4E .qdb_x_overflow al 000D23 .qdb_x_overflow
al 000E68 .qdb_x_end al 000D3D .qdb_x_end
al 000E4A .qdb_x al 000D1F .qdb_x
al 000E2E .qda_y_overflow al 000D03 .qda_y_overflow
al 000E4A .qda_y_end al 000D1F .qda_y_end
al 000E28 .qda_y al 000CFD .qda_y
al 000E0E .qaa_y_underflow al 000CE3 .qaa_y_underflow
al 000E28 .qaa_y_end al 000CFD .qaa_y_end
al 000E0A .qaa_y al 000CDF .qaa_y
al 000E08 .change_Y al 000CDD .change_Y
al 000E00 .draw_qab al 000CD5 .draw_qab
al 000DF8 .draw_qbb al 000CCD .draw_qbb
al 000DF2 .draw_qdb al 000CC7 .draw_qdb
al 000DEA .draw_qcb al 000CBF .draw_qcb
al 000DE4 .draw_qca al 000CB9 .draw_qca
al 000DDC .draw_qda al 000CB1 .draw_qda
al 000DD6 .draw_qba al 000CAB .draw_qba
al 000DCE .draw_qaa al 000CA3 .draw_qaa
al 000DCE .draw_pixels al 000CA3 .draw_pixels
al 000DCE .while_x_bigger_then_y al 000CA3 .while_x_bigger_then_y
al 000DB4 .draw_right_px_in_circle al 000C89 .draw_right_px_in_circle
al 000D93 .draw_lower_px_in_circle al 000C68 .draw_lower_px_in_circle
al 000D75 .draw_left_px_in_circle al 000C4A .draw_left_px_in_circle
al 000D57 .draw_upper_px_in_circle al 000C2C .draw_upper_px_in_circle
al 000D4B .circle al 000C20 .circle
al 000D2A .move_8px_left al 000BFD .move_8px_left
al 000D26 .increment_pixel_x al 000BF9 .increment_pixel_x
al 000D1C .LOCAL-MACRO_SYMBOL-002F al 000C1F .end
al 000D4A .end al 000BD8 .move_8px_up
al 000CFD .move_8px_up al 000BE6 .decrement_y_pos_end
al 000D0B .decrement_y_pos_end al 000BD5 .decrement_y_pos
al 000CFA .decrement_y_pos al 000BCF .for_y
al 000CF4 .for_y al 000BBA .end_selfmod
al 000CDF .end_selfmod al 000BED .case_1
al 000D1C .case_1 al 000C0F .case_2
al 000D3B .case_2 al 000BA6 .selfmod
al 000CCB .selfmod al 000B66 .decrement_y_pos_end
al 000C90 .decrement_y_pos_end al 000B54 .move_8px_up
al 000C7F .move_8px_up al 000B51 .decrement_y_pos
al 000C7C .decrement_y_pos al 000B75 .end
al 000C72 .LOCAL-MACRO_SYMBOL-0021 al 000B30 .move_8px_left
al 000C9F .end al 000B3E .increment_pixel_x_end
al 000C53 .move_8px_left al 000B2C .increment_pixel_x
al 000C61 .increment_pixel_x_end al 000B26 .for_x
al 000C4F .increment_pixel_x al 000B12 .end_selfmod
al 000C49 .for_x al 000B45 .case_1
al 000C35 .end_selfmod al 000B66 .case_2
al 000C72 .case_1 al 000AFE .selfmod
al 000C90 .case_2 al 000AB9 .move_8px_left
al 000C21 .selfmod al 000AB5 .increment_pixel_x
al 000BD4 .move_8px_left al 000ADA .end
al 000BD0 .increment_pixel_x al 000A94 .move_8px_down
al 000BC6 .LOCAL-MACRO_SYMBOL-0017 al 000AA2 .increment_y_pos_end
al 000BF5 .end al 000A91 .increment_y_pos
al 000BA7 .move_8px_down al 000A8B .for_y
al 000BB5 .increment_y_pos_end al 000A6A .end_selfmod
al 000BA4 .increment_y_pos al 000AA9 .case_1
al 000B9E .for_y al 000ACA .case_2
al 000B7D .end_selfmod al 000A56 .selfmod
al 000BC6 .case_1 al 000A16 .decrement_y_pos_end
al 000BE6 .case_2 al 000A04 .move_8px_up
al 000B69 .selfmod al 000A01 .decrement_y_pos
al 000B2E .decrement_y_pos_end al 000A25 .end
al 000B1D .move_8px_up al 0009E0 .move_8px_right
al 000B1A .decrement_y_pos al 0009EE .decrement_pixel_x_end
al 000B10 .LOCAL-MACRO_SYMBOL-0009 al 0009DC .decrement_pixel_x
al 000B3D .end al 0009D6 .for_x
al 000AF1 .move_8px_right al 0009B6 .end_selfmod
al 000AFF .decrement_pixel_x_end al 0009F5 .case_1
al 000AED .decrement_pixel_x al 000A16 .case_2
al 000AE7 .for_x al 0009A2 .selfmod
al 000AC7 .end_selfmod al 000974 .line_down
al 000B10 .case_1 al 000A26 .line_down_inv
al 000B2E .case_2 al 00096C .steep_
al 000AB3 .selfmod al 000970 .shallow_
al 000A88 .line_down al 000ADB .line_up
al 000B3E .line_down_inv al 000B76 .line_up_inv
al 000A80 .steep_ al 00095A .steep
al 000A84 .shallow_ al 00095E .shallow
al 000BF6 .line_up al 000954 .up
al 000CA0 .line_up_inv al 000964 .down
al 000A6E .steep al 00094B .dx_no_underflow
al 000A72 .shallow al 000940 .line
al 000A68 .up al 00093D .exit
al 000A78 .down al 000928 .@loop
al 000A5F .dx_no_underflow al 000916 .long_line_test_b
al 000A54 .line al 000901 .clear_screen_
al 000A51 .exit al 0008EA .@loop
al 000A21 .end_test_y al 0008D8 .long_line_test_a
al 000A15 .test_y al 0008C3 .clear_screen
al 000A0B .end_test_x al 0008C0 .hihi
al 0009FF .test_x al 0008A9 .@loop
al 0009D0 .@loop
al 0009BE .long_line_test_b
al 0009A9 .clear_screen_
al 000992 .@loop
al 000980 .long_line_test_a
al 00096B .clear_screen
al 000954 .@loop
al 0008FE .loop
al 0008C5 .loop
al 000897 .NMI_routine_end al 000897 .NMI_routine_end
al 000896 .NMI_routine al 000896 .NMI_routine

View file

@ -16,7 +16,6 @@
LDA #$00 LDA #$00
STA V +1 STA V +1
STA dy_2 +1 STA dy_2 +1
STA $FD ; for pixel_draw
;; V = 2*(dx -dy) ;; V = 2*(dx -dy)
SEC SEC
@ -27,7 +26,7 @@
;dy_2 = dy*2 ;dy_2 = dy*2
Mult_16 dy_2, dy_2 +1 ;dy_2 = dy (same address) Mult_16 dy_2, dy_2 +1 ;dy_2 = dy (same address)
;; D = dy_2 - x. (signed 16-bit) ;; D = dy_2 - dx. (signed 16-bit)
SEC SEC
LDA dy_2 LDA dy_2
SBC dx SBC dx
@ -39,7 +38,6 @@
;; because C flag is wrong value we let dy_2 be 1 to small ;; because C flag is wrong value we let dy_2 be 1 to small
Sub_16 dy_2, dy_2 +1, #$01,#$00 Sub_16 dy_2, dy_2 +1, #$01,#$00
selfmod: selfmod:
;; Self modifying code. Makes LDA and SBC instructions each take 1 cycle less. ;; Self modifying code. Makes LDA and SBC instructions each take 1 cycle less.
;; You can remove this if you run the loop without # at dy_2 and V. ;; You can remove this if you run the loop without # at dy_2 and V.

View file

@ -14,7 +14,6 @@
LDA #$00 LDA #$00
STA V + 1 STA V + 1
STA dx_2 + 1 STA dx_2 + 1
STA $FD
SEC SEC
LDA dy LDA dy
@ -24,30 +23,27 @@
Mult_16 dx_2, dx_2 + 1 Mult_16 dx_2, dx_2 + 1
LDA dx_2 ;; D = dx_2 - dy. (signed 16-bit)
ADC #$ff
TAX
LDA dx + 1
ADC #$01
TAY
SEC SEC
TXA LDA dx_2
SBC dy SBC dy
STA D STA D
TYA LDA dx_2 + 1
SBC #$00 SBC #$00
STA D + 1 STA D + 1
;; because C flag is wrong value we let dy_2 be 1 to small
Sub_16 dx_2, dx_2 +1, #$01,#$00
selfmod: selfmod:
LDA dx_2 LDA dx_2
STA case_2 + 1 STA case_2 + 1
LDA dx_2 + 1 LDA dx_2 + 1
STA case_2 + 7 STA case_2 + 7
LDA V LDA V
STA case_1 + 1 STA case_1 + 3
LDA V + 1 LDA V + 1
STA case_1 + 7 STA case_1 + 9
end_selfmod: end_selfmod:
LDA X_end LDA X_end
STA X_pos STA X_pos
@ -60,7 +56,7 @@ end_selfmod:
Sub_16 btp_mem_pos, btp_mem_pos + 1, #$00, #$00, ! Sub_16 btp_mem_pos, btp_mem_pos + 1, #$00, #$00, !
LDX dy LDX dy
CLC CLC
for_y: for_y: ; C =0
LDA byte_to_paint LDA byte_to_paint
ORA (btp_mem_pos), Y ORA (btp_mem_pos), Y
STA (btp_mem_pos), Y STA (btp_mem_pos), Y
@ -72,20 +68,22 @@ move_8px_down:
Sub_16 btp_mem_pos, btp_mem_pos + 1, #$3f ,#$01, ! Sub_16 btp_mem_pos, btp_mem_pos + 1, #$3f ,#$01, !
increment_y_pos_end: increment_y_pos_end:
DEX DEX
;CPX Y_end
BEQ end BEQ end
Lag_16 D, D + 1, #$00, #$02, case_2 ;Lag_16 D, D + 1, #$00, #$02, case_2
LDA D + 1
BMI case_2
case_1: case_1:
Sub_16_A D, D + 1, #V, #V + 1, ! Sub_16 D, D + 1, #V, #V + 1, !
increment_pixel_x: increment_pixel_x:
ASL byte_to_paint ASL byte_to_paint
BCC for_y BCC for_y
move_8px_left: move_8px_left:
Sub_16 btp_mem_pos, btp_mem_pos + 1, #$08, #$00 Sub_16 btp_mem_pos, btp_mem_pos + 1, #$08, #$00,!
ROL byte_to_paint ROL byte_to_paint
JMP for_y JMP for_y
case_2: case_2:
Add_16 D, D + 1, #dx_2, #dx_2 + 1, ! ;D = D + 2*dx Add_16 D, D + 1, #dx_2, #dx_2 + 1,! ;D = D + 2*dx
CLC ; for_y shall always have C = 1
JMP for_y JMP for_y
end: end:
RTS RTS

View file

@ -30,6 +30,9 @@
CMP #$bb CMP #$bb
bne @loop bne @loop
hihi:
jmp hihi
clear_screen: clear_screen:
;;Lets clear bitmap ;;Lets clear bitmap
VIC_bank = $4000 VIC_bank = $4000

View file

@ -15,8 +15,6 @@
LDA #$00 LDA #$00
STA V + 1 STA V + 1
STA dy_2 + 1
STA $FD
SEC SEC
LDA dx LDA dx
@ -26,18 +24,12 @@
Mult_16 dy_2, dy_2 + 1 Mult_16 dy_2, dy_2 + 1
LDA dy_2 ;; D = dy_2 - dx. (signed 16-bit)
ADC #$ff
TAX
LDA dy + 1
ADC #$01
TAY
SEC SEC
TXA LDA dy_2
SBC dx SBC dx
STA D STA D
TYA LDA dy_2 + 1
SBC #$00 SBC #$00
STA D + 1 STA D + 1
@ -47,9 +39,9 @@ selfmod:
LDA dy_2 + 1 LDA dy_2 + 1
STA case_2 + 7 STA case_2 + 7
LDA V LDA V
STA case_1 + 1 STA case_1 + 3
LDA V + 1 LDA V + 1
STA case_1 + 7 STA case_1 + 9
end_selfmod: end_selfmod:
JSR pixel_calc ;;only used first pixel. after this relative position is abused JSR pixel_calc ;;only used first pixel. after this relative position is abused
STA byte_to_paint STA byte_to_paint
@ -71,19 +63,23 @@ increment_pixel_x_end:
DEX DEX
;CPX X_end ;CPX X_end
BEQ end BEQ end
Lag_16 D, D + 1, #$00, #$02, case_2 ;;If D < 0 goto case_2
LDA D + 1
BMI case_2
case_1: case_1:
Sub_16_A D, D + 1, #V, #V + 1,! Sub_16 D, D + 1, #V, #V + 1,!
decrement_y_pos: decrement_y_pos:
DEY DEY
BNE for_x BNE for_x
move_8px_up: move_8px_up:
LDY #$08 LDY #$08
Sub_16 btp_mem_pos, btp_mem_pos + 1, #$40, #$01, ! ; C = 0 eller 1
Sub_16 btp_mem_pos, btp_mem_pos + 1, #$40, #$01
jmp for_x jmp for_x
decrement_y_pos_end: decrement_y_pos_end:
case_2: case_2:
Add_16 D, D + 1, #>dy_2, #<dy_2,! ; C =0
Add_16 D, D + 1, #dy_2, #dy_2 + 1,!
JMP for_x JMP for_x
end: end:
RTS RTS

View file

@ -15,7 +15,6 @@
LDA #$00 LDA #$00
STA V + 1 STA V + 1
STA dx_2 + 1 STA dx_2 + 1
STA $FD
SEC SEC
LDA dy LDA dy
@ -25,30 +24,27 @@
Mult_16 dx_2, dx_2 + 1 Mult_16 dx_2, dx_2 + 1
LDA dx_2 ;; D = dx_2 - dy. (signed 16-bit)
ADC #$ff
TAX
LDA dx + 1
ADC #$01
TAY
SEC SEC
TXA LDA dx_2
SBC dy SBC dy
STA D STA D
TYA LDA dx_2 + 1
SBC #$00 SBC #$00
STA D + 1 STA D + 1
;; because C flag is wrong value we let dy_2 be 1 to small
Sub_16 dy_2, dy_2 +1, #$01,#$00
selfmod: selfmod:
LDA dx_2 LDA dx_2
STA case_2 + 1 STA case_2 + 1
LDA dx_2 + 1 LDA dx_2 + 1
STA case_2 + 7 STA case_2 + 7
LDA V LDA V
STA case_1 + 1 STA case_1 + 3
LDA V + 1 LDA V + 1
STA case_1 + 7 STA case_1 + 9
end_selfmod: end_selfmod:
JSR pixel_calc ;;only used first pixel. after this relative position is abused JSR pixel_calc ;;only used first pixel. after this relative position is abused
STA byte_to_paint STA byte_to_paint
@ -65,23 +61,25 @@ decrement_y_pos:
BNE decrement_y_pos_end BNE decrement_y_pos_end
move_8px_up: move_8px_up:
LDY #$08 LDY #$08
; C=0 Sub_16 btp_mem_pos, btp_mem_pos + 1,#$3F , #$01,!
Sub_16 btp_mem_pos, btp_mem_pos + 1,#$3F , #$01, !
decrement_y_pos_end: decrement_y_pos_end:
DEX DEX
BEQ end BEQ end
Lag_16 D, D + 1, #$00, #$02, case_2 LDA D + 1
BMI case_2
case_1: case_1:
Sub_16_A D, D + 1, #V, #V + 1, ! Sub_16 D, D + 1, #V, #V + 1,!
increment_pixel_x: increment_pixel_x:
LSR byte_to_paint LSR byte_to_paint
BCC for_y BCC for_y
move_8px_left: move_8px_left:
ROR byte_to_paint ROR byte_to_paint
Add_16 btp_mem_pos, btp_mem_pos + 1, #$08, #$00,! ;; C = 1 or 0
Add_16 btp_mem_pos, btp_mem_pos + 1, #$08, #$00
JMP for_y JMP for_y
case_2: case_2:
Add_16 D, D + 1, #dx_2, #dx_2 + 1, ! Add_16 D, D + 1, #dx_2, #dx_2 + 1,!
CLC
JMP for_y JMP for_y
end: end:
RTS RTS

View file

@ -3,7 +3,3 @@ killall x64sc
./build.sh \ ./build.sh \
&& nohup flatpak run net.sf.VICE -windowypos 0 -windowxpos 960 -windowwidth 945 -windowheight 720 -moncommands program.lbl build/file.prg </dev/null &>/dev/null & && nohup flatpak run net.sf.VICE -windowypos 0 -windowxpos 960 -windowwidth 945 -windowheight 720 -moncommands program.lbl build/file.prg </dev/null &>/dev/null &
sleep 2
rm source.o
rm file.prg

View file

@ -11,17 +11,17 @@
.include "STARTUP.s" .include "STARTUP.s"
;.include "dubbel_buffer/raster_irqs.s" ;.include "dubbel_buffer/raster_irqs.s"
.include "routines/arithmatic/mult_test.s" ;.include "routines/arithmatic/mult_test.s"
.include "routines/arithmatic/div_test.s" ;.include "routines/arithmatic/div_test.s"
.include "routines/circle/circle_test.s" ;.include "routines/circle/circle_test.s"
.include "routines/circle/circle_test_size.s" ;.include "routines/circle/circle_test_size.s"
.include "routines/circle/circle_test_position.s" ;.include "routines/circle/circle_test_position.s"
.include "routines/line/line_test.s" ;.include "routines/line/line_test.s"
.include "routines/line/line_test_extensive.s" .include "routines/line/line_test_extensive.s"
.include "routines/text/char_draw_test.s" ;.include "routines/text/char_draw_test.s"
.include "routines/pixel/pixel_test.s" ;.include "routines/pixel/pixel_test.s"
.include "routines/memory/memcpy_test.s" ;.include "routines/memory/memcpy_test.s"
.include "routines/memory/memset_test.s" ;.include "routines/memory/memset_test.s"
;.include "routines/triangle/triangle_test.s" ;.include "routines/triangle/triangle_test.s"
exit: exit: