From 3fe7cdb8acc1660c7c6160b9a63d33e29cdf82e3 Mon Sep 17 00:00:00 2001 From: hugova Date: Wed, 23 Jul 2025 18:03:06 +0200 Subject: [PATCH] optimise line_* by using 2:s compliment (see commit before)) --- wip-hugo/build/program.lbl | 309 +++++++++---------- wip-hugo/routines/line/line_down.s | 4 +- wip-hugo/routines/line/line_down_inv.s | 34 +- wip-hugo/routines/line/line_test_extensive.s | 3 + wip-hugo/routines/line/line_up.s | 30 +- wip-hugo/routines/line/line_up_inv.s | 34 +- wip-hugo/run.sh | 4 - wip-hugo/source.s | 20 +- 8 files changed, 209 insertions(+), 229 deletions(-) diff --git a/wip-hugo/build/program.lbl b/wip-hugo/build/program.lbl index dbbd375..15f8a08 100644 --- a/wip-hugo/build/program.lbl +++ b/wip-hugo/build/program.lbl @@ -1,5 +1,5 @@ -al 001444 .__BSS_LOAD__ -al 001444 .__BSS_RUN__ +al 001319 .__BSS_LOAD__ +al 001319 .__BSS_RUN__ al 000000 .__BSS_SIZE__ al 000001 .__EXEHDR__ al 000001 .__LOADADDR__ @@ -7,162 +7,153 @@ al 000000 .__ZP_FILEOFFS__ al 000002 .__ZP_LAST__ al 0000FE .__ZP_SIZE__ al 000002 .__ZP_START__ -al 001404 .big_y_offset -al 0013CE .binary_factor -al 0013C3 .log -al 0013BA .inverse_factor_value -al 0013B9 .for_i_end -al 0013B1 .R_pos -al 0013B5 .R_neg -al 0013A4 .for_i -al 00139A .div -al 001399 .endloop -al 00138C .loop -al 00138E .start -al 001387 .mult -al 001377 .change_length -al 001380 .y_overflow -al 001370 .loop -al 00136E .memcpy -al 00132F .big_set_end -al 001337 .small_set -al 001030 .big_set -al 001029 .memset -al 000FFF .move_data -al 000FD6 .calculate_screen_position -al 000FBC .calculate_petski_position -al 000FBC .char_draw -al 000FB3 .calc_byte_to_paint -al 000F95 .pixel_calc -al 000F90 .draw -al 000F88 .calc_byte_to_paint -al 000F6A .pixel_draw -al 000F68 .draw_lower_triangle -al 000F5C .draw_upper_triangle -al 000F45 .BC_overflow -al 000F5C .BC_overflow_end -al 000F28 .AB_overflow -al 000F3C .AB_overflow_end -al 000F1F .triangle -al 000F1E .end -al 000EF9 .qbb_y_overflow -al 000F15 .qbb_y_end -al 000EF3 .qbb_y -al 000ED9 .qdb_overflow -al 000EF3 .qdb_y_end -al 000ED5 .qdb_y -al 000EBB .qca_x_overflow -al 000ED5 .qca_x_end -al 000EB7 .qca_x -al 000E9D .qaa_x_overflow -al 000EB7 .qaa_x_end -al 000E99 .qaa_x -al 000E95 .change_x -al 000F15 .endif -al 000E95 .if -al 000E6C .qcb_x_overflow -al 000E87 .qcb_x_end -al 000E68 .qcb_x -al 000E4E .qdb_x_overflow -al 000E68 .qdb_x_end -al 000E4A .qdb_x -al 000E2E .qda_y_overflow -al 000E4A .qda_y_end -al 000E28 .qda_y -al 000E0E .qaa_y_underflow -al 000E28 .qaa_y_end -al 000E0A .qaa_y -al 000E08 .change_Y -al 000E00 .draw_qab -al 000DF8 .draw_qbb -al 000DF2 .draw_qdb -al 000DEA .draw_qcb -al 000DE4 .draw_qca -al 000DDC .draw_qda -al 000DD6 .draw_qba -al 000DCE .draw_qaa -al 000DCE .draw_pixels -al 000DCE .while_x_bigger_then_y -al 000DB4 .draw_right_px_in_circle -al 000D93 .draw_lower_px_in_circle -al 000D75 .draw_left_px_in_circle -al 000D57 .draw_upper_px_in_circle -al 000D4B .circle -al 000D2A .move_8px_left -al 000D26 .increment_pixel_x -al 000D1C .LOCAL-MACRO_SYMBOL-002F -al 000D4A .end -al 000CFD .move_8px_up -al 000D0B .decrement_y_pos_end -al 000CFA .decrement_y_pos -al 000CF4 .for_y -al 000CDF .end_selfmod -al 000D1C .case_1 -al 000D3B .case_2 -al 000CCB .selfmod -al 000C90 .decrement_y_pos_end -al 000C7F .move_8px_up -al 000C7C .decrement_y_pos -al 000C72 .LOCAL-MACRO_SYMBOL-0021 -al 000C9F .end -al 000C53 .move_8px_left -al 000C61 .increment_pixel_x_end -al 000C4F .increment_pixel_x -al 000C49 .for_x -al 000C35 .end_selfmod -al 000C72 .case_1 -al 000C90 .case_2 -al 000C21 .selfmod -al 000BD4 .move_8px_left -al 000BD0 .increment_pixel_x -al 000BC6 .LOCAL-MACRO_SYMBOL-0017 -al 000BF5 .end -al 000BA7 .move_8px_down -al 000BB5 .increment_y_pos_end -al 000BA4 .increment_y_pos -al 000B9E .for_y -al 000B7D .end_selfmod -al 000BC6 .case_1 -al 000BE6 .case_2 -al 000B69 .selfmod -al 000B2E .decrement_y_pos_end -al 000B1D .move_8px_up -al 000B1A .decrement_y_pos -al 000B10 .LOCAL-MACRO_SYMBOL-0009 -al 000B3D .end -al 000AF1 .move_8px_right -al 000AFF .decrement_pixel_x_end -al 000AED .decrement_pixel_x -al 000AE7 .for_x -al 000AC7 .end_selfmod -al 000B10 .case_1 -al 000B2E .case_2 -al 000AB3 .selfmod -al 000A88 .line_down -al 000B3E .line_down_inv -al 000A80 .steep_ -al 000A84 .shallow_ -al 000BF6 .line_up -al 000CA0 .line_up_inv -al 000A6E .steep -al 000A72 .shallow -al 000A68 .up -al 000A78 .down -al 000A5F .dx_no_underflow -al 000A54 .line -al 000A51 .exit -al 000A21 .end_test_y -al 000A15 .test_y -al 000A0B .end_test_x -al 0009FF .test_x -al 0009D0 .@loop -al 0009BE .long_line_test_b -al 0009A9 .clear_screen_ -al 000992 .@loop -al 000980 .long_line_test_a -al 00096B .clear_screen -al 000954 .@loop -al 0008FE .loop -al 0008C5 .loop +al 0012D9 .big_y_offset +al 0012A3 .binary_factor +al 001298 .log +al 00128F .inverse_factor_value +al 00128E .for_i_end +al 001286 .R_pos +al 00128A .R_neg +al 001279 .for_i +al 00126F .div +al 00126E .endloop +al 001261 .loop +al 001263 .start +al 00125C .mult +al 00124C .change_length +al 001255 .y_overflow +al 001245 .loop +al 001243 .memcpy +al 001204 .big_set_end +al 00120C .small_set +al 000F05 .big_set +al 000EFE .memset +al 000ED4 .move_data +al 000EAB .calculate_screen_position +al 000E91 .calculate_petski_position +al 000E91 .char_draw +al 000E88 .calc_byte_to_paint +al 000E6A .pixel_calc +al 000E65 .draw +al 000E5D .calc_byte_to_paint +al 000E3F .pixel_draw +al 000E3D .draw_lower_triangle +al 000E31 .draw_upper_triangle +al 000E1A .BC_overflow +al 000E31 .BC_overflow_end +al 000DFD .AB_overflow +al 000E11 .AB_overflow_end +al 000DF4 .triangle +al 000DF3 .end +al 000DCE .qbb_y_overflow +al 000DEA .qbb_y_end +al 000DC8 .qbb_y +al 000DAE .qdb_overflow +al 000DC8 .qdb_y_end +al 000DAA .qdb_y +al 000D90 .qca_x_overflow +al 000DAA .qca_x_end +al 000D8C .qca_x +al 000D72 .qaa_x_overflow +al 000D8C .qaa_x_end +al 000D6E .qaa_x +al 000D6A .change_x +al 000DEA .endif +al 000D6A .if +al 000D41 .qcb_x_overflow +al 000D5C .qcb_x_end +al 000D3D .qcb_x +al 000D23 .qdb_x_overflow +al 000D3D .qdb_x_end +al 000D1F .qdb_x +al 000D03 .qda_y_overflow +al 000D1F .qda_y_end +al 000CFD .qda_y +al 000CE3 .qaa_y_underflow +al 000CFD .qaa_y_end +al 000CDF .qaa_y +al 000CDD .change_Y +al 000CD5 .draw_qab +al 000CCD .draw_qbb +al 000CC7 .draw_qdb +al 000CBF .draw_qcb +al 000CB9 .draw_qca +al 000CB1 .draw_qda +al 000CAB .draw_qba +al 000CA3 .draw_qaa +al 000CA3 .draw_pixels +al 000CA3 .while_x_bigger_then_y +al 000C89 .draw_right_px_in_circle +al 000C68 .draw_lower_px_in_circle +al 000C4A .draw_left_px_in_circle +al 000C2C .draw_upper_px_in_circle +al 000C20 .circle +al 000BFD .move_8px_left +al 000BF9 .increment_pixel_x +al 000C1F .end +al 000BD8 .move_8px_up +al 000BE6 .decrement_y_pos_end +al 000BD5 .decrement_y_pos +al 000BCF .for_y +al 000BBA .end_selfmod +al 000BED .case_1 +al 000C0F .case_2 +al 000BA6 .selfmod +al 000B66 .decrement_y_pos_end +al 000B54 .move_8px_up +al 000B51 .decrement_y_pos +al 000B75 .end +al 000B30 .move_8px_left +al 000B3E .increment_pixel_x_end +al 000B2C .increment_pixel_x +al 000B26 .for_x +al 000B12 .end_selfmod +al 000B45 .case_1 +al 000B66 .case_2 +al 000AFE .selfmod +al 000AB9 .move_8px_left +al 000AB5 .increment_pixel_x +al 000ADA .end +al 000A94 .move_8px_down +al 000AA2 .increment_y_pos_end +al 000A91 .increment_y_pos +al 000A8B .for_y +al 000A6A .end_selfmod +al 000AA9 .case_1 +al 000ACA .case_2 +al 000A56 .selfmod +al 000A16 .decrement_y_pos_end +al 000A04 .move_8px_up +al 000A01 .decrement_y_pos +al 000A25 .end +al 0009E0 .move_8px_right +al 0009EE .decrement_pixel_x_end +al 0009DC .decrement_pixel_x +al 0009D6 .for_x +al 0009B6 .end_selfmod +al 0009F5 .case_1 +al 000A16 .case_2 +al 0009A2 .selfmod +al 000974 .line_down +al 000A26 .line_down_inv +al 00096C .steep_ +al 000970 .shallow_ +al 000ADB .line_up +al 000B76 .line_up_inv +al 00095A .steep +al 00095E .shallow +al 000954 .up +al 000964 .down +al 00094B .dx_no_underflow +al 000940 .line +al 00093D .exit +al 000928 .@loop +al 000916 .long_line_test_b +al 000901 .clear_screen_ +al 0008EA .@loop +al 0008D8 .long_line_test_a +al 0008C3 .clear_screen +al 0008C0 .hihi +al 0008A9 .@loop al 000897 .NMI_routine_end al 000896 .NMI_routine diff --git a/wip-hugo/routines/line/line_down.s b/wip-hugo/routines/line/line_down.s index 74ce4e4..b81a095 100644 --- a/wip-hugo/routines/line/line_down.s +++ b/wip-hugo/routines/line/line_down.s @@ -16,7 +16,6 @@ LDA #$00 STA V +1 STA dy_2 +1 - STA $FD ; for pixel_draw ;; V = 2*(dx -dy) SEC @@ -27,7 +26,7 @@ ;dy_2 = dy*2 Mult_16 dy_2, dy_2 +1 ;dy_2 = dy (same address) - ;; D = dy_2 - x. (signed 16-bit) + ;; D = dy_2 - dx. (signed 16-bit) SEC LDA dy_2 SBC dx @@ -39,7 +38,6 @@ ;; because C flag is wrong value we let dy_2 be 1 to small Sub_16 dy_2, dy_2 +1, #$01,#$00 - selfmod: ;; Self modifying code. Makes LDA and SBC instructions each take 1 cycle less. ;; You can remove this if you run the loop without # at dy_2 and V. diff --git a/wip-hugo/routines/line/line_down_inv.s b/wip-hugo/routines/line/line_down_inv.s index 7d18f21..465ffdf 100644 --- a/wip-hugo/routines/line/line_down_inv.s +++ b/wip-hugo/routines/line/line_down_inv.s @@ -14,7 +14,6 @@ LDA #$00 STA V + 1 STA dx_2 + 1 - STA $FD SEC LDA dy @@ -24,30 +23,27 @@ Mult_16 dx_2, dx_2 + 1 - LDA dx_2 - ADC #$ff - TAX - LDA dx + 1 - ADC #$01 - TAY - + ;; D = dx_2 - dy. (signed 16-bit) SEC - TXA + LDA dx_2 SBC dy STA D - TYA + LDA dx_2 + 1 SBC #$00 STA D + 1 + ;; because C flag is wrong value we let dy_2 be 1 to small + Sub_16 dx_2, dx_2 +1, #$01,#$00 + selfmod: LDA dx_2 STA case_2 + 1 LDA dx_2 + 1 STA case_2 + 7 LDA V - STA case_1 + 1 + STA case_1 + 3 LDA V + 1 - STA case_1 + 7 + STA case_1 + 9 end_selfmod: LDA X_end STA X_pos @@ -60,7 +56,7 @@ end_selfmod: Sub_16 btp_mem_pos, btp_mem_pos + 1, #$00, #$00, ! LDX dy CLC -for_y: +for_y: ; C =0 LDA byte_to_paint ORA (btp_mem_pos), Y STA (btp_mem_pos), Y @@ -72,20 +68,22 @@ move_8px_down: Sub_16 btp_mem_pos, btp_mem_pos + 1, #$3f ,#$01, ! increment_y_pos_end: DEX - ;CPX Y_end BEQ end - Lag_16 D, D + 1, #$00, #$02, case_2 + ;Lag_16 D, D + 1, #$00, #$02, case_2 + LDA D + 1 + BMI case_2 case_1: - Sub_16_A D, D + 1, #V, #V + 1, ! + Sub_16 D, D + 1, #V, #V + 1, ! increment_pixel_x: ASL byte_to_paint BCC for_y move_8px_left: - Sub_16 btp_mem_pos, btp_mem_pos + 1, #$08, #$00 + Sub_16 btp_mem_pos, btp_mem_pos + 1, #$08, #$00,! ROL byte_to_paint JMP for_y case_2: - Add_16 D, D + 1, #dx_2, #dx_2 + 1, ! ;D = D + 2*dx + Add_16 D, D + 1, #dx_2, #dx_2 + 1,! ;D = D + 2*dx + CLC ; for_y shall always have C = 1 JMP for_y end: RTS diff --git a/wip-hugo/routines/line/line_test_extensive.s b/wip-hugo/routines/line/line_test_extensive.s index 7788aaf..6fa69ed 100644 --- a/wip-hugo/routines/line/line_test_extensive.s +++ b/wip-hugo/routines/line/line_test_extensive.s @@ -30,6 +30,9 @@ CMP #$bb bne @loop + + hihi: + jmp hihi clear_screen: ;;Lets clear bitmap VIC_bank = $4000 diff --git a/wip-hugo/routines/line/line_up.s b/wip-hugo/routines/line/line_up.s index ab56950..4ea14ad 100644 --- a/wip-hugo/routines/line/line_up.s +++ b/wip-hugo/routines/line/line_up.s @@ -15,8 +15,6 @@ LDA #$00 STA V + 1 - STA dy_2 + 1 - STA $FD SEC LDA dx @@ -26,18 +24,12 @@ Mult_16 dy_2, dy_2 + 1 - LDA dy_2 - ADC #$ff - TAX - LDA dy + 1 - ADC #$01 - TAY - + ;; D = dy_2 - dx. (signed 16-bit) SEC - TXA + LDA dy_2 SBC dx STA D - TYA + LDA dy_2 + 1 SBC #$00 STA D + 1 @@ -47,9 +39,9 @@ selfmod: LDA dy_2 + 1 STA case_2 + 7 LDA V - STA case_1 + 1 + STA case_1 + 3 LDA V + 1 - STA case_1 + 7 + STA case_1 + 9 end_selfmod: JSR pixel_calc ;;only used first pixel. after this relative position is abused STA byte_to_paint @@ -71,19 +63,23 @@ increment_pixel_x_end: DEX ;CPX X_end BEQ end - Lag_16 D, D + 1, #$00, #$02, case_2 + ;;If D < 0 goto case_2 + LDA D + 1 + BMI case_2 case_1: - Sub_16_A D, D + 1, #V, #V + 1,! + Sub_16 D, D + 1, #V, #V + 1,! decrement_y_pos: DEY BNE for_x move_8px_up: LDY #$08 - Sub_16 btp_mem_pos, btp_mem_pos + 1, #$40, #$01, ! + ; C = 0 eller 1 + Sub_16 btp_mem_pos, btp_mem_pos + 1, #$40, #$01 jmp for_x decrement_y_pos_end: case_2: - Add_16 D, D + 1, #>dy_2, #/dev/null & - -sleep 2 -rm source.o -rm file.prg diff --git a/wip-hugo/source.s b/wip-hugo/source.s index 804981c..9c0bfd3 100644 --- a/wip-hugo/source.s +++ b/wip-hugo/source.s @@ -11,17 +11,17 @@ .include "STARTUP.s" ;.include "dubbel_buffer/raster_irqs.s" -.include "routines/arithmatic/mult_test.s" -.include "routines/arithmatic/div_test.s" -.include "routines/circle/circle_test.s" -.include "routines/circle/circle_test_size.s" -.include "routines/circle/circle_test_position.s" -.include "routines/line/line_test.s" +;.include "routines/arithmatic/mult_test.s" +;.include "routines/arithmatic/div_test.s" +;.include "routines/circle/circle_test.s" +;.include "routines/circle/circle_test_size.s" +;.include "routines/circle/circle_test_position.s" +;.include "routines/line/line_test.s" .include "routines/line/line_test_extensive.s" -.include "routines/text/char_draw_test.s" -.include "routines/pixel/pixel_test.s" -.include "routines/memory/memcpy_test.s" -.include "routines/memory/memset_test.s" +;.include "routines/text/char_draw_test.s" +;.include "routines/pixel/pixel_test.s" +;.include "routines/memory/memcpy_test.s" +;.include "routines/memory/memset_test.s" ;.include "routines/triangle/triangle_test.s" exit: