From b23a63ddf5d71f956e5495a80bc79bcb640fa887 Mon Sep 17 00:00:00 2001 From: hugova Date: Tue, 22 Jul 2025 17:49:09 +0200 Subject: [PATCH] optimise line_down by using signed 16-bit --- wip-hugo/routines/line/line_down.s | 56 +++++++++++------------------- 1 file changed, 21 insertions(+), 35 deletions(-) diff --git a/wip-hugo/routines/line/line_down.s b/wip-hugo/routines/line/line_down.s index 17d9706..74ce4e4 100644 --- a/wip-hugo/routines/line/line_down.s +++ b/wip-hugo/routines/line/line_down.s @@ -25,39 +25,26 @@ Mult_16 A, V +1 STA V ;dy_2 = dy*2 - Mult_16 dy_2, dy_2 +1 ;>dy_2 = dy (same address) - - ;; This is an Bresenham's line algorithm, se wikipedia bellow. - ;;https://en.wikipedia.org/wiki/Bresenham%27s_line_algorithm - ;; We need to compute the Value D = 2*dy - dx, - ;; but it may be or get negative. - ;; IN the loop we may set D = D -V - ;; Because math D needs to be at least >=V. - ;; V_max = %00000001 11111111 - ;; We therefor need to add this offset to V 00000001 11111111 - ;; and to its branch logic later in the loop. - - ;;D = 2*dy - dx + 2*255 - LDA dy_2 - ADC #$ff - TAX - LDA dy + 1 - ADC #$01 - TAY + Mult_16 dy_2, dy_2 +1 ;dy_2 = dy (same address) + ;; D = dy_2 - x. (signed 16-bit) SEC - TXA + LDA dy_2 SBC dx STA D - TYA + LDA dy_2 + 1 SBC #$00 STA D + 1 + ;; because C flag is wrong value we let dy_2 be 1 to small + Sub_16 dy_2, dy_2 +1, #$01,#$00 + + selfmod: ;; Self modifying code. Makes LDA and SBC instructions each take 1 cycle less. ;; You can remove this if you run the loop without # at dy_2 and V. ;;Note: The offsets like +2 etc is because there are instructions betwean the label and the - ;address that needs to be modified + ;; address that needs to be modified. ;; dy_2 ;; Modifies LDA >dy_2 LDA dy_2 @@ -68,10 +55,10 @@ selfmod: ;; V ;;Modidies SBC >V LDA V - STA case_1 + 1 + STA case_1 + 3 ;; Modifies SBC D - Sub_16_A D, D + 1, #V, #V + 1, ! + ;; Because Sub_16 C = 1 so we can use ! + Sub_16 D, D + 1, #V, #V + 1, ! decrement_y_pos: DEY ; Increment Y pos inside the buffer BNE for_x -move_8px_up: ; Z=1 --> C=1 +move_8px_up: LDY #$08 ;; Switch to chunk bellow - ; C = 1 + ; C = 1 or 0 ; So we subtract #$40, #$01 - Sub_16 btp_mem_pos, btp_mem_pos + 1, #$40, #$01, !; +320 + Sub_16 btp_mem_pos, btp_mem_pos + 1, #$40, #$01; JMP for_x decrement_y_pos_end: case_2: - Add_16 D, D + 1, #dy_2, #dy_2 + 1, ! ;D = D + 2*dy + ; dy_2 is one off but C= 1 witch is off as well + Add_16 D, D + 1, #dy_2, #dy_2 + 1, ! ;D = D + 2*dy JMP for_x end: RTS