From b23a63ddf5d71f956e5495a80bc79bcb640fa887 Mon Sep 17 00:00:00 2001
From: hugova <hugova@kth.se>
Date: Tue, 22 Jul 2025 17:49:09 +0200
Subject: [PATCH] optimise line_down by using signed 16-bit

---
 wip-hugo/routines/line/line_down.s | 56 +++++++++++-------------------
 1 file changed, 21 insertions(+), 35 deletions(-)

diff --git a/wip-hugo/routines/line/line_down.s b/wip-hugo/routines/line/line_down.s
index 17d9706..74ce4e4 100644
--- a/wip-hugo/routines/line/line_down.s
+++ b/wip-hugo/routines/line/line_down.s
@@ -25,39 +25,26 @@
         Mult_16 A, V +1
         STA V
         ;dy_2 = dy*2
-        Mult_16 dy_2, dy_2 +1 ;>dy_2 = dy (same address)
-
-        ;; This is an Bresenham's line algorithm, se wikipedia bellow.
-        ;;https://en.wikipedia.org/wiki/Bresenham%27s_line_algorithm
-        ;; We need to compute the Value D = 2*dy - dx,
-        ;; but it may be or get negative.
-        ;; IN the loop we may set D = D -V
-        ;; Because math D needs to be at least >=V.
-        ;; V_max = %00000001 11111111
-        ;; We therefor need to add this offset to V 00000001 11111111
-        ;; and to its branch logic later in the loop.
-
-        ;;D = 2*dy - dx + 2*255
-        LDA dy_2
-        ADC #$ff
-        TAX
-        LDA dy + 1
-        ADC #$01
-        TAY
+        Mult_16 dy_2, dy_2 +1 ;dy_2 = dy (same address)
 
+        ;; D = dy_2 - x. (signed 16-bit)
         SEC
-        TXA
+        LDA dy_2
         SBC dx
         STA D
-        TYA
+        LDA dy_2 + 1
         SBC #$00
         STA D + 1
 
+        ;; because C flag is wrong value we let dy_2 be 1 to small
+        Sub_16 dy_2, dy_2 +1, #$01,#$00
+
+
 selfmod:
         ;; Self modifying code. Makes LDA and SBC instructions each take 1 cycle less.
         ;; You can remove this if you run the loop without # at dy_2 and V.
         ;;Note: The offsets like +2 etc is because there are instructions betwean the label and the
-        ;address that needs to be modified
+        ;; address that needs to be modified.
         ;; dy_2
         ;; Modifies LDA >dy_2
         LDA dy_2
@@ -68,10 +55,10 @@ selfmod:
         ;; V
         ;;Modidies SBC >V
         LDA V
-        STA case_1 + 1
+        STA case_1 + 3
         ;; Modifies SBC <V
         LDA V + 1
-        STA case_1 + 7
+        STA case_1 + 9
 end_selfmod:
         LDA X_end
         STA X_pos
@@ -103,28 +90,27 @@ move_8px_right:
 decrement_pixel_x_end:
         DEX
         BEQ end ;We keep track on when to stop line draw with the X registry.
-        ;;If D <  %00000010 00000000:  case_2
-        ;;else case 1.
-        Lag_16 D, D + 1, #$00, #$02, case_2
+        ;;If D < 0 goto case_2
+        LDA D + 1
+        BMI case_2
 case_1:
         ;; D = D - V
-        ;; Because Lag_16:
-        ;;      C =1 so we can use !
-        ;;      A = >D
-        Sub_16_A D, D + 1, #V, #V + 1, !
+        ;; Because Sub_16 C = 1 so we can use !
+        Sub_16 D, D + 1, #V, #V + 1, !
 decrement_y_pos:
         DEY ; Increment Y pos inside the buffer
         BNE for_x
-move_8px_up: ; Z=1 --> C=1
+move_8px_up:
         LDY #$08
         ;; Switch to chunk bellow
-        ; C = 1
+        ; C = 1 or 0
         ; So we subtract  #$40, #$01
-        Sub_16 btp_mem_pos, btp_mem_pos + 1, #$40, #$01, !; +320
+        Sub_16 btp_mem_pos, btp_mem_pos + 1, #$40, #$01;
         JMP for_x
 decrement_y_pos_end:
 case_2:
-        Add_16 D, D + 1, #dy_2, #dy_2 + 1, !  ;D = D + 2*dy
+        ; dy_2 is one off but C= 1 witch is off as well
+        Add_16 D, D + 1, #dy_2, #dy_2 + 1, ! ;D = D + 2*dy
         JMP for_x
 end:
         RTS