use faster memset algoritm.

This commit is contained in:
hugova 2025-07-26 14:50:23 +02:00
parent 7f136b9ed2
commit ef22e5a2ff
2 changed files with 80 additions and 41 deletions

118
wip-hugo/routines/memory/memset.s Executable file → Normal file
View file

@ -1,40 +1,78 @@
;;; -*- Mode: asm; indent-tabs-mode: t; tab-width: 8 -*- ;;; -*- Mode: asm; indent-tabs-mode: t; tab-width: 8 -*-
;; Sets memory in 'A'-registry to all addresses from 'A_start' until 'A_start' + 'length' ;; Sets memory in 'A'-registry to all addresses from 'A_start' until 'A_start' + 'length'
;; Modifies A, X and A_start ;; Modifies A, X and A_start
.proc memset .proc memset
.include "mem.inc" .include "mem.inc"
;; big_set sets the memory in $ff chunks. ;; big_set sets the memory in $ff chunks.
;; skipp if length >= $ff ;; skipp if length >= $ff
LDX length +1 LDX length +1
BNE big_set BNE big_set
JMP small_set JMP small_set
big_set: ;sets $ff of memory big_set: ;sets $ff of memory
;; Y value do not matter, will go through all anyway! ;; Y value do not matter, will go through all anyway!
.repeat $ff .repeat $ff
STA (A_start), Y STA (A_start), Y
DEY DEY
.endrepeat .endrepeat
STA (A_start), Y ; dont forget Y =0 STA (A_start), Y ; dont forget Y =0
big_set_end: big_set_end:
;;set all hole $ff memory chunks! ;;set all hole $ff memory chunks!
INC A_start + 1 INC A_start + 1
DEX ;; length +1 -- DEX ;; length +1 --
BEQ small_set BEQ small_set
JMP big_set JMP big_set
;; Note that cpu cykels total: cy_tot = 66 to 69
;; But we skipp a BNE (cy = 2*) * [length (mod 255)]
;;sets the rest of the memory ;; The BNE case has an avrige of 2*255/2 = 255 so this is faster (on avrige.)
;; note that this can use code above (smc) or the same method. may implement later.
small_set: small_set:
LDY length STA data_to_write ; cy = 3
small_set_loop: LDA length ; cy = 3
STA (A_start), Y STA length_copy ; cy = 3
DEY
BNE small_set_loop ;; calculate rts-position
STA (A_start), Y LDX #$00 ; cy = 2
RTS STX length + 1 ; cy = 3
.endproc ;; 3 bytes = STA DEY NOP = seting 1 byte of memory.
;; So we need to calculate: length*3
Mult_16 A, length + 1 ; cy = 7
; A= length
ADC length_copy ; cy = 3
TAY
LDA length + 1 ; cy = 3
ADC #$00 ; cy = 2
STA length + 1 ; cy = 3
;; Now RTS_pointer + Y = length*3 + big_set_label
LDA #<big_set ; cy = 2
STA RTS_pointer ; cy = 3
LDA #>big_set ; cy = 2
ADC length + 1 ; cy = 3
STA RTS_pointer + 1 ; cy = 3
;; read data we will change to RTS
STY Y_copy ; cy = 3
LDA (RTS_pointer), Y ; cy = 5*
TAX ; cy = 2
;; set RTS in big_set
LDA #$60 ; cy = 2
STA (RTS_pointer), Y ; cy = 5*
;; JSR to modified big_set
LDY length_copy ; cy = 3
DEY ; because we want to count to Y=0 :)
LDA data_to_write ; cy = 3
JSR big_set ; cy = 6
;; revert changes
LDY Y_copy ; cy = 3
TXA ; cy = 2
STA (RTS_pointer), Y ; cy = 5*
RTS
.endproc

View file

@ -33,7 +33,8 @@ JMP exit
.include "routines/pixel/pixel_draw.s" .include "routines/pixel/pixel_draw.s"
.include "routines/pixel/pixel_calc.s" .include "routines/pixel/pixel_calc.s"
.include "routines/text/char_draw.s" .include "routines/text/char_draw.s"
.include "routines/memory/memset_alt.s" .include "routines/memory/memset.s"
;.include "routines/memory/clear_screen.s"
.include "routines/memory/memcpy.s" .include "routines/memory/memcpy.s"
.include "routines/arithmatic/mult.s" .include "routines/arithmatic/mult.s"
.include "routines/arithmatic/div.s" .include "routines/arithmatic/div.s"