From ef22e5a2fff1cd2a96fb2cf880570fb2a1a517b1 Mon Sep 17 00:00:00 2001 From: hugova Date: Sat, 26 Jul 2025 14:50:23 +0200 Subject: [PATCH] use faster memset algoritm. --- wip-hugo/routines/memory/memset.s | 118 ++++++++++++++++++++---------- wip-hugo/source.s | 3 +- 2 files changed, 80 insertions(+), 41 deletions(-) mode change 100755 => 100644 wip-hugo/routines/memory/memset.s diff --git a/wip-hugo/routines/memory/memset.s b/wip-hugo/routines/memory/memset.s old mode 100755 new mode 100644 index 55700e9..37371ad --- a/wip-hugo/routines/memory/memset.s +++ b/wip-hugo/routines/memory/memset.s @@ -1,40 +1,78 @@ -;;; -*- Mode: asm; indent-tabs-mode: t; tab-width: 8 -*- - -;; Sets memory in 'A'-registry to all addresses from 'A_start' until 'A_start' + 'length' -;; Modifies A, X and A_start -.proc memset - .include "mem.inc" - -;; big_set sets the memory in $ff chunks. -;; skipp if length >= $ff -LDX length +1 -BNE big_set -JMP small_set - -big_set: ;sets $ff of memory - ;; Y value do not matter, will go through all anyway! - .repeat $ff - STA (A_start), Y - DEY - .endrepeat - STA (A_start), Y ; dont forget Y =0 -big_set_end: - ;;set all hole $ff memory chunks! - INC A_start + 1 - DEX ;; length +1 -- - BEQ small_set - JMP big_set - - - -;;sets the rest of the memory -;; note that this can use code above (smc) or the same method. may implement later. -small_set: - LDY length -small_set_loop: - STA (A_start), Y - DEY - BNE small_set_loop - STA (A_start), Y - RTS -.endproc +;;; -*- Mode: asm; indent-tabs-mode: t; tab-width: 8 -*- + +;; Sets memory in 'A'-registry to all addresses from 'A_start' until 'A_start' + 'length' +;; Modifies A, X and A_start +.proc memset + .include "mem.inc" + +;; big_set sets the memory in $ff chunks. +;; skipp if length >= $ff +LDX length +1 +BNE big_set +JMP small_set + +big_set: ;sets $ff of memory + ;; Y value do not matter, will go through all anyway! + .repeat $ff + STA (A_start), Y + DEY + .endrepeat + STA (A_start), Y ; dont forget Y =0 +big_set_end: + ;;set all hole $ff memory chunks! + INC A_start + 1 + DEX ;; length +1 -- + BEQ small_set + JMP big_set + +;; Note that cpu cykels total: cy_tot = 66 to 69 +;; But we skipp a BNE (cy = 2*) * [length (mod 255)] +;; The BNE case has an avrige of 2*255/2 = 255 so this is faster (on avrige.) + +small_set: + STA data_to_write ; cy = 3 + LDA length ; cy = 3 + STA length_copy ; cy = 3 + + ;; calculate rts-position + LDX #$00 ; cy = 2 + STX length + 1 ; cy = 3 + ;; 3 bytes = STA DEY NOP = seting 1 byte of memory. + ;; So we need to calculate: length*3 + Mult_16 A, length + 1 ; cy = 7 + ; A= length + ADC length_copy ; cy = 3 + TAY + LDA length + 1 ; cy = 3 + ADC #$00 ; cy = 2 + STA length + 1 ; cy = 3 + + ;; Now RTS_pointer + Y = length*3 + big_set_label + LDA #big_set ; cy = 2 + ADC length + 1 ; cy = 3 + STA RTS_pointer + 1 ; cy = 3 + + ;; read data we will change to RTS + STY Y_copy ; cy = 3 + LDA (RTS_pointer), Y ; cy = 5* + TAX ; cy = 2 + + ;; set RTS in big_set + LDA #$60 ; cy = 2 + STA (RTS_pointer), Y ; cy = 5* + + ;; JSR to modified big_set + LDY length_copy ; cy = 3 + DEY ; because we want to count to Y=0 :) + LDA data_to_write ; cy = 3 + JSR big_set ; cy = 6 + + ;; revert changes + LDY Y_copy ; cy = 3 + TXA ; cy = 2 + STA (RTS_pointer), Y ; cy = 5* + + RTS +.endproc diff --git a/wip-hugo/source.s b/wip-hugo/source.s index 9c0bfd3..59c0c30 100644 --- a/wip-hugo/source.s +++ b/wip-hugo/source.s @@ -33,7 +33,8 @@ JMP exit .include "routines/pixel/pixel_draw.s" .include "routines/pixel/pixel_calc.s" .include "routines/text/char_draw.s" -.include "routines/memory/memset_alt.s" +.include "routines/memory/memset.s" +;.include "routines/memory/clear_screen.s" .include "routines/memory/memcpy.s" .include "routines/arithmatic/mult.s" .include "routines/arithmatic/div.s"