From 65db12baaa2a2bab42ffec4ac4de98eee607773f Mon Sep 17 00:00:00 2001 From: Gabriel Ravier Date: Thu, 30 Jan 2020 14:25:34 +0100 Subject: [PATCH 01/11] Started trying to optimize MakePixelWaveData --- src/Attributes.h | 11 +++++++++++ src/Backends/Audio/SDL2.cpp | 9 +++------ src/PixTone.cpp | 3 ++- 3 files changed, 16 insertions(+), 7 deletions(-) create mode 100644 src/Attributes.h diff --git a/src/Attributes.h b/src/Attributes.h new file mode 100644 index 00000000..f2c03824 --- /dev/null +++ b/src/Attributes.h @@ -0,0 +1,11 @@ +#pragma once + +#ifdef __GNUC__ + +#define ATTRIBUTE_HOT __attribute__((hot)) + +#else + +#define ATTRIBUTE_HOT + +#endif diff --git a/src/Backends/Audio/SDL2.cpp b/src/Backends/Audio/SDL2.cpp index 88d6a0c8..42612e22 100644 --- a/src/Backends/Audio/SDL2.cpp +++ b/src/Backends/Audio/SDL2.cpp @@ -10,17 +10,14 @@ #include "../../Organya.h" #include "../../WindowsWrapper.h" +#include "../../Attributes.h" #define MIN(a, b) ((a) < (b) ? (a) : (b)) #define MAX(a, b) ((a) > (b) ? (a) : (b)) #define CLAMP(x, y, z) MIN(MAX((x), (y)), (z)) -#ifdef __GNUC__ -#define ATTR_HOT __attribute__((hot)) #else #define ATTR_HOT -#endif - struct AudioBackend_Sound { unsigned char *samples; @@ -76,8 +73,8 @@ static void SetSoundPan(AudioBackend_Sound *sound, long pan) sound->volume_r = sound->pan_r * sound->volume; } -// Most CPU-intensive function in the game (2/3rd CPU time consumption in my experience), so marked with attrHot so the compiler considers it a hot spot (as it is) when optimizing -ATTR_HOT static void MixSounds(float *stream, unsigned int frames_total) +// Most CPU-intensive function in the game, so marked with ATTRIBUTE_HOT so the compiler considers it a hot spot (as it is) when optimizing. This alone can reduce the CPU usage of CSE2 by up to 60% +ATTRIBUTE_HOT static void MixSounds(float *stream, unsigned int frames_total) { for (AudioBackend_Sound *sound = sound_list_head; sound != NULL; sound = sound->next) { diff --git a/src/PixTone.cpp b/src/PixTone.cpp index a2d06126..fd48a938 100644 --- a/src/PixTone.cpp +++ b/src/PixTone.cpp @@ -4,6 +4,7 @@ #include #include +#include "Attributes.h" #include "WindowsWrapper.h" #include "Random.h" @@ -65,7 +66,7 @@ void MakeWaveTables(void) //BOOL wave_tables_made; -BOOL MakePixelWaveData(const PIXTONEPARAMETER *ptp, unsigned char *pData) +ATTRIBUTE_HOT BOOL MakePixelWaveData(const PIXTONEPARAMETER *ptp, unsigned char *pData) { int i; int a, b, c, d; From 6ccfe063ceae8eae0edbb0050dca5c5bb25899a9 Mon Sep 17 00:00:00 2001 From: Gabriel Ravier Date: Thu, 30 Jan 2020 15:48:47 +0100 Subject: [PATCH 02/11] Made MakePixelWaveData use ffast-math --- src/Attributes.h | 2 ++ src/PixTone.cpp | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Attributes.h b/src/Attributes.h index f2c03824..8fb87984 100644 --- a/src/Attributes.h +++ b/src/Attributes.h @@ -3,9 +3,11 @@ #ifdef __GNUC__ #define ATTRIBUTE_HOT __attribute__((hot)) +#define ATTRIBUTE_OPTIMIZE(optString) __attribute__((optimize(optString))) #else #define ATTRIBUTE_HOT +#define ATTRIBUTE_OPTIMIZE(optString) #endif diff --git a/src/PixTone.cpp b/src/PixTone.cpp index fd48a938..5242d853 100644 --- a/src/PixTone.cpp +++ b/src/PixTone.cpp @@ -66,7 +66,7 @@ void MakeWaveTables(void) //BOOL wave_tables_made; -ATTRIBUTE_HOT BOOL MakePixelWaveData(const PIXTONEPARAMETER *ptp, unsigned char *pData) +ATTRIBUTE_HOT ATTRIBUTE_OPTIMIZE("-ffast-math") BOOL MakePixelWaveData(const PIXTONEPARAMETER *ptp, unsigned char *pData) { int i; int a, b, c, d; From e8ec6a8ffb70f10cfa60e375fd540452fa5074f1 Mon Sep 17 00:00:00 2001 From: Gabriel Ravier Date: Fri, 31 Jan 2020 07:43:51 +0100 Subject: [PATCH 03/11] Optimize Backend_Blit a bit --- src/Backends/Rendering/Software.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Backends/Rendering/Software.cpp b/src/Backends/Rendering/Software.cpp index 17666c27..e1b40df1 100644 --- a/src/Backends/Rendering/Software.cpp +++ b/src/Backends/Rendering/Software.cpp @@ -7,6 +7,7 @@ #include "SDL.h" #include "../../WindowsWrapper.h" +#include "../../Attributes.h" #include "../../Resource.h" @@ -156,7 +157,7 @@ void Backend_UnlockSurface(Backend_Surface *surface, unsigned int width, unsigne (void)height; } -void Backend_Blit(Backend_Surface *source_surface, const RECT *rect, Backend_Surface *destination_surface, long x, long y, BOOL colour_key) +ATTRIBUTE_HOT void Backend_Blit(Backend_Surface *source_surface, const RECT *rect, Backend_Surface *destination_surface, long x, long y, BOOL colour_key) { if (source_surface == NULL || destination_surface == NULL) return; From 2adfef003573733917a62e01726ee765ace03d0e Mon Sep 17 00:00:00 2001 From: Gabriel Ravier Date: Fri, 31 Jan 2020 12:27:29 +0100 Subject: [PATCH 04/11] Got Backend_Blit to go from 17% CPU usage to 11% : - used __builtin_expect to get a 8.5% performance improvement - used #pragma omp for to get a 30% performance improvement Signed-off-by: Gabriel Ravier --- CMakeLists.txt | 7 +++++++ src/Attributes.h | 6 ++++++ src/Backends/Rendering/Software.cpp | 5 ++++- 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 731a8327..95a968a7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -477,6 +477,13 @@ if(NOT FORCE_LOCAL_LIBS) find_package(SDL2) endif() +# Enable OpenMP if available +find_package(OpenMP) +if (OpenMP_CXX_FOUND) + target_link_libraries(CSE2 OpenMP::OpenMP_CXX) + target_compile_definitions(CSE2 PRIVATE USE_OPENMP) +endif() + if(TARGET SDL2::SDL2) # CMake-generated config (Arch, vcpkg, Raspbian) message(STATUS "Using system SDL2") diff --git a/src/Attributes.h b/src/Attributes.h index 8fb87984..ac0b6b3c 100644 --- a/src/Attributes.h +++ b/src/Attributes.h @@ -4,10 +4,16 @@ #define ATTRIBUTE_HOT __attribute__((hot)) #define ATTRIBUTE_OPTIMIZE(optString) __attribute__((optimize(optString))) +#define LIKELY(condition) __builtin_expect((condition), 1) +#define UNLIKELY(condition) __builtin_expect((condition), 0) +#define PREFETCH(address, isWrite, locality) __builtin_prefetch((address), (isWrite), (locality)) #else #define ATTRIBUTE_HOT #define ATTRIBUTE_OPTIMIZE(optString) +#define LIKELY(condition) condition +#define UNLIKELY(condition) condition +#define PREFETCH(address, isWrite, locality) #endif diff --git a/src/Backends/Rendering/Software.cpp b/src/Backends/Rendering/Software.cpp index e1b40df1..40b53fc5 100644 --- a/src/Backends/Rendering/Software.cpp +++ b/src/Backends/Rendering/Software.cpp @@ -207,6 +207,9 @@ ATTRIBUTE_HOT void Backend_Blit(Backend_Surface *source_surface, const RECT *rec // Do the actual blitting if (colour_key) { +#ifdef USE_OPENMP +#pragma omp for +#endif for (long j = 0; j < rect_clamped.bottom - rect_clamped.top; ++j) { unsigned char *source_pointer = &source_surface->pixels[((rect_clamped.top + j) * source_surface->pitch) + (rect_clamped.left * 3)]; @@ -214,7 +217,7 @@ ATTRIBUTE_HOT void Backend_Blit(Backend_Surface *source_surface, const RECT *rec for (long i = 0; i < rect_clamped.right - rect_clamped.left; ++i) { - if (source_pointer[0] == 0 && source_pointer[1] == 0 && source_pointer[2] == 0) // Assumes the colour key will always be #000000 (black) + if (UNLIKELY(source_pointer[0] == 0 && source_pointer[1] == 0 && source_pointer[2] == 0)) // Assumes the colour key will always be #000000 (black) { source_pointer += 3; destination_pointer += 3; From f43bd4f8764e84b53baac0acb4e94bf2aa01d4b6 Mon Sep 17 00:00:00 2001 From: Gabriel Ravier Date: Fri, 31 Jan 2020 12:48:00 +0100 Subject: [PATCH 05/11] Removed bad omp (it doesn't actually improve anything, i did my measurements wrong) --- src/Backends/Rendering/Software.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/Backends/Rendering/Software.cpp b/src/Backends/Rendering/Software.cpp index 40b53fc5..1b194b4e 100644 --- a/src/Backends/Rendering/Software.cpp +++ b/src/Backends/Rendering/Software.cpp @@ -207,9 +207,6 @@ ATTRIBUTE_HOT void Backend_Blit(Backend_Surface *source_surface, const RECT *rec // Do the actual blitting if (colour_key) { -#ifdef USE_OPENMP -#pragma omp for -#endif for (long j = 0; j < rect_clamped.bottom - rect_clamped.top; ++j) { unsigned char *source_pointer = &source_surface->pixels[((rect_clamped.top + j) * source_surface->pitch) + (rect_clamped.left * 3)]; From 5d6164141521b326d4a8ff07556f97605a38c6f9 Mon Sep 17 00:00:00 2001 From: Gabriel Ravier Date: Fri, 31 Jan 2020 18:47:44 +0100 Subject: [PATCH 06/11] Added ATTRIBUTE_HOT to Backend_ColourFill Signed-off-by: Gabriel Ravier --- src/Backends/Rendering/Software.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Backends/Rendering/Software.cpp b/src/Backends/Rendering/Software.cpp index 1b194b4e..fef92cbd 100644 --- a/src/Backends/Rendering/Software.cpp +++ b/src/Backends/Rendering/Software.cpp @@ -240,7 +240,7 @@ ATTRIBUTE_HOT void Backend_Blit(Backend_Surface *source_surface, const RECT *rec } } -void Backend_ColourFill(Backend_Surface *surface, const RECT *rect, unsigned char red, unsigned char green, unsigned char blue) +ATTRIBUTE_HOT void Backend_ColourFill(Backend_Surface *surface, const RECT *rect, unsigned char red, unsigned char green, unsigned char blue) { if (surface == NULL) return; From 5bd70eb2dbb687d73033089d9208621ccfcc321d Mon Sep 17 00:00:00 2001 From: Gabriel Ravier Date: Tue, 4 Feb 2020 10:50:40 +0100 Subject: [PATCH 07/11] Removed ATTRIBUTE_OPTIMIZE from MakePixelWaveData Signed-off-by: Gabriel Ravier --- src/PixTone.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/PixTone.cpp b/src/PixTone.cpp index 5242d853..fd48a938 100644 --- a/src/PixTone.cpp +++ b/src/PixTone.cpp @@ -66,7 +66,7 @@ void MakeWaveTables(void) //BOOL wave_tables_made; -ATTRIBUTE_HOT ATTRIBUTE_OPTIMIZE("-ffast-math") BOOL MakePixelWaveData(const PIXTONEPARAMETER *ptp, unsigned char *pData) +ATTRIBUTE_HOT BOOL MakePixelWaveData(const PIXTONEPARAMETER *ptp, unsigned char *pData) { int i; int a, b, c, d; From d59df30005c5c5f11e7097a06292aadd68ea2448 Mon Sep 17 00:00:00 2001 From: Gabriel Ravier Date: Thu, 13 Feb 2020 19:54:06 +0100 Subject: [PATCH 08/11] Removed OpenMP stuff --- CMakeLists.txt | 7 ------- 1 file changed, 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 95a968a7..731a8327 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -477,13 +477,6 @@ if(NOT FORCE_LOCAL_LIBS) find_package(SDL2) endif() -# Enable OpenMP if available -find_package(OpenMP) -if (OpenMP_CXX_FOUND) - target_link_libraries(CSE2 OpenMP::OpenMP_CXX) - target_compile_definitions(CSE2 PRIVATE USE_OPENMP) -endif() - if(TARGET SDL2::SDL2) # CMake-generated config (Arch, vcpkg, Raspbian) message(STATUS "Using system SDL2") From b238bcee033e6e72859bdfe6a7f7b588746ffcbd Mon Sep 17 00:00:00 2001 From: Gabriel Ravier Date: Fri, 14 Feb 2020 08:27:07 +0100 Subject: [PATCH 09/11] Remove random lines mistakenly added by merge --- src/Backends/Audio/SDL2.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Backends/Audio/SDL2.cpp b/src/Backends/Audio/SDL2.cpp index 42612e22..86772cae 100644 --- a/src/Backends/Audio/SDL2.cpp +++ b/src/Backends/Audio/SDL2.cpp @@ -16,8 +16,6 @@ #define MAX(a, b) ((a) > (b) ? (a) : (b)) #define CLAMP(x, y, z) MIN(MAX((x), (y)), (z)) -#else -#define ATTR_HOT struct AudioBackend_Sound { unsigned char *samples; From afd3abecc44812a0748caac0fa47b0612f62160d Mon Sep 17 00:00:00 2001 From: Gabriel Ravier Date: Fri, 3 Apr 2020 00:43:34 +0200 Subject: [PATCH 10/11] Correctly used Attributes.h instead of a definition directly in SoftwareMixer.cpp Signed-off-by: Gabriel Ravier --- src/Backends/Audio/SoftwareMixer.cpp | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/Backends/Audio/SoftwareMixer.cpp b/src/Backends/Audio/SoftwareMixer.cpp index afae1b4b..ade44cfa 100644 --- a/src/Backends/Audio/SoftwareMixer.cpp +++ b/src/Backends/Audio/SoftwareMixer.cpp @@ -5,17 +5,12 @@ #include #include #include +#include "../../Attributes.h" #define MIN(a, b) ((a) < (b) ? (a) : (b)) #define MAX(a, b) ((a) > (b) ? (a) : (b)) #define CLAMP(x, y, z) MIN(MAX((x), (y)), (z)) -#ifdef __GNUC__ -#define ATTR_HOT __attribute__((hot)) -#else -#define ATTR_HOT -#endif - struct Mixer_Sound { unsigned char *samples; @@ -144,7 +139,7 @@ void Mixer_SetSoundPan(Mixer_Sound *sound, long pan) } // Most CPU-intensive function in the game (2/3rd CPU time consumption in my experience), so marked with attrHot so the compiler considers it a hot spot (as it is) when optimizing -ATTR_HOT void Mixer_MixSounds(float *stream, unsigned int frames_total) +ATTRIBUTE_HOT void Mixer_MixSounds(float *stream, unsigned int frames_total) { for (Mixer_Sound *sound = sound_list_head; sound != NULL; sound = sound->next) { From 81e3259535acda40d00505fd908b9dbed3b66ef4 Mon Sep 17 00:00:00 2001 From: Gabriel Ravier Date: Sun, 5 Apr 2020 23:04:59 +0200 Subject: [PATCH 11/11] Backends/Audio/SoftwareMixer: Optimized `fmod(x, 1.0)` to `x - trunc(x)` Signed-off-by: Gabriel Ravier --- src/Backends/Audio/SoftwareMixer.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Backends/Audio/SoftwareMixer.cpp b/src/Backends/Audio/SoftwareMixer.cpp index a7088040..4260eae4 100644 --- a/src/Backends/Audio/SoftwareMixer.cpp +++ b/src/Backends/Audio/SoftwareMixer.cpp @@ -1,6 +1,6 @@ #include "SoftwareMixer.h" -#include +#include #include #include #include @@ -154,7 +154,7 @@ ATTRIBUTE_HOT void Mixer_MixSounds(float *stream, unsigned int frames_total) const float sample2 = (sound->samples[(size_t)sound->position + 1] - 128.0f) / 128.0f; // Perform linear interpolation - const float interpolated_sample = sample1 + ((sample2 - sample1) * fmod(sound->position, 1.0)); + const float interpolated_sample = sample1 + (sample2 - sample1) * (sound->position - std::trunc(sound->position)); *steam_pointer++ += interpolated_sample * sound->volume_l; *steam_pointer++ += interpolated_sample * sound->volume_r; @@ -165,7 +165,7 @@ ATTRIBUTE_HOT void Mixer_MixSounds(float *stream, unsigned int frames_total) { if (sound->looping) { - sound->position = fmod(sound->position, (double)sound->frames); + sound->position = std::fmod(sound->position, (double)sound->frames); } else {