From 2adfef003573733917a62e01726ee765ace03d0e Mon Sep 17 00:00:00 2001 From: Gabriel Ravier Date: Fri, 31 Jan 2020 12:27:29 +0100 Subject: [PATCH] Got Backend_Blit to go from 17% CPU usage to 11% : - used __builtin_expect to get a 8.5% performance improvement - used #pragma omp for to get a 30% performance improvement Signed-off-by: Gabriel Ravier --- CMakeLists.txt | 7 +++++++ src/Attributes.h | 6 ++++++ src/Backends/Rendering/Software.cpp | 5 ++++- 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 731a8327..95a968a7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -477,6 +477,13 @@ if(NOT FORCE_LOCAL_LIBS) find_package(SDL2) endif() +# Enable OpenMP if available +find_package(OpenMP) +if (OpenMP_CXX_FOUND) + target_link_libraries(CSE2 OpenMP::OpenMP_CXX) + target_compile_definitions(CSE2 PRIVATE USE_OPENMP) +endif() + if(TARGET SDL2::SDL2) # CMake-generated config (Arch, vcpkg, Raspbian) message(STATUS "Using system SDL2") diff --git a/src/Attributes.h b/src/Attributes.h index 8fb87984..ac0b6b3c 100644 --- a/src/Attributes.h +++ b/src/Attributes.h @@ -4,10 +4,16 @@ #define ATTRIBUTE_HOT __attribute__((hot)) #define ATTRIBUTE_OPTIMIZE(optString) __attribute__((optimize(optString))) +#define LIKELY(condition) __builtin_expect((condition), 1) +#define UNLIKELY(condition) __builtin_expect((condition), 0) +#define PREFETCH(address, isWrite, locality) __builtin_prefetch((address), (isWrite), (locality)) #else #define ATTRIBUTE_HOT #define ATTRIBUTE_OPTIMIZE(optString) +#define LIKELY(condition) condition +#define UNLIKELY(condition) condition +#define PREFETCH(address, isWrite, locality) #endif diff --git a/src/Backends/Rendering/Software.cpp b/src/Backends/Rendering/Software.cpp index e1b40df1..40b53fc5 100644 --- a/src/Backends/Rendering/Software.cpp +++ b/src/Backends/Rendering/Software.cpp @@ -207,6 +207,9 @@ ATTRIBUTE_HOT void Backend_Blit(Backend_Surface *source_surface, const RECT *rec // Do the actual blitting if (colour_key) { +#ifdef USE_OPENMP +#pragma omp for +#endif for (long j = 0; j < rect_clamped.bottom - rect_clamped.top; ++j) { unsigned char *source_pointer = &source_surface->pixels[((rect_clamped.top + j) * source_surface->pitch) + (rect_clamped.left * 3)]; @@ -214,7 +217,7 @@ ATTRIBUTE_HOT void Backend_Blit(Backend_Surface *source_surface, const RECT *rec for (long i = 0; i < rect_clamped.right - rect_clamped.left; ++i) { - if (source_pointer[0] == 0 && source_pointer[1] == 0 && source_pointer[2] == 0) // Assumes the colour key will always be #000000 (black) + if (UNLIKELY(source_pointer[0] == 0 && source_pointer[1] == 0 && source_pointer[2] == 0)) // Assumes the colour key will always be #000000 (black) { source_pointer += 3; destination_pointer += 3;