Got Backend_Blit to go from 17% CPU usage to 11% :

- used __builtin_expect to get a 8.5% performance improvement
- used #pragma omp for to get a 30% performance improvement

Signed-off-by: Gabriel Ravier <gabravier@gmail.com>
This commit is contained in:
Gabriel Ravier 2020-01-31 12:27:29 +01:00
parent e8ec6a8ffb
commit 2adfef0035
3 changed files with 17 additions and 1 deletions

View file

@ -477,6 +477,13 @@ if(NOT FORCE_LOCAL_LIBS)
find_package(SDL2)
endif()
# Enable OpenMP if available
find_package(OpenMP)
if (OpenMP_CXX_FOUND)
target_link_libraries(CSE2 OpenMP::OpenMP_CXX)
target_compile_definitions(CSE2 PRIVATE USE_OPENMP)
endif()
if(TARGET SDL2::SDL2)
# CMake-generated config (Arch, vcpkg, Raspbian)
message(STATUS "Using system SDL2")

View file

@ -4,10 +4,16 @@
#define ATTRIBUTE_HOT __attribute__((hot))
#define ATTRIBUTE_OPTIMIZE(optString) __attribute__((optimize(optString)))
#define LIKELY(condition) __builtin_expect((condition), 1)
#define UNLIKELY(condition) __builtin_expect((condition), 0)
#define PREFETCH(address, isWrite, locality) __builtin_prefetch((address), (isWrite), (locality))
#else
#define ATTRIBUTE_HOT
#define ATTRIBUTE_OPTIMIZE(optString)
#define LIKELY(condition) condition
#define UNLIKELY(condition) condition
#define PREFETCH(address, isWrite, locality)
#endif

View file

@ -207,6 +207,9 @@ ATTRIBUTE_HOT void Backend_Blit(Backend_Surface *source_surface, const RECT *rec
// Do the actual blitting
if (colour_key)
{
#ifdef USE_OPENMP
#pragma omp for
#endif
for (long j = 0; j < rect_clamped.bottom - rect_clamped.top; ++j)
{
unsigned char *source_pointer = &source_surface->pixels[((rect_clamped.top + j) * source_surface->pitch) + (rect_clamped.left * 3)];
@ -214,7 +217,7 @@ ATTRIBUTE_HOT void Backend_Blit(Backend_Surface *source_surface, const RECT *rec
for (long i = 0; i < rect_clamped.right - rect_clamped.left; ++i)
{
if (source_pointer[0] == 0 && source_pointer[1] == 0 && source_pointer[2] == 0) // Assumes the colour key will always be #000000 (black)
if (UNLIKELY(source_pointer[0] == 0 && source_pointer[1] == 0 && source_pointer[2] == 0)) // Assumes the colour key will always be #000000 (black)
{
source_pointer += 3;
destination_pointer += 3;