diff --git a/CMakeLists.txt b/CMakeLists.txt index 0ab79edc..00f26f95 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,6 +16,8 @@ option(DEBUG_SAVE "Re-enable the ability to drag-and-drop save files onto the wi set(RENDERER "SDLTexture" CACHE STRING "Which renderer the game should use: 'OpenGL3' for an OpenGL 3.2 renderer, 'OpenGLES2' for an OpenGL ES 2.0 renderer, 'SDLTexture' for SDL2's hardware-accelerated Texture API, 'SDLSurface' for SDL2's software-rendered Surface API, or 'Software' for a handwritten software renderer") option(LTO "Enable link-time optimisation" OFF) +option(NATIVE_OPTIMIZATIONS "Enable processor-specific optimisations (executable might not work on other architectures) (GCC-compatible compilers only)" OFF) + option(WARNINGS "Enable common compiler warnings (for GCC-compatible compilers and MSVC only)" OFF) option(WARNINGS_ALL "Enable ALL compiler warnings (for Clang and MSVC only)" OFF) option(WARNINGS_FATAL "Stop compilation on any compiler warning (for GCC-compatible compilers and MSVC only)" OFF) @@ -450,6 +452,25 @@ if(LTO) endif() endif() +# Enable -march=native if available +if(NATIVE_OPTIMIZATIONS) + include(CheckCXXCompilerFlag) + CHECK_CXX_COMPILER_FLAG("-march=native" COMPILER_SUPPORTS_MARCH_NATIVE) # GCC flag + if(COMPILER_SUPPORTS_MARCH_NATIVE) + target_compile_options(CSE2 PRIVATE -march=native) + else() + CHECK_CXX_COMPILER_FLAG("-xHost" COMPILER_SUPPORTS_XHOST) # ICC (Linux) flag + CHECK_CXX_COMPILER_FLAG("/QxHost" COMPILER_SUPPORTS_QXHOST) # ICC (Windows) flag + if(COMPILER_SUPPORTS_XHOST) + target_compile_options(CSE2 PRIVATE -xHost) + elseif(COMPILER_SUPPORTS_QXHOST) + target_compile_options(CSE2 PRIVATE /QxHost) + else() + message(WARNING "Couldn't activate native optimizations ! (Unsupported compiler)") + endif() + endif() +endif() + # Find dependencies if(NOT FORCE_LOCAL_LIBS) diff --git a/README.md b/README.md index 4de9cb95..21613d15 100644 --- a/README.md +++ b/README.md @@ -61,6 +61,7 @@ You can also add the following flags: Name | Function --------|-------- `-DLTO=ON` | Enable link-time optimisation +`-DNATIVE_OPTIMIZATIONS=ON` | Enable processor-specific optimisations (executable might not work on other architectures) (GCC-compatible compilers only) `-DJAPANESE=ON` | Enable the Japanese-language build (instead of the unofficial Aeon Genesis English translation) `-DFIX_BUGS=ON` | Fix various bugs in the game `-DDEBUG_SAVE=ON` | Re-enable the ability to drag-and-drop save files onto the window diff --git a/src/Backends/Audio/SDL2.cpp b/src/Backends/Audio/SDL2.cpp index e511d5e3..ae087d3d 100644 --- a/src/Backends/Audio/SDL2.cpp +++ b/src/Backends/Audio/SDL2.cpp @@ -12,6 +12,7 @@ #include "../../Organya.h" #include "../../WindowsWrapper.h" +#include "../../CommonDefines.h" #define MIN(a, b) ((a) < (b) ? (a) : (b)) #define MAX(a, b) ((a) > (b) ? (a) : (b)) @@ -70,7 +71,8 @@ static void SetSoundPan(AudioBackend_Sound *sound, long pan) sound->volume_r = sound->pan_r * sound->volume; } -static void MixSounds(float *stream, unsigned int frames_total) +// Most CPU-intensive function in the game (2/3rd CPU time consumption in my experience), so marked with attrHot so the compiler considers it a hot spot (as it is) when optimizing +attrHot static void MixSounds(float *stream, unsigned int frames_total) { for (AudioBackend_Sound *sound = sound_list_head; sound != NULL; sound = sound->next) { @@ -85,7 +87,7 @@ static void MixSounds(float *stream, unsigned int frames_total) const float sample2 = (sound->samples[(size_t)sound->position + 1] - 128.0f) / 128.0f; // Perform linear interpolation - const float interpolated_sample = sample1 + ((sample2 - sample1) * (float)fmod(sound->position, 1.0)); + const float interpolated_sample = sample1 + ((sample2 - sample1) * (float)fmod((float)sound->position, 1.0f)); *steam_pointer++ += interpolated_sample * sound->volume_l; *steam_pointer++ += interpolated_sample * sound->volume_r; diff --git a/src/CommonDefines.h b/src/CommonDefines.h index 5b616ffb..9e458bdc 100644 --- a/src/CommonDefines.h +++ b/src/CommonDefines.h @@ -10,6 +10,10 @@ #define TILES_TO_UNITS(x) ((int)((x) * (0x200 * 0x10))) #define UNITS_TO_TILES(x) ((int)((x) / (0x200 * 0x10))) +#ifdef __GNUC__ +#define attrHot __attribute__((hot)) +#endif + enum Collisions { COLL_LEFT_WALL = 1, // Touching a left wall