Add vertex-batching to Wii U port

Should remove the last bit of lag that port has (like the Labyrinth
fight where you team-up with Curly)
This commit is contained in:
Clownacy 2020-06-29 21:37:05 +01:00
parent 9925c16b29
commit f6fe0537ed

View file

@ -25,6 +25,15 @@
#include "WiiUShaders/texture.gsh.h"
#include "WiiUShaders/texture_colour_key.gsh.h"
typedef enum RenderMode
{
MODE_BLANK,
MODE_DRAW_SURFACE,
MODE_DRAW_SURFACE_WITH_TRANSPARENCY,
MODE_COLOUR_FILL,
MODE_DRAW_GLYPH
} RenderMode;
typedef struct RenderBackend_Surface
{
GX2Texture texture;
@ -62,6 +71,11 @@ typedef struct Vertex
Coordinate2D texture;
} Vertex;
typedef struct VertexBufferSlot
{
Vertex vertices[4];
} VertexBufferSlot;
static WHBGfxShaderGroup texture_shader;
static WHBGfxShaderGroup texture_colour_key_shader;
static WHBGfxShaderGroup colour_fill_shader;
@ -80,6 +94,90 @@ static RenderBackend_Surface *glyph_destination_surface;
static Viewport tv_viewport;
static Viewport drc_viewport;
static VertexBufferSlot *local_vertex_buffer;
static unsigned long local_vertex_buffer_size;
static unsigned long current_vertex_buffer_slot;
static RenderMode last_render_mode;
static RenderBackend_Surface *last_source_surface;
static RenderBackend_Surface *last_destination_surface;
static VertexBufferSlot* GetVertexBufferSlot(unsigned int slots_needed)
{
// Check if buffer needs expanding
if (current_vertex_buffer_slot + slots_needed > local_vertex_buffer_size)
{
local_vertex_buffer_size = 1;
while (current_vertex_buffer_slot + slots_needed > local_vertex_buffer_size)
local_vertex_buffer_size <<= 1;
VertexBufferSlot *realloc_result = (VertexBufferSlot*)realloc(local_vertex_buffer, local_vertex_buffer_size * sizeof(VertexBufferSlot));
if (realloc_result != NULL)
{
local_vertex_buffer = realloc_result;
}
else
{
Backend_PrintError("Couldn't expand vertex buffer");
return NULL;
}
}
current_vertex_buffer_slot += slots_needed;
return &local_vertex_buffer[current_vertex_buffer_slot - slots_needed];
}
static void FlushVertexBuffer(void)
{
static unsigned long vertex_buffer_size;
static unsigned int current_vertex_buffer = 0;
if (current_vertex_buffer_slot == 0)
return;
// Make sure the buffers aren't currently being used before we modify them
GX2DrawDone();
// Upload vertex buffer to VBO, growing it if necessary
if (local_vertex_buffer_size > vertex_buffer_size)
{
vertex_buffer_size = local_vertex_buffer_size;
GX2RDestroyBufferEx(&vertex_buffer, (GX2RResourceFlags)0);
vertex_buffer.flags = (GX2RResourceFlags)(GX2R_RESOURCE_BIND_VERTEX_BUFFER |
GX2R_RESOURCE_USAGE_CPU_READ |
GX2R_RESOURCE_USAGE_CPU_WRITE |
GX2R_RESOURCE_USAGE_GPU_READ);
vertex_buffer.elemSize = sizeof(VertexBufferSlot);
vertex_buffer.elemCount = vertex_buffer_size;
if (GX2RCreateBuffer(&vertex_buffer))
{
void *vertex_pointer = GX2RLockBufferEx(&vertex_buffer, (GX2RResourceFlags)0);
memcpy(vertex_pointer, local_vertex_buffer, vertex_buffer_size * sizeof(VertexBufferSlot));
GX2RUnlockBufferEx(&vertex_buffer, (GX2RResourceFlags)0);
}
}
else
{
void *vertex_pointer = GX2RLockBufferEx(&vertex_buffer, (GX2RResourceFlags)0);
memcpy(vertex_pointer, local_vertex_buffer, current_vertex_buffer_slot * sizeof(VertexBufferSlot));
GX2RUnlockBufferEx(&vertex_buffer, (GX2RResourceFlags)0);
}
GX2DrawEx(GX2_PRIMITIVE_MODE_QUADS, 4 * current_vertex_buffer_slot, 0, 1);
current_vertex_buffer_slot = 0;
}
static void CalculateViewport(unsigned int actual_screen_width, unsigned int actual_screen_height, Viewport *viewport)
{
if ((float)actual_screen_width / (float)actual_screen_height > (float)framebuffer_surface->width / (float)framebuffer_surface->height)
@ -136,80 +234,63 @@ RenderBackend_Surface* RenderBackend_Init(const char *window_title, int screen_w
WHBGfxInitShaderAttribute(&glyph_shader, "input_texture_coordinates", 1, 0, GX2_ATTRIB_FORMAT_FLOAT_32_32);
WHBGfxInitFetchShader(&glyph_shader);
// Initialise vertex position buffer
vertex_buffer.flags = (GX2RResourceFlags)(GX2R_RESOURCE_BIND_VERTEX_BUFFER |
GX2R_RESOURCE_USAGE_CPU_READ |
GX2R_RESOURCE_USAGE_CPU_WRITE |
GX2R_RESOURCE_USAGE_GPU_READ);
vertex_buffer.elemSize = sizeof(Vertex);
vertex_buffer.elemCount = 4;
// Initialise sampler
GX2InitSampler(&sampler, GX2_TEX_CLAMP_MODE_CLAMP, GX2_TEX_XY_FILTER_MODE_LINEAR);
if (GX2RCreateBuffer(&vertex_buffer))
// Create framebuffer surface
framebuffer_surface = RenderBackend_CreateSurface(screen_width, screen_height, true);
if (framebuffer_surface != NULL)
{
// Initialise sampler
GX2InitSampler(&sampler, GX2_TEX_CLAMP_MODE_CLAMP, GX2_TEX_XY_FILTER_MODE_LINEAR);
// Create a 'context' (this voodoo magic can be used to undo `GX2SetColorBuffer`,
// allowing us to draw to the screen once again)
gx2_context = (GX2ContextState*)aligned_alloc(GX2_CONTEXT_STATE_ALIGNMENT, sizeof(GX2ContextState));
// Create framebuffer surface
framebuffer_surface = RenderBackend_CreateSurface(screen_width, screen_height, true);
if (framebuffer_surface != NULL)
if (gx2_context != NULL)
{
// Create a 'context' (this voodoo magic can be used to undo `GX2SetColorBuffer`,
// allowing us to draw to the screen once again)
gx2_context = (GX2ContextState*)aligned_alloc(GX2_CONTEXT_STATE_ALIGNMENT, sizeof(GX2ContextState));
memset(gx2_context, 0, sizeof(GX2ContextState));
GX2SetupContextStateEx(gx2_context, TRUE);
GX2SetContextState(gx2_context);
if (gx2_context != NULL)
// Disable depth-test (enabled by default for some reason)
GX2SetDepthOnlyControl(FALSE, FALSE, GX2_COMPARE_FUNC_ALWAYS);
// Calculate centred viewports
switch (GX2GetSystemTVScanMode())
{
memset(gx2_context, 0, sizeof(GX2ContextState));
GX2SetupContextStateEx(gx2_context, TRUE);
GX2SetContextState(gx2_context);
// For now, we have to match WUT's broken behaviour (its `GX2TVScanMode`
// enum is missing values, and the rest are off-by-one)
//case GX2_TV_SCAN_MODE_576I:
case GX2_TV_SCAN_MODE_480I: // Actually 576i
case GX2_TV_SCAN_MODE_480P: // Actually 480i
CalculateViewport(854, 480, &tv_viewport);
break;
// Disable depth-test (enabled by default for some reason)
GX2SetDepthOnlyControl(FALSE, FALSE, GX2_COMPARE_FUNC_ALWAYS);
case GX2_TV_SCAN_MODE_720P: // Actually 480p
default: // Funnel the *real* 1080p into this
CalculateViewport(1280, 720, &tv_viewport);
break;
// Calculate centred viewports
switch (GX2GetSystemTVScanMode())
{
// For now, we have to match WUT's broken behaviour (its `GX2TVScanMode`
// enum is missing values, and the rest are off-by-one)
//case GX2_TV_SCAN_MODE_576I:
case GX2_TV_SCAN_MODE_480I: // Actually 576i
case GX2_TV_SCAN_MODE_480P: // Actually 480i
CalculateViewport(854, 480, &tv_viewport);
break;
case GX2_TV_SCAN_MODE_720P: // Actually 480p
default: // Funnel the *real* 1080p into this
CalculateViewport(1280, 720, &tv_viewport);
break;
case GX2_TV_SCAN_MODE_1080I: // Actually invalid
case GX2_TV_SCAN_MODE_1080P: // Actually 1080i
CalculateViewport(1920, 1080, &tv_viewport);
break;
}
CalculateViewport(854, 480, &drc_viewport);
return framebuffer_surface;
}
else
{
Backend_PrintError("Couldn't allocate memory for the GX2 context");
case GX2_TV_SCAN_MODE_1080I: // Actually invalid
case GX2_TV_SCAN_MODE_1080P: // Actually 1080i
CalculateViewport(1920, 1080, &tv_viewport);
break;
}
RenderBackend_FreeSurface(framebuffer_surface);
CalculateViewport(854, 480, &drc_viewport);
return framebuffer_surface;
}
else
{
Backend_PrintError("Couldn't create the framebuffer surface");
Backend_PrintError("Couldn't allocate memory for the GX2 context");
}
GX2RDestroyBufferEx(&vertex_buffer, (GX2RResourceFlags)0);
RenderBackend_FreeSurface(framebuffer_surface);
}
else
{
Backend_PrintError("Couldn't create the vertex buffer");
Backend_PrintError("Couldn't create the framebuffer surface");
}
WHBGfxFreeShaderGroup(&glyph_shader);
@ -268,6 +349,11 @@ void RenderBackend_Deinit(void)
void RenderBackend_DrawScreen(void)
{
FlushVertexBuffer();
last_render_mode = MODE_BLANK;
last_source_surface = NULL;
last_destination_surface = NULL;
// Make sure the buffers aren't currently being used before we modify them
GX2DrawDone();
@ -503,124 +589,143 @@ void RenderBackend_Blit(RenderBackend_Surface *source_surface, const RenderBacke
if (source_surface == NULL || destination_surface == NULL)
return;
// Make sure the buffers aren't currently being used before we modify them
GX2DrawDone();
const RenderMode render_mode = (colour_key ? MODE_DRAW_SURFACE_WITH_TRANSPARENCY : MODE_DRAW_SURFACE);
Vertex *vertex_pointer = (Vertex*)GX2RLockBufferEx(&vertex_buffer, (GX2RResourceFlags)0);
// Set vertex position buffer
const float destination_left = x;
const float destination_top = y;
const float destination_right = x + (rect->right - rect->left);
const float destination_bottom = y + (rect->bottom - rect->top);
vertex_pointer[0].position.x = destination_left;
vertex_pointer[0].position.y = destination_top;
vertex_pointer[1].position.x = destination_right;
vertex_pointer[1].position.y = destination_top;
vertex_pointer[2].position.x = destination_right;
vertex_pointer[2].position.y = destination_bottom;
vertex_pointer[3].position.x = destination_left;
vertex_pointer[3].position.y = destination_bottom;
for (unsigned int i = 0; i < 4; ++i)
// Flush vertex data if a context-change is needed
if (last_render_mode != render_mode || last_source_surface != source_surface || last_destination_surface != destination_surface)
{
vertex_pointer[i].position.x /= destination_surface->width;
vertex_pointer[i].position.x *= 2.0f;
vertex_pointer[i].position.x -= 1.0f;
FlushVertexBuffer();
vertex_pointer[i].position.y /= destination_surface->height;
vertex_pointer[i].position.y *= -2.0f;
vertex_pointer[i].position.y += 1.0f;
last_render_mode = render_mode;
last_source_surface = source_surface;
last_destination_surface = destination_surface;
// Draw to the selected texture, instead of the screen
GX2SetColorBuffer(&destination_surface->colour_buffer, GX2_RENDER_TARGET_0);
GX2SetViewport(0.0f, 0.0f, (float)destination_surface->colour_buffer.surface.width, (float)destination_surface->colour_buffer.surface.height, 0.0f, 1.0f);
GX2SetScissor(0, 0, destination_surface->colour_buffer.surface.width, destination_surface->colour_buffer.surface.height);
// Select shader
WHBGfxShaderGroup *shader = colour_key ? &texture_colour_key_shader : &texture_shader;
// Bind it
GX2SetFetchShader(&shader->fetchShader);
GX2SetVertexShader(shader->vertexShader);
GX2SetPixelShader(shader->pixelShader);
// Bind misc. data
GX2SetPixelSampler(&sampler, shader->pixelShader->samplerVars[0].location);
GX2SetPixelTexture(&source_surface->texture, shader->pixelShader->samplerVars[0].location);
GX2RSetAttributeBuffer(&vertex_buffer, 0, sizeof(Vertex), offsetof(Vertex, position));
GX2RSetAttributeBuffer(&vertex_buffer, 1, sizeof(Vertex), offsetof(Vertex, texture));
}
// Set texture coordinate buffer
vertex_pointer[0].texture.x = rect->left / (float)source_surface->width;
vertex_pointer[0].texture.y = rect->top / (float)source_surface->height;
vertex_pointer[1].texture.x = rect->right / (float)source_surface->width;
vertex_pointer[1].texture.y = rect->top / (float)source_surface->height;
vertex_pointer[2].texture.x = rect->right / (float)source_surface->width;
vertex_pointer[2].texture.y = rect->bottom / (float)source_surface->height;
vertex_pointer[3].texture.x = rect->left / (float)source_surface->width;
vertex_pointer[3].texture.y = rect->bottom / (float)source_surface->height;
VertexBufferSlot *vertex_buffer_slot = GetVertexBufferSlot(1);
GX2RUnlockBufferEx(&vertex_buffer, (GX2RResourceFlags)0);
if (vertex_buffer_slot != NULL)
{
// Set vertex position buffer
const float destination_left = x;
const float destination_top = y;
const float destination_right = x + (rect->right - rect->left);
const float destination_bottom = y + (rect->bottom - rect->top);
// Draw to the selected texture, instead of the screen
GX2SetColorBuffer(&destination_surface->colour_buffer, GX2_RENDER_TARGET_0);
GX2SetViewport(0.0f, 0.0f, (float)destination_surface->colour_buffer.surface.width, (float)destination_surface->colour_buffer.surface.height, 0.0f, 1.0f);
GX2SetScissor(0, 0, destination_surface->colour_buffer.surface.width, destination_surface->colour_buffer.surface.height);
vertex_buffer_slot->vertices[0].position.x = destination_left;
vertex_buffer_slot->vertices[0].position.y = destination_top;
vertex_buffer_slot->vertices[1].position.x = destination_right;
vertex_buffer_slot->vertices[1].position.y = destination_top;
vertex_buffer_slot->vertices[2].position.x = destination_right;
vertex_buffer_slot->vertices[2].position.y = destination_bottom;
vertex_buffer_slot->vertices[3].position.x = destination_left;
vertex_buffer_slot->vertices[3].position.y = destination_bottom;
// Select shader
WHBGfxShaderGroup *shader = colour_key ? &texture_colour_key_shader : &texture_shader;
for (unsigned int i = 0; i < 4; ++i)
{
vertex_buffer_slot->vertices[i].position.x /= destination_surface->width;
vertex_buffer_slot->vertices[i].position.x *= 2.0f;
vertex_buffer_slot->vertices[i].position.x -= 1.0f;
// Bind it
GX2SetFetchShader(&shader->fetchShader);
GX2SetVertexShader(shader->vertexShader);
GX2SetPixelShader(shader->pixelShader);
vertex_buffer_slot->vertices[i].position.y /= destination_surface->height;
vertex_buffer_slot->vertices[i].position.y *= -2.0f;
vertex_buffer_slot->vertices[i].position.y += 1.0f;
}
// Bind misc. data
GX2SetPixelSampler(&sampler, shader->pixelShader->samplerVars[0].location);
GX2SetPixelTexture(&source_surface->texture, shader->pixelShader->samplerVars[0].location);
GX2RSetAttributeBuffer(&vertex_buffer, 0, sizeof(Vertex), offsetof(Vertex, position));
GX2RSetAttributeBuffer(&vertex_buffer, 1, sizeof(Vertex), offsetof(Vertex, texture));
// Draw
GX2DrawEx(GX2_PRIMITIVE_MODE_QUADS, 4, 0, 1);
// Set texture coordinate buffer
vertex_buffer_slot->vertices[0].texture.x = rect->left / (float)source_surface->width;
vertex_buffer_slot->vertices[0].texture.y = rect->top / (float)source_surface->height;
vertex_buffer_slot->vertices[1].texture.x = rect->right / (float)source_surface->width;
vertex_buffer_slot->vertices[1].texture.y = rect->top / (float)source_surface->height;
vertex_buffer_slot->vertices[2].texture.x = rect->right / (float)source_surface->width;
vertex_buffer_slot->vertices[2].texture.y = rect->bottom / (float)source_surface->height;
vertex_buffer_slot->vertices[3].texture.x = rect->left / (float)source_surface->width;
vertex_buffer_slot->vertices[3].texture.y = rect->bottom / (float)source_surface->height;
}
}
void RenderBackend_ColourFill(RenderBackend_Surface *surface, const RenderBackend_Rect *rect, unsigned char red, unsigned char green, unsigned char blue)
{
static unsigned char last_red;
static unsigned char last_green;
static unsigned char last_blue;
if (surface == NULL)
return;
// Make sure the buffers aren't currently being used before we modify them
GX2DrawDone();
Vertex *vertex_pointer = (Vertex*)GX2RLockBufferEx(&vertex_buffer, (GX2RResourceFlags)0);
// Set vertex position buffer
vertex_pointer[0].position.x = rect->left;
vertex_pointer[0].position.y = rect->top;
vertex_pointer[1].position.x = rect->right;
vertex_pointer[1].position.y = rect->top;
vertex_pointer[2].position.x = rect->right;
vertex_pointer[2].position.y = rect->bottom;
vertex_pointer[3].position.x = rect->left;
vertex_pointer[3].position.y = rect->bottom;
for (unsigned int i = 0; i < 4; ++i)
// Flush vertex data if a context-change is needed
if (last_render_mode != MODE_COLOUR_FILL || last_destination_surface != surface || last_red != red || last_green != green || last_blue != blue)
{
vertex_pointer[i].position.x /= surface->width;
vertex_pointer[i].position.x *= 2.0f;
vertex_pointer[i].position.x -= 1.0f;
FlushVertexBuffer();
vertex_pointer[i].position.y /= surface->height;
vertex_pointer[i].position.y *= -2.0f;
vertex_pointer[i].position.y += 1.0f;
last_render_mode = MODE_COLOUR_FILL;
last_source_surface = NULL;
last_destination_surface = surface;
last_red = red;
last_green = green;
last_blue = blue;
// Draw to the selected texture, instead of the screen
GX2SetColorBuffer(&surface->colour_buffer, GX2_RENDER_TARGET_0);
GX2SetViewport(0, 0, (float)surface->colour_buffer.surface.width, (float)surface->colour_buffer.surface.height, 0.0f, 1.0f);
GX2SetScissor(0, 0, (float)surface->colour_buffer.surface.width, (float)surface->colour_buffer.surface.height);
// Set the colour-fill... colour
const float uniform_colours[4] = {red / 255.0f, green / 255.0f, blue / 255.0f, 1.0f};
GX2SetPixelUniformReg(colour_fill_shader.pixelShader->uniformVars[0].offset, 4, (uint32_t*)&uniform_colours);
// Bind the colour-fill shader
GX2SetFetchShader(&colour_fill_shader.fetchShader);
GX2SetVertexShader(colour_fill_shader.vertexShader);
GX2SetPixelShader(colour_fill_shader.pixelShader);
// Bind misc. data
GX2RSetAttributeBuffer(&vertex_buffer, 0, sizeof(Vertex), offsetof(Vertex, position));
}
GX2RUnlockBufferEx(&vertex_buffer, (GX2RResourceFlags)0);
VertexBufferSlot *vertex_buffer_slot = GetVertexBufferSlot(1);
// Draw to the selected texture, instead of the screen
GX2SetColorBuffer(&surface->colour_buffer, GX2_RENDER_TARGET_0);
GX2SetViewport(0, 0, (float)surface->colour_buffer.surface.width, (float)surface->colour_buffer.surface.height, 0.0f, 1.0f);
GX2SetScissor(0, 0, (float)surface->colour_buffer.surface.width, (float)surface->colour_buffer.surface.height);
if (vertex_buffer_slot != NULL)
{
// Set vertex position buffer
vertex_buffer_slot->vertices[0].position.x = rect->left;
vertex_buffer_slot->vertices[0].position.y = rect->top;
vertex_buffer_slot->vertices[1].position.x = rect->right;
vertex_buffer_slot->vertices[1].position.y = rect->top;
vertex_buffer_slot->vertices[2].position.x = rect->right;
vertex_buffer_slot->vertices[2].position.y = rect->bottom;
vertex_buffer_slot->vertices[3].position.x = rect->left;
vertex_buffer_slot->vertices[3].position.y = rect->bottom;
// Set the colour-fill... colour
const float uniform_colours[4] = {red / 255.0f, green / 255.0f, blue / 255.0f, 1.0f};
GX2SetPixelUniformReg(colour_fill_shader.pixelShader->uniformVars[0].offset, 4, (uint32_t*)&uniform_colours);
for (unsigned int i = 0; i < 4; ++i)
{
vertex_buffer_slot->vertices[i].position.x /= surface->width;
vertex_buffer_slot->vertices[i].position.x *= 2.0f;
vertex_buffer_slot->vertices[i].position.x -= 1.0f;
// Bind the colour-fill shader
GX2SetFetchShader(&colour_fill_shader.fetchShader);
GX2SetVertexShader(colour_fill_shader.vertexShader);
GX2SetPixelShader(colour_fill_shader.pixelShader);
// Bind misc. data
GX2RSetAttributeBuffer(&vertex_buffer, 0, sizeof(Vertex), offsetof(Vertex, position));
// Draw
GX2DrawEx(GX2_PRIMITIVE_MODE_QUADS, 4, 0, 1);
vertex_buffer_slot->vertices[i].position.y /= surface->height;
vertex_buffer_slot->vertices[i].position.y *= -2.0f;
vertex_buffer_slot->vertices[i].position.y += 1.0f;
}
}
}
RenderBackend_Glyph* RenderBackend_LoadGlyph(const unsigned char *pixels, unsigned int width, unsigned int height, int pitch)
@ -692,6 +797,11 @@ void RenderBackend_UnloadGlyph(RenderBackend_Glyph *glyph)
void RenderBackend_PrepareToDrawGlyphs(RenderBackend_Surface *destination_surface, const unsigned char *colour_channels)
{
FlushVertexBuffer();
last_render_mode = MODE_BLANK;
last_source_surface = NULL;
last_destination_surface = NULL;
if (destination_surface == NULL)
return;