[video_core/engines/maxwell3d] memory inline DrawState to reduce indirection on hot paths (#3758)
Some checks are pending
tx-src / sources (push) Waiting to run
Check Strings / check-strings (push) Waiting to run

usual indirection removal
helps very slightly to codegen

the idea is basically to reduce the amount of pointer deference overall in the code, and use idiomatic std::variant<>-isms to not rely on vtables/unique_ptr overhead
this should allow the compiler to emit better code
of course it's a tiny optimisation and only CPU side, but allows us to reduce indirection which is almost always a good thing

"but youre passing more parameters to the function!!!" its literally memoized into a register my friend

Signed-off-by: lizzie <lizzie@eden-emu.dev>
Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3758
Reviewed-by: crueter <crueter@eden-emu.dev>
Reviewed-by: MaranBr <maranbr@eden-emu.dev>
This commit is contained in:
lizzie 2026-04-28 17:44:51 +02:00 committed by crueter
parent ed225f8a8b
commit 77decca678
No known key found for this signature in database
GPG key ID: 425ACD2D4830EBC6
15 changed files with 231 additions and 351 deletions

View file

@ -245,7 +245,7 @@ void RasterizerOpenGL::PrepareDraw(bool is_indexed, Func&& draw_func) {
SyncState();
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
const auto& draw_state = maxwell3d->draw_manager.draw_state;
const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(draw_state.topology);
BeginTransformFeedback(pipeline, primitive_mode);
@ -260,12 +260,12 @@ void RasterizerOpenGL::PrepareDraw(bool is_indexed, Func&& draw_func) {
void RasterizerOpenGL::Draw(bool is_indexed, u32 instance_count) {
PrepareDraw(is_indexed, [this, is_indexed, instance_count](GLenum primitive_mode) {
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
const GLuint base_instance = static_cast<GLuint>(draw_state.base_instance);
const GLsizei num_instances = static_cast<GLsizei>(instance_count);
const auto& draw_state = maxwell3d->draw_manager.draw_state;
const GLuint base_instance = GLuint(draw_state.base_instance);
const GLsizei num_instances = GLsizei(instance_count);
if (is_indexed) {
const GLint base_vertex = static_cast<GLint>(draw_state.base_index);
const GLsizei num_vertices = static_cast<GLsizei>(draw_state.index_buffer.count);
const GLint base_vertex = GLint(draw_state.base_index);
const GLsizei num_vertices = GLsizei(draw_state.index_buffer.count);
const GLvoid* const offset = buffer_cache_runtime.IndexOffset();
const GLenum format = MaxwellToGL::IndexFormat(draw_state.index_buffer.format);
if (num_instances == 1 && base_instance == 0 && base_vertex == 0) {
@ -302,7 +302,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, u32 instance_count) {
}
void RasterizerOpenGL::DrawIndirect() {
const auto& params = maxwell3d->draw_manager->GetIndirectParams();
const auto& params = maxwell3d->draw_manager.indirect_state;
buffer_cache.SetDrawIndirect(&params);
PrepareDraw(params.is_indexed, [this, &params](GLenum primitive_mode) {
if (params.is_byte_count) {
@ -358,12 +358,12 @@ void RasterizerOpenGL::DrawTexture() {
SyncState();
const auto& draw_texture_state = maxwell3d->draw_manager->GetDrawTextureState();
const auto& draw_texture_state = maxwell3d->draw_manager.draw_texture_state;
const auto& sampler = texture_cache.GetGraphicsSampler(draw_texture_state.src_sampler);
const auto& texture = texture_cache.GetImageView(draw_texture_state.src_texture);
const auto Scale = [&](auto dim) -> s32 {
return Settings::values.resolution_info.ScaleUp(static_cast<s32>(dim));
return Settings::values.resolution_info.ScaleUp(s32(dim));
};
Region2D dst_region = {

View file

@ -25,7 +25,7 @@
#include "shader_recompiler/frontend/maxwell/control_flow.h"
#include "shader_recompiler/frontend/maxwell/translate_program.h"
#include "shader_recompiler/profile.h"
#include "video_core/engines/draw_manager.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
@ -357,7 +357,7 @@ GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() {
const auto& regs{maxwell3d->regs};
graphics_key.raw = 0;
graphics_key.early_z.Assign(regs.mandated_early_z != 0 ? 1 : 0);
graphics_key.gs_input_topology.Assign(maxwell3d->draw_manager->GetDrawState().topology);
graphics_key.gs_input_topology.Assign(maxwell3d->draw_manager.draw_state.topology);
graphics_key.tessellation_primitive.Assign(regs.tessellation.params.domain_type.Value());
graphics_key.tessellation_spacing.Assign(regs.tessellation.params.spacing.Value());
graphics_key.tessellation_clockwise.Assign(
@ -397,7 +397,7 @@ GraphicsPipeline* ShaderCache::BuiltPipeline(GraphicsPipeline* pipeline) const n
// If games are using a small index count, we can assume these are full screen quads.
// Usually these shaders are only used once for building textures so we can assume they
// can't be built async
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
const auto& draw_state = maxwell3d->draw_manager.draw_state;
if (draw_state.index_buffer.count <= 6 || draw_state.vertex_buffer.count <= 6) {
return pipeline;
}