[scheduler, dma, maxwell] Reduce CPU stalls in the GPU command processing pipeline through multiple targeted optimizations (#3296)

- Scheduler: Reduced lock scope to allow parallel command preparation across channels
- DmaPusher: Added command prefetching (16-command lookahead) to improve cache hit rate
- Maxwell3D: Pre-allocated macro parameter vectors to eliminate dynamic allocations and unrolls dirty register tracking loop for better cache locality
- MacroEngine: Added last-executed macro cache to skip hash table lookups on hot path

Co-authored-by: lizzie <lizzie@eden-emu.dev>
Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3296
Reviewed-by: Maufeat <sahyno1996@gmail.com>
Reviewed-by: DraVee <dravee@eden-emu.dev>
Co-authored-by: CamilleLaVey <camillelavey99@gmail.com>
Co-committed-by: CamilleLaVey <camillelavey99@gmail.com>
This commit is contained in:
CamilleLaVey 2026-01-18 03:45:18 +01:00 committed by crueter
parent 6ec6ca7c37
commit 51cc1bc6be
No known key found for this signature in database
GPG key ID: 425ACD2D4830EBC6
4 changed files with 100 additions and 30 deletions

View file

@ -31,9 +31,8 @@ Maxwell3D::Maxwell3D(Core::System& system_, MemoryManager& memory_manager_)
dirty.flags.flip();
InitializeRegisterDefaults();
execution_mask.reset();
for (size_t i = 0; i < execution_mask.size(); i++) {
execution_mask[i] = IsMethodExecutable(static_cast<u32>(i));
}
for (size_t i = 0; i < execution_mask.size(); i++)
execution_mask[i] = IsMethodExecutable(u32(i));
}
Maxwell3D::~Maxwell3D() = default;
@ -292,38 +291,32 @@ u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) {
}
void Maxwell3D::ConsumeSinkImpl() {
SCOPE_EXIT {
method_sink.clear();
};
const auto control = shadow_state.shadow_ram_control;
if (control == Regs::ShadowRamControl::Track ||
control == Regs::ShadowRamControl::TrackWithFilter) {
if (control == Regs::ShadowRamControl::Track || control == Regs::ShadowRamControl::TrackWithFilter) {
for (auto [method, value] : method_sink) {
shadow_state.reg_array[method] = value;
ProcessDirtyRegisters(method, value);
}
return;
}
if (control == Regs::ShadowRamControl::Replay) {
for (auto [method, value] : method_sink) {
} else if (control == Regs::ShadowRamControl::Replay) {
for (auto [method, value] : method_sink)
ProcessDirtyRegisters(method, shadow_state.reg_array[method]);
}
return;
}
for (auto [method, value] : method_sink) {
ProcessDirtyRegisters(method, value);
} else {
for (auto [method, value] : method_sink)
ProcessDirtyRegisters(method, value);
}
method_sink.clear();
}
void Maxwell3D::ProcessDirtyRegisters(u32 method, u32 argument) {
if (regs.reg_array[method] == argument) {
return;
}
regs.reg_array[method] = argument;
for (const auto& table : dirty.tables) {
dirty.flags[table[method]] = true;
if (regs.reg_array[method] != argument) {
regs.reg_array[method] = argument;
auto const& table0 = dirty.tables[0];
auto const& table1 = dirty.tables[1];
u8 const flag0 = table0[method];
u8 const flag1 = table1[method];
dirty.flags[flag0] = true;
if (flag1 != flag0)
dirty.flags[flag1] = true;
}
}