From dc1f5a7ad756b04f045c78b4a418e7645c71d171 Mon Sep 17 00:00:00 2001 From: xbzk Date: Tue, 14 Apr 2026 13:22:56 -0300 Subject: [PATCH] [dma_pusher] 1st dword dirty only safe reads (up to 99% less safe reads) --- src/video_core/dma_pusher.cpp | 67 +++++++++++++++++++++++++++-------- 1 file changed, 53 insertions(+), 14 deletions(-) diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 3844a8e2f9..a361f4c747 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -1,9 +1,10 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "common/logging.h" #include "common/settings.h" #include "core/core.h" #include "video_core/dma_pusher.h" @@ -21,7 +22,7 @@ namespace Tegra { constexpr u32 MacroRegistersStart = 0xE00; -[[maybe_unused]] constexpr u32 ComputeInline = 0x6D; +constexpr u32 ComputeInline = 0x6D; DmaPusher::DmaPusher(Core::System& system_, GPU& gpu_, MemoryManager& memory_manager_, Control::ChannelState& channel_state_) @@ -31,9 +32,7 @@ DmaPusher::DmaPusher(Core::System& system_, GPU& gpu_, MemoryManager& memory_man DmaPusher::~DmaPusher() = default; void DmaPusher::DispatchCalls() { - dma_pushbuffer_subindex = 0; - dma_state.is_last_call = true; while (system.IsPoweredOn()) { @@ -51,7 +50,6 @@ bool DmaPusher::Step() { } CommandList& command_list = dma_pushbuffer.front(); - const size_t prefetch_size = command_list.prefetch_command_list.size(); const size_t command_list_size = command_list.command_lists.size(); @@ -78,17 +76,58 @@ bool DmaPusher::Step() { synced = false; } - if (header.size > 0 && dma_state.method >= MacroRegistersStart && subchannels[dma_state.subchannel]) { - subchannels[dma_state.subchannel]->current_dirty = memory_manager.IsMemoryDirty(dma_state.dma_get, header.size * sizeof(u32)); - } - if (header.size > 0) { - if (Settings::IsDMALevelDefault() ? (Settings::IsGPULevelMedium() || Settings::IsGPULevelHigh()) : Settings::IsDMALevelSafe()) { - Tegra::Memory::GpuGuestMemoryheaders(memory_manager, dma_state.dma_get, header.size, &command_headers); - ProcessCommands(headers); + const bool safe_by_settings = + Settings::IsDMALevelDefault() ? !Settings::IsGPULevelLow() : Settings::IsDMALevelSafe(); + + const bool is_inline_dma_cont = dma_state.method_count > 0 && + dma_state.method == ComputeInline; + const bool is_macro_cont = dma_state.method_count > 0 && + dma_state.method >= MacroRegistersStart; + + // Only probe dirty when something actually consumes it: + // - inline DMA source (0x6D cont): determines whether to safe-read + // - macro cont (0xE3B cont): determines current_dirty for the HLE macro Execute path + const bool segment_dirty = (is_inline_dma_cont || is_macro_cont) && !safe_by_settings && + memory_manager.IsMemoryDirty(dma_state.dma_get, sizeof(u32)); + + // Safe-read only KeplerMemory/KeplerCompute inline DMA source buffers (0x6D) when + // dirty. Macro param buffers (0xE3B) don't need it — current_dirty routes the HLE + // macro to Execute() → GetMacroAddress(), which reads directly from GPU memory. + const bool use_safe = safe_by_settings || (is_inline_dma_cont && segment_dirty); + + // Set current_dirty on macro subchannels so HLE macros route to Execute() → + // GetMacroAddress() → read draw params from GPU memory, not Fallback() which reads + // stale register state and produces zero instance counts. + // + // When method_count > 0, dma_state.method is the live ongoing method (continuation). + // When method_count == 0, dma_state.method is stale from the previous ProcessCommands + // call — read the actual method from the first word of the buffer. + auto set_macro_dirty = [&](std::span cmd) { + if (dma_state.method_count > 0) { + if (dma_state.method >= MacroRegistersStart && subchannels[dma_state.subchannel]) { + subchannels[dma_state.subchannel]->current_dirty = safe_by_settings || segment_dirty; + } + } else if (!cmd.empty() && cmd[0].method.Value() >= MacroRegistersStart) { + const u32 subchan = cmd[0].subchannel.Value(); + if (subchannels[subchan]) { + subchannels[subchan]->current_dirty = safe_by_settings || segment_dirty; + } + } + }; + + if (use_safe) { + Tegra::Memory::GpuGuestMemory + headers(memory_manager, dma_state.dma_get, header.size, &command_headers); + const std::span cmd{headers}; + set_macro_dirty(cmd); + ProcessCommands(cmd); } else { - Tegra::Memory::GpuGuestMemoryheaders(memory_manager, dma_state.dma_get, header.size, &command_headers); - ProcessCommands(headers); + Tegra::Memory::GpuGuestMemory + headers(memory_manager, dma_state.dma_get, header.size, &command_headers); + const std::span cmd{headers}; + set_macro_dirty(cmd); + ProcessCommands(cmd); } }