[dma_pusher] 1st dword dirty only safe reads (up to 99% less safe reads)

This commit is contained in:
xbzk 2026-04-14 13:22:56 -03:00
parent a0bb6324c0
commit dc1f5a7ad7

View file

@ -1,9 +1,10 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/logging.h"
#include "common/settings.h"
#include "core/core.h"
#include "video_core/dma_pusher.h"
@ -21,7 +22,7 @@
namespace Tegra {
constexpr u32 MacroRegistersStart = 0xE00;
[[maybe_unused]] constexpr u32 ComputeInline = 0x6D;
constexpr u32 ComputeInline = 0x6D;
DmaPusher::DmaPusher(Core::System& system_, GPU& gpu_, MemoryManager& memory_manager_,
Control::ChannelState& channel_state_)
@ -31,9 +32,7 @@ DmaPusher::DmaPusher(Core::System& system_, GPU& gpu_, MemoryManager& memory_man
DmaPusher::~DmaPusher() = default;
void DmaPusher::DispatchCalls() {
dma_pushbuffer_subindex = 0;
dma_state.is_last_call = true;
while (system.IsPoweredOn()) {
@ -51,7 +50,6 @@ bool DmaPusher::Step() {
}
CommandList& command_list = dma_pushbuffer.front();
const size_t prefetch_size = command_list.prefetch_command_list.size();
const size_t command_list_size = command_list.command_lists.size();
@ -78,17 +76,58 @@ bool DmaPusher::Step() {
synced = false;
}
if (header.size > 0 && dma_state.method >= MacroRegistersStart && subchannels[dma_state.subchannel]) {
subchannels[dma_state.subchannel]->current_dirty = memory_manager.IsMemoryDirty(dma_state.dma_get, header.size * sizeof(u32));
}
if (header.size > 0) {
if (Settings::IsDMALevelDefault() ? (Settings::IsGPULevelMedium() || Settings::IsGPULevelHigh()) : Settings::IsDMALevelSafe()) {
Tegra::Memory::GpuGuestMemory<Tegra::CommandHeader, Tegra::Memory::GuestMemoryFlags::SafeRead>headers(memory_manager, dma_state.dma_get, header.size, &command_headers);
ProcessCommands(headers);
const bool safe_by_settings =
Settings::IsDMALevelDefault() ? !Settings::IsGPULevelLow() : Settings::IsDMALevelSafe();
const bool is_inline_dma_cont = dma_state.method_count > 0 &&
dma_state.method == ComputeInline;
const bool is_macro_cont = dma_state.method_count > 0 &&
dma_state.method >= MacroRegistersStart;
// Only probe dirty when something actually consumes it:
// - inline DMA source (0x6D cont): determines whether to safe-read
// - macro cont (0xE3B cont): determines current_dirty for the HLE macro Execute path
const bool segment_dirty = (is_inline_dma_cont || is_macro_cont) && !safe_by_settings &&
memory_manager.IsMemoryDirty(dma_state.dma_get, sizeof(u32));
// Safe-read only KeplerMemory/KeplerCompute inline DMA source buffers (0x6D) when
// dirty. Macro param buffers (0xE3B) don't need it — current_dirty routes the HLE
// macro to Execute() → GetMacroAddress(), which reads directly from GPU memory.
const bool use_safe = safe_by_settings || (is_inline_dma_cont && segment_dirty);
// Set current_dirty on macro subchannels so HLE macros route to Execute() →
// GetMacroAddress() → read draw params from GPU memory, not Fallback() which reads
// stale register state and produces zero instance counts.
//
// When method_count > 0, dma_state.method is the live ongoing method (continuation).
// When method_count == 0, dma_state.method is stale from the previous ProcessCommands
// call — read the actual method from the first word of the buffer.
auto set_macro_dirty = [&](std::span<const CommandHeader> cmd) {
if (dma_state.method_count > 0) {
if (dma_state.method >= MacroRegistersStart && subchannels[dma_state.subchannel]) {
subchannels[dma_state.subchannel]->current_dirty = safe_by_settings || segment_dirty;
}
} else if (!cmd.empty() && cmd[0].method.Value() >= MacroRegistersStart) {
const u32 subchan = cmd[0].subchannel.Value();
if (subchannels[subchan]) {
subchannels[subchan]->current_dirty = safe_by_settings || segment_dirty;
}
}
};
if (use_safe) {
Tegra::Memory::GpuGuestMemory<Tegra::CommandHeader, Tegra::Memory::GuestMemoryFlags::SafeRead>
headers(memory_manager, dma_state.dma_get, header.size, &command_headers);
const std::span<const CommandHeader> cmd{headers};
set_macro_dirty(cmd);
ProcessCommands(cmd);
} else {
Tegra::Memory::GpuGuestMemory<Tegra::CommandHeader, Tegra::Memory::GuestMemoryFlags::UnsafeRead>headers(memory_manager, dma_state.dma_get, header.size, &command_headers);
ProcessCommands(headers);
Tegra::Memory::GpuGuestMemory<Tegra::CommandHeader, Tegra::Memory::GuestMemoryFlags::UnsafeRead>
headers(memory_manager, dma_state.dma_get, header.size, &command_headers);
const std::span<const CommandHeader> cmd{headers};
set_macro_dirty(cmd);
ProcessCommands(cmd);
}
}