original guards + antiflicker knob + bank_base fix

This commit is contained in:
xbzk 2026-04-07 23:39:17 -03:00
parent 19eab4d7aa
commit c0be772ce9
4 changed files with 189 additions and 9 deletions

View file

@ -1,5 +1,7 @@
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
//
#include <limits>
#include "common/settings.h"
#include "video_core/dirty_flags.h"
@ -9,6 +11,123 @@
namespace Tegra::Engines {
DrawManager::DrawManager(Maxwell3D* maxwell3d_) : maxwell3d(maxwell3d_) {}
namespace {
constexpr u32 IndexCountGuardLimit = 1u * 1024u * 1024u;
constexpr u32 DrawOffsetGuardLimit = 256u * 1024u * 1024u;
constexpr size_t IndirectBufferSizeGuardLimit = 64ull * 1024ull * 1024ull;
struct BufferSignature {
bool valid{};
u32 count{};
u32 first{};
u32 base_index{};
u32 base_instance{};
u64 span_bytes{};
u64 available_bytes{};
bool range_overflow{};
bool end_index_overflow{};
bool span_overflow{};
bool bounds_invalid{};
size_t max_draw_counts{};
size_t buffer_size{};
};
[[nodiscard]] bool DiscardCorrupted(bool draw_indexed, bool include_indirect,
const DrawManager::State& state,
size_t max_draw_counts, size_t buffer_size,
BufferSignature& last) {
const u32 count = draw_indexed ? state.index_buffer.count : state.vertex_buffer.count;
const u32 first = draw_indexed ? state.index_buffer.first : state.vertex_buffer.first;
const u32 base_index = state.base_index;
const u32 base_instance = state.base_instance;
const bool suspicious_offsets = first > DrawOffsetGuardLimit ||
base_index > DrawOffsetGuardLimit ||
base_instance > DrawOffsetGuardLimit;
bool range_overflow = false;
bool end_index_overflow = false;
bool span_overflow = false;
bool bounds_invalid = false;
u64 span_bytes = 0;
u64 available_bytes = 0;
if (draw_indexed) {
const u64 first64 = state.index_buffer.first;
const u64 count64 = state.index_buffer.count;
const u64 format_bytes = state.index_buffer.FormatSizeInBytes();
const u64 buffer_start = state.index_buffer.StartAddress();
const u64 buffer_end = state.index_buffer.EndAddress();
end_index_overflow = first64 > ((std::numeric_limits<u64>::max)() - count64);
const u64 end_index = end_index_overflow ? 0 : first64 + count64;
span_overflow = format_bytes == 0 ||
(end_index > ((std::numeric_limits<u64>::max)() / format_bytes));
span_bytes = (end_index_overflow || span_overflow) ? (std::numeric_limits<u64>::max)()
: end_index * format_bytes;
bounds_invalid = buffer_end < buffer_start;
available_bytes = bounds_invalid ? 0 : (buffer_end - buffer_start);
} else {
const u64 first64 = state.vertex_buffer.first;
const u64 count64 = state.vertex_buffer.count;
range_overflow = first64 > ((std::numeric_limits<u64>::max)() - count64);
}
bool blocked = count > IndexCountGuardLimit || suspicious_offsets || range_overflow ||
end_index_overflow || span_overflow || bounds_invalid;
if (draw_indexed) {
blocked = blocked || span_bytes > available_bytes;
}
if (include_indirect) {
blocked = blocked || max_draw_counts > IndexCountGuardLimit ||
buffer_size > IndirectBufferSizeGuardLimit;
}
if (!blocked) {
return false;
}
const bool same = last.valid && last.count == count && last.first == first &&
last.base_index == base_index && last.base_instance == base_instance &&
last.span_bytes == span_bytes && last.available_bytes == available_bytes &&
last.range_overflow == range_overflow &&
last.end_index_overflow == end_index_overflow &&
last.span_overflow == span_overflow &&
last.bounds_invalid == bounds_invalid &&
last.max_draw_counts == max_draw_counts && last.buffer_size == buffer_size;
if (!same) {
const char* const label =
include_indirect
? (draw_indexed ? "DrawManager: blocked indexed indirect draw"
: "DrawManager: blocked vertex indirect draw")
: (draw_indexed ? "DrawManager: blocked indexed draw"
: "DrawManager: blocked vertex draw");
LOG_WARNING(HW_GPU,
"{} path={} count={} limit={} first=0x{:X} base_index=0x{:X} "
"base_instance=0x{:X} span_bytes={} available={} "
"overflow(range={} index_end={} span={}) bounds_invalid={} "
"offset_limit={} max_draw_count={} buffer_size={} "
"indirect_limits(count={} buffer={})",
label, draw_indexed ? "indexed" : "vertex", count, IndexCountGuardLimit,
first, base_index, base_instance, span_bytes, available_bytes, range_overflow,
end_index_overflow, span_overflow, bounds_invalid, DrawOffsetGuardLimit,
max_draw_counts, buffer_size, IndexCountGuardLimit,
IndirectBufferSizeGuardLimit);
last = {.valid = true,
.count = count,
.first = first,
.base_index = base_index,
.base_instance = base_instance,
.span_bytes = span_bytes,
.available_bytes = available_bytes,
.range_overflow = range_overflow,
.end_index_overflow = end_index_overflow,
.span_overflow = span_overflow,
.bounds_invalid = bounds_invalid,
.max_draw_counts = max_draw_counts,
.buffer_size = buffer_size};
}
return true;
}
} // namespace
void DrawManager::ProcessMethodCall(u32 method, u32 argument) {
const auto& regs{maxwell3d->regs};
switch (method) {
@ -264,6 +383,15 @@ void DrawManager::ProcessDraw(bool draw_indexed, u32 instance_count) {
LOG_TRACE(HW_GPU, "called, topology={}, count={}", draw_state.topology,
draw_indexed ? draw_state.index_buffer.count : draw_state.vertex_buffer.count);
static thread_local BufferSignature last_direct[2]{};
if (DiscardCorrupted(draw_indexed, false, draw_state, 0, 0,
last_direct[draw_indexed ? 1 : 0])) {
if (draw_indexed) {
draw_state.draw_indexed = false;
}
return;
}
UpdateTopology();
if (maxwell3d->ShouldExecute()) {
@ -278,6 +406,13 @@ void DrawManager::ProcessDrawIndirect() {
draw_state.topology, indirect_state.is_indexed, indirect_state.include_count,
indirect_state.buffer_size, indirect_state.max_draw_counts);
static thread_local BufferSignature last_indirect[2]{};
if (DiscardCorrupted(indirect_state.is_indexed, true, draw_state,
indirect_state.max_draw_counts, indirect_state.buffer_size,
last_indirect[indirect_state.is_indexed ? 1 : 0])) {
return;
}
UpdateTopology();
if (maxwell3d->ShouldExecute()) {

View file

@ -96,7 +96,36 @@ void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amoun
void KeplerCompute::ProcessLaunch() {
const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32));
LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32));
// CUDA spec: x up to 2^31-1 (31-bit field), y and z up to 65535.
static constexpr u32 MaxGridDimX = 64u * 1024u * 1024u; // 64M
static constexpr u32 MaxGridDimYZ = 65535u; // CUDA spec
const u32 x = launch_description.grid_dim_x;
const u32 y = launch_description.grid_dim_y;
const u32 z = launch_description.grid_dim_z;
if (x == 0 || y == 0 || z == 0 ||
x > MaxGridDimX || y > MaxGridDimYZ || z > MaxGridDimYZ) {
struct BlockedDispatchSignature {
u32 x = 0, y = 0, z = 0;
bool valid = false;
};
static thread_local BlockedDispatchSignature last_blocked{};
const bool same = last_blocked.valid && last_blocked.x == x &&
last_blocked.y == y && last_blocked.z == z;
if (!same) {
const auto* raw = reinterpret_cast<const u32*>(&launch_description);
LOG_WARNING(HW_GPU,
"KeplerCompute: blocked oversized dispatch"
" qmd_addr={:#x} x={}({:#010x}) y={}({:#010x}) z={}({:#010x})"
" word12={:#010x} word13={:#010x}",
launch_desc_loc, x, x, y, y, z, z,
raw[12], raw[13]);
last_blocked = {x, y, z, true};
}
return;
}
rasterizer->DispatchCompute();
}

View file

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
@ -7,6 +10,7 @@
#include <deque>
#include <utility>
#include "common/assert.h"
#include "common/common_types.h"
namespace VideoCommon {
@ -46,10 +50,15 @@ public:
}
void CloseReference(size_t how_many = 1) {
if (how_many > references.load(std::memory_order_relaxed)) {
UNREACHABLE();
// Use fetch_sub and inspect the old value atomically to avoid a TOCTOU race
// between the guard-load and the decrement when multiple threads close refs.
const size_t prev = references.fetch_sub(how_many, std::memory_order_relaxed);
if (prev < how_many) [[unlikely]] {
// Undo the wrap-around so IsDead() remains meaningful.
references.store(0, std::memory_order_relaxed);
ASSERT_MSG(false, "QueryBank CloseReference underflow: prev={} how_many={}", prev,
how_many);
}
references.fetch_sub(how_many, std::memory_order_relaxed);
}
void Close() {

View file

@ -260,7 +260,8 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type
};
u8* pointer = impl->device_memory.template GetPointer<u8>(cpu_addr);
u8* pointer_timestamp = impl->device_memory.template GetPointer<u8>(cpu_addr + 8);
bool is_synced = !Settings::IsGPULevelHigh() && is_fence;
const bool is_payload = counter_type == QueryType::Payload;
const bool is_synced = !is_payload && !Settings::IsGPULevelHigh() && is_fence;
std::function<void()> operation([this, is_synced, streamer, query_base = query, query_location,
pointer, pointer_timestamp] {
if (True(query_base->flags & QueryFlagBits::IsInvalidated)) {
@ -292,9 +293,15 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type
}
});
if (is_fence) {
impl->rasterizer.SignalFence(std::move(operation));
if (Settings::getDebugKnobAt(0) && is_payload) {
impl->rasterizer.SyncOperation(std::move(operation));
std::function<void()> noop([] {});
impl->rasterizer.SignalFence(std::move(noop));
} else {
impl->rasterizer.SignalFence(std::move(operation));
}
} else {
if (!Settings::IsGPULevelHigh() && counter_type == QueryType::Payload) {
if (!Settings::IsGPULevelHigh() && is_payload) {
if (has_timestamp) {
u64 timestamp = impl->gpu.GetTicks();
u64 value = static_cast<u64>(payload);