[video_core] fix std::bitset<> dirty tracker OOB, fix slightly wrong estimate

Signed-off-by: lizzie <lizzie@eden-emu.dev>
This commit is contained in:
lizzie 2026-05-23 00:48:18 +00:00
parent 0d736d49d6
commit a2ca1583f5
5 changed files with 38 additions and 55 deletions

View file

@ -1252,8 +1252,7 @@ void BufferCache<P>::UpdateIndexBuffer() {
const GPUVAddr gpu_addr_end = index_buffer_ref.EndAddress(); const GPUVAddr gpu_addr_end = index_buffer_ref.EndAddress();
const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin); const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin);
const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
const u32 draw_size = const u32 draw_size = (index_buffer_ref.count + index_buffer_ref.first) * u32(index_buffer_ref.FormatSizeInBytes());
(index_buffer_ref.count + index_buffer_ref.first) * index_buffer_ref.FormatSizeInBytes();
const u32 size = (std::min)(address_size, draw_size); const u32 size = (std::min)(address_size, draw_size);
if (size == 0 || !device_addr) { if (size == 0 || !device_addr) {
channel_state->index_buffer = NULL_BINDING; channel_state->index_buffer = NULL_BINDING;

View file

@ -181,12 +181,12 @@ void DmaPusher::CallMethod(u32 argument) const {
}); });
} else { } else {
auto subchannel = subchannels[dma_state.subchannel]; auto subchannel = subchannels[dma_state.subchannel];
if (!subchannel->execution_mask[dma_state.method]) { if (subchannel->execution_mask[dma_state.method]) {
subchannel->method_sink.emplace_back(dma_state.method, argument);
} else {
subchannel->ConsumeSink(); subchannel->ConsumeSink();
subchannel->current_dma_segment = dma_state.dma_get + dma_state.dma_word_offset; subchannel->current_dma_segment = dma_state.dma_get + dma_state.dma_word_offset;
subchannel->CallMethod(dma_state.method, argument, dma_state.is_last_call); subchannel->CallMethod(dma_state.method, argument, dma_state.is_last_call);
} else {
subchannel->method_sink.emplace_back(dma_state.method, argument);
} }
} }
} }

View file

@ -270,31 +270,20 @@ u32 Maxwell3D::GetMaxCurrentVertices() {
size_t Maxwell3D::EstimateIndexBufferSize() { size_t Maxwell3D::EstimateIndexBufferSize() {
GPUVAddr start_address = regs.index_buffer.StartAddress(); GPUVAddr start_address = regs.index_buffer.StartAddress();
GPUVAddr end_address = regs.index_buffer.EndAddress(); GPUVAddr end_address = regs.index_buffer.EndAddress();
static constexpr std::array<size_t, 3> max_sizes = {(std::numeric_limits<u8>::max)(), auto const byte_size = regs.index_buffer.FormatSizeInBytes();
(std::numeric_limits<u16>::max)(), auto const max_size = 1ull << (byte_size * CHAR_BIT);
(std::numeric_limits<u32>::max)()}; auto const upper_cap = GetMaxCurrentVertices() * 4 * byte_size;
const size_t byte_size = regs.index_buffer.FormatSizeInBytes(); auto const lower_cap = std::min<size_t>(size_t(end_address - start_address), upper_cap);
const size_t log2_byte_size = Common::Log2Ceil64(byte_size); return std::min<size_t>(memory_manager.GetMemoryLayoutSize(start_address, byte_size * max_size) / byte_size, lower_cap);
const size_t cap{GetMaxCurrentVertices() * 4 * byte_size};
const size_t lower_cap =
std::min<size_t>(static_cast<size_t>(end_address - start_address), cap);
return std::min<size_t>(
memory_manager.GetMemoryLayoutSize(start_address, byte_size * max_sizes[log2_byte_size]) /
byte_size,
lower_cap);
} }
u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) { u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) {
// Keep track of the register value in shadow_state when requested. // Keep track of the register value in shadow_state when requested.
const auto control = shadow_state.shadow_ram_control; auto const c = shadow_state.shadow_ram_control;
if (control == Regs::ShadowRamControl::Track || if (c == Regs::ShadowRamControl::Track || c == Regs::ShadowRamControl::TrackWithFilter)
control == Regs::ShadowRamControl::TrackWithFilter) { return shadow_state.reg_array[method] = argument;
shadow_state.reg_array[method] = argument; else if (c == Regs::ShadowRamControl::Replay)
return argument;
}
if (control == Regs::ShadowRamControl::Replay) {
return shadow_state.reg_array[method]; return shadow_state.reg_array[method];
}
return argument; return argument;
} }
@ -317,10 +306,8 @@ void Maxwell3D::ConsumeSinkImpl() {
void Maxwell3D::ProcessDirtyRegisters(u32 method, u32 argument) { void Maxwell3D::ProcessDirtyRegisters(u32 method, u32 argument) {
regs.reg_array[method] = argument; regs.reg_array[method] = argument;
for (auto const& table : dirty.tables)
for (const auto& table : dirty.tables) {
dirty.flags[table[method]] = true; dirty.flags[table[method]] = true;
}
} }
void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argument, bool is_last_call) { void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argument, bool is_last_call) {

View file

@ -2215,7 +2215,7 @@ public:
u32 first; u32 first;
u32 count; u32 count;
unsigned FormatSizeInBytes() const { size_t FormatSizeInBytes() const {
switch (format) { switch (format) {
case IndexFormat::UnsignedByte: case IndexFormat::UnsignedByte:
return 1; return 1;
@ -2224,7 +2224,7 @@ public:
case IndexFormat::UnsignedInt: case IndexFormat::UnsignedInt:
return 4; return 4;
} }
ASSERT(false); UNREACHABLE();
return 1; return 1;
} }
@ -3148,9 +3148,9 @@ public:
} }
struct DirtyState { struct DirtyState {
using Flags = std::bitset<(std::numeric_limits<u8>::max)()>; using Flags = std::bitset<(std::numeric_limits<u8>::max)() + 1>;
using Table = std::array<u8, Regs::NUM_REGS>; using Table = std::array<u8, Regs::NUM_REGS>;
using Tables = std::array<Table, 2>; using Tables = std::array<std::array<u8, Regs::NUM_REGS>, 2>;
Flags flags; Flags flags;
Tables tables{}; Tables tables{};

View file

@ -24,11 +24,8 @@ using namespace Dirty;
using namespace VideoCommon::Dirty; using namespace VideoCommon::Dirty;
using Tegra::Engines::Maxwell3D; using Tegra::Engines::Maxwell3D;
using Regs = Maxwell3D::Regs; using Regs = Maxwell3D::Regs;
using Tables = Maxwell3D::DirtyState::Tables;
using Table = Maxwell3D::DirtyState::Table;
using Flags = Maxwell3D::DirtyState::Flags;
Flags MakeInvalidationFlags() { Maxwell3D::DirtyState::Flags MakeInvalidationFlags() {
static constexpr int INVALIDATION_FLAGS[]{ static constexpr int INVALIDATION_FLAGS[]{
Viewports, Viewports,
Scissors, Scissors,
@ -68,7 +65,7 @@ Flags MakeInvalidationFlags() {
LineStippleEnable, LineStippleEnable,
LineStippleParams, LineStippleParams,
}; };
Flags flags{}; Maxwell3D::DirtyState::Flags flags{};
for (const int flag : INVALIDATION_FLAGS) { for (const int flag : INVALIDATION_FLAGS) {
flags[flag] = true; flags[flag] = true;
} }
@ -84,7 +81,7 @@ Flags MakeInvalidationFlags() {
return flags; return flags;
} }
void SetupDirtyViewports(Tables& tables) { void SetupDirtyViewports(Maxwell3D::DirtyState::Tables& tables) {
FillBlock(tables[0], OFF(viewport_transform), NUM(viewport_transform), Viewports); FillBlock(tables[0], OFF(viewport_transform), NUM(viewport_transform), Viewports);
FillBlock(tables[0], OFF(viewports), NUM(viewports), Viewports); FillBlock(tables[0], OFF(viewports), NUM(viewports), Viewports);
FillBlock(tables[1], OFF(surface_clip), NUM(surface_clip), Viewports); FillBlock(tables[1], OFF(surface_clip), NUM(surface_clip), Viewports);
@ -92,26 +89,26 @@ void SetupDirtyViewports(Tables& tables) {
tables[1][OFF(window_origin)] = Viewports; tables[1][OFF(window_origin)] = Viewports;
} }
void SetupDirtyScissors(Tables& tables) { void SetupDirtyScissors(Maxwell3D::DirtyState::Tables& tables) {
FillBlock(tables[0], OFF(scissor_test), NUM(scissor_test), Scissors); FillBlock(tables[0], OFF(scissor_test), NUM(scissor_test), Scissors);
} }
void SetupDirtyDepthBias(Tables& tables) { void SetupDirtyDepthBias(Maxwell3D::DirtyState::Tables& tables) {
auto& table = tables[0]; auto& table = tables[0];
table[OFF(depth_bias)] = DepthBias; table[OFF(depth_bias)] = DepthBias;
table[OFF(depth_bias_clamp)] = DepthBias; table[OFF(depth_bias_clamp)] = DepthBias;
table[OFF(slope_scale_depth_bias)] = DepthBias; table[OFF(slope_scale_depth_bias)] = DepthBias;
} }
void SetupDirtyBlendConstants(Tables& tables) { void SetupDirtyBlendConstants(Maxwell3D::DirtyState::Tables& tables) {
FillBlock(tables[0], OFF(blend_color), NUM(blend_color), BlendConstants); FillBlock(tables[0], OFF(blend_color), NUM(blend_color), BlendConstants);
} }
void SetupDirtyDepthBounds(Tables& tables) { void SetupDirtyDepthBounds(Maxwell3D::DirtyState::Tables& tables) {
FillBlock(tables[0], OFF(depth_bounds), NUM(depth_bounds), DepthBounds); FillBlock(tables[0], OFF(depth_bounds), NUM(depth_bounds), DepthBounds);
} }
void SetupDirtyStencilProperties(Tables& tables) { void SetupDirtyStencilProperties(Maxwell3D::DirtyState::Tables& tables) {
const auto setup = [&](size_t position, u8 flag) { const auto setup = [&](size_t position, u8 flag) {
tables[0][position] = flag; tables[0][position] = flag;
tables[1][position] = StencilProperties; tables[1][position] = StencilProperties;
@ -125,18 +122,18 @@ void SetupDirtyStencilProperties(Tables& tables) {
setup(OFF(stencil_back_func_mask), StencilCompare); setup(OFF(stencil_back_func_mask), StencilCompare);
} }
void SetupDirtyLineWidth(Tables& tables) { void SetupDirtyLineWidth(Maxwell3D::DirtyState::Tables& tables) {
tables[0][OFF(line_width_smooth)] = LineWidth; tables[0][OFF(line_width_smooth)] = LineWidth;
tables[0][OFF(line_width_aliased)] = LineWidth; tables[0][OFF(line_width_aliased)] = LineWidth;
} }
void SetupDirtyCullMode(Tables& tables) { void SetupDirtyCullMode(Maxwell3D::DirtyState::Tables& tables) {
auto& table = tables[0]; auto& table = tables[0];
table[OFF(gl_cull_face)] = CullMode; table[OFF(gl_cull_face)] = CullMode;
table[OFF(gl_cull_test_enabled)] = CullMode; table[OFF(gl_cull_test_enabled)] = CullMode;
} }
void SetupDirtyStateEnable(Tables& tables) { void SetupDirtyStateEnable(Maxwell3D::DirtyState::Tables& tables) {
const auto setup = [&](size_t position, u8 flag) { const auto setup = [&](size_t position, u8 flag) {
tables[0][position] = flag; tables[0][position] = flag;
tables[1][position] = StateEnable; tables[1][position] = StateEnable;
@ -157,17 +154,17 @@ void SetupDirtyStateEnable(Tables& tables) {
setup(OFF(anti_alias_alpha_control.alpha_to_one), AlphaToOneEnable); setup(OFF(anti_alias_alpha_control.alpha_to_one), AlphaToOneEnable);
} }
void SetupDirtyDepthCompareOp(Tables& tables) { void SetupDirtyDepthCompareOp(Maxwell3D::DirtyState::Tables& tables) {
tables[0][OFF(depth_test_func)] = DepthCompareOp; tables[0][OFF(depth_test_func)] = DepthCompareOp;
} }
void SetupDirtyFrontFace(Tables& tables) { void SetupDirtyFrontFace(Maxwell3D::DirtyState::Tables& tables) {
auto& table = tables[0]; auto& table = tables[0];
table[OFF(gl_front_face)] = FrontFace; table[OFF(gl_front_face)] = FrontFace;
table[OFF(window_origin)] = FrontFace; table[OFF(window_origin)] = FrontFace;
} }
void SetupDirtyStencilOp(Tables& tables) { void SetupDirtyStencilOp(Maxwell3D::DirtyState::Tables& tables) {
auto& table = tables[0]; auto& table = tables[0];
table[OFF(stencil_front_op.fail)] = StencilOp; table[OFF(stencil_front_op.fail)] = StencilOp;
table[OFF(stencil_front_op.zfail)] = StencilOp; table[OFF(stencil_front_op.zfail)] = StencilOp;
@ -182,7 +179,7 @@ void SetupDirtyStencilOp(Tables& tables) {
tables[1][OFF(stencil_two_side_enable)] = StencilOp; tables[1][OFF(stencil_two_side_enable)] = StencilOp;
} }
void SetupDirtyBlending(Tables& tables) { void SetupDirtyBlending(Maxwell3D::DirtyState::Tables& tables) {
tables[0][OFF(color_mask_common)] = Blending; tables[0][OFF(color_mask_common)] = Blending;
tables[1][OFF(color_mask_common)] = ColorMask; tables[1][OFF(color_mask_common)] = ColorMask;
tables[0][OFF(blend_per_target_enabled)] = Blending; tables[0][OFF(blend_per_target_enabled)] = Blending;
@ -196,11 +193,11 @@ void SetupDirtyBlending(Tables& tables) {
FillBlock(tables[1], OFF(blend_per_target), NUM(blend_per_target), BlendEquations); FillBlock(tables[1], OFF(blend_per_target), NUM(blend_per_target), BlendEquations);
} }
void SetupDirtySpecialOps(Tables& tables) { void SetupDirtySpecialOps(Maxwell3D::DirtyState::Tables& tables) {
tables[0][OFF(logic_op.op)] = LogicOp; tables[0][OFF(logic_op.op)] = LogicOp;
} }
void SetupDirtyViewportSwizzles(Tables& tables) { void SetupDirtyViewportSwizzles(Maxwell3D::DirtyState::Tables& tables) {
static constexpr size_t swizzle_offset = 6; static constexpr size_t swizzle_offset = 6;
for (size_t index = 0; index < Regs::NumViewports; ++index) { for (size_t index = 0; index < Regs::NumViewports; ++index) {
tables[1][OFF(viewport_transform) + index * NUM(viewport_transform[0]) + swizzle_offset] = tables[1][OFF(viewport_transform) + index * NUM(viewport_transform[0]) + swizzle_offset] =
@ -208,7 +205,7 @@ void SetupDirtyViewportSwizzles(Tables& tables) {
} }
} }
void SetupDirtyVertexAttributes(Tables& tables) { void SetupDirtyVertexAttributes(Maxwell3D::DirtyState::Tables& tables) {
for (size_t i = 0; i < Regs::NumVertexAttributes; ++i) { for (size_t i = 0; i < Regs::NumVertexAttributes; ++i) {
const size_t offset = OFF(vertex_attrib_format) + i * NUM(vertex_attrib_format[0]); const size_t offset = OFF(vertex_attrib_format) + i * NUM(vertex_attrib_format[0]);
FillBlock(tables[0], offset, NUM(vertex_attrib_format[0]), VertexAttribute0 + i); FillBlock(tables[0], offset, NUM(vertex_attrib_format[0]), VertexAttribute0 + i);
@ -216,7 +213,7 @@ void SetupDirtyVertexAttributes(Tables& tables) {
FillBlock(tables[1], OFF(vertex_attrib_format), Regs::NumVertexAttributes, VertexInput); FillBlock(tables[1], OFF(vertex_attrib_format), Regs::NumVertexAttributes, VertexInput);
} }
void SetupDirtyVertexBindings(Tables& tables) { void SetupDirtyVertexBindings(Maxwell3D::DirtyState::Tables& tables) {
// Do NOT include stride here, it's implicit in VertexBuffer // Do NOT include stride here, it's implicit in VertexBuffer
static constexpr size_t divisor_offset = 3; static constexpr size_t divisor_offset = 3;
for (size_t i = 0; i < Regs::NumVertexArrays; ++i) { for (size_t i = 0; i < Regs::NumVertexArrays; ++i) {
@ -228,7 +225,7 @@ void SetupDirtyVertexBindings(Tables& tables) {
} }
} }
void SetupRasterModes(Tables &tables) { void SetupRasterModes(Maxwell3D::DirtyState::Tables &tables) {
auto& table = tables[0]; auto& table = tables[0];
table[OFF(line_stipple_params)] = LineStippleParams; table[OFF(line_stipple_params)] = LineStippleParams;