[video_core] fix std::bitset<> dirty tracker OOB, fix slightly wrong estimate

Signed-off-by: lizzie <lizzie@eden-emu.dev>
This commit is contained in:
lizzie 2026-05-23 00:48:18 +00:00
parent 0d736d49d6
commit a2ca1583f5
5 changed files with 38 additions and 55 deletions

View file

@ -1252,8 +1252,7 @@ void BufferCache<P>::UpdateIndexBuffer() {
const GPUVAddr gpu_addr_end = index_buffer_ref.EndAddress();
const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin);
const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
const u32 draw_size =
(index_buffer_ref.count + index_buffer_ref.first) * index_buffer_ref.FormatSizeInBytes();
const u32 draw_size = (index_buffer_ref.count + index_buffer_ref.first) * u32(index_buffer_ref.FormatSizeInBytes());
const u32 size = (std::min)(address_size, draw_size);
if (size == 0 || !device_addr) {
channel_state->index_buffer = NULL_BINDING;

View file

@ -181,12 +181,12 @@ void DmaPusher::CallMethod(u32 argument) const {
});
} else {
auto subchannel = subchannels[dma_state.subchannel];
if (!subchannel->execution_mask[dma_state.method]) {
subchannel->method_sink.emplace_back(dma_state.method, argument);
} else {
if (subchannel->execution_mask[dma_state.method]) {
subchannel->ConsumeSink();
subchannel->current_dma_segment = dma_state.dma_get + dma_state.dma_word_offset;
subchannel->CallMethod(dma_state.method, argument, dma_state.is_last_call);
} else {
subchannel->method_sink.emplace_back(dma_state.method, argument);
}
}
}

View file

@ -270,31 +270,20 @@ u32 Maxwell3D::GetMaxCurrentVertices() {
size_t Maxwell3D::EstimateIndexBufferSize() {
GPUVAddr start_address = regs.index_buffer.StartAddress();
GPUVAddr end_address = regs.index_buffer.EndAddress();
static constexpr std::array<size_t, 3> max_sizes = {(std::numeric_limits<u8>::max)(),
(std::numeric_limits<u16>::max)(),
(std::numeric_limits<u32>::max)()};
const size_t byte_size = regs.index_buffer.FormatSizeInBytes();
const size_t log2_byte_size = Common::Log2Ceil64(byte_size);
const size_t cap{GetMaxCurrentVertices() * 4 * byte_size};
const size_t lower_cap =
std::min<size_t>(static_cast<size_t>(end_address - start_address), cap);
return std::min<size_t>(
memory_manager.GetMemoryLayoutSize(start_address, byte_size * max_sizes[log2_byte_size]) /
byte_size,
lower_cap);
auto const byte_size = regs.index_buffer.FormatSizeInBytes();
auto const max_size = 1ull << (byte_size * CHAR_BIT);
auto const upper_cap = GetMaxCurrentVertices() * 4 * byte_size;
auto const lower_cap = std::min<size_t>(size_t(end_address - start_address), upper_cap);
return std::min<size_t>(memory_manager.GetMemoryLayoutSize(start_address, byte_size * max_size) / byte_size, lower_cap);
}
u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) {
// Keep track of the register value in shadow_state when requested.
const auto control = shadow_state.shadow_ram_control;
if (control == Regs::ShadowRamControl::Track ||
control == Regs::ShadowRamControl::TrackWithFilter) {
shadow_state.reg_array[method] = argument;
return argument;
}
if (control == Regs::ShadowRamControl::Replay) {
auto const c = shadow_state.shadow_ram_control;
if (c == Regs::ShadowRamControl::Track || c == Regs::ShadowRamControl::TrackWithFilter)
return shadow_state.reg_array[method] = argument;
else if (c == Regs::ShadowRamControl::Replay)
return shadow_state.reg_array[method];
}
return argument;
}
@ -317,10 +306,8 @@ void Maxwell3D::ConsumeSinkImpl() {
void Maxwell3D::ProcessDirtyRegisters(u32 method, u32 argument) {
regs.reg_array[method] = argument;
for (const auto& table : dirty.tables) {
for (auto const& table : dirty.tables)
dirty.flags[table[method]] = true;
}
}
void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argument, bool is_last_call) {

View file

@ -2215,7 +2215,7 @@ public:
u32 first;
u32 count;
unsigned FormatSizeInBytes() const {
size_t FormatSizeInBytes() const {
switch (format) {
case IndexFormat::UnsignedByte:
return 1;
@ -2224,7 +2224,7 @@ public:
case IndexFormat::UnsignedInt:
return 4;
}
ASSERT(false);
UNREACHABLE();
return 1;
}
@ -3148,9 +3148,9 @@ public:
}
struct DirtyState {
using Flags = std::bitset<(std::numeric_limits<u8>::max)()>;
using Flags = std::bitset<(std::numeric_limits<u8>::max)() + 1>;
using Table = std::array<u8, Regs::NUM_REGS>;
using Tables = std::array<Table, 2>;
using Tables = std::array<std::array<u8, Regs::NUM_REGS>, 2>;
Flags flags;
Tables tables{};

View file

@ -24,11 +24,8 @@ using namespace Dirty;
using namespace VideoCommon::Dirty;
using Tegra::Engines::Maxwell3D;
using Regs = Maxwell3D::Regs;
using Tables = Maxwell3D::DirtyState::Tables;
using Table = Maxwell3D::DirtyState::Table;
using Flags = Maxwell3D::DirtyState::Flags;
Flags MakeInvalidationFlags() {
Maxwell3D::DirtyState::Flags MakeInvalidationFlags() {
static constexpr int INVALIDATION_FLAGS[]{
Viewports,
Scissors,
@ -68,7 +65,7 @@ Flags MakeInvalidationFlags() {
LineStippleEnable,
LineStippleParams,
};
Flags flags{};
Maxwell3D::DirtyState::Flags flags{};
for (const int flag : INVALIDATION_FLAGS) {
flags[flag] = true;
}
@ -84,7 +81,7 @@ Flags MakeInvalidationFlags() {
return flags;
}
void SetupDirtyViewports(Tables& tables) {
void SetupDirtyViewports(Maxwell3D::DirtyState::Tables& tables) {
FillBlock(tables[0], OFF(viewport_transform), NUM(viewport_transform), Viewports);
FillBlock(tables[0], OFF(viewports), NUM(viewports), Viewports);
FillBlock(tables[1], OFF(surface_clip), NUM(surface_clip), Viewports);
@ -92,26 +89,26 @@ void SetupDirtyViewports(Tables& tables) {
tables[1][OFF(window_origin)] = Viewports;
}
void SetupDirtyScissors(Tables& tables) {
void SetupDirtyScissors(Maxwell3D::DirtyState::Tables& tables) {
FillBlock(tables[0], OFF(scissor_test), NUM(scissor_test), Scissors);
}
void SetupDirtyDepthBias(Tables& tables) {
void SetupDirtyDepthBias(Maxwell3D::DirtyState::Tables& tables) {
auto& table = tables[0];
table[OFF(depth_bias)] = DepthBias;
table[OFF(depth_bias_clamp)] = DepthBias;
table[OFF(slope_scale_depth_bias)] = DepthBias;
}
void SetupDirtyBlendConstants(Tables& tables) {
void SetupDirtyBlendConstants(Maxwell3D::DirtyState::Tables& tables) {
FillBlock(tables[0], OFF(blend_color), NUM(blend_color), BlendConstants);
}
void SetupDirtyDepthBounds(Tables& tables) {
void SetupDirtyDepthBounds(Maxwell3D::DirtyState::Tables& tables) {
FillBlock(tables[0], OFF(depth_bounds), NUM(depth_bounds), DepthBounds);
}
void SetupDirtyStencilProperties(Tables& tables) {
void SetupDirtyStencilProperties(Maxwell3D::DirtyState::Tables& tables) {
const auto setup = [&](size_t position, u8 flag) {
tables[0][position] = flag;
tables[1][position] = StencilProperties;
@ -125,18 +122,18 @@ void SetupDirtyStencilProperties(Tables& tables) {
setup(OFF(stencil_back_func_mask), StencilCompare);
}
void SetupDirtyLineWidth(Tables& tables) {
void SetupDirtyLineWidth(Maxwell3D::DirtyState::Tables& tables) {
tables[0][OFF(line_width_smooth)] = LineWidth;
tables[0][OFF(line_width_aliased)] = LineWidth;
}
void SetupDirtyCullMode(Tables& tables) {
void SetupDirtyCullMode(Maxwell3D::DirtyState::Tables& tables) {
auto& table = tables[0];
table[OFF(gl_cull_face)] = CullMode;
table[OFF(gl_cull_test_enabled)] = CullMode;
}
void SetupDirtyStateEnable(Tables& tables) {
void SetupDirtyStateEnable(Maxwell3D::DirtyState::Tables& tables) {
const auto setup = [&](size_t position, u8 flag) {
tables[0][position] = flag;
tables[1][position] = StateEnable;
@ -157,17 +154,17 @@ void SetupDirtyStateEnable(Tables& tables) {
setup(OFF(anti_alias_alpha_control.alpha_to_one), AlphaToOneEnable);
}
void SetupDirtyDepthCompareOp(Tables& tables) {
void SetupDirtyDepthCompareOp(Maxwell3D::DirtyState::Tables& tables) {
tables[0][OFF(depth_test_func)] = DepthCompareOp;
}
void SetupDirtyFrontFace(Tables& tables) {
void SetupDirtyFrontFace(Maxwell3D::DirtyState::Tables& tables) {
auto& table = tables[0];
table[OFF(gl_front_face)] = FrontFace;
table[OFF(window_origin)] = FrontFace;
}
void SetupDirtyStencilOp(Tables& tables) {
void SetupDirtyStencilOp(Maxwell3D::DirtyState::Tables& tables) {
auto& table = tables[0];
table[OFF(stencil_front_op.fail)] = StencilOp;
table[OFF(stencil_front_op.zfail)] = StencilOp;
@ -182,7 +179,7 @@ void SetupDirtyStencilOp(Tables& tables) {
tables[1][OFF(stencil_two_side_enable)] = StencilOp;
}
void SetupDirtyBlending(Tables& tables) {
void SetupDirtyBlending(Maxwell3D::DirtyState::Tables& tables) {
tables[0][OFF(color_mask_common)] = Blending;
tables[1][OFF(color_mask_common)] = ColorMask;
tables[0][OFF(blend_per_target_enabled)] = Blending;
@ -196,11 +193,11 @@ void SetupDirtyBlending(Tables& tables) {
FillBlock(tables[1], OFF(blend_per_target), NUM(blend_per_target), BlendEquations);
}
void SetupDirtySpecialOps(Tables& tables) {
void SetupDirtySpecialOps(Maxwell3D::DirtyState::Tables& tables) {
tables[0][OFF(logic_op.op)] = LogicOp;
}
void SetupDirtyViewportSwizzles(Tables& tables) {
void SetupDirtyViewportSwizzles(Maxwell3D::DirtyState::Tables& tables) {
static constexpr size_t swizzle_offset = 6;
for (size_t index = 0; index < Regs::NumViewports; ++index) {
tables[1][OFF(viewport_transform) + index * NUM(viewport_transform[0]) + swizzle_offset] =
@ -208,7 +205,7 @@ void SetupDirtyViewportSwizzles(Tables& tables) {
}
}
void SetupDirtyVertexAttributes(Tables& tables) {
void SetupDirtyVertexAttributes(Maxwell3D::DirtyState::Tables& tables) {
for (size_t i = 0; i < Regs::NumVertexAttributes; ++i) {
const size_t offset = OFF(vertex_attrib_format) + i * NUM(vertex_attrib_format[0]);
FillBlock(tables[0], offset, NUM(vertex_attrib_format[0]), VertexAttribute0 + i);
@ -216,7 +213,7 @@ void SetupDirtyVertexAttributes(Tables& tables) {
FillBlock(tables[1], OFF(vertex_attrib_format), Regs::NumVertexAttributes, VertexInput);
}
void SetupDirtyVertexBindings(Tables& tables) {
void SetupDirtyVertexBindings(Maxwell3D::DirtyState::Tables& tables) {
// Do NOT include stride here, it's implicit in VertexBuffer
static constexpr size_t divisor_offset = 3;
for (size_t i = 0; i < Regs::NumVertexArrays; ++i) {
@ -228,7 +225,7 @@ void SetupDirtyVertexBindings(Tables& tables) {
}
}
void SetupRasterModes(Tables &tables) {
void SetupRasterModes(Maxwell3D::DirtyState::Tables &tables) {
auto& table = tables[0];
table[OFF(line_stipple_params)] = LineStippleParams;