From a2ca1583f531be9e08e9ee9b62583f43ce4ee18d Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 23 May 2026 00:48:18 +0000 Subject: [PATCH] [video_core] fix std::bitset<> dirty tracker OOB, fix slightly wrong estimate Signed-off-by: lizzie --- src/video_core/buffer_cache/buffer_cache.h | 3 +- src/video_core/dma_pusher.cpp | 6 +-- src/video_core/engines/maxwell_3d.cpp | 33 +++++--------- src/video_core/engines/maxwell_3d.h | 8 ++-- .../renderer_vulkan/vk_state_tracker.cpp | 43 +++++++++---------- 5 files changed, 38 insertions(+), 55 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 60e0e8449b..bc5a5b6e56 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -1252,8 +1252,7 @@ void BufferCache

::UpdateIndexBuffer() { const GPUVAddr gpu_addr_end = index_buffer_ref.EndAddress(); const std::optional device_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin); const u32 address_size = static_cast(gpu_addr_end - gpu_addr_begin); - const u32 draw_size = - (index_buffer_ref.count + index_buffer_ref.first) * index_buffer_ref.FormatSizeInBytes(); + const u32 draw_size = (index_buffer_ref.count + index_buffer_ref.first) * u32(index_buffer_ref.FormatSizeInBytes()); const u32 size = (std::min)(address_size, draw_size); if (size == 0 || !device_addr) { channel_state->index_buffer = NULL_BINDING; diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index f98e20171a..8a7798e035 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -181,12 +181,12 @@ void DmaPusher::CallMethod(u32 argument) const { }); } else { auto subchannel = subchannels[dma_state.subchannel]; - if (!subchannel->execution_mask[dma_state.method]) { - subchannel->method_sink.emplace_back(dma_state.method, argument); - } else { + if (subchannel->execution_mask[dma_state.method]) { subchannel->ConsumeSink(); subchannel->current_dma_segment = dma_state.dma_get + dma_state.dma_word_offset; subchannel->CallMethod(dma_state.method, argument, dma_state.is_last_call); + } else { + subchannel->method_sink.emplace_back(dma_state.method, argument); } } } diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 9aaa99f7ff..7cf351e458 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -270,31 +270,20 @@ u32 Maxwell3D::GetMaxCurrentVertices() { size_t Maxwell3D::EstimateIndexBufferSize() { GPUVAddr start_address = regs.index_buffer.StartAddress(); GPUVAddr end_address = regs.index_buffer.EndAddress(); - static constexpr std::array max_sizes = {(std::numeric_limits::max)(), - (std::numeric_limits::max)(), - (std::numeric_limits::max)()}; - const size_t byte_size = regs.index_buffer.FormatSizeInBytes(); - const size_t log2_byte_size = Common::Log2Ceil64(byte_size); - const size_t cap{GetMaxCurrentVertices() * 4 * byte_size}; - const size_t lower_cap = - std::min(static_cast(end_address - start_address), cap); - return std::min( - memory_manager.GetMemoryLayoutSize(start_address, byte_size * max_sizes[log2_byte_size]) / - byte_size, - lower_cap); + auto const byte_size = regs.index_buffer.FormatSizeInBytes(); + auto const max_size = 1ull << (byte_size * CHAR_BIT); + auto const upper_cap = GetMaxCurrentVertices() * 4 * byte_size; + auto const lower_cap = std::min(size_t(end_address - start_address), upper_cap); + return std::min(memory_manager.GetMemoryLayoutSize(start_address, byte_size * max_size) / byte_size, lower_cap); } u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) { // Keep track of the register value in shadow_state when requested. - const auto control = shadow_state.shadow_ram_control; - if (control == Regs::ShadowRamControl::Track || - control == Regs::ShadowRamControl::TrackWithFilter) { - shadow_state.reg_array[method] = argument; - return argument; - } - if (control == Regs::ShadowRamControl::Replay) { + auto const c = shadow_state.shadow_ram_control; + if (c == Regs::ShadowRamControl::Track || c == Regs::ShadowRamControl::TrackWithFilter) + return shadow_state.reg_array[method] = argument; + else if (c == Regs::ShadowRamControl::Replay) return shadow_state.reg_array[method]; - } return argument; } @@ -317,10 +306,8 @@ void Maxwell3D::ConsumeSinkImpl() { void Maxwell3D::ProcessDirtyRegisters(u32 method, u32 argument) { regs.reg_array[method] = argument; - - for (const auto& table : dirty.tables) { + for (auto const& table : dirty.tables) dirty.flags[table[method]] = true; - } } void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argument, bool is_last_call) { diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 3ac79e0eb8..864ee27fb6 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -2215,7 +2215,7 @@ public: u32 first; u32 count; - unsigned FormatSizeInBytes() const { + size_t FormatSizeInBytes() const { switch (format) { case IndexFormat::UnsignedByte: return 1; @@ -2224,7 +2224,7 @@ public: case IndexFormat::UnsignedInt: return 4; } - ASSERT(false); + UNREACHABLE(); return 1; } @@ -3148,9 +3148,9 @@ public: } struct DirtyState { - using Flags = std::bitset<(std::numeric_limits::max)()>; + using Flags = std::bitset<(std::numeric_limits::max)() + 1>; using Table = std::array; - using Tables = std::array; + using Tables = std::array, 2>; Flags flags; Tables tables{}; diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp index 3f4dd89c7e..a7b7c46f39 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp @@ -24,11 +24,8 @@ using namespace Dirty; using namespace VideoCommon::Dirty; using Tegra::Engines::Maxwell3D; using Regs = Maxwell3D::Regs; -using Tables = Maxwell3D::DirtyState::Tables; -using Table = Maxwell3D::DirtyState::Table; -using Flags = Maxwell3D::DirtyState::Flags; -Flags MakeInvalidationFlags() { +Maxwell3D::DirtyState::Flags MakeInvalidationFlags() { static constexpr int INVALIDATION_FLAGS[]{ Viewports, Scissors, @@ -68,7 +65,7 @@ Flags MakeInvalidationFlags() { LineStippleEnable, LineStippleParams, }; - Flags flags{}; + Maxwell3D::DirtyState::Flags flags{}; for (const int flag : INVALIDATION_FLAGS) { flags[flag] = true; } @@ -84,7 +81,7 @@ Flags MakeInvalidationFlags() { return flags; } -void SetupDirtyViewports(Tables& tables) { +void SetupDirtyViewports(Maxwell3D::DirtyState::Tables& tables) { FillBlock(tables[0], OFF(viewport_transform), NUM(viewport_transform), Viewports); FillBlock(tables[0], OFF(viewports), NUM(viewports), Viewports); FillBlock(tables[1], OFF(surface_clip), NUM(surface_clip), Viewports); @@ -92,26 +89,26 @@ void SetupDirtyViewports(Tables& tables) { tables[1][OFF(window_origin)] = Viewports; } -void SetupDirtyScissors(Tables& tables) { +void SetupDirtyScissors(Maxwell3D::DirtyState::Tables& tables) { FillBlock(tables[0], OFF(scissor_test), NUM(scissor_test), Scissors); } -void SetupDirtyDepthBias(Tables& tables) { +void SetupDirtyDepthBias(Maxwell3D::DirtyState::Tables& tables) { auto& table = tables[0]; table[OFF(depth_bias)] = DepthBias; table[OFF(depth_bias_clamp)] = DepthBias; table[OFF(slope_scale_depth_bias)] = DepthBias; } -void SetupDirtyBlendConstants(Tables& tables) { +void SetupDirtyBlendConstants(Maxwell3D::DirtyState::Tables& tables) { FillBlock(tables[0], OFF(blend_color), NUM(blend_color), BlendConstants); } -void SetupDirtyDepthBounds(Tables& tables) { +void SetupDirtyDepthBounds(Maxwell3D::DirtyState::Tables& tables) { FillBlock(tables[0], OFF(depth_bounds), NUM(depth_bounds), DepthBounds); } -void SetupDirtyStencilProperties(Tables& tables) { +void SetupDirtyStencilProperties(Maxwell3D::DirtyState::Tables& tables) { const auto setup = [&](size_t position, u8 flag) { tables[0][position] = flag; tables[1][position] = StencilProperties; @@ -125,18 +122,18 @@ void SetupDirtyStencilProperties(Tables& tables) { setup(OFF(stencil_back_func_mask), StencilCompare); } -void SetupDirtyLineWidth(Tables& tables) { +void SetupDirtyLineWidth(Maxwell3D::DirtyState::Tables& tables) { tables[0][OFF(line_width_smooth)] = LineWidth; tables[0][OFF(line_width_aliased)] = LineWidth; } -void SetupDirtyCullMode(Tables& tables) { +void SetupDirtyCullMode(Maxwell3D::DirtyState::Tables& tables) { auto& table = tables[0]; table[OFF(gl_cull_face)] = CullMode; table[OFF(gl_cull_test_enabled)] = CullMode; } -void SetupDirtyStateEnable(Tables& tables) { +void SetupDirtyStateEnable(Maxwell3D::DirtyState::Tables& tables) { const auto setup = [&](size_t position, u8 flag) { tables[0][position] = flag; tables[1][position] = StateEnable; @@ -157,17 +154,17 @@ void SetupDirtyStateEnable(Tables& tables) { setup(OFF(anti_alias_alpha_control.alpha_to_one), AlphaToOneEnable); } -void SetupDirtyDepthCompareOp(Tables& tables) { +void SetupDirtyDepthCompareOp(Maxwell3D::DirtyState::Tables& tables) { tables[0][OFF(depth_test_func)] = DepthCompareOp; } -void SetupDirtyFrontFace(Tables& tables) { +void SetupDirtyFrontFace(Maxwell3D::DirtyState::Tables& tables) { auto& table = tables[0]; table[OFF(gl_front_face)] = FrontFace; table[OFF(window_origin)] = FrontFace; } -void SetupDirtyStencilOp(Tables& tables) { +void SetupDirtyStencilOp(Maxwell3D::DirtyState::Tables& tables) { auto& table = tables[0]; table[OFF(stencil_front_op.fail)] = StencilOp; table[OFF(stencil_front_op.zfail)] = StencilOp; @@ -182,7 +179,7 @@ void SetupDirtyStencilOp(Tables& tables) { tables[1][OFF(stencil_two_side_enable)] = StencilOp; } -void SetupDirtyBlending(Tables& tables) { +void SetupDirtyBlending(Maxwell3D::DirtyState::Tables& tables) { tables[0][OFF(color_mask_common)] = Blending; tables[1][OFF(color_mask_common)] = ColorMask; tables[0][OFF(blend_per_target_enabled)] = Blending; @@ -196,11 +193,11 @@ void SetupDirtyBlending(Tables& tables) { FillBlock(tables[1], OFF(blend_per_target), NUM(blend_per_target), BlendEquations); } -void SetupDirtySpecialOps(Tables& tables) { +void SetupDirtySpecialOps(Maxwell3D::DirtyState::Tables& tables) { tables[0][OFF(logic_op.op)] = LogicOp; } -void SetupDirtyViewportSwizzles(Tables& tables) { +void SetupDirtyViewportSwizzles(Maxwell3D::DirtyState::Tables& tables) { static constexpr size_t swizzle_offset = 6; for (size_t index = 0; index < Regs::NumViewports; ++index) { tables[1][OFF(viewport_transform) + index * NUM(viewport_transform[0]) + swizzle_offset] = @@ -208,7 +205,7 @@ void SetupDirtyViewportSwizzles(Tables& tables) { } } -void SetupDirtyVertexAttributes(Tables& tables) { +void SetupDirtyVertexAttributes(Maxwell3D::DirtyState::Tables& tables) { for (size_t i = 0; i < Regs::NumVertexAttributes; ++i) { const size_t offset = OFF(vertex_attrib_format) + i * NUM(vertex_attrib_format[0]); FillBlock(tables[0], offset, NUM(vertex_attrib_format[0]), VertexAttribute0 + i); @@ -216,7 +213,7 @@ void SetupDirtyVertexAttributes(Tables& tables) { FillBlock(tables[1], OFF(vertex_attrib_format), Regs::NumVertexAttributes, VertexInput); } -void SetupDirtyVertexBindings(Tables& tables) { +void SetupDirtyVertexBindings(Maxwell3D::DirtyState::Tables& tables) { // Do NOT include stride here, it's implicit in VertexBuffer static constexpr size_t divisor_offset = 3; for (size_t i = 0; i < Regs::NumVertexArrays; ++i) { @@ -228,7 +225,7 @@ void SetupDirtyVertexBindings(Tables& tables) { } } -void SetupRasterModes(Tables &tables) { +void SetupRasterModes(Maxwell3D::DirtyState::Tables &tables) { auto& table = tables[0]; table[OFF(line_stipple_params)] = LineStippleParams;