From def03f6589007026572816baed73f762818f05bb Mon Sep 17 00:00:00 2001 From: lizzie Date: Fri, 29 May 2026 03:28:47 +0200 Subject: [PATCH] [video_core] fix redundant resize-copy overload and just use default-init resize, to reduce stutter on Mario BP (#3874) before vs. after Mario Brothership kept remaking vectors of sizes 256 AND 4095 (TIC) and 1215 AND 524287 (TSC) every single frame, which resulted in a noticeable overhead the main cause was because of using `resize(n, c)` instead of `resize(n)` (also to aggressively resize for more room beforehand), the copy overload of resize does a copy of... well.. the value over the entire vector, additionally __append() keeps getting called because the capacity goes bonkers and all over the place ![image](/attachments/e3ba07fb-1c85-4d56-9b81-bb16a8150c15) ![image](/attachments/5c4eba26-015a-4c95-9b24-b41695a62e51) Signed-off-by: lizzie Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3874 Reviewed-by: crueter Reviewed-by: CamilleLaVey Reviewed-by: MaranBr --- src/common/slot_vector.h | 38 ++- .../touch_screen/touch_screen_resource.cpp | 24 +- .../renderer_opengl/gl_compute_pipeline.cpp | 6 +- .../renderer_opengl/gl_graphics_pipeline.cpp | 6 +- .../renderer_opengl/gl_rasterizer.cpp | 4 +- .../renderer_vulkan/vk_compute_pipeline.cpp | 6 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 10 +- .../renderer_vulkan/vk_rasterizer.cpp | 4 +- .../texture_cache/descriptor_table.h | 63 ++-- .../texture_cache/texture_cache.cpp | 7 +- src/video_core/texture_cache/texture_cache.h | 314 +++++++----------- .../texture_cache/texture_cache_base.h | 64 ++-- 12 files changed, 219 insertions(+), 327 deletions(-) diff --git a/src/common/slot_vector.h b/src/common/slot_vector.h index e464d3d948..f6da7a59d1 100644 --- a/src/common/slot_vector.h +++ b/src/common/slot_vector.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project @@ -20,10 +20,14 @@ namespace Common { struct SlotId { + static constexpr u32 TAGGED_MASK = 0x7fffffff; + static constexpr u32 TAGGED_VALUE = 0x80000000; static constexpr u32 INVALID_INDEX = (std::numeric_limits::max)(); + constexpr u32 Value() const noexcept { + return index & (~TAGGED_VALUE); + } constexpr auto operator<=>(const SlotId&) const noexcept = default; - constexpr explicit operator bool() const noexcept { return index != INVALID_INDEX; } @@ -47,12 +51,12 @@ public: Iterator& operator++() noexcept { const u64* const bitset = slot_vector->stored_bitset.data(); const u32 size = static_cast(slot_vector->stored_bitset.size()) * 64; - if (id.index < size) { + if (id.Value() < size) { do { ++id.index; - } while (id.index < size && !IsValid(bitset)); - if (id.index == size) { - id.index = SlotId::INVALID_INDEX; + } while (id.Value() < size && !IsValid(bitset)); + if (id.Value() == size) { + id = SlotId{}; } } return *this; @@ -85,7 +89,7 @@ public: : slot_vector{slot_vector_}, id{id_} {} bool IsValid(const u64* bitset) const noexcept { - return ((bitset[id.index / 64] >> (id.index % 64)) & 1) != 0; + return ((bitset[id.Value() / 64] >> (id.Value() % 64)) & 1) != 0; } SlotVector* slot_vector; @@ -107,12 +111,12 @@ public: [[nodiscard]] T& operator[](SlotId id) noexcept { ValidateIndex(id); - return values[id.index].object; + return values[id.Value()].object; } [[nodiscard]] const T& operator[](SlotId id) const noexcept { ValidateIndex(id); - return values[id.index].object; + return values[id.Value()].object; } template @@ -125,9 +129,9 @@ public: } void erase(SlotId id) noexcept { - values[id.index].object.~T(); - free_list.push_back(id.index); - ResetStorageBit(id.index); + values[id.Value()].object.~T(); + free_list.push_back(id.Value()); + ResetStorageBit(id.Value()); } [[nodiscard]] Iterator begin() noexcept { @@ -141,7 +145,7 @@ public: } [[nodiscard]] Iterator end() noexcept { - return Iterator(this, SlotId{SlotId::INVALID_INDEX}); + return Iterator(this, SlotId{}); } [[nodiscard]] size_t size() const noexcept { @@ -175,8 +179,8 @@ private: void ValidateIndex(SlotId id) const noexcept { DEBUG_ASSERT(id); - DEBUG_ASSERT(id.index / 64 < stored_bitset.size()); - DEBUG_ASSERT(((stored_bitset[id.index / 64] >> (id.index % 64)) & 1) != 0); + DEBUG_ASSERT(id.Value() / 64 < stored_bitset.size()); + DEBUG_ASSERT(((stored_bitset[id.Value() / 64] >> (id.Value() % 64)) & 1) != 0); } [[nodiscard]] u32 FreeValueIndex() noexcept { @@ -208,9 +212,7 @@ private: const size_t old_free_size = free_list.size(); free_list.resize(old_free_size + (new_capacity - values_capacity)); - std::iota(free_list.begin() + old_free_size, free_list.end(), - static_cast(values_capacity)); - + std::iota(free_list.begin() + old_free_size, free_list.end(), u32(values_capacity)); delete[] values; values = new_values; values_capacity = new_capacity; diff --git a/src/hid_core/resources/touch_screen/touch_screen_resource.cpp b/src/hid_core/resources/touch_screen/touch_screen_resource.cpp index 5d77fe5719..018a43b6c0 100644 --- a/src/hid_core/resources/touch_screen/touch_screen_resource.cpp +++ b/src/hid_core/resources/touch_screen/touch_screen_resource.cpp @@ -486,27 +486,17 @@ void TouchResource::ReadTouchInput() { SanitizeInput(current_touch_state); std::scoped_lock lock{*input_mutex}; - if (current_touch_state.entry_count == previous_touch_state.entry_count) { - if (current_touch_state.entry_count < 1) { - return; - } + if (current_touch_state.entry_count == previous_touch_state.entry_count && current_touch_state.entry_count >= 1) { bool has_moved = false; - for (std::size_t i = 0; i < static_cast(current_touch_state.entry_count); - i++) { - s32 delta_x = std::abs(static_cast(current_touch_state.states[i].position.x) - - static_cast(previous_touch_state.states[i].position.x)); - s32 delta_y = std::abs(static_cast(current_touch_state.states[i].position.y) - - static_cast(previous_touch_state.states[i].position.y)); - if (delta_x > 1 || delta_y > 1) { - has_moved = true; - } + for (std::size_t i = 0; !has_moved && i < std::size_t(current_touch_state.entry_count); i++) { + s32 delta_x = std::abs(s32(current_touch_state.states[i].position.x) - s32(previous_touch_state.states[i].position.x)); + s32 delta_y = std::abs(s32(current_touch_state.states[i].position.y) - s32(previous_touch_state.states[i].position.y)); + has_moved |= (delta_x > 1 || delta_y > 1); } - if (!has_moved) { - return; + if (has_moved) { + input_event->Signal(); } } - - input_event->Signal(); } void TouchResource::OnTouchUpdate(s64 timestamp) { diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index d1c61be743..f0a7baf9aa 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -90,7 +90,7 @@ void ComputePipeline::Configure() { desc.is_written); ++ssbo_index; } - texture_cache.SynchronizeComputeDescriptors(); + texture_cache.SynchronizeDescriptors(true); boost::container::static_vector views; boost::container::static_vector samplers; @@ -148,14 +148,14 @@ void ComputePipeline::Configure() { const auto handle{read_handle(desc, index)}; views.push_back({handle.first}); - VideoCommon::SamplerId sampler = texture_cache.GetComputeSamplerId(handle.second); + VideoCommon::SamplerId sampler = texture_cache.GetSamplerId(handle.second, true); samplers.push_back(sampler); } } for (const auto& desc : info.image_descriptors) { add_image(desc, desc.is_written); } - texture_cache.FillComputeImageViews(std::span(views.data(), views.size())); + texture_cache.FillImageViews(std::span(views.data(), views.size()), true); if (!is_built) { WaitForBuild(); diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index ee3498428e..83545463ac 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -283,7 +283,7 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) { size_t views_index{}; size_t samplers_index{}; - texture_cache.SynchronizeGraphicsDescriptors(); + texture_cache.SynchronizeDescriptors(false); buffer_cache.SetUniformBuffersState(enabled_uniform_buffer_masks, &uniform_buffer_sizes); buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings); @@ -354,7 +354,7 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) { const auto handle{read_handle(desc, index)}; views[views_index++] = {handle.first}; - VideoCommon::SamplerId sampler{texture_cache.GetGraphicsSamplerId(handle.second)}; + VideoCommon::SamplerId sampler{texture_cache.GetSamplerId(handle.second, false)}; samplers[samplers_index++] = sampler; } } @@ -379,7 +379,7 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) { if constexpr (Spec::enabled_stages[4]) { config_stage(4); } - texture_cache.FillGraphicsImageViews(std::span(views.data(), views_index)); + texture_cache.FillImageViews(std::span(views.data(), views_index), false, Spec::has_images); texture_cache.UpdateRenderTargets(false); state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 70f244809f..1d77d28c46 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -353,13 +353,13 @@ void RasterizerOpenGL::DrawTexture() { gpu.TickWork(); }; - texture_cache.SynchronizeGraphicsDescriptors(); + texture_cache.SynchronizeDescriptors(false); texture_cache.UpdateRenderTargets(false); SyncState(); const auto& draw_texture_state = maxwell3d->draw_manager.draw_texture_state; - const auto& sampler = texture_cache.GetGraphicsSampler(draw_texture_state.src_sampler); + const auto& sampler = texture_cache.GetSampler(draw_texture_state.src_sampler, true); const auto& texture = texture_cache.GetImageView(draw_texture_state.src_texture); const auto Scale = [&](auto dim) -> s32 { diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 490dd7acfe..81ff8fe31a 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -125,7 +125,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, ++ssbo_index; } - texture_cache.SynchronizeComputeDescriptors(); + texture_cache.SynchronizeDescriptors(true); boost::container::small_vector views; boost::container::small_vector samplers; @@ -173,14 +173,14 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, const auto handle{read_handle(desc, index)}; views.push_back({handle.first}); - VideoCommon::SamplerId sampler = texture_cache.GetComputeSamplerId(handle.second); + VideoCommon::SamplerId sampler = texture_cache.GetSamplerId(handle.second, true); samplers.push_back(sampler); } } for (const auto& desc : info.image_descriptors) { add_image(desc, desc.is_written); } - texture_cache.FillComputeImageViews(std::span(views.data(), views.size())); + texture_cache.FillImageViews(std::span(views.data(), views.size()), true); buffer_cache.UnbindComputeTextureBuffers(); size_t index{}; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 43fbefe425..9609965637 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -314,12 +314,12 @@ void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) { template bool GraphicsPipeline::ConfigureImpl(bool is_indexed) { - small_vector views; - small_vector samplers; + boost::container::small_vector views; + boost::container::small_vector samplers; views.reserve(num_image_elements); samplers.reserve(num_textures); - texture_cache.SynchronizeGraphicsDescriptors(); + texture_cache.SynchronizeDescriptors(false); buffer_cache.SetUniformBuffersState(enabled_uniform_buffer_masks, &uniform_buffer_sizes); @@ -384,7 +384,7 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) { const auto handle{read_handle(desc, index)}; views.push_back({handle.first}); - VideoCommon::SamplerId sampler{texture_cache.GetGraphicsSamplerId(handle.second)}; + VideoCommon::SamplerId sampler{texture_cache.GetSamplerId(handle.second, false)}; samplers.push_back(sampler); } } @@ -413,7 +413,7 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) { } ASSERT(views.size() == num_image_elements); ASSERT(samplers.size() == num_textures); - texture_cache.FillGraphicsImageViews(std::span(views.data(), views.size())); + texture_cache.FillImageViews(std::span(views.data(), views.size()), false, Spec::has_images); VideoCommon::ImageViewInOut* texture_buffer_it{views.data()}; const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 3c3367cfd8..d99a650acc 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -351,7 +351,7 @@ void RasterizerVulkan::DrawTexture() { FlushWork(); std::scoped_lock l{texture_cache.mutex}; - texture_cache.SynchronizeGraphicsDescriptors(); + texture_cache.SynchronizeDescriptors(false); texture_cache.UpdateRenderTargets(false); UpdateDynamicStates(); @@ -359,7 +359,7 @@ void RasterizerVulkan::DrawTexture() { query_cache.NotifySegment(true); query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, maxwell3d->regs.zpass_pixel_count_enable); const auto& draw_texture_state = maxwell3d->draw_manager.draw_texture_state; - const auto& sampler = texture_cache.GetGraphicsSampler(draw_texture_state.src_sampler); + const auto& sampler = texture_cache.GetSampler(draw_texture_state.src_sampler, true); const auto& texture = texture_cache.GetImageView(draw_texture_state.src_texture); const auto* framebuffer = texture_cache.GetFramebuffer(); diff --git a/src/video_core/texture_cache/descriptor_table.h b/src/video_core/texture_cache/descriptor_table.h index 1bad83fb4a..e40c128ab5 100644 --- a/src/video_core/texture_cache/descriptor_table.h +++ b/src/video_core/texture_cache/descriptor_table.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -6,37 +9,39 @@ #include #include +#include "common/alignment.h" #include "common/common_types.h" #include "common/div_ceil.h" +#include "common/assert.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" namespace VideoCommon { -template +template class DescriptorTable { public: - explicit DescriptorTable(Tegra::MemoryManager& gpu_memory_) : gpu_memory{gpu_memory_} {} - - [[nodiscard]] bool Synchronize(GPUVAddr gpu_addr, u32 limit) { - [[likely]] if (current_gpu_addr == gpu_addr && current_limit == limit) { return false; } - Refresh(gpu_addr, limit); - return true; + [[nodiscard]] bool Synchronize(GPUVAddr gpu_addr, u32 limit) noexcept { + bool ret = !(current_gpu_addr == gpu_addr && current_limit == limit); + if (ret) { + Refresh(gpu_addr, limit); + } + return ret; } void Invalidate() noexcept { std::ranges::fill(read_descriptors, 0); } - [[nodiscard]] std::pair Read(u32 index) { + [[nodiscard]] std::pair Read(Tegra::MemoryManager const& gpu_memory, u32 index) noexcept { DEBUG_ASSERT(index <= current_limit); - const GPUVAddr gpu_addr = current_gpu_addr + index * sizeof(Descriptor); - std::pair result; - gpu_memory.ReadBlockUnsafe(gpu_addr, &result.first, sizeof(Descriptor)); - if (IsDescriptorRead(index)) { + const GPUVAddr gpu_addr = current_gpu_addr + index * sizeof(T); + std::pair result; + gpu_memory.ReadBlockUnsafe(gpu_addr, std::addressof(result.first), sizeof(T)); + if ((read_descriptors[index / 64] & (1ULL << (index % 64))) != 0) { result.second = result.first != descriptors[index]; } else { - MarkDescriptorAsRead(index); + read_descriptors[index / 64] |= 1ULL << (index % 64); result.second = true; } if (result.second) { @@ -45,34 +50,24 @@ public: return result; } - [[nodiscard]] u32 Limit() const noexcept { - return current_limit; - } - -private: - void Refresh(GPUVAddr gpu_addr, u32 limit) { + void Refresh(GPUVAddr gpu_addr, u32 limit) noexcept { current_gpu_addr = gpu_addr; current_limit = limit; - - const size_t num_descriptors = static_cast(limit) + 1; - read_descriptors.clear(); - read_descriptors.resize(Common::DivCeil(num_descriptors, 64U), 0); + // Mario Brothership reallocates a lot of times, so use aggressive pre-alloc sizes + // std::vector by default uses quadratic growth, but that isn't even enough to satisfy brothership + const size_t num_descriptors = ((limit + 0x80000) & (~0x7ffff)) + 1; + size_t old_size = read_descriptors.size(); + read_descriptors.resize(Common::DivCeil(num_descriptors, 64U)); + old_size = (std::min)(old_size, read_descriptors.size()); + std::fill(read_descriptors.begin(), read_descriptors.begin() + old_size, 0); + // descriptors.resize(num_descriptors); } - void MarkDescriptorAsRead(u32 index) noexcept { - read_descriptors[index / 64] |= 1ULL << (index % 64); - } - - [[nodiscard]] bool IsDescriptorRead(u32 index) const noexcept { - return (read_descriptors[index / 64] & (1ULL << (index % 64))) != 0; - } - - Tegra::MemoryManager& gpu_memory; + std::vector read_descriptors; + std::vector descriptors; GPUVAddr current_gpu_addr{}; u32 current_limit{}; - std::vector read_descriptors; - std::vector descriptors; }; } // namespace VideoCommon diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 8a9a32f44a..d1728603bf 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later @@ -7,8 +10,8 @@ namespace VideoCommon { TextureCacheChannelInfo::TextureCacheChannelInfo(Tegra::Control::ChannelState& state) noexcept - : ChannelInfo(state), graphics_image_table{gpu_memory}, graphics_sampler_table{gpu_memory}, - compute_image_table{gpu_memory}, compute_sampler_table{gpu_memory} {} + : ChannelInfo(state) +{} template class VideoCommon::ChannelSetupCaches; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index cb6b5b0a94..c0a0c3736b 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -14,6 +14,7 @@ #include "common/alignment.h" #include "common/settings.h" +#include "common/slot_vector.h" #include "video_core/control/channel_state.h" #include "video_core/dirty_flags.h" #include "video_core/engines/kepler_compute.h" @@ -204,8 +205,8 @@ typename P::ImageView& TextureCache

::GetImageView(ImageViewId id) noexcept { template typename P::ImageView& TextureCache

::GetImageView(u32 index) noexcept { - const auto image_view_id = VisitImageView(channel_state->graphics_image_table, - channel_state->graphics_image_view_ids, index); + // Not compute! + const auto image_view_id = VisitImageView(index, false); return slot_image_views[image_view_id]; } @@ -215,16 +216,25 @@ void TextureCache

::MarkModification(ImageId id) noexcept { } template -template -void TextureCache

::FillGraphicsImageViews(std::span views) { - FillImageViews(channel_state->graphics_image_table, - channel_state->graphics_image_view_ids, views); -} - -template -void TextureCache

::FillComputeImageViews(std::span views) { - FillImageViews(channel_state->compute_image_table, channel_state->compute_image_view_ids, - views); +void TextureCache

::FillImageViews(std::span views, bool compute, bool blacklist) { + bool has_blacklisted = false; + do { + has_deleted_images = false; + if (blacklist) { + has_blacklisted = false; + } + for (ImageViewInOut& view : views) { + view.id = VisitImageView(view.index, compute); + if (blacklist) { + if (view.blacklist && view.id != NULL_IMAGE_VIEW_ID) { + const ImageViewBase& image_view = slot_image_views[view.id]; + auto& image = slot_images[image_view.image_id]; + has_blacklisted |= ScaleDown(image); + image.scale_rating = 0; + } + } + } + } while (has_deleted_images || (blacklist && has_blacklisted)); } template @@ -292,41 +302,24 @@ void TextureCache

::CheckFeedbackLoop(std::span views) { } template -typename P::Sampler* TextureCache

::GetGraphicsSampler(u32 index) { - return &slot_samplers[GetGraphicsSamplerId(index)]; +typename P::Sampler* TextureCache

::GetSampler(u32 index, bool compute) { + return &slot_samplers[GetSamplerId(index, compute)]; } template -typename P::Sampler* TextureCache

::GetComputeSampler(u32 index) { - return &slot_samplers[GetComputeSamplerId(index)]; -} - -template -SamplerId TextureCache

::GetGraphicsSamplerId(u32 index) { - if (index > channel_state->graphics_sampler_table.Limit()) { +SamplerId TextureCache

::GetSamplerId(u32 index, bool compute) { + auto& table = compute ? channel_state->compute_sampler_table : channel_state->graphics_sampler_table; + if (index > table.current_limit) { LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); return NULL_SAMPLER_ID; } - const auto [descriptor, is_new] = channel_state->graphics_sampler_table.Read(index); - SamplerId& id = channel_state->graphics_sampler_ids[index]; + auto const [descriptor, is_new] = table.Read(*gpu_memory, index); if (is_new) { - id = FindSampler(descriptor); + auto const id = FindSampler(descriptor, compute); + channel_state->sampler_ids.insert_or_assign(index | (compute ? Common::SlotId::TAGGED_VALUE : 0), id); + return id; } - return id; -} - -template -SamplerId TextureCache

::GetComputeSamplerId(u32 index) { - if (index > channel_state->compute_sampler_table.Limit()) { - LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); - return NULL_SAMPLER_ID; - } - const auto [descriptor, is_new] = channel_state->compute_sampler_table.Read(index); - SamplerId& id = channel_state->compute_sampler_ids[index]; - if (is_new) { - id = FindSampler(descriptor); - } - return id; + return channel_state->sampler_ids.find(index | (compute ? Common::SlotId::TAGGED_VALUE : 0))->second; } template @@ -340,45 +333,31 @@ typename P::Sampler& TextureCache

::GetSampler(SamplerId id) noexcept { } template -void TextureCache

::SynchronizeGraphicsDescriptors() { - using SamplerBinding = Tegra::Engines::Maxwell3D::Regs::SamplerBinding; - const bool linked_tsc = maxwell3d->regs.sampler_binding == SamplerBinding::ViaHeaderBinding; - const u32 tic_limit = maxwell3d->regs.tex_header.limit; - const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d->regs.tex_sampler.limit; - bool bindings_changed = false; - if (channel_state->graphics_sampler_table.Synchronize(maxwell3d->regs.tex_sampler.Address(), - tsc_limit)) { - channel_state->graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); - bindings_changed = true; - } - if (channel_state->graphics_image_table.Synchronize(maxwell3d->regs.tex_header.Address(), - tic_limit)) { - channel_state->graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); - bindings_changed = true; - } - if (bindings_changed) { - ++texture_bindings_serial; - } -} - -template -void TextureCache

::SynchronizeComputeDescriptors() { - const bool linked_tsc = kepler_compute->launch_description.linked_tsc; - const u32 tic_limit = kepler_compute->regs.tic.limit; - const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute->regs.tsc.limit; - const GPUVAddr tsc_gpu_addr = kepler_compute->regs.tsc.Address(); - bool bindings_changed = false; - if (channel_state->compute_sampler_table.Synchronize(tsc_gpu_addr, tsc_limit)) { - channel_state->compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); - bindings_changed = true; - } - if (channel_state->compute_image_table.Synchronize(kepler_compute->regs.tic.Address(), - tic_limit)) { - channel_state->compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); - bindings_changed = true; - } - if (bindings_changed) { - ++texture_bindings_serial; +void TextureCache

::SynchronizeDescriptors(bool compute) { + if (compute) { + const bool linked_tsc = kepler_compute->launch_description.linked_tsc; + const u32 tic_limit = kepler_compute->regs.tic.limit; + const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute->regs.tsc.limit; + bool bindings_changed = false; + if (channel_state->compute_sampler_table.Synchronize(kepler_compute->regs.tsc.Address(), tsc_limit)) + bindings_changed = true; + if (channel_state->compute_image_table.Synchronize(kepler_compute->regs.tic.Address(), tic_limit)) + bindings_changed = true; + if (bindings_changed) { + ++texture_bindings_serial; + } + } else { + const bool linked_tsc = maxwell3d->regs.sampler_binding == Tegra::Engines::Maxwell3D::Regs::SamplerBinding::ViaHeaderBinding; + const u32 tic_limit = maxwell3d->regs.tex_header.limit; + const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d->regs.tex_sampler.limit; + bool bindings_changed = false; + if (channel_state->graphics_sampler_table.Synchronize(maxwell3d->regs.tex_sampler.Address(), tsc_limit)) + bindings_changed = true; + if (channel_state->graphics_image_table.Synchronize(maxwell3d->regs.tex_header.Address(), tic_limit)) + bindings_changed = true; + if (bindings_changed) { + ++texture_bindings_serial; + } } } @@ -557,47 +536,30 @@ typename P::Framebuffer* TextureCache

::GetFramebuffer() { } template -template -void TextureCache

::FillImageViews(DescriptorTable& table, - std::span cached_image_view_ids, - std::span views) { - bool has_blacklisted = false; - do { - has_deleted_images = false; - if constexpr (has_blacklists) { - has_blacklisted = false; - } - for (ImageViewInOut& view : views) { - view.id = VisitImageView(table, cached_image_view_ids, view.index); - if constexpr (has_blacklists) { - if (view.blacklist && view.id != NULL_IMAGE_VIEW_ID) { - const ImageViewBase& image_view{slot_image_views[view.id]}; - auto& image = slot_images[image_view.image_id]; - has_blacklisted |= ScaleDown(image); - image.scale_rating = 0; - } - } - } - } while (has_deleted_images || (has_blacklists && has_blacklisted)); -} - -template -ImageViewId TextureCache

::VisitImageView(DescriptorTable& table, - std::span cached_image_view_ids, - u32 index) { - if (index > table.Limit()) { +ImageViewId TextureCache

::VisitImageView(u32 index, bool compute) { + auto& table = compute ? channel_state->compute_image_table : channel_state->graphics_image_table; + if (index > table.current_limit) { LOG_DEBUG(HW_GPU, "Invalid image view index={}", index); return NULL_IMAGE_VIEW_ID; } - const auto [descriptor, is_new] = table.Read(index); - ImageViewId& image_view_id = cached_image_view_ids[index]; + // Is new (on the tegra engine side)? + auto const [descriptor, is_new] = table.Read(*gpu_memory, index); if (is_new) { - image_view_id = FindImageView(descriptor); + if (IsValidEntry(*gpu_memory, descriptor)) { + // Is new (registered view) on the texture cache side? + const auto [pair, is_new_tc] = channel_state->image_views.try_emplace(descriptor); + if (is_new_tc) + pair->second = CreateImageView(descriptor); + PrepareImageView(pair->second, false, false); + channel_state->image_view_ids.insert_or_assign(index | (compute ? Common::SlotId::TAGGED_VALUE : 0), pair->second); + return pair->second; + } + channel_state->image_view_ids.insert_or_assign(index | (compute ? Common::SlotId::TAGGED_VALUE : 0), NULL_IMAGE_VIEW_ID); + return NULL_IMAGE_VIEW_ID; } - if (image_view_id != NULL_IMAGE_VIEW_ID) { - PrepareImageView(image_view_id, false, false); - } - return image_view_id; + auto const it = channel_state->image_view_ids.find(index | (compute ? Common::SlotId::TAGGED_VALUE : 0)); + PrepareImageView(it->second, false, false); + return it->second; } template @@ -1196,19 +1158,6 @@ void TextureCache

::UploadImageContents(Image& image, StagingBuffer& staging) } } -template -ImageViewId TextureCache

::FindImageView(const TICEntry& config) { - if (!IsValidEntry(*gpu_memory, config)) { - return NULL_IMAGE_VIEW_ID; - } - const auto [pair, is_new] = channel_state->image_views.try_emplace(config); - ImageViewId& image_view_id = pair->second; - if (is_new) { - image_view_id = CreateImageView(config); - } - return image_view_id; -} - template ImageViewId TextureCache

::CreateImageView(const TICEntry& config) { const ImageInfo info(config); @@ -1350,10 +1299,10 @@ void TextureCache

::InvalidateScale(Image& image) { image.image_view_infos.clear(); for (size_t c : active_channel_ids) { auto& channel_info = channel_storage[c]; - if constexpr (ENABLE_VALIDATION) { - std::ranges::fill(channel_info.graphics_image_view_ids, CORRUPT_ID); - std::ranges::fill(channel_info.compute_image_view_ids, CORRUPT_ID); - } + + if constexpr (ENABLE_VALIDATION) + for (auto& e : channel_info.image_view_ids) + e.second = CORRUPT_ID; channel_info.graphics_image_table.Invalidate(); channel_info.compute_image_table.Invalidate(); } @@ -1918,7 +1867,7 @@ std::pair TextureCache

::PrepareDmaImage(ImageId dst_id, GPUVAddr ba } template -SamplerId TextureCache

::FindSampler(const TSCEntry& config) { +SamplerId TextureCache

::FindSampler(const TSCEntry& config, bool compute) { if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) { return NULL_SAMPLER_ID; } @@ -1941,69 +1890,48 @@ std::optional TextureCache

::QuerySamplerBudget() const { template void TextureCache

::EnforceSamplerBudget() { - const auto budget = QuerySamplerBudget(); - if (!budget) { - return; + if (auto const budget = QuerySamplerBudget(); budget) { + if (slot_samplers.size() < *budget) { + return; + } + if (!channel_state) { + return; + } + if (last_sampler_gc_frame == frame_tick) { + return; + } + last_sampler_gc_frame = frame_tick; + TrimInactiveSamplers(*budget); } - if (slot_samplers.size() < *budget) { - return; - } - if (!channel_state) { - return; - } - if (last_sampler_gc_frame == frame_tick) { - return; - } - last_sampler_gc_frame = frame_tick; - TrimInactiveSamplers(*budget); } template void TextureCache

::TrimInactiveSamplers(size_t budget) { - if (channel_state->samplers.empty()) { - return; - } - constexpr size_t SAMPLER_GC_SLACK = 1024; - auto mark_active = [](auto& set, SamplerId id) { - if (!id || id == CORRUPT_ID || id == NULL_SAMPLER_ID) { - return; + if (channel_state->samplers.size() > 0) { + constexpr size_t SAMPLER_GC_SLACK = 1024; + ankerl::unordered_dense::set active_sampler_ids; + for (auto const& e : channel_state->sampler_ids) + active_sampler_ids.insert(e.second); + // Elements in the map must be necesarily valid + size_t removed = 0; + for (auto it = channel_state->samplers.begin(); it != channel_state->samplers.end();) { + const SamplerId sampler_id = it->second; + if (!sampler_id || sampler_id == CORRUPT_ID) { + it = channel_state->samplers.erase(it); + } else if (std::ranges::find(active_sampler_ids, sampler_id) != active_sampler_ids.end()) { + ++it; + } else { + slot_samplers.erase(sampler_id); + it = channel_state->samplers.erase(it); + ++removed; + if (slot_samplers.size() + SAMPLER_GC_SLACK <= budget) { + break; + } + } } - set.insert(id); - }; - ankerl::unordered_dense::set active; - active.reserve(channel_state->graphics_sampler_ids.size() + - channel_state->compute_sampler_ids.size()); - for (const SamplerId id : channel_state->graphics_sampler_ids) { - mark_active(active, id); - } - for (const SamplerId id : channel_state->compute_sampler_ids) { - mark_active(active, id); - } - - size_t removed = 0; - auto& sampler_map = channel_state->samplers; - for (auto it = sampler_map.begin(); it != sampler_map.end();) { - const SamplerId sampler_id = it->second; - if (!sampler_id || sampler_id == CORRUPT_ID) { - it = sampler_map.erase(it); - continue; + if (removed != 0) { + LOG_WARNING(HW_GPU, "Sampler cache exceeded {} entries on this driver; reclaimed {} inactive samplers", budget, removed); } - if (active.find(sampler_id) != active.end()) { - ++it; - continue; - } - slot_samplers.erase(sampler_id); - it = sampler_map.erase(it); - ++removed; - if (slot_samplers.size() + SAMPLER_GC_SLACK <= budget) { - break; - } - } - - if (removed != 0) { - LOG_WARNING(HW_GPU, - "Sampler cache exceeded {} entries on this driver; reclaimed {} inactive samplers", - budget, removed); } } @@ -2243,8 +2171,7 @@ ImageViewId TextureCache

::FindOrEmplaceImageView(ImageId image_id, const Imag if (const ImageViewId image_view_id = image.FindView(info); image_view_id) { return image_view_id; } - const ImageViewId image_view_id = - slot_image_views.insert(runtime, info, image_id, image, slot_images); + const ImageViewId image_view_id = slot_image_views.insert(runtime, info, image_id, image, slot_images); image.InsertView(info, image_view_id); return image_view_id; } @@ -2504,10 +2431,9 @@ void TextureCache

::DeleteImage(ImageId image_id, bool immediate_delete) { } for (size_t c : active_channel_ids) { auto& channel_info = channel_storage[c]; - if constexpr (ENABLE_VALIDATION) { - std::ranges::fill(channel_info.graphics_image_view_ids, CORRUPT_ID); - std::ranges::fill(channel_info.compute_image_view_ids, CORRUPT_ID); - } + if constexpr (ENABLE_VALIDATION) + for (auto& e : channel_info.image_view_ids) + e.second = CORRUPT_ID; channel_info.graphics_image_table.Invalidate(); channel_info.compute_image_table.Invalidate(); } diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 47f52c5c99..fbc2bb4cf7 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include "common/common_types.h" @@ -76,22 +77,20 @@ public: TextureCacheChannelInfo(const TextureCacheChannelInfo& state) = delete; TextureCacheChannelInfo& operator=(const TextureCacheChannelInfo&) = delete; - DescriptorTable graphics_image_table{gpu_memory}; - DescriptorTable graphics_sampler_table{gpu_memory}; - std::vector graphics_sampler_ids; - std::vector graphics_image_view_ids; - - DescriptorTable compute_image_table{gpu_memory}; - DescriptorTable compute_sampler_table{gpu_memory}; - std::vector compute_sampler_ids; - std::vector compute_image_view_ids; + DescriptorTable graphics_image_table; + DescriptorTable graphics_sampler_table; + DescriptorTable compute_image_table; + DescriptorTable compute_sampler_table; // TODO: still relies on bad iterators :( std::unordered_map image_views; std::unordered_map samplers; - TextureCacheGPUMap* gpu_page_table; - TextureCacheGPUMap* sparse_page_table; + ankerl::unordered_dense::map sampler_ids; + ankerl::unordered_dense::map image_view_ids; + + TextureCacheGPUMap* gpu_page_table = nullptr; + TextureCacheGPUMap* sparse_page_table = nullptr; }; template @@ -167,27 +166,17 @@ public: /// Mark an image as modified from the GPU void MarkModification(ImageId id) noexcept; - /// Fill image_view_ids with the graphics images in indices - template - void FillGraphicsImageViews(std::span views); - - /// Fill image_view_ids with the compute images in indices - void FillComputeImageViews(std::span views); + /// Fill image_view_ids with the graphics/compute images in indices + void FillImageViews(std::span views, bool compute, bool blacklist = true); /// Handle feedback loops during draws. void CheckFeedbackLoop(std::span views); - /// Get the sampler from the graphics descriptor table in the specified index - Sampler* GetGraphicsSampler(u32 index); + /// Get the sampler from the graphics/compute descriptor table in the specified index + Sampler* GetSampler(u32 index, bool compute); - /// Get the sampler from the compute descriptor table in the specified index - Sampler* GetComputeSampler(u32 index); - - /// Get the sampler id from the graphics descriptor table in the specified index - SamplerId GetGraphicsSamplerId(u32 index); - - /// Get the sampler id from the compute descriptor table in the specified index - SamplerId GetComputeSamplerId(u32 index); + /// Get the sampler id from the graphics/compute descriptor table in the specified index + SamplerId GetSamplerId(u32 index, bool compute); /// Return a constant reference to the given sampler id [[nodiscard]] const Sampler& GetSampler(SamplerId id) const noexcept; @@ -195,11 +184,8 @@ public: /// Return a reference to the given sampler id [[nodiscard]] Sampler& GetSampler(SamplerId id) noexcept; - /// Refresh the state for graphics image view and sampler descriptors - void SynchronizeGraphicsDescriptors(); - - /// Refresh the state for compute image view and sampler descriptors - void SynchronizeComputeDescriptors(); + /// Refresh the state for graphics/compute image view and sampler descriptors + void SynchronizeDescriptors(bool compute); /// Updates the Render Targets if they can be rescaled /// @retval True if the Render Targets have been rescaled. @@ -310,15 +296,8 @@ private: /// Runs the Garbage Collector. void RunGarbageCollector(); - /// Fills image_view_ids in the image views in indices - template - void FillImageViews(DescriptorTable& table, - std::span cached_image_view_ids, - std::span views); - /// Find or create an image view in the guest descriptor table - ImageViewId VisitImageView(DescriptorTable& table, - std::span cached_image_view_ids, u32 index); + ImageViewId VisitImageView(u32 index, bool compute); /// Find or create a framebuffer with the given render target parameters FramebufferId GetFramebufferId(const RenderTargets& key); @@ -330,9 +309,6 @@ private: template void UploadImageContents(Image& image, StagingBuffer& staging_buffer); - /// Find or create an image view from a guest descriptor - [[nodiscard]] ImageViewId FindImageView(const TICEntry& config); - /// Create a new image view from a guest descriptor [[nodiscard]] ImageViewId CreateImageView(const TICEntry& config); @@ -360,7 +336,7 @@ private: const Tegra::Engines::Fermi2D::Config& copy); /// Find or create a sampler from a guest descriptor sampler - [[nodiscard]] SamplerId FindSampler(const TSCEntry& config); + [[nodiscard]] SamplerId FindSampler(const TSCEntry& config, bool compute); /// Find or create an image view for the given color buffer index [[nodiscard]] ImageViewId FindColorBuffer(size_t index);