From 71ee160a7bd010ccb61be7455df8521970209611 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Wed, 1 Apr 2026 00:13:41 -0400 Subject: [PATCH 01/10] [maxwell] Refactor execution mask initialization to use fill() instead of reset() --- src/video_core/dma_pusher.cpp | 36 ++++++++++++++- src/video_core/engines/engine_interface.h | 13 +++--- src/video_core/engines/fermi_2d.cpp | 2 +- src/video_core/engines/kepler_compute.cpp | 2 +- src/video_core/engines/kepler_memory.cpp | 2 +- src/video_core/engines/maxwell_3d.cpp | 53 ++++++++++++++++++++--- src/video_core/engines/maxwell_dma.cpp | 2 +- 7 files changed, 94 insertions(+), 16 deletions(-) diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 3844a8e2f9..d6d44e66b4 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -122,7 +122,35 @@ void DmaPusher::ProcessCommands(std::span commands) { dma_state.is_last_call = true; index += max_write; } else if (dma_state.method_count) { - auto const command_header = commands[index]; //can copy + if (!dma_state.non_incrementing && !dma_increment_once && + dma_state.method >= non_puller_methods) { + auto subchannel = subchannels[dma_state.subchannel]; + const u32 available = u32(std::min( + index + dma_state.method_count, commands.size()) - index); + u32 batch = 0; + u32 method = dma_state.method; + while (batch < available) { + const bool needs_exec = + (method < Engines::EngineInterface::EXECUTION_MASK_TABLE_SIZE) + ? subchannel->execution_mask[method] + : subchannel->execution_mask_default; + if (needs_exec) break; + batch++; + method++; + } + if (batch > 0) { + auto& sink = subchannel->method_sink; + sink.reserve(sink.size() + batch); + for (u32 j = 0; j < batch; j++) { + sink.emplace_back(dma_state.method + j, commands[index + j].argument); + } + dma_state.method += batch; + dma_state.method_count -= batch; + index += batch; + continue; + } + } + auto const command_header = commands[index]; dma_state.dma_word_offset = u32(index * sizeof(u32)); dma_state.is_last_call = dma_state.method_count <= 1; CallMethod(command_header.argument); @@ -181,7 +209,11 @@ void DmaPusher::CallMethod(u32 argument) const { }); } else { auto subchannel = subchannels[dma_state.subchannel]; - if (!subchannel->execution_mask[dma_state.method]) { + const bool needs_execution = + (dma_state.method < Engines::EngineInterface::EXECUTION_MASK_TABLE_SIZE) + ? subchannel->execution_mask[dma_state.method] + : subchannel->execution_mask_default; + if (!needs_execution) { subchannel->method_sink.emplace_back(dma_state.method, argument); } else { subchannel->ConsumeSink(); diff --git a/src/video_core/engines/engine_interface.h b/src/video_core/engines/engine_interface.h index bf3bd66aca..292f0a5738 100644 --- a/src/video_core/engines/engine_interface.h +++ b/src/video_core/engines/engine_interface.h @@ -6,9 +6,9 @@ #pragma once -#include -#include -#include +#include + +#include #include "common/common_types.h" @@ -41,8 +41,11 @@ public: ConsumeSinkImpl(); } - std::bitset<(std::numeric_limits::max)()> execution_mask{}; - std::vector> method_sink{}; + static constexpr size_t EXECUTION_MASK_TABLE_SIZE = 0xE00; + + std::array execution_mask{}; + bool execution_mask_default{}; + boost::container::small_vector, 64> method_sink{}; bool current_dirty{}; GPUVAddr current_dma_segment; diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index b442c5cc76..11f60ef32b 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp @@ -26,7 +26,7 @@ Fermi2D::Fermi2D(MemoryManager& memory_manager_) : memory_manager{memory_manager regs.src.depth = 1; regs.dst.depth = 1; - execution_mask.reset(); + execution_mask.fill(0); execution_mask[FERMI2D_REG_INDEX(pixels_from_memory.src_y0) + 1] = true; } diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 7b4efeb1e0..d6ee80f6e2 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -18,7 +18,7 @@ namespace Tegra::Engines { KeplerCompute::KeplerCompute(Core::System& system_, MemoryManager& memory_manager_) : system{system_}, memory_manager{memory_manager_}, upload_state{memory_manager, regs.upload} { - execution_mask.reset(); + execution_mask.fill(0); execution_mask[KEPLER_COMPUTE_REG_INDEX(exec_upload)] = true; execution_mask[KEPLER_COMPUTE_REG_INDEX(data_upload)] = true; execution_mask[KEPLER_COMPUTE_REG_INDEX(launch)] = true; diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index 5d4c4720d3..013a644c1b 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp @@ -22,7 +22,7 @@ KeplerMemory::~KeplerMemory() = default; void KeplerMemory::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) { upload_state.BindRasterizer(rasterizer_); - execution_mask.reset(); + execution_mask.fill(0); execution_mask[KEPLERMEMORY_REG_INDEX(exec)] = true; execution_mask[KEPLERMEMORY_REG_INDEX(data)] = true; } diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 6d9ebd6296..88869917fd 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -4,8 +4,14 @@ // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include #include #include + +#if defined(_MSC_VER) && !defined(__clang__) +#include +#endif + #include "common/assert.h" #include "common/bit_util.h" #include "common/scope_exit.h" @@ -22,6 +28,16 @@ namespace Tegra::Engines { +namespace { +inline void PrefetchLine(const void* addr) { +#if defined(_MSC_VER) && !defined(__clang__) + _mm_prefetch(static_cast(addr), _MM_HINT_T0); +#else + __builtin_prefetch(addr, 0, 1); +#endif +} +} // namespace + /// First register id that is actually a Macro call. constexpr u32 MacroRegistersStart = 0xE00; @@ -37,9 +53,10 @@ Maxwell3D::Maxwell3D(Core::System& system_, MemoryManager& memory_manager_) { dirty.flags.flip(); InitializeRegisterDefaults(); - execution_mask.reset(); - for (size_t i = 0; i < execution_mask.size(); i++) + execution_mask.fill(0); + for (size_t i = 0; i < EXECUTION_MASK_TABLE_SIZE; i++) execution_mask[i] = IsMethodExecutable(u32(i)); + execution_mask_default = true; } Maxwell3D::~Maxwell3D() = default; @@ -298,18 +315,44 @@ u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) { } void Maxwell3D::ConsumeSinkImpl() { + std::stable_sort(method_sink.begin(), method_sink.end(), + [](const auto& a, const auto& b) { return a.first < b.first; }); + + const auto sink_size = method_sink.size(); const auto control = shadow_state.shadow_ram_control; if (control == Regs::ShadowRamControl::Track || control == Regs::ShadowRamControl::TrackWithFilter) { - for (auto [method, value] : method_sink) { + for (size_t i = 0; i < sink_size; ++i) { + const auto [method, value] = method_sink[i]; + if (i + 1 < sink_size) { + const u32 next = method_sink[i + 1].first; + PrefetchLine(®s.reg_array[next]); + PrefetchLine(&shadow_state.reg_array[next]); + PrefetchLine(&dirty.tables[0][next]); + } shadow_state.reg_array[method] = value; ProcessDirtyRegisters(method, value); } } else if (control == Regs::ShadowRamControl::Replay) { - for (auto [method, value] : method_sink) + for (size_t i = 0; i < sink_size; ++i) { + const auto [method, value] = method_sink[i]; + if (i + 1 < sink_size) { + const u32 next = method_sink[i + 1].first; + PrefetchLine(®s.reg_array[next]); + PrefetchLine(&shadow_state.reg_array[next]); + PrefetchLine(&dirty.tables[0][next]); + } ProcessDirtyRegisters(method, shadow_state.reg_array[method]); + } } else { - for (auto [method, value] : method_sink) + for (size_t i = 0; i < sink_size; ++i) { + const auto [method, value] = method_sink[i]; + if (i + 1 < sink_size) { + const u32 next = method_sink[i + 1].first; + PrefetchLine(®s.reg_array[next]); + PrefetchLine(&dirty.tables[0][next]); + } ProcessDirtyRegisters(method, value); + } } method_sink.clear(); } diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 089d118a09..c99039cda7 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -23,7 +23,7 @@ using namespace Texture; MaxwellDMA::MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_) : system{system_}, memory_manager{memory_manager_} { - execution_mask.reset(); + execution_mask.fill(0); execution_mask[offsetof(Regs, launch_dma) / sizeof(u32)] = true; } From 0949fea64e472995f82f39922e73b3bf8f432949 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Wed, 1 Apr 2026 01:21:55 -0400 Subject: [PATCH 02/10] [maxwell] Removed prefetching for ProcessCommands --- src/video_core/engines/maxwell_3d.cpp | 31 --------------------------- 1 file changed, 31 deletions(-) diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 88869917fd..431da33559 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -8,10 +8,6 @@ #include #include -#if defined(_MSC_VER) && !defined(__clang__) -#include -#endif - #include "common/assert.h" #include "common/bit_util.h" #include "common/scope_exit.h" @@ -28,16 +24,6 @@ namespace Tegra::Engines { -namespace { -inline void PrefetchLine(const void* addr) { -#if defined(_MSC_VER) && !defined(__clang__) - _mm_prefetch(static_cast(addr), _MM_HINT_T0); -#else - __builtin_prefetch(addr, 0, 1); -#endif -} -} // namespace - /// First register id that is actually a Macro call. constexpr u32 MacroRegistersStart = 0xE00; @@ -323,34 +309,17 @@ void Maxwell3D::ConsumeSinkImpl() { if (control == Regs::ShadowRamControl::Track || control == Regs::ShadowRamControl::TrackWithFilter) { for (size_t i = 0; i < sink_size; ++i) { const auto [method, value] = method_sink[i]; - if (i + 1 < sink_size) { - const u32 next = method_sink[i + 1].first; - PrefetchLine(®s.reg_array[next]); - PrefetchLine(&shadow_state.reg_array[next]); - PrefetchLine(&dirty.tables[0][next]); - } shadow_state.reg_array[method] = value; ProcessDirtyRegisters(method, value); } } else if (control == Regs::ShadowRamControl::Replay) { for (size_t i = 0; i < sink_size; ++i) { const auto [method, value] = method_sink[i]; - if (i + 1 < sink_size) { - const u32 next = method_sink[i + 1].first; - PrefetchLine(®s.reg_array[next]); - PrefetchLine(&shadow_state.reg_array[next]); - PrefetchLine(&dirty.tables[0][next]); - } ProcessDirtyRegisters(method, shadow_state.reg_array[method]); } } else { for (size_t i = 0; i < sink_size; ++i) { const auto [method, value] = method_sink[i]; - if (i + 1 < sink_size) { - const u32 next = method_sink[i + 1].first; - PrefetchLine(®s.reg_array[next]); - PrefetchLine(&dirty.tables[0][next]); - } ProcessDirtyRegisters(method, value); } } From 1b45b4f37919d931068deefc2946e81344ae2e38 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Wed, 1 Apr 2026 02:10:00 -0400 Subject: [PATCH 03/10] [buffer_cache] Removal of LRU inside buffer cache and replaced with tick operations inside frames. --- src/video_core/buffer_cache/buffer_base.h | 10 +++++----- src/video_core/buffer_cache/buffer_cache.h | 20 +++++++++++-------- .../buffer_cache/buffer_cache_base.h | 6 ------ 3 files changed, 17 insertions(+), 19 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h index bec2dac246..f080f4503c 100644 --- a/src/video_core/buffer_cache/buffer_base.h +++ b/src/video_core/buffer_cache/buffer_base.h @@ -109,12 +109,12 @@ public: return static_cast(other_cpu_addr - cpu_addr); } - size_t getLRUID() const noexcept { - return lru_id; + u64 GetFrameTick() const noexcept { + return frame_tick; } - void setLRUID(size_t lru_id_) { - lru_id = lru_id_; + void SetFrameTick(u64 tick) noexcept { + frame_tick = tick; } size_t SizeBytes() const { @@ -125,7 +125,7 @@ private: VAddr cpu_addr = 0; BufferFlagBits flags{}; int stream_score = 0; - size_t lru_id = SIZE_MAX; + u64 frame_tick = 0; size_t size_bytes = 0; }; diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 014b4a318e..672ecb80eb 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -58,17 +58,22 @@ void BufferCache

::RunGarbageCollector() { const bool aggressive_gc = total_used_memory >= critical_memory; const u64 ticks_to_destroy = aggressive_gc ? 60 : 120; int num_iterations = aggressive_gc ? 64 : 32; - const auto clean_up = [this, &num_iterations](BufferId buffer_id) { + const u64 threshold = frame_tick - ticks_to_destroy; + boost::container::small_vector expired; + for (auto [id, buffer] : slot_buffers) { + if (buffer.GetFrameTick() < threshold) { + expired.push_back(id); + } + } + for (const auto buffer_id : expired) { if (num_iterations == 0) { - return true; + break; } --num_iterations; auto& buffer = slot_buffers[buffer_id]; DownloadBufferMemory(buffer); DeleteBuffer(buffer_id); - return false; - }; - lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up); + } } template @@ -1595,10 +1600,9 @@ void BufferCache

::ChangeRegister(BufferId buffer_id) { const auto size = buffer.SizeBytes(); if (insert) { total_used_memory += Common::AlignUp(size, 1024); - buffer.setLRUID(lru_cache.Insert(buffer_id, frame_tick)); + buffer.SetFrameTick(frame_tick); } else { total_used_memory -= Common::AlignUp(size, 1024); - lru_cache.Free(buffer.getLRUID()); } const DAddr device_addr_begin = buffer.CpuAddr(); const DAddr device_addr_end = device_addr_begin + size; @@ -1616,7 +1620,7 @@ void BufferCache

::ChangeRegister(BufferId buffer_id) { template void BufferCache

::TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept { if (buffer_id != NULL_BUFFER_ID) { - lru_cache.Touch(buffer.getLRUID(), frame_tick); + buffer.SetFrameTick(frame_tick); } } diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index 08524bd854..b17fed1b6d 100644 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h @@ -23,7 +23,6 @@ #include "common/common_types.h" #include "common/div_ceil.h" #include "common/literals.h" -#include "common/lru_cache.h" #include "common/range_sets.h" #include "common/scope_exit.h" #include "common/settings.h" @@ -506,11 +505,6 @@ private: size_t immediate_buffer_capacity = 0; Common::ScratchBuffer immediate_buffer_alloc; - struct LRUItemParams { - using ObjectType = BufferId; - using TickType = u64; - }; - Common::LeastRecentlyUsedCache lru_cache; u64 frame_tick = 0; u64 total_used_memory = 0; u64 minimum_memory = 0; From f59e3f8d57da7a1abd07aa59c5ff2a337345b643 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Wed, 1 Apr 2026 02:18:59 -0400 Subject: [PATCH 04/10] I got meowed by Gidoly --- src/video_core/buffer_cache/buffer_cache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 672ecb80eb..422e542421 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -61,7 +61,7 @@ void BufferCache

::RunGarbageCollector() { const u64 threshold = frame_tick - ticks_to_destroy; boost::container::small_vector expired; for (auto [id, buffer] : slot_buffers) { - if (buffer.GetFrameTick() < threshold) { + if (buffer->GetFrameTick() < threshold) { expired.push_back(id); } } From 524eda6b80948bc4180cda83980639f22ad3eaad Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Thu, 2 Apr 2026 23:24:41 -0400 Subject: [PATCH 05/10] [texture_cache] Replace LRU index with frame tick in ImageBase + update garbage collection logic --- src/common/lru_cache.h | 139 ------------------ src/video_core/texture_cache/image_base.h | 2 +- src/video_core/texture_cache/texture_cache.h | 45 ++++-- .../texture_cache/texture_cache_base.h | 8 +- 4 files changed, 37 insertions(+), 157 deletions(-) delete mode 100644 src/common/lru_cache.h diff --git a/src/common/lru_cache.h b/src/common/lru_cache.h deleted file mode 100644 index 36cea5d27e..0000000000 --- a/src/common/lru_cache.h +++ /dev/null @@ -1,139 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include -#include -#include - -#include "common/common_types.h" - -namespace Common { - -template -class LeastRecentlyUsedCache { - using ObjectType = typename Traits::ObjectType; - using TickType = typename Traits::TickType; - - struct Item { - ObjectType obj; - TickType tick; - Item* next{}; - Item* prev{}; - }; - -public: - LeastRecentlyUsedCache() : first_item{}, last_item{} {} - ~LeastRecentlyUsedCache() = default; - - size_t Insert(ObjectType obj, TickType tick) { - const auto new_id = Build(); - auto& item = item_pool[new_id]; - item.obj = obj; - item.tick = tick; - Attach(item); - return new_id; - } - - void Touch(size_t id, TickType tick) { - auto& item = item_pool[id]; - if (item.tick >= tick) { - return; - } - item.tick = tick; - if (&item == last_item) { - return; - } - Detach(item); - Attach(item); - } - - void Free(size_t id) { - auto& item = item_pool[id]; - Detach(item); - item.prev = nullptr; - item.next = nullptr; - free_items.push_back(id); - } - - template - void ForEachItemBelow(TickType tick, Func&& func) { - static constexpr bool RETURNS_BOOL = - std::is_same_v, bool>; - Item* iterator = first_item; - while (iterator) { - if (static_cast(tick) - static_cast(iterator->tick) < 0) { - return; - } - Item* next = iterator->next; - if constexpr (RETURNS_BOOL) { - if (func(iterator->obj)) { - return; - } - } else { - func(iterator->obj); - } - iterator = next; - } - } - -private: - size_t Build() { - if (free_items.empty()) { - const size_t item_id = item_pool.size(); - auto& item = item_pool.emplace_back(); - item.next = nullptr; - item.prev = nullptr; - return item_id; - } - const size_t item_id = free_items.front(); - free_items.pop_front(); - auto& item = item_pool[item_id]; - item.next = nullptr; - item.prev = nullptr; - return item_id; - } - - void Attach(Item& item) { - if (!first_item) { - first_item = &item; - } - if (!last_item) { - last_item = &item; - } else { - item.prev = last_item; - last_item->next = &item; - item.next = nullptr; - last_item = &item; - } - } - - void Detach(Item& item) { - if (item.prev) { - item.prev->next = item.next; - } - if (item.next) { - item.next->prev = item.prev; - } - if (&item == first_item) { - first_item = item.next; - if (first_item) { - first_item->prev = nullptr; - } - } - if (&item == last_item) { - last_item = item.prev; - if (last_item) { - last_item->next = nullptr; - } - } - } - - std::deque item_pool; - std::deque free_items; - Item* first_item{}; - Item* last_item{}; -}; - -} // namespace Common diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index 0587d7b724..e121361f9b 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -102,7 +102,7 @@ struct ImageBase { VAddr cpu_addr_end = 0; u64 modification_tick = 0; - size_t lru_index = SIZE_MAX; + u64 last_use_tick = 0; std::array mip_level_offsets{}; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 71210ffe6e..0784bf6339 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -196,11 +196,21 @@ void TextureCache

::RunGarbageCollector() { return false; }; + const auto CollectBelow = [this](u64 threshold) { + boost::container::small_vector expired; + for (auto [id, image] : slot_images) { + if (image->last_use_tick < threshold) { + expired.push_back(id); + } + } + return expired; + }; + // Aggressively clear massive sparse textures if (total_used_memory >= expected_memory) { - lru_cache.ForEachItemBelow(frame_tick, [&](ImageId image_id) { + auto candidates = CollectBelow(frame_tick); + for (const auto image_id : candidates) { auto& image = slot_images[image_id]; - // Only target sparse textures that are old enough if (lowmemorydevice && image.info.is_sparse && image.guest_size_bytes >= 256_MiB && @@ -208,19 +218,32 @@ void TextureCache

::RunGarbageCollector() { LOG_DEBUG(HW_GPU, "GC targeting old sparse texture at 0x{:X} ({} MiB, age: {} frames)", image.gpu_addr, image.guest_size_bytes / (1024 * 1024), frame_tick - image.allocation_tick); - return Cleanup(image_id); + if (Cleanup(image_id)) { + break; + } } - return false; - }); + } } Configure(false); - lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, Cleanup); + { + auto expired = CollectBelow(frame_tick - ticks_to_destroy); + for (const auto image_id : expired) { + if (Cleanup(image_id)) { + break; + } + } + } // If pressure is still too high, prune aggressively. if (total_used_memory >= critical_memory) { Configure(true); - lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, Cleanup); + auto expired = CollectBelow(frame_tick - ticks_to_destroy); + for (const auto image_id : expired) { + if (Cleanup(image_id)) { + break; + } + } } } @@ -2028,7 +2051,7 @@ std::pair TextureCache

::PrepareDmaImage(ImageId dst_id, GPUVAddr ba const auto base = image.TryFindBase(base_addr); PrepareImage(dst_id, mark_as_modified, false); const auto& new_image = slot_images[dst_id]; - lru_cache.Touch(new_image.lru_index, frame_tick); + new_image.last_use_tick = frame_tick; return std::make_pair(base->level, base->layer); } @@ -2377,7 +2400,7 @@ void TextureCache

::RegisterImage(ImageId image_id) { tentative_size = TranscodedAstcSize(tentative_size, image.info.format); } total_used_memory += Common::AlignUp(tentative_size, 1024); - image.lru_index = lru_cache.Insert(image_id, frame_tick); + image.last_use_tick = frame_tick; ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { (*channel_state->gpu_page_table)[page].push_back(image_id); @@ -2411,7 +2434,7 @@ void TextureCache

::UnregisterImage(ImageId image_id) { "Trying to unregister an already registered image"); image.flags &= ~ImageFlagBits::Registered; image.flags &= ~ImageFlagBits::BadOverlap; - lru_cache.Free(image.lru_index); + const auto& clear_page_table = [image_id](u64 page, ankerl::unordered_dense::map, Common::IdentityHash>& selected_page_table) { const auto page_it = selected_page_table.find(page); @@ -2738,7 +2761,7 @@ void TextureCache

::PrepareImage(ImageId image_id, bool is_modification, bool if (is_modification) { MarkModification(image); } - lru_cache.Touch(image.lru_index, frame_tick); + image.last_use_tick = frame_tick; } template diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 4b4061f21d..ba2af1bf44 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -22,7 +22,7 @@ #include "common/common_types.h" #include "common/hash.h" #include "common/literals.h" -#include "common/lru_cache.h" + #include #include "common/scratch_buffer.h" #include "common/slot_vector.h" @@ -510,11 +510,7 @@ private: std::deque> async_buffers; std::deque async_buffers_death_ring; - struct LRUItemParams { - using ObjectType = ImageId; - using TickType = u64; - }; - Common::LeastRecentlyUsedCache lru_cache; + #ifdef YUZU_LEGACY static constexpr size_t TICKS_TO_DESTROY = 6; From 0af0893754c5f64fc523ce448492d0bf48ffdc3e Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Thu, 2 Apr 2026 23:50:34 -0400 Subject: [PATCH 06/10] Gido MEOW --- src/video_core/texture_cache/image_base.h | 3 +++ src/video_core/texture_cache/texture_cache.h | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index e121361f9b..14b30d5d5b 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 0784bf6339..b767b6b262 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -2050,7 +2050,7 @@ std::pair TextureCache

::PrepareDmaImage(ImageId dst_id, GPUVAddr ba const auto& image = slot_images[dst_id]; const auto base = image.TryFindBase(base_addr); PrepareImage(dst_id, mark_as_modified, false); - const auto& new_image = slot_images[dst_id]; + auto& new_image = slot_images[dst_id]; new_image.last_use_tick = frame_tick; return std::make_pair(base->level, base->layer); } From 0c55c57ed9e1c0e32f39ba14d4bc8aaf242076af Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Fri, 3 Apr 2026 00:07:04 -0400 Subject: [PATCH 07/10] small fix for the softlock after lru cache removal --- src/video_core/texture_cache/texture_cache.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index b767b6b262..5165f78749 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -199,7 +199,8 @@ void TextureCache

::RunGarbageCollector() { const auto CollectBelow = [this](u64 threshold) { boost::container::small_vector expired; for (auto [id, image] : slot_images) { - if (image->last_use_tick < threshold) { + if (True(image->flags & ImageFlagBits::Registered) && + image->last_use_tick < threshold) { expired.push_back(id); } } @@ -226,7 +227,7 @@ void TextureCache

::RunGarbageCollector() { } Configure(false); - { + if (frame_tick > ticks_to_destroy) { auto expired = CollectBelow(frame_tick - ticks_to_destroy); for (const auto image_id : expired) { if (Cleanup(image_id)) { @@ -238,10 +239,12 @@ void TextureCache

::RunGarbageCollector() { // If pressure is still too high, prune aggressively. if (total_used_memory >= critical_memory) { Configure(true); - auto expired = CollectBelow(frame_tick - ticks_to_destroy); - for (const auto image_id : expired) { - if (Cleanup(image_id)) { - break; + if (frame_tick > ticks_to_destroy) { + auto expired = CollectBelow(frame_tick - ticks_to_destroy); + for (const auto image_id : expired) { + if (Cleanup(image_id)) { + break; + } } } } From c4bdeae58650c93e464b13fb1fcf8a9fccebfad6 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Fri, 3 Apr 2026 12:52:22 -0400 Subject: [PATCH 08/10] [texture_cache] Reduce garbage collection logic by simplifying conditions and thresholds --- src/video_core/texture_cache/texture_cache.h | 91 +++++++++----------- 1 file changed, 39 insertions(+), 52 deletions(-) diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 5165f78749..eae1c11ce7 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -118,17 +118,10 @@ TextureCache

::TextureCache(Runtime& runtime_, Tegra::MaxwellDeviceMemoryManag template void TextureCache

::RunGarbageCollector() { - bool high_priority_mode = false; + bool high_priority_mode = total_used_memory >= expected_memory; bool aggressive_mode = false; - u64 ticks_to_destroy = 0; - size_t num_iterations = 0; - - const auto Configure = [&](bool allow_aggressive) { - high_priority_mode = total_used_memory >= expected_memory; - aggressive_mode = allow_aggressive && total_used_memory >= critical_memory; - ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 50ULL; - num_iterations = aggressive_mode ? 40 : (high_priority_mode ? 20 : 10); - }; + u64 ticks_to_destroy = high_priority_mode ? 25ULL : 50ULL; + size_t num_iterations = high_priority_mode ? 20 : 10; const auto Cleanup = [this, &num_iterations, &high_priority_mode, &aggressive_mode](ImageId image_id) { @@ -145,12 +138,9 @@ void TextureCache

::RunGarbageCollector() { } if (True(image.flags & ImageFlagBits::IsDecoding)) { - // This image is still being decoded, deleting it will invalidate the slot - // used by the async decoder thread. return false; } - // Prioritize large sparse textures for cleanup const bool is_large_sparse = lowmemorydevice && image.info.is_sparse && image.guest_size_bytes >= 256_MiB; @@ -183,7 +173,6 @@ void TextureCache

::RunGarbageCollector() { if (total_used_memory < critical_memory) { if (aggressive_mode) { - // Sink the aggresiveness. num_iterations >>= 2; aggressive_mode = false; return false; @@ -196,55 +185,53 @@ void TextureCache

::RunGarbageCollector() { return false; }; - const auto CollectBelow = [this](u64 threshold) { - boost::container::small_vector expired; - for (auto [id, image] : slot_images) { - if (True(image->flags & ImageFlagBits::Registered) && - image->last_use_tick < threshold) { - expired.push_back(id); - } - } - return expired; - }; + // Single pass: collect all candidates, classified by tier + const u64 normal_threshold = frame_tick > ticks_to_destroy ? frame_tick - ticks_to_destroy : 0; + const u64 aggressive_threshold = frame_tick > 10 ? frame_tick - 10 : 0; + boost::container::small_vector sparse_candidates; + boost::container::small_vector expired; + boost::container::small_vector aggressive_expired; - // Aggressively clear massive sparse textures - if (total_used_memory >= expected_memory) { - auto candidates = CollectBelow(frame_tick); - for (const auto image_id : candidates) { - auto& image = slot_images[image_id]; - if (lowmemorydevice && - image.info.is_sparse && - image.guest_size_bytes >= 256_MiB && - image.allocation_tick < frame_tick - 3) { - LOG_DEBUG(HW_GPU, "GC targeting old sparse texture at 0x{:X} ({} MiB, age: {} frames)", - image.gpu_addr, image.guest_size_bytes / (1024 * 1024), - frame_tick - image.allocation_tick); - if (Cleanup(image_id)) { - break; - } - } + for (auto [id, image] : slot_images) { + if (False(image->flags & ImageFlagBits::Registered)) { + continue; + } + const u64 tick = image->last_use_tick; + if (tick < normal_threshold) { + expired.push_back(id); + } else if (tick < aggressive_threshold) { + aggressive_expired.push_back(id); + } else if (high_priority_mode && tick < frame_tick && + lowmemorydevice && image->info.is_sparse && + image->guest_size_bytes >= 256_MiB) { + sparse_candidates.push_back(id); } } - Configure(false); - if (frame_tick > ticks_to_destroy) { - auto expired = CollectBelow(frame_tick - ticks_to_destroy); - for (const auto image_id : expired) { + // Tier 1: large sparse textures under memory pressure + for (const auto image_id : sparse_candidates) { + auto& image = slot_images[image_id]; + if (image.allocation_tick < frame_tick - 3) { if (Cleanup(image_id)) { break; } } } - // If pressure is still too high, prune aggressively. + // Tier 2: normal expiration + for (const auto image_id : expired) { + if (Cleanup(image_id)) { + break; + } + } + + // Tier 3: if still critical, use aggressive threshold with more iterations if (total_used_memory >= critical_memory) { - Configure(true); - if (frame_tick > ticks_to_destroy) { - auto expired = CollectBelow(frame_tick - ticks_to_destroy); - for (const auto image_id : expired) { - if (Cleanup(image_id)) { - break; - } + aggressive_mode = true; + num_iterations = 40; + for (const auto image_id : aggressive_expired) { + if (Cleanup(image_id)) { + break; } } } From b5151948d2c1a1d12ae6b2ce05b1e8f66b4242ea Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Fri, 3 Apr 2026 23:53:49 -0400 Subject: [PATCH 09/10] [texture_cache] Adjusted GC logic for the iterations with older or obsolete textures --- src/video_core/texture_cache/texture_cache.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index eae1c11ce7..3d5a4a153b 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -6,6 +6,7 @@ #pragma once +#include #include #include #include @@ -128,7 +129,7 @@ void TextureCache

::RunGarbageCollector() { if (num_iterations == 0) { return true; } - --num_iterations; + auto& image = slot_images[image_id]; // Never delete recently allocated sparse textures (within 3 frames) @@ -156,6 +157,8 @@ void TextureCache

::RunGarbageCollector() { return false; } + --num_iterations; + if (must_download && !is_large_sparse) { auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); const auto copies = FixSmallVectorADL(FullDownloadCopies(image.info)); @@ -185,6 +188,12 @@ void TextureCache

::RunGarbageCollector() { return false; }; + const auto SortByAge = [this](auto& vec) { + std::sort(vec.begin(), vec.end(), [this](ImageId a, ImageId b) { + return slot_images[a].last_use_tick < slot_images[b].last_use_tick; + }); + }; + // Single pass: collect all candidates, classified by tier const u64 normal_threshold = frame_tick > ticks_to_destroy ? frame_tick - ticks_to_destroy : 0; const u64 aggressive_threshold = frame_tick > 10 ? frame_tick - 10 : 0; @@ -208,6 +217,9 @@ void TextureCache

::RunGarbageCollector() { } } + SortByAge(expired); + SortByAge(aggressive_expired); + // Tier 1: large sparse textures under memory pressure for (const auto image_id : sparse_candidates) { auto& image = slot_images[image_id]; From 9e078aaffb4d727a735177375bd8bd39091ec69a Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Sat, 4 Apr 2026 18:40:14 -0400 Subject: [PATCH 10/10] fix license headers+ --- src/video_core/dma_pusher.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index d6d44e66b4..1469aef6ef 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project