From ee124f328400ad4f59d19e40d81ca167d8de555b Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Wed, 1 Apr 2026 00:13:41 -0400 Subject: [PATCH 01/12] [maxwell] Refactor execution mask initialization to use fill() instead of reset() --- src/video_core/dma_pusher.cpp | 36 ++++++++++++++- src/video_core/engines/engine_interface.h | 13 +++--- src/video_core/engines/fermi_2d.cpp | 2 +- src/video_core/engines/kepler_compute.cpp | 2 +- src/video_core/engines/kepler_memory.cpp | 2 +- src/video_core/engines/maxwell_3d.cpp | 53 ++++++++++++++++++++--- src/video_core/engines/maxwell_dma.cpp | 2 +- 7 files changed, 94 insertions(+), 16 deletions(-) diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 3844a8e2f9..d6d44e66b4 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -122,7 +122,35 @@ void DmaPusher::ProcessCommands(std::span commands) { dma_state.is_last_call = true; index += max_write; } else if (dma_state.method_count) { - auto const command_header = commands[index]; //can copy + if (!dma_state.non_incrementing && !dma_increment_once && + dma_state.method >= non_puller_methods) { + auto subchannel = subchannels[dma_state.subchannel]; + const u32 available = u32(std::min( + index + dma_state.method_count, commands.size()) - index); + u32 batch = 0; + u32 method = dma_state.method; + while (batch < available) { + const bool needs_exec = + (method < Engines::EngineInterface::EXECUTION_MASK_TABLE_SIZE) + ? subchannel->execution_mask[method] + : subchannel->execution_mask_default; + if (needs_exec) break; + batch++; + method++; + } + if (batch > 0) { + auto& sink = subchannel->method_sink; + sink.reserve(sink.size() + batch); + for (u32 j = 0; j < batch; j++) { + sink.emplace_back(dma_state.method + j, commands[index + j].argument); + } + dma_state.method += batch; + dma_state.method_count -= batch; + index += batch; + continue; + } + } + auto const command_header = commands[index]; dma_state.dma_word_offset = u32(index * sizeof(u32)); dma_state.is_last_call = dma_state.method_count <= 1; CallMethod(command_header.argument); @@ -181,7 +209,11 @@ void DmaPusher::CallMethod(u32 argument) const { }); } else { auto subchannel = subchannels[dma_state.subchannel]; - if (!subchannel->execution_mask[dma_state.method]) { + const bool needs_execution = + (dma_state.method < Engines::EngineInterface::EXECUTION_MASK_TABLE_SIZE) + ? subchannel->execution_mask[dma_state.method] + : subchannel->execution_mask_default; + if (!needs_execution) { subchannel->method_sink.emplace_back(dma_state.method, argument); } else { subchannel->ConsumeSink(); diff --git a/src/video_core/engines/engine_interface.h b/src/video_core/engines/engine_interface.h index bf3bd66aca..292f0a5738 100644 --- a/src/video_core/engines/engine_interface.h +++ b/src/video_core/engines/engine_interface.h @@ -6,9 +6,9 @@ #pragma once -#include -#include -#include +#include + +#include #include "common/common_types.h" @@ -41,8 +41,11 @@ public: ConsumeSinkImpl(); } - std::bitset<(std::numeric_limits::max)()> execution_mask{}; - std::vector> method_sink{}; + static constexpr size_t EXECUTION_MASK_TABLE_SIZE = 0xE00; + + std::array execution_mask{}; + bool execution_mask_default{}; + boost::container::small_vector, 64> method_sink{}; bool current_dirty{}; GPUVAddr current_dma_segment; diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index b442c5cc76..11f60ef32b 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp @@ -26,7 +26,7 @@ Fermi2D::Fermi2D(MemoryManager& memory_manager_) : memory_manager{memory_manager regs.src.depth = 1; regs.dst.depth = 1; - execution_mask.reset(); + execution_mask.fill(0); execution_mask[FERMI2D_REG_INDEX(pixels_from_memory.src_y0) + 1] = true; } diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 7b4efeb1e0..d6ee80f6e2 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -18,7 +18,7 @@ namespace Tegra::Engines { KeplerCompute::KeplerCompute(Core::System& system_, MemoryManager& memory_manager_) : system{system_}, memory_manager{memory_manager_}, upload_state{memory_manager, regs.upload} { - execution_mask.reset(); + execution_mask.fill(0); execution_mask[KEPLER_COMPUTE_REG_INDEX(exec_upload)] = true; execution_mask[KEPLER_COMPUTE_REG_INDEX(data_upload)] = true; execution_mask[KEPLER_COMPUTE_REG_INDEX(launch)] = true; diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index 5d4c4720d3..013a644c1b 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp @@ -22,7 +22,7 @@ KeplerMemory::~KeplerMemory() = default; void KeplerMemory::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) { upload_state.BindRasterizer(rasterizer_); - execution_mask.reset(); + execution_mask.fill(0); execution_mask[KEPLERMEMORY_REG_INDEX(exec)] = true; execution_mask[KEPLERMEMORY_REG_INDEX(data)] = true; } diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 6d9ebd6296..88869917fd 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -4,8 +4,14 @@ // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include #include #include + +#if defined(_MSC_VER) && !defined(__clang__) +#include +#endif + #include "common/assert.h" #include "common/bit_util.h" #include "common/scope_exit.h" @@ -22,6 +28,16 @@ namespace Tegra::Engines { +namespace { +inline void PrefetchLine(const void* addr) { +#if defined(_MSC_VER) && !defined(__clang__) + _mm_prefetch(static_cast(addr), _MM_HINT_T0); +#else + __builtin_prefetch(addr, 0, 1); +#endif +} +} // namespace + /// First register id that is actually a Macro call. constexpr u32 MacroRegistersStart = 0xE00; @@ -37,9 +53,10 @@ Maxwell3D::Maxwell3D(Core::System& system_, MemoryManager& memory_manager_) { dirty.flags.flip(); InitializeRegisterDefaults(); - execution_mask.reset(); - for (size_t i = 0; i < execution_mask.size(); i++) + execution_mask.fill(0); + for (size_t i = 0; i < EXECUTION_MASK_TABLE_SIZE; i++) execution_mask[i] = IsMethodExecutable(u32(i)); + execution_mask_default = true; } Maxwell3D::~Maxwell3D() = default; @@ -298,18 +315,44 @@ u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) { } void Maxwell3D::ConsumeSinkImpl() { + std::stable_sort(method_sink.begin(), method_sink.end(), + [](const auto& a, const auto& b) { return a.first < b.first; }); + + const auto sink_size = method_sink.size(); const auto control = shadow_state.shadow_ram_control; if (control == Regs::ShadowRamControl::Track || control == Regs::ShadowRamControl::TrackWithFilter) { - for (auto [method, value] : method_sink) { + for (size_t i = 0; i < sink_size; ++i) { + const auto [method, value] = method_sink[i]; + if (i + 1 < sink_size) { + const u32 next = method_sink[i + 1].first; + PrefetchLine(®s.reg_array[next]); + PrefetchLine(&shadow_state.reg_array[next]); + PrefetchLine(&dirty.tables[0][next]); + } shadow_state.reg_array[method] = value; ProcessDirtyRegisters(method, value); } } else if (control == Regs::ShadowRamControl::Replay) { - for (auto [method, value] : method_sink) + for (size_t i = 0; i < sink_size; ++i) { + const auto [method, value] = method_sink[i]; + if (i + 1 < sink_size) { + const u32 next = method_sink[i + 1].first; + PrefetchLine(®s.reg_array[next]); + PrefetchLine(&shadow_state.reg_array[next]); + PrefetchLine(&dirty.tables[0][next]); + } ProcessDirtyRegisters(method, shadow_state.reg_array[method]); + } } else { - for (auto [method, value] : method_sink) + for (size_t i = 0; i < sink_size; ++i) { + const auto [method, value] = method_sink[i]; + if (i + 1 < sink_size) { + const u32 next = method_sink[i + 1].first; + PrefetchLine(®s.reg_array[next]); + PrefetchLine(&dirty.tables[0][next]); + } ProcessDirtyRegisters(method, value); + } } method_sink.clear(); } diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 089d118a09..c99039cda7 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -23,7 +23,7 @@ using namespace Texture; MaxwellDMA::MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_) : system{system_}, memory_manager{memory_manager_} { - execution_mask.reset(); + execution_mask.fill(0); execution_mask[offsetof(Regs, launch_dma) / sizeof(u32)] = true; } From 6fa854001db45c6a1c046ab61bd1afb346f25c0d Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Wed, 1 Apr 2026 01:21:55 -0400 Subject: [PATCH 02/12] [maxwell] Removed prefetching for ProcessCommands --- src/video_core/engines/maxwell_3d.cpp | 31 --------------------------- 1 file changed, 31 deletions(-) diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 88869917fd..431da33559 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -8,10 +8,6 @@ #include #include -#if defined(_MSC_VER) && !defined(__clang__) -#include -#endif - #include "common/assert.h" #include "common/bit_util.h" #include "common/scope_exit.h" @@ -28,16 +24,6 @@ namespace Tegra::Engines { -namespace { -inline void PrefetchLine(const void* addr) { -#if defined(_MSC_VER) && !defined(__clang__) - _mm_prefetch(static_cast(addr), _MM_HINT_T0); -#else - __builtin_prefetch(addr, 0, 1); -#endif -} -} // namespace - /// First register id that is actually a Macro call. constexpr u32 MacroRegistersStart = 0xE00; @@ -323,34 +309,17 @@ void Maxwell3D::ConsumeSinkImpl() { if (control == Regs::ShadowRamControl::Track || control == Regs::ShadowRamControl::TrackWithFilter) { for (size_t i = 0; i < sink_size; ++i) { const auto [method, value] = method_sink[i]; - if (i + 1 < sink_size) { - const u32 next = method_sink[i + 1].first; - PrefetchLine(®s.reg_array[next]); - PrefetchLine(&shadow_state.reg_array[next]); - PrefetchLine(&dirty.tables[0][next]); - } shadow_state.reg_array[method] = value; ProcessDirtyRegisters(method, value); } } else if (control == Regs::ShadowRamControl::Replay) { for (size_t i = 0; i < sink_size; ++i) { const auto [method, value] = method_sink[i]; - if (i + 1 < sink_size) { - const u32 next = method_sink[i + 1].first; - PrefetchLine(®s.reg_array[next]); - PrefetchLine(&shadow_state.reg_array[next]); - PrefetchLine(&dirty.tables[0][next]); - } ProcessDirtyRegisters(method, shadow_state.reg_array[method]); } } else { for (size_t i = 0; i < sink_size; ++i) { const auto [method, value] = method_sink[i]; - if (i + 1 < sink_size) { - const u32 next = method_sink[i + 1].first; - PrefetchLine(®s.reg_array[next]); - PrefetchLine(&dirty.tables[0][next]); - } ProcessDirtyRegisters(method, value); } } From fb53c236b259921462266436f3a7362b84753e54 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Wed, 1 Apr 2026 02:10:00 -0400 Subject: [PATCH 03/12] [buffer_cache] Removal of LRU inside buffer cache and replaced with tick operations inside frames. --- src/video_core/buffer_cache/buffer_base.h | 10 +++++----- src/video_core/buffer_cache/buffer_cache.h | 20 +++++++++++-------- .../buffer_cache/buffer_cache_base.h | 6 ------ 3 files changed, 17 insertions(+), 19 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h index bec2dac246..f080f4503c 100644 --- a/src/video_core/buffer_cache/buffer_base.h +++ b/src/video_core/buffer_cache/buffer_base.h @@ -109,12 +109,12 @@ public: return static_cast(other_cpu_addr - cpu_addr); } - size_t getLRUID() const noexcept { - return lru_id; + u64 GetFrameTick() const noexcept { + return frame_tick; } - void setLRUID(size_t lru_id_) { - lru_id = lru_id_; + void SetFrameTick(u64 tick) noexcept { + frame_tick = tick; } size_t SizeBytes() const { @@ -125,7 +125,7 @@ private: VAddr cpu_addr = 0; BufferFlagBits flags{}; int stream_score = 0; - size_t lru_id = SIZE_MAX; + u64 frame_tick = 0; size_t size_bytes = 0; }; diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 014b4a318e..672ecb80eb 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -58,17 +58,22 @@ void BufferCache

::RunGarbageCollector() { const bool aggressive_gc = total_used_memory >= critical_memory; const u64 ticks_to_destroy = aggressive_gc ? 60 : 120; int num_iterations = aggressive_gc ? 64 : 32; - const auto clean_up = [this, &num_iterations](BufferId buffer_id) { + const u64 threshold = frame_tick - ticks_to_destroy; + boost::container::small_vector expired; + for (auto [id, buffer] : slot_buffers) { + if (buffer.GetFrameTick() < threshold) { + expired.push_back(id); + } + } + for (const auto buffer_id : expired) { if (num_iterations == 0) { - return true; + break; } --num_iterations; auto& buffer = slot_buffers[buffer_id]; DownloadBufferMemory(buffer); DeleteBuffer(buffer_id); - return false; - }; - lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up); + } } template @@ -1595,10 +1600,9 @@ void BufferCache

::ChangeRegister(BufferId buffer_id) { const auto size = buffer.SizeBytes(); if (insert) { total_used_memory += Common::AlignUp(size, 1024); - buffer.setLRUID(lru_cache.Insert(buffer_id, frame_tick)); + buffer.SetFrameTick(frame_tick); } else { total_used_memory -= Common::AlignUp(size, 1024); - lru_cache.Free(buffer.getLRUID()); } const DAddr device_addr_begin = buffer.CpuAddr(); const DAddr device_addr_end = device_addr_begin + size; @@ -1616,7 +1620,7 @@ void BufferCache

::ChangeRegister(BufferId buffer_id) { template void BufferCache

::TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept { if (buffer_id != NULL_BUFFER_ID) { - lru_cache.Touch(buffer.getLRUID(), frame_tick); + buffer.SetFrameTick(frame_tick); } } diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index 08524bd854..b17fed1b6d 100644 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h @@ -23,7 +23,6 @@ #include "common/common_types.h" #include "common/div_ceil.h" #include "common/literals.h" -#include "common/lru_cache.h" #include "common/range_sets.h" #include "common/scope_exit.h" #include "common/settings.h" @@ -506,11 +505,6 @@ private: size_t immediate_buffer_capacity = 0; Common::ScratchBuffer immediate_buffer_alloc; - struct LRUItemParams { - using ObjectType = BufferId; - using TickType = u64; - }; - Common::LeastRecentlyUsedCache lru_cache; u64 frame_tick = 0; u64 total_used_memory = 0; u64 minimum_memory = 0; From 1d844296f400843879016b31c7d90f7968b8f2e7 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Wed, 1 Apr 2026 02:18:59 -0400 Subject: [PATCH 04/12] I got meowed by Gidoly --- src/video_core/buffer_cache/buffer_cache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 672ecb80eb..422e542421 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -61,7 +61,7 @@ void BufferCache

::RunGarbageCollector() { const u64 threshold = frame_tick - ticks_to_destroy; boost::container::small_vector expired; for (auto [id, buffer] : slot_buffers) { - if (buffer.GetFrameTick() < threshold) { + if (buffer->GetFrameTick() < threshold) { expired.push_back(id); } } From df113ea18bef3ccef9a08be67130d7d628bafbdd Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Thu, 2 Apr 2026 23:24:41 -0400 Subject: [PATCH 05/12] [texture_cache] Replace LRU index with frame tick in ImageBase + update garbage collection logic --- src/common/lru_cache.h | 139 ------------------ src/video_core/texture_cache/image_base.h | 2 +- src/video_core/texture_cache/texture_cache.h | 45 ++++-- .../texture_cache/texture_cache_base.h | 8 +- 4 files changed, 37 insertions(+), 157 deletions(-) delete mode 100644 src/common/lru_cache.h diff --git a/src/common/lru_cache.h b/src/common/lru_cache.h deleted file mode 100644 index 36cea5d27e..0000000000 --- a/src/common/lru_cache.h +++ /dev/null @@ -1,139 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include -#include -#include - -#include "common/common_types.h" - -namespace Common { - -template -class LeastRecentlyUsedCache { - using ObjectType = typename Traits::ObjectType; - using TickType = typename Traits::TickType; - - struct Item { - ObjectType obj; - TickType tick; - Item* next{}; - Item* prev{}; - }; - -public: - LeastRecentlyUsedCache() : first_item{}, last_item{} {} - ~LeastRecentlyUsedCache() = default; - - size_t Insert(ObjectType obj, TickType tick) { - const auto new_id = Build(); - auto& item = item_pool[new_id]; - item.obj = obj; - item.tick = tick; - Attach(item); - return new_id; - } - - void Touch(size_t id, TickType tick) { - auto& item = item_pool[id]; - if (item.tick >= tick) { - return; - } - item.tick = tick; - if (&item == last_item) { - return; - } - Detach(item); - Attach(item); - } - - void Free(size_t id) { - auto& item = item_pool[id]; - Detach(item); - item.prev = nullptr; - item.next = nullptr; - free_items.push_back(id); - } - - template - void ForEachItemBelow(TickType tick, Func&& func) { - static constexpr bool RETURNS_BOOL = - std::is_same_v, bool>; - Item* iterator = first_item; - while (iterator) { - if (static_cast(tick) - static_cast(iterator->tick) < 0) { - return; - } - Item* next = iterator->next; - if constexpr (RETURNS_BOOL) { - if (func(iterator->obj)) { - return; - } - } else { - func(iterator->obj); - } - iterator = next; - } - } - -private: - size_t Build() { - if (free_items.empty()) { - const size_t item_id = item_pool.size(); - auto& item = item_pool.emplace_back(); - item.next = nullptr; - item.prev = nullptr; - return item_id; - } - const size_t item_id = free_items.front(); - free_items.pop_front(); - auto& item = item_pool[item_id]; - item.next = nullptr; - item.prev = nullptr; - return item_id; - } - - void Attach(Item& item) { - if (!first_item) { - first_item = &item; - } - if (!last_item) { - last_item = &item; - } else { - item.prev = last_item; - last_item->next = &item; - item.next = nullptr; - last_item = &item; - } - } - - void Detach(Item& item) { - if (item.prev) { - item.prev->next = item.next; - } - if (item.next) { - item.next->prev = item.prev; - } - if (&item == first_item) { - first_item = item.next; - if (first_item) { - first_item->prev = nullptr; - } - } - if (&item == last_item) { - last_item = item.prev; - if (last_item) { - last_item->next = nullptr; - } - } - } - - std::deque item_pool; - std::deque free_items; - Item* first_item{}; - Item* last_item{}; -}; - -} // namespace Common diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index 0587d7b724..e121361f9b 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -102,7 +102,7 @@ struct ImageBase { VAddr cpu_addr_end = 0; u64 modification_tick = 0; - size_t lru_index = SIZE_MAX; + u64 last_use_tick = 0; std::array mip_level_offsets{}; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index efae825885..46f02b7f90 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -196,11 +196,21 @@ void TextureCache

::RunGarbageCollector() { return false; }; + const auto CollectBelow = [this](u64 threshold) { + boost::container::small_vector expired; + for (auto [id, image] : slot_images) { + if (image->last_use_tick < threshold) { + expired.push_back(id); + } + } + return expired; + }; + // Aggressively clear massive sparse textures if (total_used_memory >= expected_memory) { - lru_cache.ForEachItemBelow(frame_tick, [&](ImageId image_id) { + auto candidates = CollectBelow(frame_tick); + for (const auto image_id : candidates) { auto& image = slot_images[image_id]; - // Only target sparse textures that are old enough if (lowmemorydevice && image.info.is_sparse && image.guest_size_bytes >= 256_MiB && @@ -208,19 +218,32 @@ void TextureCache

::RunGarbageCollector() { LOG_DEBUG(HW_GPU, "GC targeting old sparse texture at 0x{:X} ({} MiB, age: {} frames)", image.gpu_addr, image.guest_size_bytes / (1024 * 1024), frame_tick - image.allocation_tick); - return Cleanup(image_id); + if (Cleanup(image_id)) { + break; + } } - return false; - }); + } } Configure(false); - lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, Cleanup); + { + auto expired = CollectBelow(frame_tick - ticks_to_destroy); + for (const auto image_id : expired) { + if (Cleanup(image_id)) { + break; + } + } + } // If pressure is still too high, prune aggressively. if (total_used_memory >= critical_memory) { Configure(true); - lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, Cleanup); + auto expired = CollectBelow(frame_tick - ticks_to_destroy); + for (const auto image_id : expired) { + if (Cleanup(image_id)) { + break; + } + } } } @@ -2028,7 +2051,7 @@ std::pair TextureCache

::PrepareDmaImage(ImageId dst_id, GPUVAddr ba const auto base = image.TryFindBase(base_addr); PrepareImage(dst_id, mark_as_modified, false); const auto& new_image = slot_images[dst_id]; - lru_cache.Touch(new_image.lru_index, frame_tick); + new_image.last_use_tick = frame_tick; return std::make_pair(base->level, base->layer); } @@ -2377,7 +2400,7 @@ void TextureCache

::RegisterImage(ImageId image_id) { tentative_size = TranscodedAstcSize(tentative_size, image.info.format); } total_used_memory += Common::AlignUp(tentative_size, 1024); - image.lru_index = lru_cache.Insert(image_id, frame_tick); + image.last_use_tick = frame_tick; ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { (*channel_state->gpu_page_table)[page].push_back(image_id); @@ -2411,7 +2434,7 @@ void TextureCache

::UnregisterImage(ImageId image_id) { "Trying to unregister an already registered image"); image.flags &= ~ImageFlagBits::Registered; image.flags &= ~ImageFlagBits::BadOverlap; - lru_cache.Free(image.lru_index); + const auto& clear_page_table = [image_id](u64 page, ankerl::unordered_dense::map, Common::IdentityHash>& selected_page_table) { const auto page_it = selected_page_table.find(page); @@ -2740,7 +2763,7 @@ void TextureCache

::PrepareImage(ImageId image_id, bool is_modification, bool if (is_modification) { MarkModification(image); } - lru_cache.Touch(image.lru_index, frame_tick); + image.last_use_tick = frame_tick; } template diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 4b4061f21d..ba2af1bf44 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -22,7 +22,7 @@ #include "common/common_types.h" #include "common/hash.h" #include "common/literals.h" -#include "common/lru_cache.h" + #include #include "common/scratch_buffer.h" #include "common/slot_vector.h" @@ -510,11 +510,7 @@ private: std::deque> async_buffers; std::deque async_buffers_death_ring; - struct LRUItemParams { - using ObjectType = ImageId; - using TickType = u64; - }; - Common::LeastRecentlyUsedCache lru_cache; + #ifdef YUZU_LEGACY static constexpr size_t TICKS_TO_DESTROY = 6; From cc553379d8438e15ea5cc4caa7ed96f62a18928f Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Thu, 2 Apr 2026 23:50:34 -0400 Subject: [PATCH 06/12] Gido MEOW --- src/video_core/texture_cache/image_base.h | 3 +++ src/video_core/texture_cache/texture_cache.h | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index e121361f9b..14b30d5d5b 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 46f02b7f90..5c0c58b9d3 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -2050,7 +2050,7 @@ std::pair TextureCache

::PrepareDmaImage(ImageId dst_id, GPUVAddr ba const auto& image = slot_images[dst_id]; const auto base = image.TryFindBase(base_addr); PrepareImage(dst_id, mark_as_modified, false); - const auto& new_image = slot_images[dst_id]; + auto& new_image = slot_images[dst_id]; new_image.last_use_tick = frame_tick; return std::make_pair(base->level, base->layer); } From 18ad42f9963ed2a5ce0b8ca2aed29db3e01d3e1d Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Fri, 3 Apr 2026 00:07:04 -0400 Subject: [PATCH 07/12] small fix for the softlock after lru cache removal --- src/video_core/texture_cache/texture_cache.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 5c0c58b9d3..72beb843a6 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -199,7 +199,8 @@ void TextureCache

::RunGarbageCollector() { const auto CollectBelow = [this](u64 threshold) { boost::container::small_vector expired; for (auto [id, image] : slot_images) { - if (image->last_use_tick < threshold) { + if (True(image->flags & ImageFlagBits::Registered) && + image->last_use_tick < threshold) { expired.push_back(id); } } @@ -226,7 +227,7 @@ void TextureCache

::RunGarbageCollector() { } Configure(false); - { + if (frame_tick > ticks_to_destroy) { auto expired = CollectBelow(frame_tick - ticks_to_destroy); for (const auto image_id : expired) { if (Cleanup(image_id)) { @@ -238,10 +239,12 @@ void TextureCache

::RunGarbageCollector() { // If pressure is still too high, prune aggressively. if (total_used_memory >= critical_memory) { Configure(true); - auto expired = CollectBelow(frame_tick - ticks_to_destroy); - for (const auto image_id : expired) { - if (Cleanup(image_id)) { - break; + if (frame_tick > ticks_to_destroy) { + auto expired = CollectBelow(frame_tick - ticks_to_destroy); + for (const auto image_id : expired) { + if (Cleanup(image_id)) { + break; + } } } } From e343ee952479b711abd00b463ca983865477151b Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Fri, 3 Apr 2026 12:52:22 -0400 Subject: [PATCH 08/12] [texture_cache] Reduce garbage collection logic by simplifying conditions and thresholds --- src/video_core/texture_cache/texture_cache.h | 91 +++++++++----------- 1 file changed, 39 insertions(+), 52 deletions(-) diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 72beb843a6..e69a085ad4 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -118,17 +118,10 @@ TextureCache

::TextureCache(Runtime& runtime_, Tegra::MaxwellDeviceMemoryManag template void TextureCache

::RunGarbageCollector() { - bool high_priority_mode = false; + bool high_priority_mode = total_used_memory >= expected_memory; bool aggressive_mode = false; - u64 ticks_to_destroy = 0; - size_t num_iterations = 0; - - const auto Configure = [&](bool allow_aggressive) { - high_priority_mode = total_used_memory >= expected_memory; - aggressive_mode = allow_aggressive && total_used_memory >= critical_memory; - ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 50ULL; - num_iterations = aggressive_mode ? 40 : (high_priority_mode ? 20 : 10); - }; + u64 ticks_to_destroy = high_priority_mode ? 25ULL : 50ULL; + size_t num_iterations = high_priority_mode ? 20 : 10; const auto Cleanup = [this, &num_iterations, &high_priority_mode, &aggressive_mode](ImageId image_id) { @@ -145,12 +138,9 @@ void TextureCache

::RunGarbageCollector() { } if (True(image.flags & ImageFlagBits::IsDecoding)) { - // This image is still being decoded, deleting it will invalidate the slot - // used by the async decoder thread. return false; } - // Prioritize large sparse textures for cleanup const bool is_large_sparse = lowmemorydevice && image.info.is_sparse && image.guest_size_bytes >= 256_MiB; @@ -183,7 +173,6 @@ void TextureCache

::RunGarbageCollector() { if (total_used_memory < critical_memory) { if (aggressive_mode) { - // Sink the aggresiveness. num_iterations >>= 2; aggressive_mode = false; return false; @@ -196,55 +185,53 @@ void TextureCache

::RunGarbageCollector() { return false; }; - const auto CollectBelow = [this](u64 threshold) { - boost::container::small_vector expired; - for (auto [id, image] : slot_images) { - if (True(image->flags & ImageFlagBits::Registered) && - image->last_use_tick < threshold) { - expired.push_back(id); - } - } - return expired; - }; + // Single pass: collect all candidates, classified by tier + const u64 normal_threshold = frame_tick > ticks_to_destroy ? frame_tick - ticks_to_destroy : 0; + const u64 aggressive_threshold = frame_tick > 10 ? frame_tick - 10 : 0; + boost::container::small_vector sparse_candidates; + boost::container::small_vector expired; + boost::container::small_vector aggressive_expired; - // Aggressively clear massive sparse textures - if (total_used_memory >= expected_memory) { - auto candidates = CollectBelow(frame_tick); - for (const auto image_id : candidates) { - auto& image = slot_images[image_id]; - if (lowmemorydevice && - image.info.is_sparse && - image.guest_size_bytes >= 256_MiB && - image.allocation_tick < frame_tick - 3) { - LOG_DEBUG(HW_GPU, "GC targeting old sparse texture at 0x{:X} ({} MiB, age: {} frames)", - image.gpu_addr, image.guest_size_bytes / (1024 * 1024), - frame_tick - image.allocation_tick); - if (Cleanup(image_id)) { - break; - } - } + for (auto [id, image] : slot_images) { + if (False(image->flags & ImageFlagBits::Registered)) { + continue; + } + const u64 tick = image->last_use_tick; + if (tick < normal_threshold) { + expired.push_back(id); + } else if (tick < aggressive_threshold) { + aggressive_expired.push_back(id); + } else if (high_priority_mode && tick < frame_tick && + lowmemorydevice && image->info.is_sparse && + image->guest_size_bytes >= 256_MiB) { + sparse_candidates.push_back(id); } } - Configure(false); - if (frame_tick > ticks_to_destroy) { - auto expired = CollectBelow(frame_tick - ticks_to_destroy); - for (const auto image_id : expired) { + // Tier 1: large sparse textures under memory pressure + for (const auto image_id : sparse_candidates) { + auto& image = slot_images[image_id]; + if (image.allocation_tick < frame_tick - 3) { if (Cleanup(image_id)) { break; } } } - // If pressure is still too high, prune aggressively. + // Tier 2: normal expiration + for (const auto image_id : expired) { + if (Cleanup(image_id)) { + break; + } + } + + // Tier 3: if still critical, use aggressive threshold with more iterations if (total_used_memory >= critical_memory) { - Configure(true); - if (frame_tick > ticks_to_destroy) { - auto expired = CollectBelow(frame_tick - ticks_to_destroy); - for (const auto image_id : expired) { - if (Cleanup(image_id)) { - break; - } + aggressive_mode = true; + num_iterations = 40; + for (const auto image_id : aggressive_expired) { + if (Cleanup(image_id)) { + break; } } } From d646a57f7545428c7b215e79365ed546f6c80412 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Fri, 3 Apr 2026 23:53:49 -0400 Subject: [PATCH 09/12] [texture_cache] Adjusted GC logic for the iterations with older or obsolete textures --- src/video_core/texture_cache/texture_cache.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index e69a085ad4..fe5f37548d 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -6,6 +6,7 @@ #pragma once +#include #include #include #include @@ -128,7 +129,7 @@ void TextureCache

::RunGarbageCollector() { if (num_iterations == 0) { return true; } - --num_iterations; + auto& image = slot_images[image_id]; // Never delete recently allocated sparse textures (within 3 frames) @@ -156,6 +157,8 @@ void TextureCache

::RunGarbageCollector() { return false; } + --num_iterations; + if (must_download && !is_large_sparse) { auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); const auto copies = FixSmallVectorADL(FullDownloadCopies(image.info)); @@ -185,6 +188,12 @@ void TextureCache

::RunGarbageCollector() { return false; }; + const auto SortByAge = [this](auto& vec) { + std::sort(vec.begin(), vec.end(), [this](ImageId a, ImageId b) { + return slot_images[a].last_use_tick < slot_images[b].last_use_tick; + }); + }; + // Single pass: collect all candidates, classified by tier const u64 normal_threshold = frame_tick > ticks_to_destroy ? frame_tick - ticks_to_destroy : 0; const u64 aggressive_threshold = frame_tick > 10 ? frame_tick - 10 : 0; @@ -208,6 +217,9 @@ void TextureCache

::RunGarbageCollector() { } } + SortByAge(expired); + SortByAge(aggressive_expired); + // Tier 1: large sparse textures under memory pressure for (const auto image_id : sparse_candidates) { auto& image = slot_images[image_id]; From 4c0916915f26f41b3b0461ea0b7779ba9e39dbd4 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Sat, 4 Apr 2026 18:40:14 -0400 Subject: [PATCH 10/12] fix license headers+ --- src/video_core/dma_pusher.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index d6d44e66b4..1469aef6ef 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project From 2ec925d67067a20221ecb047ffc969705d9ebcc1 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Wed, 8 Apr 2026 03:36:02 -0400 Subject: [PATCH 11/12] [texture_cache] Removal of lowmemorydevice dead code --- src/video_core/texture_cache/texture_cache.h | 89 ++----------------- .../texture_cache/texture_cache_base.h | 1 - 2 files changed, 5 insertions(+), 85 deletions(-) diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index fe5f37548d..7a8f78cde1 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -71,14 +71,10 @@ TextureCache

::TextureCache(Runtime& runtime_, Tegra::MaxwellDeviceMemoryManag (std::max)((std::min)(device_local_memory - min_vacancy_critical, min_spacing_critical), DEFAULT_CRITICAL_MEMORY)); minimum_memory = static_cast((device_local_memory - mem_threshold) / 2); - - lowmemorydevice = false; } else { expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; minimum_memory = 0; - - lowmemorydevice = true; } const bool gpu_unswizzle_enabled = Settings::values.gpu_unswizzle_enabled.GetValue(); @@ -142,24 +138,19 @@ void TextureCache

::RunGarbageCollector() { return false; } - const bool is_large_sparse = lowmemorydevice && - image.info.is_sparse && - image.guest_size_bytes >= 256_MiB; - - if (!aggressive_mode && !is_large_sparse && - True(image.flags & ImageFlagBits::CostlyLoad)) { + if (!aggressive_mode && True(image.flags & ImageFlagBits::CostlyLoad)) { return false; } const bool must_download = image.IsSafeDownload() && False(image.flags & ImageFlagBits::BadOverlap); - if (!high_priority_mode && !is_large_sparse && must_download) { + if (!high_priority_mode && must_download) { return false; } --num_iterations; - if (must_download && !is_large_sparse) { + if (must_download) { auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); const auto copies = FixSmallVectorADL(FullDownloadCopies(image.info)); image.DownloadMemory(map, copies); @@ -197,7 +188,6 @@ void TextureCache

::RunGarbageCollector() { // Single pass: collect all candidates, classified by tier const u64 normal_threshold = frame_tick > ticks_to_destroy ? frame_tick - ticks_to_destroy : 0; const u64 aggressive_threshold = frame_tick > 10 ? frame_tick - 10 : 0; - boost::container::small_vector sparse_candidates; boost::container::small_vector expired; boost::container::small_vector aggressive_expired; @@ -210,34 +200,20 @@ void TextureCache

::RunGarbageCollector() { expired.push_back(id); } else if (tick < aggressive_threshold) { aggressive_expired.push_back(id); - } else if (high_priority_mode && tick < frame_tick && - lowmemorydevice && image->info.is_sparse && - image->guest_size_bytes >= 256_MiB) { - sparse_candidates.push_back(id); } } SortByAge(expired); SortByAge(aggressive_expired); - // Tier 1: large sparse textures under memory pressure - for (const auto image_id : sparse_candidates) { - auto& image = slot_images[image_id]; - if (image.allocation_tick < frame_tick - 3) { - if (Cleanup(image_id)) { - break; - } - } - } - - // Tier 2: normal expiration + // Tier 1: normal expiration for (const auto image_id : expired) { if (Cleanup(image_id)) { break; } } - // Tier 3: if still critical, use aggressive threshold with more iterations + // Tier 2: if still critical, use aggressive threshold with more iterations if (total_used_memory >= critical_memory) { aggressive_mode = true; num_iterations = 40; @@ -1221,9 +1197,6 @@ void TextureCache

::RefreshContents(Image& image, ImageId image_id) { } image.flags &= ~ImageFlagBits::CpuModified; - if( lowmemorydevice && image.info.format == PixelFormat::BC1_RGBA_UNORM && MapSizeBytes(image) >= 256_MiB ) { - return; - } TrackImage(image, image_id); @@ -1645,38 +1618,6 @@ ImageId TextureCache

::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, } ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); - // For large sparse textures, aggressively clean up old allocations at same address - if (lowmemorydevice && info.is_sparse && CalculateGuestSizeInBytes(info) >= 256_MiB) { - const auto alloc_it = image_allocs_table.find(gpu_addr); - if (alloc_it != image_allocs_table.end()) { - const ImageAllocId alloc_id = alloc_it->second; - auto& alloc_images = slot_image_allocs[alloc_id].images; - - // Collect old images at this address that were created more than 2 frames ago - boost::container::small_vector to_delete; - for (ImageId old_image_id : alloc_images) { - Image& old_image = slot_images[old_image_id]; - if (old_image.info.is_sparse && - old_image.gpu_addr == gpu_addr && - old_image.allocation_tick < frame_tick - 2) { // Try not to delete fresh textures - to_delete.push_back(old_image_id); - } - } - - // Delete old images immediately - for (ImageId old_id : to_delete) { - Image& old_image = slot_images[old_id]; - LOG_DEBUG(HW_GPU, "Immediately deleting old sparse texture at 0x{:X} ({} MiB)", - gpu_addr, old_image.guest_size_bytes / (1024 * 1024)); - if (True(old_image.flags & ImageFlagBits::Tracked)) { - UntrackImage(old_image, old_id); - } - UnregisterImage(old_id); - DeleteImage(old_id, true); - } - } - } - const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr); const Image& image = slot_images[image_id]; // Using "image.gpu_addr" instead of "gpu_addr" is important because it might be different @@ -1693,26 +1634,6 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DA ImageInfo new_info = info; const size_t size_bytes = CalculateGuestSizeInBytes(new_info); - // Proactive cleanup for large sparse texture allocations - if (lowmemorydevice && new_info.is_sparse && size_bytes >= 256_MiB) { - const u64 estimated_alloc_size = size_bytes; - - if (total_used_memory + estimated_alloc_size >= critical_memory) { - LOG_DEBUG(HW_GPU, "Large sparse texture allocation ({} MiB) - running aggressive GC. " - "Current memory: {} MiB, Critical: {} MiB", - size_bytes / (1024 * 1024), - total_used_memory / (1024 * 1024), - critical_memory / (1024 * 1024)); - RunGarbageCollector(); - - // If still over threshold after GC, try one more aggressive pass - if (total_used_memory + estimated_alloc_size >= critical_memory) { - LOG_DEBUG(HW_GPU, "Still critically low on memory, running second GC pass"); - RunGarbageCollector(); - } - } - } - const bool broken_views = runtime.HasBrokenTextureViewFormats(); const bool native_bgr = runtime.HasNativeBgr(); join_overlap_ids.clear(); diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index ba2af1bf44..0cdeb9fdc5 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -478,7 +478,6 @@ private: u64 minimum_memory; u64 expected_memory; u64 critical_memory; - bool lowmemorydevice = false; size_t gpu_unswizzle_maxsize = 0; size_t swizzle_chunk_size = 0; u32 swizzle_slices_per_batch = 0; From 684397110468b1fee49abb2b8efd752cfdabc316 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Wed, 8 Apr 2026 04:21:52 -0400 Subject: [PATCH 12/12] Small experiment --- src/video_core/texture_cache/texture_cache.h | 58 ++++++++++++++++++- .../texture_cache/texture_cache_base.h | 6 +- 2 files changed, 60 insertions(+), 4 deletions(-) diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 7a8f78cde1..b80af2b480 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -113,6 +113,54 @@ TextureCache

::TextureCache(Runtime& runtime_, Tegra::MaxwellDeviceMemoryManag } } +template +void TextureCache

::RunAllocationGarbageCollector(size_t requested_bytes) { + if (requested_bytes == 0) { + return; + } + + if (allocation_gc_frame != frame_tick) { + allocation_gc_frame = frame_tick; + allocation_gc_passes = 0; + } + if (allocation_gc_passes >= MAX_ALLOCATION_GC_PASSES_PER_FRAME) { + return; + } + + if (runtime.CanReportMemoryUsage()) { + total_used_memory = runtime.GetDeviceMemoryUsage(); + } + + const u64 request = static_cast(requested_bytes); + const u64 max_u64 = (std::numeric_limits::max)(); + const u64 projected_usage = request > (max_u64 - total_used_memory) + ? max_u64 + : total_used_memory + request; + if (projected_usage < expected_memory) { + return; + } + + RunGarbageCollector(); + ++allocation_gc_passes; + + if (runtime.CanReportMemoryUsage()) { + total_used_memory = runtime.GetDeviceMemoryUsage(); + } + + const u64 projected_after_gc = request > (max_u64 - total_used_memory) + ? max_u64 + : total_used_memory + request; + if (projected_after_gc >= critical_memory && + allocation_gc_passes < MAX_ALLOCATION_GC_PASSES_PER_FRAME) { + RunGarbageCollector(); + ++allocation_gc_passes; + + if (runtime.CanReportMemoryUsage()) { + total_used_memory = runtime.GetDeviceMemoryUsage(); + } + } +} + template void TextureCache

::RunGarbageCollector() { bool high_priority_mode = total_used_memory >= expected_memory; @@ -1606,18 +1654,20 @@ bool TextureCache

::ScaleDown(Image& image) { template ImageId TextureCache

::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, RelaxedOptions options) { + const size_t requested_size = CalculateGuestSizeInBytes(info); std::optional cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); if (!cpu_addr) { - const auto size = CalculateGuestSizeInBytes(info); - cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, size); + cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, requested_size); if (!cpu_addr) { const DAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space; - virtual_invalid_space += Common::AlignUp(size, 32); + virtual_invalid_space += Common::AlignUp(requested_size, 32); cpu_addr = std::optional(fake_addr); } } ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); + RunAllocationGarbageCollector(requested_size); + const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr); const Image& image = slot_images[image_id]; // Using "image.gpu_addr" instead of "gpu_addr" is important because it might be different @@ -1634,6 +1684,8 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DA ImageInfo new_info = info; const size_t size_bytes = CalculateGuestSizeInBytes(new_info); + RunAllocationGarbageCollector(size_bytes); + const bool broken_views = runtime.HasBrokenTextureViewFormats(); const bool native_bgr = runtime.HasNativeBgr(); join_overlap_ids.clear(); diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 0cdeb9fdc5..e2c2c5d7d9 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -120,7 +120,7 @@ class TextureCache : public VideoCommon::ChannelSetupCaches void FillImageViews(DescriptorTable& table, @@ -527,6 +529,8 @@ private: u64 modification_tick = 0; u64 frame_tick = 0; + u64 allocation_gc_frame = (std::numeric_limits::max)(); + u32 allocation_gc_passes = 0; u64 last_sampler_gc_frame = (std::numeric_limits::max)(); Common::ThreadWorker texture_decode_worker{1, "TextureDecoder"};