diff --git a/src/common/lru_cache.h b/src/common/lru_cache.h deleted file mode 100644 index 36cea5d27e..0000000000 --- a/src/common/lru_cache.h +++ /dev/null @@ -1,139 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include -#include -#include - -#include "common/common_types.h" - -namespace Common { - -template -class LeastRecentlyUsedCache { - using ObjectType = typename Traits::ObjectType; - using TickType = typename Traits::TickType; - - struct Item { - ObjectType obj; - TickType tick; - Item* next{}; - Item* prev{}; - }; - -public: - LeastRecentlyUsedCache() : first_item{}, last_item{} {} - ~LeastRecentlyUsedCache() = default; - - size_t Insert(ObjectType obj, TickType tick) { - const auto new_id = Build(); - auto& item = item_pool[new_id]; - item.obj = obj; - item.tick = tick; - Attach(item); - return new_id; - } - - void Touch(size_t id, TickType tick) { - auto& item = item_pool[id]; - if (item.tick >= tick) { - return; - } - item.tick = tick; - if (&item == last_item) { - return; - } - Detach(item); - Attach(item); - } - - void Free(size_t id) { - auto& item = item_pool[id]; - Detach(item); - item.prev = nullptr; - item.next = nullptr; - free_items.push_back(id); - } - - template - void ForEachItemBelow(TickType tick, Func&& func) { - static constexpr bool RETURNS_BOOL = - std::is_same_v, bool>; - Item* iterator = first_item; - while (iterator) { - if (static_cast(tick) - static_cast(iterator->tick) < 0) { - return; - } - Item* next = iterator->next; - if constexpr (RETURNS_BOOL) { - if (func(iterator->obj)) { - return; - } - } else { - func(iterator->obj); - } - iterator = next; - } - } - -private: - size_t Build() { - if (free_items.empty()) { - const size_t item_id = item_pool.size(); - auto& item = item_pool.emplace_back(); - item.next = nullptr; - item.prev = nullptr; - return item_id; - } - const size_t item_id = free_items.front(); - free_items.pop_front(); - auto& item = item_pool[item_id]; - item.next = nullptr; - item.prev = nullptr; - return item_id; - } - - void Attach(Item& item) { - if (!first_item) { - first_item = &item; - } - if (!last_item) { - last_item = &item; - } else { - item.prev = last_item; - last_item->next = &item; - item.next = nullptr; - last_item = &item; - } - } - - void Detach(Item& item) { - if (item.prev) { - item.prev->next = item.next; - } - if (item.next) { - item.next->prev = item.prev; - } - if (&item == first_item) { - first_item = item.next; - if (first_item) { - first_item->prev = nullptr; - } - } - if (&item == last_item) { - last_item = item.prev; - if (last_item) { - last_item->next = nullptr; - } - } - } - - std::deque item_pool; - std::deque free_items; - Item* first_item{}; - Item* last_item{}; -}; - -} // namespace Common diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h index bec2dac246..f080f4503c 100644 --- a/src/video_core/buffer_cache/buffer_base.h +++ b/src/video_core/buffer_cache/buffer_base.h @@ -109,12 +109,12 @@ public: return static_cast(other_cpu_addr - cpu_addr); } - size_t getLRUID() const noexcept { - return lru_id; + u64 GetFrameTick() const noexcept { + return frame_tick; } - void setLRUID(size_t lru_id_) { - lru_id = lru_id_; + void SetFrameTick(u64 tick) noexcept { + frame_tick = tick; } size_t SizeBytes() const { @@ -125,7 +125,7 @@ private: VAddr cpu_addr = 0; BufferFlagBits flags{}; int stream_score = 0; - size_t lru_id = SIZE_MAX; + u64 frame_tick = 0; size_t size_bytes = 0; }; diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 014b4a318e..422e542421 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -58,17 +58,22 @@ void BufferCache

::RunGarbageCollector() { const bool aggressive_gc = total_used_memory >= critical_memory; const u64 ticks_to_destroy = aggressive_gc ? 60 : 120; int num_iterations = aggressive_gc ? 64 : 32; - const auto clean_up = [this, &num_iterations](BufferId buffer_id) { + const u64 threshold = frame_tick - ticks_to_destroy; + boost::container::small_vector expired; + for (auto [id, buffer] : slot_buffers) { + if (buffer->GetFrameTick() < threshold) { + expired.push_back(id); + } + } + for (const auto buffer_id : expired) { if (num_iterations == 0) { - return true; + break; } --num_iterations; auto& buffer = slot_buffers[buffer_id]; DownloadBufferMemory(buffer); DeleteBuffer(buffer_id); - return false; - }; - lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up); + } } template @@ -1595,10 +1600,9 @@ void BufferCache

::ChangeRegister(BufferId buffer_id) { const auto size = buffer.SizeBytes(); if (insert) { total_used_memory += Common::AlignUp(size, 1024); - buffer.setLRUID(lru_cache.Insert(buffer_id, frame_tick)); + buffer.SetFrameTick(frame_tick); } else { total_used_memory -= Common::AlignUp(size, 1024); - lru_cache.Free(buffer.getLRUID()); } const DAddr device_addr_begin = buffer.CpuAddr(); const DAddr device_addr_end = device_addr_begin + size; @@ -1616,7 +1620,7 @@ void BufferCache

::ChangeRegister(BufferId buffer_id) { template void BufferCache

::TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept { if (buffer_id != NULL_BUFFER_ID) { - lru_cache.Touch(buffer.getLRUID(), frame_tick); + buffer.SetFrameTick(frame_tick); } } diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index 08524bd854..b17fed1b6d 100644 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h @@ -23,7 +23,6 @@ #include "common/common_types.h" #include "common/div_ceil.h" #include "common/literals.h" -#include "common/lru_cache.h" #include "common/range_sets.h" #include "common/scope_exit.h" #include "common/settings.h" @@ -506,11 +505,6 @@ private: size_t immediate_buffer_capacity = 0; Common::ScratchBuffer immediate_buffer_alloc; - struct LRUItemParams { - using ObjectType = BufferId; - using TickType = u64; - }; - Common::LeastRecentlyUsedCache lru_cache; u64 frame_tick = 0; u64 total_used_memory = 0; u64 minimum_memory = 0; diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 3844a8e2f9..1469aef6ef 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project @@ -122,7 +122,35 @@ void DmaPusher::ProcessCommands(std::span commands) { dma_state.is_last_call = true; index += max_write; } else if (dma_state.method_count) { - auto const command_header = commands[index]; //can copy + if (!dma_state.non_incrementing && !dma_increment_once && + dma_state.method >= non_puller_methods) { + auto subchannel = subchannels[dma_state.subchannel]; + const u32 available = u32(std::min( + index + dma_state.method_count, commands.size()) - index); + u32 batch = 0; + u32 method = dma_state.method; + while (batch < available) { + const bool needs_exec = + (method < Engines::EngineInterface::EXECUTION_MASK_TABLE_SIZE) + ? subchannel->execution_mask[method] + : subchannel->execution_mask_default; + if (needs_exec) break; + batch++; + method++; + } + if (batch > 0) { + auto& sink = subchannel->method_sink; + sink.reserve(sink.size() + batch); + for (u32 j = 0; j < batch; j++) { + sink.emplace_back(dma_state.method + j, commands[index + j].argument); + } + dma_state.method += batch; + dma_state.method_count -= batch; + index += batch; + continue; + } + } + auto const command_header = commands[index]; dma_state.dma_word_offset = u32(index * sizeof(u32)); dma_state.is_last_call = dma_state.method_count <= 1; CallMethod(command_header.argument); @@ -181,7 +209,11 @@ void DmaPusher::CallMethod(u32 argument) const { }); } else { auto subchannel = subchannels[dma_state.subchannel]; - if (!subchannel->execution_mask[dma_state.method]) { + const bool needs_execution = + (dma_state.method < Engines::EngineInterface::EXECUTION_MASK_TABLE_SIZE) + ? subchannel->execution_mask[dma_state.method] + : subchannel->execution_mask_default; + if (!needs_execution) { subchannel->method_sink.emplace_back(dma_state.method, argument); } else { subchannel->ConsumeSink(); diff --git a/src/video_core/engines/engine_interface.h b/src/video_core/engines/engine_interface.h index bf3bd66aca..292f0a5738 100644 --- a/src/video_core/engines/engine_interface.h +++ b/src/video_core/engines/engine_interface.h @@ -6,9 +6,9 @@ #pragma once -#include -#include -#include +#include + +#include #include "common/common_types.h" @@ -41,8 +41,11 @@ public: ConsumeSinkImpl(); } - std::bitset<(std::numeric_limits::max)()> execution_mask{}; - std::vector> method_sink{}; + static constexpr size_t EXECUTION_MASK_TABLE_SIZE = 0xE00; + + std::array execution_mask{}; + bool execution_mask_default{}; + boost::container::small_vector, 64> method_sink{}; bool current_dirty{}; GPUVAddr current_dma_segment; diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index b442c5cc76..11f60ef32b 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp @@ -26,7 +26,7 @@ Fermi2D::Fermi2D(MemoryManager& memory_manager_) : memory_manager{memory_manager regs.src.depth = 1; regs.dst.depth = 1; - execution_mask.reset(); + execution_mask.fill(0); execution_mask[FERMI2D_REG_INDEX(pixels_from_memory.src_y0) + 1] = true; } diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 7b4efeb1e0..d6ee80f6e2 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -18,7 +18,7 @@ namespace Tegra::Engines { KeplerCompute::KeplerCompute(Core::System& system_, MemoryManager& memory_manager_) : system{system_}, memory_manager{memory_manager_}, upload_state{memory_manager, regs.upload} { - execution_mask.reset(); + execution_mask.fill(0); execution_mask[KEPLER_COMPUTE_REG_INDEX(exec_upload)] = true; execution_mask[KEPLER_COMPUTE_REG_INDEX(data_upload)] = true; execution_mask[KEPLER_COMPUTE_REG_INDEX(launch)] = true; diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index 5d4c4720d3..013a644c1b 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp @@ -22,7 +22,7 @@ KeplerMemory::~KeplerMemory() = default; void KeplerMemory::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) { upload_state.BindRasterizer(rasterizer_); - execution_mask.reset(); + execution_mask.fill(0); execution_mask[KEPLERMEMORY_REG_INDEX(exec)] = true; execution_mask[KEPLERMEMORY_REG_INDEX(data)] = true; } diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 6d9ebd6296..431da33559 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -4,8 +4,10 @@ // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include #include #include + #include "common/assert.h" #include "common/bit_util.h" #include "common/scope_exit.h" @@ -37,9 +39,10 @@ Maxwell3D::Maxwell3D(Core::System& system_, MemoryManager& memory_manager_) { dirty.flags.flip(); InitializeRegisterDefaults(); - execution_mask.reset(); - for (size_t i = 0; i < execution_mask.size(); i++) + execution_mask.fill(0); + for (size_t i = 0; i < EXECUTION_MASK_TABLE_SIZE; i++) execution_mask[i] = IsMethodExecutable(u32(i)); + execution_mask_default = true; } Maxwell3D::~Maxwell3D() = default; @@ -298,18 +301,27 @@ u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) { } void Maxwell3D::ConsumeSinkImpl() { + std::stable_sort(method_sink.begin(), method_sink.end(), + [](const auto& a, const auto& b) { return a.first < b.first; }); + + const auto sink_size = method_sink.size(); const auto control = shadow_state.shadow_ram_control; if (control == Regs::ShadowRamControl::Track || control == Regs::ShadowRamControl::TrackWithFilter) { - for (auto [method, value] : method_sink) { + for (size_t i = 0; i < sink_size; ++i) { + const auto [method, value] = method_sink[i]; shadow_state.reg_array[method] = value; ProcessDirtyRegisters(method, value); } } else if (control == Regs::ShadowRamControl::Replay) { - for (auto [method, value] : method_sink) + for (size_t i = 0; i < sink_size; ++i) { + const auto [method, value] = method_sink[i]; ProcessDirtyRegisters(method, shadow_state.reg_array[method]); + } } else { - for (auto [method, value] : method_sink) + for (size_t i = 0; i < sink_size; ++i) { + const auto [method, value] = method_sink[i]; ProcessDirtyRegisters(method, value); + } } method_sink.clear(); } diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 089d118a09..c99039cda7 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -23,7 +23,7 @@ using namespace Texture; MaxwellDMA::MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_) : system{system_}, memory_manager{memory_manager_} { - execution_mask.reset(); + execution_mask.fill(0); execution_mask[offsetof(Regs, launch_dma) / sizeof(u32)] = true; } diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index 0587d7b724..14b30d5d5b 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -102,7 +105,7 @@ struct ImageBase { VAddr cpu_addr_end = 0; u64 modification_tick = 0; - size_t lru_index = SIZE_MAX; + u64 last_use_tick = 0; std::array mip_level_offsets{}; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 71210ffe6e..3d5a4a153b 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -6,6 +6,7 @@ #pragma once +#include #include #include #include @@ -118,24 +119,17 @@ TextureCache

::TextureCache(Runtime& runtime_, Tegra::MaxwellDeviceMemoryManag template void TextureCache

::RunGarbageCollector() { - bool high_priority_mode = false; + bool high_priority_mode = total_used_memory >= expected_memory; bool aggressive_mode = false; - u64 ticks_to_destroy = 0; - size_t num_iterations = 0; - - const auto Configure = [&](bool allow_aggressive) { - high_priority_mode = total_used_memory >= expected_memory; - aggressive_mode = allow_aggressive && total_used_memory >= critical_memory; - ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 50ULL; - num_iterations = aggressive_mode ? 40 : (high_priority_mode ? 20 : 10); - }; + u64 ticks_to_destroy = high_priority_mode ? 25ULL : 50ULL; + size_t num_iterations = high_priority_mode ? 20 : 10; const auto Cleanup = [this, &num_iterations, &high_priority_mode, &aggressive_mode](ImageId image_id) { if (num_iterations == 0) { return true; } - --num_iterations; + auto& image = slot_images[image_id]; // Never delete recently allocated sparse textures (within 3 frames) @@ -145,12 +139,9 @@ void TextureCache

::RunGarbageCollector() { } if (True(image.flags & ImageFlagBits::IsDecoding)) { - // This image is still being decoded, deleting it will invalidate the slot - // used by the async decoder thread. return false; } - // Prioritize large sparse textures for cleanup const bool is_large_sparse = lowmemorydevice && image.info.is_sparse && image.guest_size_bytes >= 256_MiB; @@ -166,6 +157,8 @@ void TextureCache

::RunGarbageCollector() { return false; } + --num_iterations; + if (must_download && !is_large_sparse) { auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); const auto copies = FixSmallVectorADL(FullDownloadCopies(image.info)); @@ -183,7 +176,6 @@ void TextureCache

::RunGarbageCollector() { if (total_used_memory < critical_memory) { if (aggressive_mode) { - // Sink the aggresiveness. num_iterations >>= 2; aggressive_mode = false; return false; @@ -196,31 +188,64 @@ void TextureCache

::RunGarbageCollector() { return false; }; - // Aggressively clear massive sparse textures - if (total_used_memory >= expected_memory) { - lru_cache.ForEachItemBelow(frame_tick, [&](ImageId image_id) { - auto& image = slot_images[image_id]; - // Only target sparse textures that are old enough - if (lowmemorydevice && - image.info.is_sparse && - image.guest_size_bytes >= 256_MiB && - image.allocation_tick < frame_tick - 3) { - LOG_DEBUG(HW_GPU, "GC targeting old sparse texture at 0x{:X} ({} MiB, age: {} frames)", - image.gpu_addr, image.guest_size_bytes / (1024 * 1024), - frame_tick - image.allocation_tick); - return Cleanup(image_id); - } - return false; + const auto SortByAge = [this](auto& vec) { + std::sort(vec.begin(), vec.end(), [this](ImageId a, ImageId b) { + return slot_images[a].last_use_tick < slot_images[b].last_use_tick; }); + }; + + // Single pass: collect all candidates, classified by tier + const u64 normal_threshold = frame_tick > ticks_to_destroy ? frame_tick - ticks_to_destroy : 0; + const u64 aggressive_threshold = frame_tick > 10 ? frame_tick - 10 : 0; + boost::container::small_vector sparse_candidates; + boost::container::small_vector expired; + boost::container::small_vector aggressive_expired; + + for (auto [id, image] : slot_images) { + if (False(image->flags & ImageFlagBits::Registered)) { + continue; + } + const u64 tick = image->last_use_tick; + if (tick < normal_threshold) { + expired.push_back(id); + } else if (tick < aggressive_threshold) { + aggressive_expired.push_back(id); + } else if (high_priority_mode && tick < frame_tick && + lowmemorydevice && image->info.is_sparse && + image->guest_size_bytes >= 256_MiB) { + sparse_candidates.push_back(id); + } } - Configure(false); - lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, Cleanup); + SortByAge(expired); + SortByAge(aggressive_expired); - // If pressure is still too high, prune aggressively. + // Tier 1: large sparse textures under memory pressure + for (const auto image_id : sparse_candidates) { + auto& image = slot_images[image_id]; + if (image.allocation_tick < frame_tick - 3) { + if (Cleanup(image_id)) { + break; + } + } + } + + // Tier 2: normal expiration + for (const auto image_id : expired) { + if (Cleanup(image_id)) { + break; + } + } + + // Tier 3: if still critical, use aggressive threshold with more iterations if (total_used_memory >= critical_memory) { - Configure(true); - lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, Cleanup); + aggressive_mode = true; + num_iterations = 40; + for (const auto image_id : aggressive_expired) { + if (Cleanup(image_id)) { + break; + } + } } } @@ -2027,8 +2052,8 @@ std::pair TextureCache

::PrepareDmaImage(ImageId dst_id, GPUVAddr ba const auto& image = slot_images[dst_id]; const auto base = image.TryFindBase(base_addr); PrepareImage(dst_id, mark_as_modified, false); - const auto& new_image = slot_images[dst_id]; - lru_cache.Touch(new_image.lru_index, frame_tick); + auto& new_image = slot_images[dst_id]; + new_image.last_use_tick = frame_tick; return std::make_pair(base->level, base->layer); } @@ -2377,7 +2402,7 @@ void TextureCache

::RegisterImage(ImageId image_id) { tentative_size = TranscodedAstcSize(tentative_size, image.info.format); } total_used_memory += Common::AlignUp(tentative_size, 1024); - image.lru_index = lru_cache.Insert(image_id, frame_tick); + image.last_use_tick = frame_tick; ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { (*channel_state->gpu_page_table)[page].push_back(image_id); @@ -2411,7 +2436,7 @@ void TextureCache

::UnregisterImage(ImageId image_id) { "Trying to unregister an already registered image"); image.flags &= ~ImageFlagBits::Registered; image.flags &= ~ImageFlagBits::BadOverlap; - lru_cache.Free(image.lru_index); + const auto& clear_page_table = [image_id](u64 page, ankerl::unordered_dense::map, Common::IdentityHash>& selected_page_table) { const auto page_it = selected_page_table.find(page); @@ -2738,7 +2763,7 @@ void TextureCache

::PrepareImage(ImageId image_id, bool is_modification, bool if (is_modification) { MarkModification(image); } - lru_cache.Touch(image.lru_index, frame_tick); + image.last_use_tick = frame_tick; } template diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 4b4061f21d..ba2af1bf44 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -22,7 +22,7 @@ #include "common/common_types.h" #include "common/hash.h" #include "common/literals.h" -#include "common/lru_cache.h" + #include #include "common/scratch_buffer.h" #include "common/slot_vector.h" @@ -510,11 +510,7 @@ private: std::deque> async_buffers; std::deque async_buffers_death_ring; - struct LRUItemParams { - using ObjectType = ImageId; - using TickType = u64; - }; - Common::LeastRecentlyUsedCache lru_cache; + #ifdef YUZU_LEGACY static constexpr size_t TICKS_TO_DESTROY = 6;