diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index fe5f37548d..b80af2b480 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -71,14 +71,10 @@ TextureCache

::TextureCache(Runtime& runtime_, Tegra::MaxwellDeviceMemoryManag (std::max)((std::min)(device_local_memory - min_vacancy_critical, min_spacing_critical), DEFAULT_CRITICAL_MEMORY)); minimum_memory = static_cast((device_local_memory - mem_threshold) / 2); - - lowmemorydevice = false; } else { expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; minimum_memory = 0; - - lowmemorydevice = true; } const bool gpu_unswizzle_enabled = Settings::values.gpu_unswizzle_enabled.GetValue(); @@ -117,6 +113,54 @@ TextureCache

::TextureCache(Runtime& runtime_, Tegra::MaxwellDeviceMemoryManag } } +template +void TextureCache

::RunAllocationGarbageCollector(size_t requested_bytes) { + if (requested_bytes == 0) { + return; + } + + if (allocation_gc_frame != frame_tick) { + allocation_gc_frame = frame_tick; + allocation_gc_passes = 0; + } + if (allocation_gc_passes >= MAX_ALLOCATION_GC_PASSES_PER_FRAME) { + return; + } + + if (runtime.CanReportMemoryUsage()) { + total_used_memory = runtime.GetDeviceMemoryUsage(); + } + + const u64 request = static_cast(requested_bytes); + const u64 max_u64 = (std::numeric_limits::max)(); + const u64 projected_usage = request > (max_u64 - total_used_memory) + ? max_u64 + : total_used_memory + request; + if (projected_usage < expected_memory) { + return; + } + + RunGarbageCollector(); + ++allocation_gc_passes; + + if (runtime.CanReportMemoryUsage()) { + total_used_memory = runtime.GetDeviceMemoryUsage(); + } + + const u64 projected_after_gc = request > (max_u64 - total_used_memory) + ? max_u64 + : total_used_memory + request; + if (projected_after_gc >= critical_memory && + allocation_gc_passes < MAX_ALLOCATION_GC_PASSES_PER_FRAME) { + RunGarbageCollector(); + ++allocation_gc_passes; + + if (runtime.CanReportMemoryUsage()) { + total_used_memory = runtime.GetDeviceMemoryUsage(); + } + } +} + template void TextureCache

::RunGarbageCollector() { bool high_priority_mode = total_used_memory >= expected_memory; @@ -142,24 +186,19 @@ void TextureCache

::RunGarbageCollector() { return false; } - const bool is_large_sparse = lowmemorydevice && - image.info.is_sparse && - image.guest_size_bytes >= 256_MiB; - - if (!aggressive_mode && !is_large_sparse && - True(image.flags & ImageFlagBits::CostlyLoad)) { + if (!aggressive_mode && True(image.flags & ImageFlagBits::CostlyLoad)) { return false; } const bool must_download = image.IsSafeDownload() && False(image.flags & ImageFlagBits::BadOverlap); - if (!high_priority_mode && !is_large_sparse && must_download) { + if (!high_priority_mode && must_download) { return false; } --num_iterations; - if (must_download && !is_large_sparse) { + if (must_download) { auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); const auto copies = FixSmallVectorADL(FullDownloadCopies(image.info)); image.DownloadMemory(map, copies); @@ -197,7 +236,6 @@ void TextureCache

::RunGarbageCollector() { // Single pass: collect all candidates, classified by tier const u64 normal_threshold = frame_tick > ticks_to_destroy ? frame_tick - ticks_to_destroy : 0; const u64 aggressive_threshold = frame_tick > 10 ? frame_tick - 10 : 0; - boost::container::small_vector sparse_candidates; boost::container::small_vector expired; boost::container::small_vector aggressive_expired; @@ -210,34 +248,20 @@ void TextureCache

::RunGarbageCollector() { expired.push_back(id); } else if (tick < aggressive_threshold) { aggressive_expired.push_back(id); - } else if (high_priority_mode && tick < frame_tick && - lowmemorydevice && image->info.is_sparse && - image->guest_size_bytes >= 256_MiB) { - sparse_candidates.push_back(id); } } SortByAge(expired); SortByAge(aggressive_expired); - // Tier 1: large sparse textures under memory pressure - for (const auto image_id : sparse_candidates) { - auto& image = slot_images[image_id]; - if (image.allocation_tick < frame_tick - 3) { - if (Cleanup(image_id)) { - break; - } - } - } - - // Tier 2: normal expiration + // Tier 1: normal expiration for (const auto image_id : expired) { if (Cleanup(image_id)) { break; } } - // Tier 3: if still critical, use aggressive threshold with more iterations + // Tier 2: if still critical, use aggressive threshold with more iterations if (total_used_memory >= critical_memory) { aggressive_mode = true; num_iterations = 40; @@ -1221,9 +1245,6 @@ void TextureCache

::RefreshContents(Image& image, ImageId image_id) { } image.flags &= ~ImageFlagBits::CpuModified; - if( lowmemorydevice && image.info.format == PixelFormat::BC1_RGBA_UNORM && MapSizeBytes(image) >= 256_MiB ) { - return; - } TrackImage(image, image_id); @@ -1633,49 +1654,19 @@ bool TextureCache

::ScaleDown(Image& image) { template ImageId TextureCache

::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, RelaxedOptions options) { + const size_t requested_size = CalculateGuestSizeInBytes(info); std::optional cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); if (!cpu_addr) { - const auto size = CalculateGuestSizeInBytes(info); - cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, size); + cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, requested_size); if (!cpu_addr) { const DAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space; - virtual_invalid_space += Common::AlignUp(size, 32); + virtual_invalid_space += Common::AlignUp(requested_size, 32); cpu_addr = std::optional(fake_addr); } } ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); - // For large sparse textures, aggressively clean up old allocations at same address - if (lowmemorydevice && info.is_sparse && CalculateGuestSizeInBytes(info) >= 256_MiB) { - const auto alloc_it = image_allocs_table.find(gpu_addr); - if (alloc_it != image_allocs_table.end()) { - const ImageAllocId alloc_id = alloc_it->second; - auto& alloc_images = slot_image_allocs[alloc_id].images; - - // Collect old images at this address that were created more than 2 frames ago - boost::container::small_vector to_delete; - for (ImageId old_image_id : alloc_images) { - Image& old_image = slot_images[old_image_id]; - if (old_image.info.is_sparse && - old_image.gpu_addr == gpu_addr && - old_image.allocation_tick < frame_tick - 2) { // Try not to delete fresh textures - to_delete.push_back(old_image_id); - } - } - - // Delete old images immediately - for (ImageId old_id : to_delete) { - Image& old_image = slot_images[old_id]; - LOG_DEBUG(HW_GPU, "Immediately deleting old sparse texture at 0x{:X} ({} MiB)", - gpu_addr, old_image.guest_size_bytes / (1024 * 1024)); - if (True(old_image.flags & ImageFlagBits::Tracked)) { - UntrackImage(old_image, old_id); - } - UnregisterImage(old_id); - DeleteImage(old_id, true); - } - } - } + RunAllocationGarbageCollector(requested_size); const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr); const Image& image = slot_images[image_id]; @@ -1693,25 +1684,7 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DA ImageInfo new_info = info; const size_t size_bytes = CalculateGuestSizeInBytes(new_info); - // Proactive cleanup for large sparse texture allocations - if (lowmemorydevice && new_info.is_sparse && size_bytes >= 256_MiB) { - const u64 estimated_alloc_size = size_bytes; - - if (total_used_memory + estimated_alloc_size >= critical_memory) { - LOG_DEBUG(HW_GPU, "Large sparse texture allocation ({} MiB) - running aggressive GC. " - "Current memory: {} MiB, Critical: {} MiB", - size_bytes / (1024 * 1024), - total_used_memory / (1024 * 1024), - critical_memory / (1024 * 1024)); - RunGarbageCollector(); - - // If still over threshold after GC, try one more aggressive pass - if (total_used_memory + estimated_alloc_size >= critical_memory) { - LOG_DEBUG(HW_GPU, "Still critically low on memory, running second GC pass"); - RunGarbageCollector(); - } - } - } + RunAllocationGarbageCollector(size_bytes); const bool broken_views = runtime.HasBrokenTextureViewFormats(); const bool native_bgr = runtime.HasNativeBgr(); diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index ba2af1bf44..e2c2c5d7d9 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -120,7 +120,7 @@ class TextureCache : public VideoCommon::ChannelSetupCaches void FillImageViews(DescriptorTable& table, @@ -478,7 +480,6 @@ private: u64 minimum_memory; u64 expected_memory; u64 critical_memory; - bool lowmemorydevice = false; size_t gpu_unswizzle_maxsize = 0; size_t swizzle_chunk_size = 0; u32 swizzle_slices_per_batch = 0; @@ -528,6 +529,8 @@ private: u64 modification_tick = 0; u64 frame_tick = 0; + u64 allocation_gc_frame = (std::numeric_limits::max)(); + u32 allocation_gc_passes = 0; u64 last_sampler_gc_frame = (std::numeric_limits::max)(); Common::ThreadWorker texture_decode_worker{1, "TextureDecoder"};