diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake index 5b0adad8dd..adc9902fcf 100644 --- a/CMakeModules/GenerateSCMRev.cmake +++ b/CMakeModules/GenerateSCMRev.cmake @@ -38,13 +38,13 @@ set(GIT_DESC ${BUILD_VERSION}) # Auto-updater metadata! Must somewhat mirror GitHub API endpoint if (NIGHTLY_BUILD) set(BUILD_AUTO_UPDATE_WEBSITE "https://github.com") - set(BUILD_AUTO_UPDATE_API "api.github.com") + set(BUILD_AUTO_UPDATE_API "https://api.github.com") set(BUILD_AUTO_UPDATE_API_PATH "/repos/") set(BUILD_AUTO_UPDATE_REPO "Eden-CI/Nightly") set(REPO_NAME "Eden Nightly") else() set(BUILD_AUTO_UPDATE_WEBSITE "https://git.eden-emu.dev") - set(BUILD_AUTO_UPDATE_API "git.eden-emu.dev") + set(BUILD_AUTO_UPDATE_API "https://git.eden-emu.dev") set(BUILD_AUTO_UPDATE_API_PATH "/api/v1/repos/") set(BUILD_AUTO_UPDATE_REPO "eden-emu/eden") set(REPO_NAME "Eden") diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 5c2c77e10a..5c57df424c 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -6,7 +6,6 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include -#include #include #include #include @@ -128,14 +127,83 @@ struct Memory::Impl { } } - [[nodiscard]] inline u8* GetPointerFromRasterizerCachedMemory(u64 vaddr) const { - auto const paddr = current_page_table->entries[vaddr >> YUZU_PAGEBITS].addr; - return paddr ? system.DeviceMemory().GetPointer(paddr + vaddr) : nullptr; + [[nodiscard]] u8* GetPointerFromRasterizerCachedMemory(u64 vaddr) const { + Common::PhysicalAddress const paddr = current_page_table->entries[vaddr >> YUZU_PAGEBITS].addr; + if (paddr) + return system.DeviceMemory().GetPointer(paddr + vaddr); + return {}; } - [[nodiscard]] inline u8* GetPointerFromDebugMemory(u64 vaddr) const { - auto const paddr = current_page_table->entries[vaddr >> YUZU_PAGEBITS].addr; - return paddr ? system.DeviceMemory().GetPointer(paddr + vaddr) : nullptr; + [[nodiscard]] u8* GetPointerFromDebugMemory(u64 vaddr) const { + const Common::PhysicalAddress paddr = current_page_table->entries[vaddr >> YUZU_PAGEBITS].addr; + if (paddr != 0) + return system.DeviceMemory().GetPointer(paddr + vaddr); + return {}; + } + + u8 Read8(const Common::ProcessAddress addr) { + return Read(addr); + } + + u16 Read16(const Common::ProcessAddress addr) { + if ((addr & 1) == 0) { + return Read(addr); + } else { + const u32 a{Read(addr)}; + const u32 b{Read(addr + sizeof(u8))}; + return static_cast((b << 8) | a); + } + } + + u32 Read32(const Common::ProcessAddress addr) { + if ((addr & 3) == 0) { + return Read(addr); + } else { + const u32 a{Read16(addr)}; + const u32 b{Read16(addr + sizeof(u16))}; + return (b << 16) | a; + } + } + + u64 Read64(const Common::ProcessAddress addr) { + if ((addr & 7) == 0) { + return Read(addr); + } else { + const u32 a{Read32(addr)}; + const u32 b{Read32(addr + sizeof(u32))}; + return (static_cast(b) << 32) | a; + } + } + + void Write8(const Common::ProcessAddress addr, const u8 data) { + Write(addr, data); + } + + void Write16(const Common::ProcessAddress addr, const u16 data) { + if ((addr & 1) == 0) { + Write(addr, data); + } else { + Write(addr, static_cast(data)); + Write(addr + sizeof(u8), static_cast(data >> 8)); + } + } + + void Write32(const Common::ProcessAddress addr, const u32 data) { + if ((addr & 3) == 0) { + Write(addr, data); + } else { + Write16(addr, static_cast(data)); + Write16(addr + sizeof(u16), static_cast(data >> 16)); + } + } + + void Write64(const Common::ProcessAddress addr, const u64 data) { + if ((addr & 7) == 0) { + Write(addr, data); + } else { + Write32(addr, static_cast(data)); + Write32(addr + sizeof(u32), static_cast(data >> 32)); + } } bool WriteExclusive8(const Common::ProcessAddress addr, const u8 data, const u8 expected) { @@ -590,7 +658,7 @@ struct Memory::Impl { } template - [[nodiscard]] inline u8* GetPointerImpl(u64 vaddr, F&& on_unmapped, G&& on_rasterizer) const { + [[nodiscard]] u8* GetPointerImpl(u64 vaddr, F&& on_unmapped, G&& on_rasterizer) const { // AARCH64 masks the upper 16 bit of all memory accesses vaddr &= 0xffffffffffffULL; if (AddressSpaceContains(*current_page_table, vaddr, 1)) [[likely]] { @@ -645,42 +713,18 @@ struct Memory::Impl { /// @returns The instance of T read from the specified virtual address. template inline T Read(Common::ProcessAddress vaddr) noexcept requires(std::is_trivially_copyable_v) { - auto const addr_c1 = GetInteger(vaddr); - if (!(sizeof(T) > 1 && (addr_c1 & 4095) + sizeof(T) > 4096)) { - if (auto const ptr_c1 = GetPointerImpl(addr_c1, [addr_c1] { - LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:016X}", sizeof(T) * 8, addr_c1); - }, [&] { - HandleRasterizerDownload(addr_c1, sizeof(T)); - }); ptr_c1) { - // It may be tempting to rewrite this particular section to use "reinterpret_cast"; - // afterall, it's trivially copyable so surely it can be copied ov- Alignment. - // Remember, alignment. memcpy() will deal with all the alignment extremely fast. - T result{}; - std::memcpy(&result, ptr_c1, sizeof(T)); - return result; - } - } else { - auto const addr_c2 = (addr_c1 & (~0xfff)) + 0x1000; - // page crossing: say if sizeof(T) = 2, vaddr = 4095 - // 4095 + 2 mod 4096 = 1 => 2 - 1 = 1, thus c1=1, c2=1 - auto const count_c2 = (addr_c1 + sizeof(T)) & 4095; - auto const count_c1 = sizeof(T) - count_c2; - if (auto const ptr_c1 = GetPointerImpl(addr_c1, [addr_c1] { - LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:016X}", sizeof(T) * 8, addr_c1); - }, [&] { - HandleRasterizerDownload(addr_c1, count_c1); - }); ptr_c1) { - if (auto const ptr_c2 = GetPointerImpl(addr_c2, [addr_c2] { - LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:016X}", sizeof(T) * 8, addr_c2); - }, [&] { - HandleRasterizerDownload(addr_c2, count_c2); - }); ptr_c2) { - std::array result{}; - std::memcpy(result.data() + 0, ptr_c1, count_c1); - std::memcpy(result.data() + count_c1, ptr_c2, count_c2); - return std::bit_cast(result); - } - } + const u64 addr = GetInteger(vaddr); + if (auto const ptr = GetPointerImpl(addr, [addr]() { + LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:016X}", sizeof(T) * 8, addr); + }, [&]() { + HandleRasterizerDownload(addr, sizeof(T)); + }); ptr) [[likely]] { + // It may be tempting to rewrite this particular section to use "reinterpret_cast"; + // afterall, it's trivially copyable so surely it can be copied ov- Alignment. + // Remember, alignment. memcpy() will deal with all the alignment extremely fast. + T result{}; + std::memcpy(&result, ptr, sizeof(T)); + return result; } return T{}; } @@ -690,37 +734,11 @@ struct Memory::Impl { /// @tparam T The data type to write to memory. template inline void Write(Common::ProcessAddress vaddr, const T data) noexcept requires(std::is_trivially_copyable_v) { - auto const addr_c1 = GetInteger(vaddr); - if (!(sizeof(T) > 1 && (addr_c1 & 4095) + sizeof(T) > 4096)) { - if (auto const ptr_c1 = GetPointerImpl(addr_c1, [addr_c1] { - LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:016X}", sizeof(T) * 8, addr_c1); - }, [&] { - HandleRasterizerWrite(addr_c1, sizeof(T)); - }); ptr_c1) { - std::memcpy(ptr_c1, &data, sizeof(T)); - } - } else { - auto const addr_c2 = (addr_c1 & (~0xfff)) + 0x1000; - // page crossing: say if sizeof(T) = 2, vaddr = 4095 - // 4095 + 2 mod 4096 = 1 => 2 - 1 = 1, thus c1=1, c2=1 - auto const count_c2 = (addr_c1 + sizeof(T)) & 4095; - auto const count_c1 = sizeof(T) - count_c2; - if (auto const ptr_c1 = GetPointerImpl(addr_c1, [addr_c1] { - LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X}", sizeof(T) * 8, addr_c1); - }, [&] { - HandleRasterizerWrite(addr_c1, count_c1); - }); ptr_c1) { - if (auto const ptr_c2 = GetPointerImpl(addr_c2, [addr_c2] { - LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X}", sizeof(T) * 8, addr_c2); - }, [&] { - HandleRasterizerWrite(addr_c2, count_c2); - }); ptr_c2) { - std::array tmp = std::bit_cast>(data); - std::memcpy(ptr_c1, tmp.data() + 0, count_c1); - std::memcpy(ptr_c2, tmp.data() + count_c1, count_c2); - } - } - } + const u64 addr = GetInteger(vaddr); + if (auto const ptr = GetPointerImpl(addr, [addr, data]() { + LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8, addr, u64(data)); + }, [&]() { HandleRasterizerWrite(addr, sizeof(T)); }); ptr) [[likely]] + std::memcpy(ptr, &data, sizeof(T)); } template @@ -924,35 +942,35 @@ const u8* Memory::GetPointer(Common::ProcessAddress vaddr) const { } u8 Memory::Read8(const Common::ProcessAddress addr) { - return impl->Read(addr); + return impl->Read8(addr); } u16 Memory::Read16(const Common::ProcessAddress addr) { - return impl->Read(addr); + return impl->Read16(addr); } u32 Memory::Read32(const Common::ProcessAddress addr) { - return impl->Read(addr); + return impl->Read32(addr); } u64 Memory::Read64(const Common::ProcessAddress addr) { - return impl->Read(addr); + return impl->Read64(addr); } void Memory::Write8(Common::ProcessAddress addr, u8 data) { - impl->Write(addr, data); + impl->Write8(addr, data); } void Memory::Write16(Common::ProcessAddress addr, u16 data) { - impl->Write(addr, data); + impl->Write16(addr, data); } void Memory::Write32(Common::ProcessAddress addr, u32 data) { - impl->Write(addr, data); + impl->Write32(addr, data); } void Memory::Write64(Common::ProcessAddress addr, u64 data) { - impl->Write(addr, data); + impl->Write64(addr, data); } bool Memory::WriteExclusive8(Common::ProcessAddress addr, u8 data, u8 expected) { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 71210ffe6e..fbdeba9346 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -70,14 +70,10 @@ TextureCache

::TextureCache(Runtime& runtime_, Tegra::MaxwellDeviceMemoryManag (std::max)((std::min)(device_local_memory - min_vacancy_critical, min_spacing_critical), DEFAULT_CRITICAL_MEMORY)); minimum_memory = static_cast((device_local_memory - mem_threshold) / 2); - - lowmemorydevice = false; } else { expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; minimum_memory = 0; - - lowmemorydevice = true; } const bool gpu_unswizzle_enabled = Settings::values.gpu_unswizzle_enabled.GetValue(); @@ -122,102 +118,44 @@ void TextureCache

::RunGarbageCollector() { bool aggressive_mode = false; u64 ticks_to_destroy = 0; size_t num_iterations = 0; - const auto Configure = [&](bool allow_aggressive) { high_priority_mode = total_used_memory >= expected_memory; aggressive_mode = allow_aggressive && total_used_memory >= critical_memory; ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 50ULL; num_iterations = aggressive_mode ? 40 : (high_priority_mode ? 20 : 10); }; - - const auto Cleanup = [this, &num_iterations, &high_priority_mode, - &aggressive_mode](ImageId image_id) { + const auto Cleanup = [this, &num_iterations, &high_priority_mode, &aggressive_mode](ImageId image_id) { if (num_iterations == 0) { return true; } --num_iterations; auto& image = slot_images[image_id]; - - // Never delete recently allocated sparse textures (within 3 frames) - const bool is_recently_allocated = image.allocation_tick >= frame_tick - 3; - if (is_recently_allocated && image.info.is_sparse) { - return false; - } - if (True(image.flags & ImageFlagBits::IsDecoding)) { - // This image is still being decoded, deleting it will invalidate the slot - // used by the async decoder thread. return false; } - - // Prioritize large sparse textures for cleanup - const bool is_large_sparse = lowmemorydevice && - image.info.is_sparse && - image.guest_size_bytes >= 256_MiB; - - if (!aggressive_mode && !is_large_sparse && - True(image.flags & ImageFlagBits::CostlyLoad)) { - return false; - } - - const bool must_download = - image.IsSafeDownload() && False(image.flags & ImageFlagBits::BadOverlap); - if (!high_priority_mode && !is_large_sparse && must_download) { - return false; - } - - if (must_download && !is_large_sparse) { + if (image.IsSafeDownload() && False(image.flags & ImageFlagBits::BadOverlap)) { auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); const auto copies = FixSmallVectorADL(FullDownloadCopies(image.info)); image.DownloadMemory(map, copies); runtime.Finish(); - SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span, - swizzle_data_buffer); + SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span, swizzle_data_buffer); } - if (True(image.flags & ImageFlagBits::Tracked)) { UntrackImage(image, image_id); } UnregisterImage(image_id); DeleteImage(image_id, image.scale_tick > frame_tick + 5); - - if (total_used_memory < critical_memory) { - if (aggressive_mode) { - // Sink the aggresiveness. - num_iterations >>= 2; - aggressive_mode = false; - return false; - } - if (high_priority_mode && total_used_memory < expected_memory) { - num_iterations >>= 1; - high_priority_mode = false; - } + if (aggressive_mode && total_used_memory < critical_memory) { + num_iterations >>= 2; + aggressive_mode = false; + } else if (high_priority_mode && total_used_memory < expected_memory) { + num_iterations >>= 1; + high_priority_mode = false; } return false; }; - - // Aggressively clear massive sparse textures - if (total_used_memory >= expected_memory) { - lru_cache.ForEachItemBelow(frame_tick, [&](ImageId image_id) { - auto& image = slot_images[image_id]; - // Only target sparse textures that are old enough - if (lowmemorydevice && - image.info.is_sparse && - image.guest_size_bytes >= 256_MiB && - image.allocation_tick < frame_tick - 3) { - LOG_DEBUG(HW_GPU, "GC targeting old sparse texture at 0x{:X} ({} MiB, age: {} frames)", - image.gpu_addr, image.guest_size_bytes / (1024 * 1024), - frame_tick - image.allocation_tick); - return Cleanup(image_id); - } - return false; - }); - } - Configure(false); lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, Cleanup); - - // If pressure is still too high, prune aggressively. if (total_used_memory >= critical_memory) { Configure(true); lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, Cleanup); @@ -1196,9 +1134,6 @@ void TextureCache

::RefreshContents(Image& image, ImageId image_id) { } image.flags &= ~ImageFlagBits::CpuModified; - if( lowmemorydevice && image.info.format == PixelFormat::BC1_RGBA_UNORM && MapSizeBytes(image) >= 256_MiB ) { - return; - } TrackImage(image, image_id); @@ -1619,39 +1554,6 @@ ImageId TextureCache

::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, } } ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); - - // For large sparse textures, aggressively clean up old allocations at same address - if (lowmemorydevice && info.is_sparse && CalculateGuestSizeInBytes(info) >= 256_MiB) { - const auto alloc_it = image_allocs_table.find(gpu_addr); - if (alloc_it != image_allocs_table.end()) { - const ImageAllocId alloc_id = alloc_it->second; - auto& alloc_images = slot_image_allocs[alloc_id].images; - - // Collect old images at this address that were created more than 2 frames ago - boost::container::small_vector to_delete; - for (ImageId old_image_id : alloc_images) { - Image& old_image = slot_images[old_image_id]; - if (old_image.info.is_sparse && - old_image.gpu_addr == gpu_addr && - old_image.allocation_tick < frame_tick - 2) { // Try not to delete fresh textures - to_delete.push_back(old_image_id); - } - } - - // Delete old images immediately - for (ImageId old_id : to_delete) { - Image& old_image = slot_images[old_id]; - LOG_DEBUG(HW_GPU, "Immediately deleting old sparse texture at 0x{:X} ({} MiB)", - gpu_addr, old_image.guest_size_bytes / (1024 * 1024)); - if (True(old_image.flags & ImageFlagBits::Tracked)) { - UntrackImage(old_image, old_id); - } - UnregisterImage(old_id); - DeleteImage(old_id, true); - } - } - } - const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr); const Image& image = slot_images[image_id]; // Using "image.gpu_addr" instead of "gpu_addr" is important because it might be different @@ -1667,27 +1569,6 @@ template ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DAddr cpu_addr) { ImageInfo new_info = info; const size_t size_bytes = CalculateGuestSizeInBytes(new_info); - - // Proactive cleanup for large sparse texture allocations - if (lowmemorydevice && new_info.is_sparse && size_bytes >= 256_MiB) { - const u64 estimated_alloc_size = size_bytes; - - if (total_used_memory + estimated_alloc_size >= critical_memory) { - LOG_DEBUG(HW_GPU, "Large sparse texture allocation ({} MiB) - running aggressive GC. " - "Current memory: {} MiB, Critical: {} MiB", - size_bytes / (1024 * 1024), - total_used_memory / (1024 * 1024), - critical_memory / (1024 * 1024)); - RunGarbageCollector(); - - // If still over threshold after GC, try one more aggressive pass - if (total_used_memory + estimated_alloc_size >= critical_memory) { - LOG_DEBUG(HW_GPU, "Still critically low on memory, running second GC pass"); - RunGarbageCollector(); - } - } - } - const bool broken_views = runtime.HasBrokenTextureViewFormats(); const bool native_bgr = runtime.HasNativeBgr(); join_overlap_ids.clear(); diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 4b4061f21d..47f52c5c99 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -478,7 +478,6 @@ private: u64 minimum_memory; u64 expected_memory; u64 critical_memory; - bool lowmemorydevice = false; size_t gpu_unswizzle_maxsize = 0; size_t swizzle_chunk_size = 0; u32 swizzle_slices_per_batch = 0;