diff --git a/src/core/device_memory_manager.h b/src/core/device_memory_manager.h index 3d97fdcc5c..24465f802d 100644 --- a/src/core/device_memory_manager.h +++ b/src/core/device_memory_manager.h @@ -14,12 +14,17 @@ #include #include #include +#include #include "common/common_types.h" #include "common/range_mutex.h" #include "common/scratch_buffer.h" #include "common/virtual_buffer.h" +#if defined(__linux__) +#include +#endif + namespace Core { constexpr size_t DEVICE_PAGEBITS = 12ULL; @@ -45,6 +50,74 @@ class DeviceMemoryManager { using DeviceMethods = typename Traits::DeviceMethods; public: + class MirrorMapping { + public: + MirrorMapping() = default; + MirrorMapping(u8* mapped_base_, size_t mapped_size_, size_t data_offset_) + : mapped_base{mapped_base_}, mapped_size{mapped_size_}, data_offset{data_offset_} {} + + MirrorMapping(const MirrorMapping&) = delete; + MirrorMapping& operator=(const MirrorMapping&) = delete; + + MirrorMapping(MirrorMapping&& other) noexcept { + MoveFrom(other); + } + + MirrorMapping& operator=(MirrorMapping&& other) noexcept { + if (this != &other) { + Release(); + MoveFrom(other); + } + return *this; + } + + ~MirrorMapping() { + Release(); + } + + [[nodiscard]] bool IsValid() const noexcept { + return mapped_base != nullptr; + } + + [[nodiscard]] explicit operator bool() const noexcept { + return IsValid(); + } + + [[nodiscard]] u8* Data() noexcept { + return mapped_base ? mapped_base + data_offset : nullptr; + } + + [[nodiscard]] const u8* Data() const noexcept { + return mapped_base ? mapped_base + data_offset : nullptr; + } + + [[nodiscard]] size_t Size() const noexcept { + return mapped_size >= data_offset ? mapped_size - data_offset : 0; + } + + private: + void MoveFrom(MirrorMapping& other) noexcept { + mapped_base = std::exchange(other.mapped_base, nullptr); + mapped_size = std::exchange(other.mapped_size, 0); + data_offset = std::exchange(other.data_offset, 0); + } + + void Release() noexcept { +#if defined(__linux__) + if (mapped_base) { + munmap(mapped_base, mapped_size); + } +#endif + mapped_base = nullptr; + mapped_size = 0; + data_offset = 0; + } + + u8* mapped_base{}; + size_t mapped_size{}; + size_t data_offset{}; + }; + DeviceMemoryManager(const DeviceMemory& device_memory); ~DeviceMemoryManager(); @@ -118,6 +191,11 @@ public: void WriteBlock(DAddr address, const void* src_pointer, size_t size); void WriteBlockUnsafe(DAddr address, const void* src_pointer, size_t size); + [[nodiscard]] MirrorMapping CreateMirrorMapping(DAddr address, size_t size) const; + [[nodiscard]] u64 GetMappingVersion() const noexcept { + return mapping_version.load(std::memory_order_acquire); + } + Asid RegisterProcess(Memory::Memory* memory); void UnregisterProcess(Asid id); @@ -236,6 +314,7 @@ private: std::unique_ptr cached_pages; Common::RangeMutex counter_guard; std::mutex mapping_guard; + std::atomic mapping_version{1}; }; diff --git a/src/core/device_memory_manager.inc b/src/core/device_memory_manager.inc index 15e3a1ad52..7ff381dfa0 100644 --- a/src/core/device_memory_manager.inc +++ b/src/core/device_memory_manager.inc @@ -4,6 +4,10 @@ // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#if defined(__linux__) && !defined(_GNU_SOURCE) +#define _GNU_SOURCE +#endif + #include #include #include @@ -11,6 +15,17 @@ #include #include +#if defined(__linux__) +#include +#ifndef MREMAP_MAYMOVE +#define MREMAP_MAYMOVE 1 +#endif +#ifndef MREMAP_FIXED +#define MREMAP_FIXED 2 +#endif +extern "C" void* mremap(void* old_address, size_t old_size, size_t new_size, int flags, ...); +#endif + #include "common/address_space.h" #include "common/address_space.inc" #include "common/alignment.h" @@ -240,6 +255,7 @@ void DeviceMemoryManager::Map(DAddr address, VAddr virtual_address, size impl->multi_dev_address.Register(new_dev, start_id); } t_slot = {}; + mapping_version.fetch_add(1, std::memory_order_release); if (track) { TrackContinuityImpl(address, virtual_address, size, asid); } @@ -272,6 +288,7 @@ void DeviceMemoryManager::Unmap(DAddr address, size_t size) { } } t_slot = {}; + mapping_version.fetch_add(1, std::memory_order_release); } template void DeviceMemoryManager::TrackContinuityImpl(DAddr address, VAddr virtual_address, @@ -315,6 +332,78 @@ const u8* DeviceMemoryManager::GetSpan(const DAddr src_addr, const std:: return nullptr; } +template +typename DeviceMemoryManager::MirrorMapping DeviceMemoryManager::CreateMirrorMapping( + DAddr address, size_t size) const { +#if !defined(__linux__) + return {}; +#else + if (size == 0) { + return {}; + } + + const DAddr aligned_address = Common::AlignDown(address, DAddr{page_size}); + const size_t data_offset = static_cast(address - aligned_address); + const size_t mapped_size = Common::AlignUp(size + data_offset, page_size); + + struct Segment { + const u8* source; + size_t size; + }; + + std::vector segments; + segments.reserve(Common::DivCeil(mapped_size, page_size)); + + size_t remaining_size = mapped_size; + size_t page_index = aligned_address >> page_bits; + while (remaining_size > 0) { + const size_t next_pages = std::size_t(tracked_entries[page_index].continuity_tracker); + const size_t copy_amount = (std::min)(next_pages << page_bits, remaining_size); + + const auto phys_addr = tracked_entries[page_index].compressed_physical_ptr; + if (phys_addr == 0) { + return {}; + } + + const auto* source = + GetPointerFromRaw(PAddr(phys_addr - 1U) << Memory::YUZU_PAGEBITS); + + if (!segments.empty() && segments.back().source + segments.back().size == source) { + segments.back().size += copy_amount; + } else { + segments.push_back({source, copy_amount}); + } + + page_index += next_pages; + remaining_size -= copy_amount; + } + + void* const mirror_base = + mmap(nullptr, mapped_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (mirror_base == MAP_FAILED) { + return {}; + } + + size_t mirror_offset = 0; + for (const auto& segment : segments) { + void* const target = static_cast(mirror_base) + mirror_offset; + void* const result = mremap(const_cast(segment.source), 0, segment.size, + MREMAP_MAYMOVE | MREMAP_FIXED, target); + if (result == MAP_FAILED) { + munmap(mirror_base, mapped_size); + return {}; + } + if (mprotect(result, segment.size, PROT_READ | PROT_WRITE) != 0) { + munmap(mirror_base, mapped_size); + return {}; + } + mirror_offset += segment.size; + } + + return MirrorMapping{static_cast(mirror_base), mapped_size, data_offset}; +#endif +} + template void DeviceMemoryManager::InnerGatherDeviceAddresses(Common::ScratchBuffer& buffer, PAddr address) { diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 014b4a318e..802afaf197 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -104,6 +104,51 @@ void BufferCache

::TickFrame() { RunGarbageCollector(); } ++frame_tick; + static constexpr u64 mirror_stats_log_interval = 300; + if ((frame_tick % mirror_stats_log_interval) == 0) { + const u64 upload_hit_copies = mirror_upload_hit_copies - mirror_upload_hit_copies_last; + const u64 upload_miss_copies = mirror_upload_miss_copies - mirror_upload_miss_copies_last; + const u64 upload_hit_bytes = mirror_upload_hit_bytes - mirror_upload_hit_bytes_last; + const u64 upload_miss_bytes = mirror_upload_miss_bytes - mirror_upload_miss_bytes_last; + const u64 download_hit_copies = + mirror_download_hit_copies - mirror_download_hit_copies_last; + const u64 download_miss_copies = + mirror_download_miss_copies - mirror_download_miss_copies_last; + const u64 download_hit_bytes = mirror_download_hit_bytes - mirror_download_hit_bytes_last; + const u64 download_miss_bytes = + mirror_download_miss_bytes - mirror_download_miss_bytes_last; + + const u64 upload_total_copies = upload_hit_copies + upload_miss_copies; + const u64 download_total_copies = download_hit_copies + download_miss_copies; + if (upload_total_copies > 0 || download_total_copies > 0) { + const double upload_hit_ratio = upload_total_copies > 0 + ? (100.0 * static_cast(upload_hit_copies) / + static_cast(upload_total_copies)) + : 0.0; + const double download_hit_ratio = + download_total_copies > 0 + ? (100.0 * static_cast(download_hit_copies) / + static_cast(download_total_copies)) + : 0.0; + LOG_INFO(HW_GPU, + "Buffer mirror counters (last {} frames): upload hit/miss copies = {}/{}, " + "hit ratio = {:.2f}%, bytes hit/miss = {}/{}, download hit/miss copies = " + "{}/{}, hit ratio = {:.2f}%, bytes hit/miss = {}/{}", + mirror_stats_log_interval, upload_hit_copies, upload_miss_copies, + upload_hit_ratio, upload_hit_bytes, upload_miss_bytes, download_hit_copies, + download_miss_copies, download_hit_ratio, download_hit_bytes, + download_miss_bytes); + } + + mirror_upload_hit_copies_last = mirror_upload_hit_copies; + mirror_upload_miss_copies_last = mirror_upload_miss_copies; + mirror_upload_hit_bytes_last = mirror_upload_hit_bytes; + mirror_upload_miss_bytes_last = mirror_upload_miss_bytes; + mirror_download_hit_copies_last = mirror_download_hit_copies; + mirror_download_miss_copies_last = mirror_download_miss_copies; + mirror_download_hit_bytes_last = mirror_download_hit_bytes; + mirror_download_miss_bytes_last = mirror_download_miss_bytes; + } delayed_destruction_ring.Tick(); for (auto& buffer : async_buffers_death_ring) { @@ -1567,6 +1612,21 @@ BufferId BufferCache

::CreateBuffer(DAddr device_addr, u32 wanted_size) { const u32 size = static_cast(overlap.end - overlap.begin); const BufferId new_buffer_id = slot_buffers.insert(runtime, overlap.begin, size); auto& new_buffer = slot_buffers[new_buffer_id]; + const u64 current_mapping_version = device_memory.GetMappingVersion(); + if (mirror_mapping_version != current_mapping_version) { + buffer_mirrors.clear(); + mirror_mapping_version = current_mapping_version; + } + buffer_mirrors.erase(new_buffer.CpuAddr()); + if (auto mirror = + device_memory.CreateMirrorMapping(new_buffer.CpuAddr(), new_buffer.SizeBytes()); + mirror) { + buffer_mirrors.emplace(new_buffer.CpuAddr(), std::move(mirror)); + if (!mirror_creation_logged) [[unlikely]] { + LOG_INFO(HW_GPU, "Buffer mirror mapping enabled (first successful mapping)"); + mirror_creation_logged = true; + } + } const size_t size_bytes = new_buffer.SizeBytes(); runtime.ClearBuffer(new_buffer, 0, size_bytes, 0); new_buffer.MarkUsage(0, size_bytes); @@ -1660,15 +1720,52 @@ void BufferCache

::ImmediateUploadMemory([[maybe_unused]] Buffer& buffer, [[maybe_unused]] std::span copies) { if constexpr (!USE_MEMORY_MAPS_FOR_UPLOADS) { std::span immediate_buffer; + const auto resolve_mirror_pointer = [&]() -> const u8* { + const u64 current_mapping_version = device_memory.GetMappingVersion(); + if (mirror_mapping_version != current_mapping_version) { + buffer_mirrors.clear(); + mirror_mapping_version = current_mapping_version; + } + + auto mirror_it = buffer_mirrors.find(buffer.CpuAddr()); + if (mirror_it == buffer_mirrors.end()) { + if (auto mirror = + device_memory.CreateMirrorMapping(buffer.CpuAddr(), buffer.SizeBytes()); + mirror) { + auto [it, inserted] = + buffer_mirrors.emplace(buffer.CpuAddr(), std::move(mirror)); + mirror_it = it; + if (inserted && !mirror_creation_logged) [[unlikely]] { + LOG_INFO(HW_GPU, "Buffer mirror mapping enabled (first successful mapping)"); + mirror_creation_logged = true; + } + } + } + return mirror_it != buffer_mirrors.end() ? mirror_it->second.Data() : nullptr; + }; + const u8* const mirror_pointer = resolve_mirror_pointer(); for (const BufferCopy& copy : copies) { std::span upload_span; const DAddr device_addr = buffer.CpuAddr() + copy.dst_offset; - if (IsRangeGranular(device_addr, copy.size)) { + if (mirror_pointer != nullptr) { + mirror_upload_hit_copies++; + mirror_upload_hit_bytes += copy.size; + if (!mirror_upload_logged) [[unlikely]] { + LOG_INFO(HW_GPU, "Buffer mirror fast path active for upload sync"); + mirror_upload_logged = true; + } + upload_span = + std::span(mirror_pointer + static_cast(copy.dst_offset), copy.size); + } else if (IsRangeGranular(device_addr, copy.size)) { + mirror_upload_miss_copies++; + mirror_upload_miss_bytes += copy.size; auto* const ptr = device_memory.GetPointer(device_addr); if (ptr != nullptr) { upload_span = std::span(ptr, copy.size); } } else { + mirror_upload_miss_copies++; + mirror_upload_miss_bytes += copy.size; if (immediate_buffer.empty()) { immediate_buffer = ImmediateBuffer(largest_copy); } @@ -1687,10 +1784,47 @@ void BufferCache

::MappedUploadMemory([[maybe_unused]] Buffer& buffer, if constexpr (USE_MEMORY_MAPS) { auto upload_staging = runtime.UploadStagingBuffer(total_size_bytes); const std::span staging_pointer = upload_staging.mapped_span; + const auto resolve_mirror_pointer = [&]() -> const u8* { + const u64 current_mapping_version = device_memory.GetMappingVersion(); + if (mirror_mapping_version != current_mapping_version) { + buffer_mirrors.clear(); + mirror_mapping_version = current_mapping_version; + } + + auto mirror_it = buffer_mirrors.find(buffer.CpuAddr()); + if (mirror_it == buffer_mirrors.end()) { + if (auto mirror = + device_memory.CreateMirrorMapping(buffer.CpuAddr(), buffer.SizeBytes()); + mirror) { + auto [it, inserted] = + buffer_mirrors.emplace(buffer.CpuAddr(), std::move(mirror)); + mirror_it = it; + if (inserted && !mirror_creation_logged) [[unlikely]] { + LOG_INFO(HW_GPU, "Buffer mirror mapping enabled (first successful mapping)"); + mirror_creation_logged = true; + } + } + } + return mirror_it != buffer_mirrors.end() ? mirror_it->second.Data() : nullptr; + }; + const u8* const mirror_pointer = resolve_mirror_pointer(); for (BufferCopy& copy : copies) { u8* const src_pointer = staging_pointer.data() + copy.src_offset; const DAddr device_addr = buffer.CpuAddr() + copy.dst_offset; - device_memory.ReadBlockUnsafe(device_addr, src_pointer, copy.size); + if (mirror_pointer != nullptr) { + mirror_upload_hit_copies++; + mirror_upload_hit_bytes += copy.size; + if (!mirror_upload_logged) [[unlikely]] { + LOG_INFO(HW_GPU, "Buffer mirror fast path active for upload sync"); + mirror_upload_logged = true; + } + std::memcpy(src_pointer, mirror_pointer + static_cast(copy.dst_offset), + copy.size); + } else { + mirror_upload_miss_copies++; + mirror_upload_miss_bytes += copy.size; + device_memory.ReadBlockUnsafe(device_addr, src_pointer, copy.size); + } // Apply the staging offset copy.src_offset += upload_staging.offset; @@ -1783,6 +1917,30 @@ void BufferCache

::DownloadBufferMemory(Buffer& buffer, DAddr device_addr, u64 if constexpr (USE_MEMORY_MAPS) { auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); const u8* const mapped_memory = download_staging.mapped_span.data(); + const auto resolve_mirror_pointer = [&]() -> u8* { + const u64 current_mapping_version = device_memory.GetMappingVersion(); + if (mirror_mapping_version != current_mapping_version) { + buffer_mirrors.clear(); + mirror_mapping_version = current_mapping_version; + } + + auto mirror_it = buffer_mirrors.find(buffer.CpuAddr()); + if (mirror_it == buffer_mirrors.end()) { + if (auto mirror = + device_memory.CreateMirrorMapping(buffer.CpuAddr(), buffer.SizeBytes()); + mirror) { + auto [it, inserted] = + buffer_mirrors.emplace(buffer.CpuAddr(), std::move(mirror)); + mirror_it = it; + if (inserted && !mirror_creation_logged) [[unlikely]] { + LOG_INFO(HW_GPU, "Buffer mirror mapping enabled (first successful mapping)"); + mirror_creation_logged = true; + } + } + } + return mirror_it != buffer_mirrors.end() ? mirror_it->second.Data() : nullptr; + }; + u8* const mirror_pointer = resolve_mirror_pointer(); const std::span copies_span(copies.data(), copies.data() + copies.size()); for (BufferCopy& copy : copies) { // Modify copies to have the staging offset in mind @@ -1796,14 +1954,65 @@ void BufferCache

::DownloadBufferMemory(Buffer& buffer, DAddr device_addr, u64 // Undo the modified offset const u64 dst_offset = copy.dst_offset - download_staging.offset; const u8* copy_mapped_memory = mapped_memory + dst_offset; - device_memory.WriteBlockUnsafe(copy_device_addr, copy_mapped_memory, copy.size); + if (mirror_pointer != nullptr) { + mirror_download_hit_copies++; + mirror_download_hit_bytes += copy.size; + if (!mirror_download_logged) [[unlikely]] { + LOG_INFO(HW_GPU, "Buffer mirror fast path active for download sync"); + mirror_download_logged = true; + } + std::memcpy(mirror_pointer + static_cast(copy.src_offset), + copy_mapped_memory, copy.size); + } else { + mirror_download_miss_copies++; + mirror_download_miss_bytes += copy.size; + device_memory.WriteBlockUnsafe(copy_device_addr, copy_mapped_memory, copy.size); + } } } else { const std::span immediate_buffer = ImmediateBuffer(largest_copy); + const auto resolve_mirror_pointer = [&]() -> u8* { + const u64 current_mapping_version = device_memory.GetMappingVersion(); + if (mirror_mapping_version != current_mapping_version) { + buffer_mirrors.clear(); + mirror_mapping_version = current_mapping_version; + } + + auto mirror_it = buffer_mirrors.find(buffer.CpuAddr()); + if (mirror_it == buffer_mirrors.end()) { + if (auto mirror = + device_memory.CreateMirrorMapping(buffer.CpuAddr(), buffer.SizeBytes()); + mirror) { + auto [it, inserted] = + buffer_mirrors.emplace(buffer.CpuAddr(), std::move(mirror)); + mirror_it = it; + if (inserted && !mirror_creation_logged) [[unlikely]] { + LOG_INFO(HW_GPU, "Buffer mirror mapping enabled (first successful mapping)"); + mirror_creation_logged = true; + } + } + } + return mirror_it != buffer_mirrors.end() ? mirror_it->second.Data() : nullptr; + }; + u8* const mirror_pointer = resolve_mirror_pointer(); for (const BufferCopy& copy : copies) { buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size)); const DAddr copy_device_addr = buffer.CpuAddr() + copy.src_offset; - device_memory.WriteBlockUnsafe(copy_device_addr, immediate_buffer.data(), copy.size); + if (mirror_pointer != nullptr) { + mirror_download_hit_copies++; + mirror_download_hit_bytes += copy.size; + if (!mirror_download_logged) [[unlikely]] { + LOG_INFO(HW_GPU, "Buffer mirror fast path active for download sync"); + mirror_download_logged = true; + } + std::memcpy(mirror_pointer + static_cast(copy.src_offset), + immediate_buffer.data(), copy.size); + } else { + mirror_download_miss_copies++; + mirror_download_miss_bytes += copy.size; + device_memory.WriteBlockUnsafe(copy_device_addr, immediate_buffer.data(), + copy.size); + } } } } @@ -1844,6 +2053,10 @@ void BufferCache

::DeleteBuffer(BufferId buffer_id, bool do_not_mark) { if (!do_not_mark) { Buffer& buffer = slot_buffers[buffer_id]; memory_tracker.MarkRegionAsCpuModified(buffer.CpuAddr(), buffer.SizeBytes()); + buffer_mirrors.erase(buffer.CpuAddr()); + } else { + const Buffer& buffer = slot_buffers[buffer_id]; + buffer_mirrors.erase(buffer.CpuAddr()); } Unregister(buffer_id); diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index 08524bd854..9ed1d8c35a 100644 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h @@ -473,6 +473,8 @@ private: Tegra::MaxwellDeviceMemoryManager& device_memory; Common::SlotVector slot_buffers; + ankerl::unordered_dense::map + buffer_mirrors; #ifdef YUZU_LEGACY static constexpr size_t TICKS_TO_DESTROY = 6; #else @@ -522,6 +524,26 @@ private: std::array> CACHING_PAGEBITS)> page_table; Common::ScratchBuffer tmp_buffer; + bool mirror_creation_logged = false; + bool mirror_upload_logged = false; + bool mirror_download_logged = false; + u64 mirror_mapping_version = 0; + u64 mirror_upload_hit_copies = 0; + u64 mirror_upload_miss_copies = 0; + u64 mirror_upload_hit_bytes = 0; + u64 mirror_upload_miss_bytes = 0; + u64 mirror_download_hit_copies = 0; + u64 mirror_download_miss_copies = 0; + u64 mirror_download_hit_bytes = 0; + u64 mirror_download_miss_bytes = 0; + u64 mirror_upload_hit_copies_last = 0; + u64 mirror_upload_miss_copies_last = 0; + u64 mirror_upload_hit_bytes_last = 0; + u64 mirror_upload_miss_bytes_last = 0; + u64 mirror_download_hit_copies_last = 0; + u64 mirror_download_miss_copies_last = 0; + u64 mirror_download_hit_bytes_last = 0; + u64 mirror_download_miss_bytes_last = 0; }; } // namespace VideoCommon