[memory] Add mirror mapping for buffer sync

will be added once determined safe for testing, plus remove the logging before merge
This commit is contained in:
wildcard 2026-03-25 12:23:51 +01:00
parent f0d77e86e3
commit 83f3150dc1
4 changed files with 407 additions and 4 deletions

View file

@ -14,12 +14,17 @@
#include <mutex>
#include <vector>
#include <span>
#include <utility>
#include "common/common_types.h"
#include "common/range_mutex.h"
#include "common/scratch_buffer.h"
#include "common/virtual_buffer.h"
#if defined(__linux__)
#include <sys/mman.h>
#endif
namespace Core {
constexpr size_t DEVICE_PAGEBITS = 12ULL;
@ -45,6 +50,74 @@ class DeviceMemoryManager {
using DeviceMethods = typename Traits::DeviceMethods;
public:
class MirrorMapping {
public:
MirrorMapping() = default;
MirrorMapping(u8* mapped_base_, size_t mapped_size_, size_t data_offset_)
: mapped_base{mapped_base_}, mapped_size{mapped_size_}, data_offset{data_offset_} {}
MirrorMapping(const MirrorMapping&) = delete;
MirrorMapping& operator=(const MirrorMapping&) = delete;
MirrorMapping(MirrorMapping&& other) noexcept {
MoveFrom(other);
}
MirrorMapping& operator=(MirrorMapping&& other) noexcept {
if (this != &other) {
Release();
MoveFrom(other);
}
return *this;
}
~MirrorMapping() {
Release();
}
[[nodiscard]] bool IsValid() const noexcept {
return mapped_base != nullptr;
}
[[nodiscard]] explicit operator bool() const noexcept {
return IsValid();
}
[[nodiscard]] u8* Data() noexcept {
return mapped_base ? mapped_base + data_offset : nullptr;
}
[[nodiscard]] const u8* Data() const noexcept {
return mapped_base ? mapped_base + data_offset : nullptr;
}
[[nodiscard]] size_t Size() const noexcept {
return mapped_size >= data_offset ? mapped_size - data_offset : 0;
}
private:
void MoveFrom(MirrorMapping& other) noexcept {
mapped_base = std::exchange(other.mapped_base, nullptr);
mapped_size = std::exchange(other.mapped_size, 0);
data_offset = std::exchange(other.data_offset, 0);
}
void Release() noexcept {
#if defined(__linux__)
if (mapped_base) {
munmap(mapped_base, mapped_size);
}
#endif
mapped_base = nullptr;
mapped_size = 0;
data_offset = 0;
}
u8* mapped_base{};
size_t mapped_size{};
size_t data_offset{};
};
DeviceMemoryManager(const DeviceMemory& device_memory);
~DeviceMemoryManager();
@ -118,6 +191,11 @@ public:
void WriteBlock(DAddr address, const void* src_pointer, size_t size);
void WriteBlockUnsafe(DAddr address, const void* src_pointer, size_t size);
[[nodiscard]] MirrorMapping CreateMirrorMapping(DAddr address, size_t size) const;
[[nodiscard]] u64 GetMappingVersion() const noexcept {
return mapping_version.load(std::memory_order_acquire);
}
Asid RegisterProcess(Memory::Memory* memory);
void UnregisterProcess(Asid id);
@ -236,6 +314,7 @@ private:
std::unique_ptr<CachedPages> cached_pages;
Common::RangeMutex counter_guard;
std::mutex mapping_guard;
std::atomic<u64> mapping_version{1};
};

View file

@ -4,6 +4,10 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#if defined(__linux__) && !defined(_GNU_SOURCE)
#define _GNU_SOURCE
#endif
#include <atomic>
#include <limits>
#include <memory>
@ -11,6 +15,17 @@
#include <algorithm>
#include <vector>
#if defined(__linux__)
#include <sys/mman.h>
#ifndef MREMAP_MAYMOVE
#define MREMAP_MAYMOVE 1
#endif
#ifndef MREMAP_FIXED
#define MREMAP_FIXED 2
#endif
extern "C" void* mremap(void* old_address, size_t old_size, size_t new_size, int flags, ...);
#endif
#include "common/address_space.h"
#include "common/address_space.inc"
#include "common/alignment.h"
@ -240,6 +255,7 @@ void DeviceMemoryManager<Traits>::Map(DAddr address, VAddr virtual_address, size
impl->multi_dev_address.Register(new_dev, start_id);
}
t_slot = {};
mapping_version.fetch_add(1, std::memory_order_release);
if (track) {
TrackContinuityImpl(address, virtual_address, size, asid);
}
@ -272,6 +288,7 @@ void DeviceMemoryManager<Traits>::Unmap(DAddr address, size_t size) {
}
}
t_slot = {};
mapping_version.fetch_add(1, std::memory_order_release);
}
template <typename Traits>
void DeviceMemoryManager<Traits>::TrackContinuityImpl(DAddr address, VAddr virtual_address,
@ -315,6 +332,78 @@ const u8* DeviceMemoryManager<Traits>::GetSpan(const DAddr src_addr, const std::
return nullptr;
}
template <typename Traits>
typename DeviceMemoryManager<Traits>::MirrorMapping DeviceMemoryManager<Traits>::CreateMirrorMapping(
DAddr address, size_t size) const {
#if !defined(__linux__)
return {};
#else
if (size == 0) {
return {};
}
const DAddr aligned_address = Common::AlignDown(address, DAddr{page_size});
const size_t data_offset = static_cast<size_t>(address - aligned_address);
const size_t mapped_size = Common::AlignUp(size + data_offset, page_size);
struct Segment {
const u8* source;
size_t size;
};
std::vector<Segment> segments;
segments.reserve(Common::DivCeil(mapped_size, page_size));
size_t remaining_size = mapped_size;
size_t page_index = aligned_address >> page_bits;
while (remaining_size > 0) {
const size_t next_pages = std::size_t(tracked_entries[page_index].continuity_tracker);
const size_t copy_amount = (std::min)(next_pages << page_bits, remaining_size);
const auto phys_addr = tracked_entries[page_index].compressed_physical_ptr;
if (phys_addr == 0) {
return {};
}
const auto* source =
GetPointerFromRaw<u8>(PAddr(phys_addr - 1U) << Memory::YUZU_PAGEBITS);
if (!segments.empty() && segments.back().source + segments.back().size == source) {
segments.back().size += copy_amount;
} else {
segments.push_back({source, copy_amount});
}
page_index += next_pages;
remaining_size -= copy_amount;
}
void* const mirror_base =
mmap(nullptr, mapped_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (mirror_base == MAP_FAILED) {
return {};
}
size_t mirror_offset = 0;
for (const auto& segment : segments) {
void* const target = static_cast<u8*>(mirror_base) + mirror_offset;
void* const result = mremap(const_cast<u8*>(segment.source), 0, segment.size,
MREMAP_MAYMOVE | MREMAP_FIXED, target);
if (result == MAP_FAILED) {
munmap(mirror_base, mapped_size);
return {};
}
if (mprotect(result, segment.size, PROT_READ | PROT_WRITE) != 0) {
munmap(mirror_base, mapped_size);
return {};
}
mirror_offset += segment.size;
}
return MirrorMapping{static_cast<u8*>(mirror_base), mapped_size, data_offset};
#endif
}
template <typename Traits>
void DeviceMemoryManager<Traits>::InnerGatherDeviceAddresses(Common::ScratchBuffer<u32>& buffer,
PAddr address) {

View file

@ -104,6 +104,51 @@ void BufferCache<P>::TickFrame() {
RunGarbageCollector();
}
++frame_tick;
static constexpr u64 mirror_stats_log_interval = 300;
if ((frame_tick % mirror_stats_log_interval) == 0) {
const u64 upload_hit_copies = mirror_upload_hit_copies - mirror_upload_hit_copies_last;
const u64 upload_miss_copies = mirror_upload_miss_copies - mirror_upload_miss_copies_last;
const u64 upload_hit_bytes = mirror_upload_hit_bytes - mirror_upload_hit_bytes_last;
const u64 upload_miss_bytes = mirror_upload_miss_bytes - mirror_upload_miss_bytes_last;
const u64 download_hit_copies =
mirror_download_hit_copies - mirror_download_hit_copies_last;
const u64 download_miss_copies =
mirror_download_miss_copies - mirror_download_miss_copies_last;
const u64 download_hit_bytes = mirror_download_hit_bytes - mirror_download_hit_bytes_last;
const u64 download_miss_bytes =
mirror_download_miss_bytes - mirror_download_miss_bytes_last;
const u64 upload_total_copies = upload_hit_copies + upload_miss_copies;
const u64 download_total_copies = download_hit_copies + download_miss_copies;
if (upload_total_copies > 0 || download_total_copies > 0) {
const double upload_hit_ratio = upload_total_copies > 0
? (100.0 * static_cast<double>(upload_hit_copies) /
static_cast<double>(upload_total_copies))
: 0.0;
const double download_hit_ratio =
download_total_copies > 0
? (100.0 * static_cast<double>(download_hit_copies) /
static_cast<double>(download_total_copies))
: 0.0;
LOG_INFO(HW_GPU,
"Buffer mirror counters (last {} frames): upload hit/miss copies = {}/{}, "
"hit ratio = {:.2f}%, bytes hit/miss = {}/{}, download hit/miss copies = "
"{}/{}, hit ratio = {:.2f}%, bytes hit/miss = {}/{}",
mirror_stats_log_interval, upload_hit_copies, upload_miss_copies,
upload_hit_ratio, upload_hit_bytes, upload_miss_bytes, download_hit_copies,
download_miss_copies, download_hit_ratio, download_hit_bytes,
download_miss_bytes);
}
mirror_upload_hit_copies_last = mirror_upload_hit_copies;
mirror_upload_miss_copies_last = mirror_upload_miss_copies;
mirror_upload_hit_bytes_last = mirror_upload_hit_bytes;
mirror_upload_miss_bytes_last = mirror_upload_miss_bytes;
mirror_download_hit_copies_last = mirror_download_hit_copies;
mirror_download_miss_copies_last = mirror_download_miss_copies;
mirror_download_hit_bytes_last = mirror_download_hit_bytes;
mirror_download_miss_bytes_last = mirror_download_miss_bytes;
}
delayed_destruction_ring.Tick();
for (auto& buffer : async_buffers_death_ring) {
@ -1567,6 +1612,21 @@ BufferId BufferCache<P>::CreateBuffer(DAddr device_addr, u32 wanted_size) {
const u32 size = static_cast<u32>(overlap.end - overlap.begin);
const BufferId new_buffer_id = slot_buffers.insert(runtime, overlap.begin, size);
auto& new_buffer = slot_buffers[new_buffer_id];
const u64 current_mapping_version = device_memory.GetMappingVersion();
if (mirror_mapping_version != current_mapping_version) {
buffer_mirrors.clear();
mirror_mapping_version = current_mapping_version;
}
buffer_mirrors.erase(new_buffer.CpuAddr());
if (auto mirror =
device_memory.CreateMirrorMapping(new_buffer.CpuAddr(), new_buffer.SizeBytes());
mirror) {
buffer_mirrors.emplace(new_buffer.CpuAddr(), std::move(mirror));
if (!mirror_creation_logged) [[unlikely]] {
LOG_INFO(HW_GPU, "Buffer mirror mapping enabled (first successful mapping)");
mirror_creation_logged = true;
}
}
const size_t size_bytes = new_buffer.SizeBytes();
runtime.ClearBuffer(new_buffer, 0, size_bytes, 0);
new_buffer.MarkUsage(0, size_bytes);
@ -1660,15 +1720,52 @@ void BufferCache<P>::ImmediateUploadMemory([[maybe_unused]] Buffer& buffer,
[[maybe_unused]] std::span<const BufferCopy> copies) {
if constexpr (!USE_MEMORY_MAPS_FOR_UPLOADS) {
std::span<u8> immediate_buffer;
const auto resolve_mirror_pointer = [&]() -> const u8* {
const u64 current_mapping_version = device_memory.GetMappingVersion();
if (mirror_mapping_version != current_mapping_version) {
buffer_mirrors.clear();
mirror_mapping_version = current_mapping_version;
}
auto mirror_it = buffer_mirrors.find(buffer.CpuAddr());
if (mirror_it == buffer_mirrors.end()) {
if (auto mirror =
device_memory.CreateMirrorMapping(buffer.CpuAddr(), buffer.SizeBytes());
mirror) {
auto [it, inserted] =
buffer_mirrors.emplace(buffer.CpuAddr(), std::move(mirror));
mirror_it = it;
if (inserted && !mirror_creation_logged) [[unlikely]] {
LOG_INFO(HW_GPU, "Buffer mirror mapping enabled (first successful mapping)");
mirror_creation_logged = true;
}
}
}
return mirror_it != buffer_mirrors.end() ? mirror_it->second.Data() : nullptr;
};
const u8* const mirror_pointer = resolve_mirror_pointer();
for (const BufferCopy& copy : copies) {
std::span<const u8> upload_span;
const DAddr device_addr = buffer.CpuAddr() + copy.dst_offset;
if (IsRangeGranular(device_addr, copy.size)) {
if (mirror_pointer != nullptr) {
mirror_upload_hit_copies++;
mirror_upload_hit_bytes += copy.size;
if (!mirror_upload_logged) [[unlikely]] {
LOG_INFO(HW_GPU, "Buffer mirror fast path active for upload sync");
mirror_upload_logged = true;
}
upload_span =
std::span(mirror_pointer + static_cast<size_t>(copy.dst_offset), copy.size);
} else if (IsRangeGranular(device_addr, copy.size)) {
mirror_upload_miss_copies++;
mirror_upload_miss_bytes += copy.size;
auto* const ptr = device_memory.GetPointer<u8>(device_addr);
if (ptr != nullptr) {
upload_span = std::span(ptr, copy.size);
}
} else {
mirror_upload_miss_copies++;
mirror_upload_miss_bytes += copy.size;
if (immediate_buffer.empty()) {
immediate_buffer = ImmediateBuffer(largest_copy);
}
@ -1687,10 +1784,47 @@ void BufferCache<P>::MappedUploadMemory([[maybe_unused]] Buffer& buffer,
if constexpr (USE_MEMORY_MAPS) {
auto upload_staging = runtime.UploadStagingBuffer(total_size_bytes);
const std::span<u8> staging_pointer = upload_staging.mapped_span;
const auto resolve_mirror_pointer = [&]() -> const u8* {
const u64 current_mapping_version = device_memory.GetMappingVersion();
if (mirror_mapping_version != current_mapping_version) {
buffer_mirrors.clear();
mirror_mapping_version = current_mapping_version;
}
auto mirror_it = buffer_mirrors.find(buffer.CpuAddr());
if (mirror_it == buffer_mirrors.end()) {
if (auto mirror =
device_memory.CreateMirrorMapping(buffer.CpuAddr(), buffer.SizeBytes());
mirror) {
auto [it, inserted] =
buffer_mirrors.emplace(buffer.CpuAddr(), std::move(mirror));
mirror_it = it;
if (inserted && !mirror_creation_logged) [[unlikely]] {
LOG_INFO(HW_GPU, "Buffer mirror mapping enabled (first successful mapping)");
mirror_creation_logged = true;
}
}
}
return mirror_it != buffer_mirrors.end() ? mirror_it->second.Data() : nullptr;
};
const u8* const mirror_pointer = resolve_mirror_pointer();
for (BufferCopy& copy : copies) {
u8* const src_pointer = staging_pointer.data() + copy.src_offset;
const DAddr device_addr = buffer.CpuAddr() + copy.dst_offset;
if (mirror_pointer != nullptr) {
mirror_upload_hit_copies++;
mirror_upload_hit_bytes += copy.size;
if (!mirror_upload_logged) [[unlikely]] {
LOG_INFO(HW_GPU, "Buffer mirror fast path active for upload sync");
mirror_upload_logged = true;
}
std::memcpy(src_pointer, mirror_pointer + static_cast<size_t>(copy.dst_offset),
copy.size);
} else {
mirror_upload_miss_copies++;
mirror_upload_miss_bytes += copy.size;
device_memory.ReadBlockUnsafe(device_addr, src_pointer, copy.size);
}
// Apply the staging offset
copy.src_offset += upload_staging.offset;
@ -1783,6 +1917,30 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, DAddr device_addr, u64
if constexpr (USE_MEMORY_MAPS) {
auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes);
const u8* const mapped_memory = download_staging.mapped_span.data();
const auto resolve_mirror_pointer = [&]() -> u8* {
const u64 current_mapping_version = device_memory.GetMappingVersion();
if (mirror_mapping_version != current_mapping_version) {
buffer_mirrors.clear();
mirror_mapping_version = current_mapping_version;
}
auto mirror_it = buffer_mirrors.find(buffer.CpuAddr());
if (mirror_it == buffer_mirrors.end()) {
if (auto mirror =
device_memory.CreateMirrorMapping(buffer.CpuAddr(), buffer.SizeBytes());
mirror) {
auto [it, inserted] =
buffer_mirrors.emplace(buffer.CpuAddr(), std::move(mirror));
mirror_it = it;
if (inserted && !mirror_creation_logged) [[unlikely]] {
LOG_INFO(HW_GPU, "Buffer mirror mapping enabled (first successful mapping)");
mirror_creation_logged = true;
}
}
}
return mirror_it != buffer_mirrors.end() ? mirror_it->second.Data() : nullptr;
};
u8* const mirror_pointer = resolve_mirror_pointer();
const std::span<BufferCopy> copies_span(copies.data(), copies.data() + copies.size());
for (BufferCopy& copy : copies) {
// Modify copies to have the staging offset in mind
@ -1796,14 +1954,65 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, DAddr device_addr, u64
// Undo the modified offset
const u64 dst_offset = copy.dst_offset - download_staging.offset;
const u8* copy_mapped_memory = mapped_memory + dst_offset;
if (mirror_pointer != nullptr) {
mirror_download_hit_copies++;
mirror_download_hit_bytes += copy.size;
if (!mirror_download_logged) [[unlikely]] {
LOG_INFO(HW_GPU, "Buffer mirror fast path active for download sync");
mirror_download_logged = true;
}
std::memcpy(mirror_pointer + static_cast<size_t>(copy.src_offset),
copy_mapped_memory, copy.size);
} else {
mirror_download_miss_copies++;
mirror_download_miss_bytes += copy.size;
device_memory.WriteBlockUnsafe(copy_device_addr, copy_mapped_memory, copy.size);
}
}
} else {
const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy);
const auto resolve_mirror_pointer = [&]() -> u8* {
const u64 current_mapping_version = device_memory.GetMappingVersion();
if (mirror_mapping_version != current_mapping_version) {
buffer_mirrors.clear();
mirror_mapping_version = current_mapping_version;
}
auto mirror_it = buffer_mirrors.find(buffer.CpuAddr());
if (mirror_it == buffer_mirrors.end()) {
if (auto mirror =
device_memory.CreateMirrorMapping(buffer.CpuAddr(), buffer.SizeBytes());
mirror) {
auto [it, inserted] =
buffer_mirrors.emplace(buffer.CpuAddr(), std::move(mirror));
mirror_it = it;
if (inserted && !mirror_creation_logged) [[unlikely]] {
LOG_INFO(HW_GPU, "Buffer mirror mapping enabled (first successful mapping)");
mirror_creation_logged = true;
}
}
}
return mirror_it != buffer_mirrors.end() ? mirror_it->second.Data() : nullptr;
};
u8* const mirror_pointer = resolve_mirror_pointer();
for (const BufferCopy& copy : copies) {
buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size));
const DAddr copy_device_addr = buffer.CpuAddr() + copy.src_offset;
device_memory.WriteBlockUnsafe(copy_device_addr, immediate_buffer.data(), copy.size);
if (mirror_pointer != nullptr) {
mirror_download_hit_copies++;
mirror_download_hit_bytes += copy.size;
if (!mirror_download_logged) [[unlikely]] {
LOG_INFO(HW_GPU, "Buffer mirror fast path active for download sync");
mirror_download_logged = true;
}
std::memcpy(mirror_pointer + static_cast<size_t>(copy.src_offset),
immediate_buffer.data(), copy.size);
} else {
mirror_download_miss_copies++;
mirror_download_miss_bytes += copy.size;
device_memory.WriteBlockUnsafe(copy_device_addr, immediate_buffer.data(),
copy.size);
}
}
}
}
@ -1844,6 +2053,10 @@ void BufferCache<P>::DeleteBuffer(BufferId buffer_id, bool do_not_mark) {
if (!do_not_mark) {
Buffer& buffer = slot_buffers[buffer_id];
memory_tracker.MarkRegionAsCpuModified(buffer.CpuAddr(), buffer.SizeBytes());
buffer_mirrors.erase(buffer.CpuAddr());
} else {
const Buffer& buffer = slot_buffers[buffer_id];
buffer_mirrors.erase(buffer.CpuAddr());
}
Unregister(buffer_id);

View file

@ -473,6 +473,8 @@ private:
Tegra::MaxwellDeviceMemoryManager& device_memory;
Common::SlotVector<Buffer> slot_buffers;
ankerl::unordered_dense::map<DAddr, Tegra::MaxwellDeviceMemoryManager::MirrorMapping>
buffer_mirrors;
#ifdef YUZU_LEGACY
static constexpr size_t TICKS_TO_DESTROY = 6;
#else
@ -522,6 +524,26 @@ private:
std::array<BufferId, ((1ULL << 34) >> CACHING_PAGEBITS)> page_table;
Common::ScratchBuffer<u8> tmp_buffer;
bool mirror_creation_logged = false;
bool mirror_upload_logged = false;
bool mirror_download_logged = false;
u64 mirror_mapping_version = 0;
u64 mirror_upload_hit_copies = 0;
u64 mirror_upload_miss_copies = 0;
u64 mirror_upload_hit_bytes = 0;
u64 mirror_upload_miss_bytes = 0;
u64 mirror_download_hit_copies = 0;
u64 mirror_download_miss_copies = 0;
u64 mirror_download_hit_bytes = 0;
u64 mirror_download_miss_bytes = 0;
u64 mirror_upload_hit_copies_last = 0;
u64 mirror_upload_miss_copies_last = 0;
u64 mirror_upload_hit_bytes_last = 0;
u64 mirror_upload_miss_bytes_last = 0;
u64 mirror_download_hit_copies_last = 0;
u64 mirror_download_miss_copies_last = 0;
u64 mirror_download_hit_bytes_last = 0;
u64 mirror_download_miss_bytes_last = 0;
};
} // namespace VideoCommon