mirror of
https://git.eden-emu.dev/eden-emu/eden
synced 2026-04-10 03:18:55 +02:00
[memory] Add mirror mapping for buffer sync
will be added once determined safe for testing, plus remove the logging before merge
This commit is contained in:
parent
f0d77e86e3
commit
83f3150dc1
4 changed files with 407 additions and 4 deletions
|
|
@ -14,12 +14,17 @@
|
|||
#include <mutex>
|
||||
#include <vector>
|
||||
#include <span>
|
||||
#include <utility>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "common/range_mutex.h"
|
||||
#include "common/scratch_buffer.h"
|
||||
#include "common/virtual_buffer.h"
|
||||
|
||||
#if defined(__linux__)
|
||||
#include <sys/mman.h>
|
||||
#endif
|
||||
|
||||
namespace Core {
|
||||
|
||||
constexpr size_t DEVICE_PAGEBITS = 12ULL;
|
||||
|
|
@ -45,6 +50,74 @@ class DeviceMemoryManager {
|
|||
using DeviceMethods = typename Traits::DeviceMethods;
|
||||
|
||||
public:
|
||||
class MirrorMapping {
|
||||
public:
|
||||
MirrorMapping() = default;
|
||||
MirrorMapping(u8* mapped_base_, size_t mapped_size_, size_t data_offset_)
|
||||
: mapped_base{mapped_base_}, mapped_size{mapped_size_}, data_offset{data_offset_} {}
|
||||
|
||||
MirrorMapping(const MirrorMapping&) = delete;
|
||||
MirrorMapping& operator=(const MirrorMapping&) = delete;
|
||||
|
||||
MirrorMapping(MirrorMapping&& other) noexcept {
|
||||
MoveFrom(other);
|
||||
}
|
||||
|
||||
MirrorMapping& operator=(MirrorMapping&& other) noexcept {
|
||||
if (this != &other) {
|
||||
Release();
|
||||
MoveFrom(other);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
~MirrorMapping() {
|
||||
Release();
|
||||
}
|
||||
|
||||
[[nodiscard]] bool IsValid() const noexcept {
|
||||
return mapped_base != nullptr;
|
||||
}
|
||||
|
||||
[[nodiscard]] explicit operator bool() const noexcept {
|
||||
return IsValid();
|
||||
}
|
||||
|
||||
[[nodiscard]] u8* Data() noexcept {
|
||||
return mapped_base ? mapped_base + data_offset : nullptr;
|
||||
}
|
||||
|
||||
[[nodiscard]] const u8* Data() const noexcept {
|
||||
return mapped_base ? mapped_base + data_offset : nullptr;
|
||||
}
|
||||
|
||||
[[nodiscard]] size_t Size() const noexcept {
|
||||
return mapped_size >= data_offset ? mapped_size - data_offset : 0;
|
||||
}
|
||||
|
||||
private:
|
||||
void MoveFrom(MirrorMapping& other) noexcept {
|
||||
mapped_base = std::exchange(other.mapped_base, nullptr);
|
||||
mapped_size = std::exchange(other.mapped_size, 0);
|
||||
data_offset = std::exchange(other.data_offset, 0);
|
||||
}
|
||||
|
||||
void Release() noexcept {
|
||||
#if defined(__linux__)
|
||||
if (mapped_base) {
|
||||
munmap(mapped_base, mapped_size);
|
||||
}
|
||||
#endif
|
||||
mapped_base = nullptr;
|
||||
mapped_size = 0;
|
||||
data_offset = 0;
|
||||
}
|
||||
|
||||
u8* mapped_base{};
|
||||
size_t mapped_size{};
|
||||
size_t data_offset{};
|
||||
};
|
||||
|
||||
DeviceMemoryManager(const DeviceMemory& device_memory);
|
||||
~DeviceMemoryManager();
|
||||
|
||||
|
|
@ -118,6 +191,11 @@ public:
|
|||
void WriteBlock(DAddr address, const void* src_pointer, size_t size);
|
||||
void WriteBlockUnsafe(DAddr address, const void* src_pointer, size_t size);
|
||||
|
||||
[[nodiscard]] MirrorMapping CreateMirrorMapping(DAddr address, size_t size) const;
|
||||
[[nodiscard]] u64 GetMappingVersion() const noexcept {
|
||||
return mapping_version.load(std::memory_order_acquire);
|
||||
}
|
||||
|
||||
Asid RegisterProcess(Memory::Memory* memory);
|
||||
void UnregisterProcess(Asid id);
|
||||
|
||||
|
|
@ -236,6 +314,7 @@ private:
|
|||
std::unique_ptr<CachedPages> cached_pages;
|
||||
Common::RangeMutex counter_guard;
|
||||
std::mutex mapping_guard;
|
||||
std::atomic<u64> mapping_version{1};
|
||||
|
||||
|
||||
};
|
||||
|
|
|
|||
|
|
@ -4,6 +4,10 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#if defined(__linux__) && !defined(_GNU_SOURCE)
|
||||
#define _GNU_SOURCE
|
||||
#endif
|
||||
|
||||
#include <atomic>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
|
|
@ -11,6 +15,17 @@
|
|||
#include <algorithm>
|
||||
#include <vector>
|
||||
|
||||
#if defined(__linux__)
|
||||
#include <sys/mman.h>
|
||||
#ifndef MREMAP_MAYMOVE
|
||||
#define MREMAP_MAYMOVE 1
|
||||
#endif
|
||||
#ifndef MREMAP_FIXED
|
||||
#define MREMAP_FIXED 2
|
||||
#endif
|
||||
extern "C" void* mremap(void* old_address, size_t old_size, size_t new_size, int flags, ...);
|
||||
#endif
|
||||
|
||||
#include "common/address_space.h"
|
||||
#include "common/address_space.inc"
|
||||
#include "common/alignment.h"
|
||||
|
|
@ -240,6 +255,7 @@ void DeviceMemoryManager<Traits>::Map(DAddr address, VAddr virtual_address, size
|
|||
impl->multi_dev_address.Register(new_dev, start_id);
|
||||
}
|
||||
t_slot = {};
|
||||
mapping_version.fetch_add(1, std::memory_order_release);
|
||||
if (track) {
|
||||
TrackContinuityImpl(address, virtual_address, size, asid);
|
||||
}
|
||||
|
|
@ -272,6 +288,7 @@ void DeviceMemoryManager<Traits>::Unmap(DAddr address, size_t size) {
|
|||
}
|
||||
}
|
||||
t_slot = {};
|
||||
mapping_version.fetch_add(1, std::memory_order_release);
|
||||
}
|
||||
template <typename Traits>
|
||||
void DeviceMemoryManager<Traits>::TrackContinuityImpl(DAddr address, VAddr virtual_address,
|
||||
|
|
@ -315,6 +332,78 @@ const u8* DeviceMemoryManager<Traits>::GetSpan(const DAddr src_addr, const std::
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
template <typename Traits>
|
||||
typename DeviceMemoryManager<Traits>::MirrorMapping DeviceMemoryManager<Traits>::CreateMirrorMapping(
|
||||
DAddr address, size_t size) const {
|
||||
#if !defined(__linux__)
|
||||
return {};
|
||||
#else
|
||||
if (size == 0) {
|
||||
return {};
|
||||
}
|
||||
|
||||
const DAddr aligned_address = Common::AlignDown(address, DAddr{page_size});
|
||||
const size_t data_offset = static_cast<size_t>(address - aligned_address);
|
||||
const size_t mapped_size = Common::AlignUp(size + data_offset, page_size);
|
||||
|
||||
struct Segment {
|
||||
const u8* source;
|
||||
size_t size;
|
||||
};
|
||||
|
||||
std::vector<Segment> segments;
|
||||
segments.reserve(Common::DivCeil(mapped_size, page_size));
|
||||
|
||||
size_t remaining_size = mapped_size;
|
||||
size_t page_index = aligned_address >> page_bits;
|
||||
while (remaining_size > 0) {
|
||||
const size_t next_pages = std::size_t(tracked_entries[page_index].continuity_tracker);
|
||||
const size_t copy_amount = (std::min)(next_pages << page_bits, remaining_size);
|
||||
|
||||
const auto phys_addr = tracked_entries[page_index].compressed_physical_ptr;
|
||||
if (phys_addr == 0) {
|
||||
return {};
|
||||
}
|
||||
|
||||
const auto* source =
|
||||
GetPointerFromRaw<u8>(PAddr(phys_addr - 1U) << Memory::YUZU_PAGEBITS);
|
||||
|
||||
if (!segments.empty() && segments.back().source + segments.back().size == source) {
|
||||
segments.back().size += copy_amount;
|
||||
} else {
|
||||
segments.push_back({source, copy_amount});
|
||||
}
|
||||
|
||||
page_index += next_pages;
|
||||
remaining_size -= copy_amount;
|
||||
}
|
||||
|
||||
void* const mirror_base =
|
||||
mmap(nullptr, mapped_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
if (mirror_base == MAP_FAILED) {
|
||||
return {};
|
||||
}
|
||||
|
||||
size_t mirror_offset = 0;
|
||||
for (const auto& segment : segments) {
|
||||
void* const target = static_cast<u8*>(mirror_base) + mirror_offset;
|
||||
void* const result = mremap(const_cast<u8*>(segment.source), 0, segment.size,
|
||||
MREMAP_MAYMOVE | MREMAP_FIXED, target);
|
||||
if (result == MAP_FAILED) {
|
||||
munmap(mirror_base, mapped_size);
|
||||
return {};
|
||||
}
|
||||
if (mprotect(result, segment.size, PROT_READ | PROT_WRITE) != 0) {
|
||||
munmap(mirror_base, mapped_size);
|
||||
return {};
|
||||
}
|
||||
mirror_offset += segment.size;
|
||||
}
|
||||
|
||||
return MirrorMapping{static_cast<u8*>(mirror_base), mapped_size, data_offset};
|
||||
#endif
|
||||
}
|
||||
|
||||
template <typename Traits>
|
||||
void DeviceMemoryManager<Traits>::InnerGatherDeviceAddresses(Common::ScratchBuffer<u32>& buffer,
|
||||
PAddr address) {
|
||||
|
|
|
|||
|
|
@ -104,6 +104,51 @@ void BufferCache<P>::TickFrame() {
|
|||
RunGarbageCollector();
|
||||
}
|
||||
++frame_tick;
|
||||
static constexpr u64 mirror_stats_log_interval = 300;
|
||||
if ((frame_tick % mirror_stats_log_interval) == 0) {
|
||||
const u64 upload_hit_copies = mirror_upload_hit_copies - mirror_upload_hit_copies_last;
|
||||
const u64 upload_miss_copies = mirror_upload_miss_copies - mirror_upload_miss_copies_last;
|
||||
const u64 upload_hit_bytes = mirror_upload_hit_bytes - mirror_upload_hit_bytes_last;
|
||||
const u64 upload_miss_bytes = mirror_upload_miss_bytes - mirror_upload_miss_bytes_last;
|
||||
const u64 download_hit_copies =
|
||||
mirror_download_hit_copies - mirror_download_hit_copies_last;
|
||||
const u64 download_miss_copies =
|
||||
mirror_download_miss_copies - mirror_download_miss_copies_last;
|
||||
const u64 download_hit_bytes = mirror_download_hit_bytes - mirror_download_hit_bytes_last;
|
||||
const u64 download_miss_bytes =
|
||||
mirror_download_miss_bytes - mirror_download_miss_bytes_last;
|
||||
|
||||
const u64 upload_total_copies = upload_hit_copies + upload_miss_copies;
|
||||
const u64 download_total_copies = download_hit_copies + download_miss_copies;
|
||||
if (upload_total_copies > 0 || download_total_copies > 0) {
|
||||
const double upload_hit_ratio = upload_total_copies > 0
|
||||
? (100.0 * static_cast<double>(upload_hit_copies) /
|
||||
static_cast<double>(upload_total_copies))
|
||||
: 0.0;
|
||||
const double download_hit_ratio =
|
||||
download_total_copies > 0
|
||||
? (100.0 * static_cast<double>(download_hit_copies) /
|
||||
static_cast<double>(download_total_copies))
|
||||
: 0.0;
|
||||
LOG_INFO(HW_GPU,
|
||||
"Buffer mirror counters (last {} frames): upload hit/miss copies = {}/{}, "
|
||||
"hit ratio = {:.2f}%, bytes hit/miss = {}/{}, download hit/miss copies = "
|
||||
"{}/{}, hit ratio = {:.2f}%, bytes hit/miss = {}/{}",
|
||||
mirror_stats_log_interval, upload_hit_copies, upload_miss_copies,
|
||||
upload_hit_ratio, upload_hit_bytes, upload_miss_bytes, download_hit_copies,
|
||||
download_miss_copies, download_hit_ratio, download_hit_bytes,
|
||||
download_miss_bytes);
|
||||
}
|
||||
|
||||
mirror_upload_hit_copies_last = mirror_upload_hit_copies;
|
||||
mirror_upload_miss_copies_last = mirror_upload_miss_copies;
|
||||
mirror_upload_hit_bytes_last = mirror_upload_hit_bytes;
|
||||
mirror_upload_miss_bytes_last = mirror_upload_miss_bytes;
|
||||
mirror_download_hit_copies_last = mirror_download_hit_copies;
|
||||
mirror_download_miss_copies_last = mirror_download_miss_copies;
|
||||
mirror_download_hit_bytes_last = mirror_download_hit_bytes;
|
||||
mirror_download_miss_bytes_last = mirror_download_miss_bytes;
|
||||
}
|
||||
delayed_destruction_ring.Tick();
|
||||
|
||||
for (auto& buffer : async_buffers_death_ring) {
|
||||
|
|
@ -1567,6 +1612,21 @@ BufferId BufferCache<P>::CreateBuffer(DAddr device_addr, u32 wanted_size) {
|
|||
const u32 size = static_cast<u32>(overlap.end - overlap.begin);
|
||||
const BufferId new_buffer_id = slot_buffers.insert(runtime, overlap.begin, size);
|
||||
auto& new_buffer = slot_buffers[new_buffer_id];
|
||||
const u64 current_mapping_version = device_memory.GetMappingVersion();
|
||||
if (mirror_mapping_version != current_mapping_version) {
|
||||
buffer_mirrors.clear();
|
||||
mirror_mapping_version = current_mapping_version;
|
||||
}
|
||||
buffer_mirrors.erase(new_buffer.CpuAddr());
|
||||
if (auto mirror =
|
||||
device_memory.CreateMirrorMapping(new_buffer.CpuAddr(), new_buffer.SizeBytes());
|
||||
mirror) {
|
||||
buffer_mirrors.emplace(new_buffer.CpuAddr(), std::move(mirror));
|
||||
if (!mirror_creation_logged) [[unlikely]] {
|
||||
LOG_INFO(HW_GPU, "Buffer mirror mapping enabled (first successful mapping)");
|
||||
mirror_creation_logged = true;
|
||||
}
|
||||
}
|
||||
const size_t size_bytes = new_buffer.SizeBytes();
|
||||
runtime.ClearBuffer(new_buffer, 0, size_bytes, 0);
|
||||
new_buffer.MarkUsage(0, size_bytes);
|
||||
|
|
@ -1660,15 +1720,52 @@ void BufferCache<P>::ImmediateUploadMemory([[maybe_unused]] Buffer& buffer,
|
|||
[[maybe_unused]] std::span<const BufferCopy> copies) {
|
||||
if constexpr (!USE_MEMORY_MAPS_FOR_UPLOADS) {
|
||||
std::span<u8> immediate_buffer;
|
||||
const auto resolve_mirror_pointer = [&]() -> const u8* {
|
||||
const u64 current_mapping_version = device_memory.GetMappingVersion();
|
||||
if (mirror_mapping_version != current_mapping_version) {
|
||||
buffer_mirrors.clear();
|
||||
mirror_mapping_version = current_mapping_version;
|
||||
}
|
||||
|
||||
auto mirror_it = buffer_mirrors.find(buffer.CpuAddr());
|
||||
if (mirror_it == buffer_mirrors.end()) {
|
||||
if (auto mirror =
|
||||
device_memory.CreateMirrorMapping(buffer.CpuAddr(), buffer.SizeBytes());
|
||||
mirror) {
|
||||
auto [it, inserted] =
|
||||
buffer_mirrors.emplace(buffer.CpuAddr(), std::move(mirror));
|
||||
mirror_it = it;
|
||||
if (inserted && !mirror_creation_logged) [[unlikely]] {
|
||||
LOG_INFO(HW_GPU, "Buffer mirror mapping enabled (first successful mapping)");
|
||||
mirror_creation_logged = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return mirror_it != buffer_mirrors.end() ? mirror_it->second.Data() : nullptr;
|
||||
};
|
||||
const u8* const mirror_pointer = resolve_mirror_pointer();
|
||||
for (const BufferCopy& copy : copies) {
|
||||
std::span<const u8> upload_span;
|
||||
const DAddr device_addr = buffer.CpuAddr() + copy.dst_offset;
|
||||
if (IsRangeGranular(device_addr, copy.size)) {
|
||||
if (mirror_pointer != nullptr) {
|
||||
mirror_upload_hit_copies++;
|
||||
mirror_upload_hit_bytes += copy.size;
|
||||
if (!mirror_upload_logged) [[unlikely]] {
|
||||
LOG_INFO(HW_GPU, "Buffer mirror fast path active for upload sync");
|
||||
mirror_upload_logged = true;
|
||||
}
|
||||
upload_span =
|
||||
std::span(mirror_pointer + static_cast<size_t>(copy.dst_offset), copy.size);
|
||||
} else if (IsRangeGranular(device_addr, copy.size)) {
|
||||
mirror_upload_miss_copies++;
|
||||
mirror_upload_miss_bytes += copy.size;
|
||||
auto* const ptr = device_memory.GetPointer<u8>(device_addr);
|
||||
if (ptr != nullptr) {
|
||||
upload_span = std::span(ptr, copy.size);
|
||||
}
|
||||
} else {
|
||||
mirror_upload_miss_copies++;
|
||||
mirror_upload_miss_bytes += copy.size;
|
||||
if (immediate_buffer.empty()) {
|
||||
immediate_buffer = ImmediateBuffer(largest_copy);
|
||||
}
|
||||
|
|
@ -1687,10 +1784,47 @@ void BufferCache<P>::MappedUploadMemory([[maybe_unused]] Buffer& buffer,
|
|||
if constexpr (USE_MEMORY_MAPS) {
|
||||
auto upload_staging = runtime.UploadStagingBuffer(total_size_bytes);
|
||||
const std::span<u8> staging_pointer = upload_staging.mapped_span;
|
||||
const auto resolve_mirror_pointer = [&]() -> const u8* {
|
||||
const u64 current_mapping_version = device_memory.GetMappingVersion();
|
||||
if (mirror_mapping_version != current_mapping_version) {
|
||||
buffer_mirrors.clear();
|
||||
mirror_mapping_version = current_mapping_version;
|
||||
}
|
||||
|
||||
auto mirror_it = buffer_mirrors.find(buffer.CpuAddr());
|
||||
if (mirror_it == buffer_mirrors.end()) {
|
||||
if (auto mirror =
|
||||
device_memory.CreateMirrorMapping(buffer.CpuAddr(), buffer.SizeBytes());
|
||||
mirror) {
|
||||
auto [it, inserted] =
|
||||
buffer_mirrors.emplace(buffer.CpuAddr(), std::move(mirror));
|
||||
mirror_it = it;
|
||||
if (inserted && !mirror_creation_logged) [[unlikely]] {
|
||||
LOG_INFO(HW_GPU, "Buffer mirror mapping enabled (first successful mapping)");
|
||||
mirror_creation_logged = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return mirror_it != buffer_mirrors.end() ? mirror_it->second.Data() : nullptr;
|
||||
};
|
||||
const u8* const mirror_pointer = resolve_mirror_pointer();
|
||||
for (BufferCopy& copy : copies) {
|
||||
u8* const src_pointer = staging_pointer.data() + copy.src_offset;
|
||||
const DAddr device_addr = buffer.CpuAddr() + copy.dst_offset;
|
||||
device_memory.ReadBlockUnsafe(device_addr, src_pointer, copy.size);
|
||||
if (mirror_pointer != nullptr) {
|
||||
mirror_upload_hit_copies++;
|
||||
mirror_upload_hit_bytes += copy.size;
|
||||
if (!mirror_upload_logged) [[unlikely]] {
|
||||
LOG_INFO(HW_GPU, "Buffer mirror fast path active for upload sync");
|
||||
mirror_upload_logged = true;
|
||||
}
|
||||
std::memcpy(src_pointer, mirror_pointer + static_cast<size_t>(copy.dst_offset),
|
||||
copy.size);
|
||||
} else {
|
||||
mirror_upload_miss_copies++;
|
||||
mirror_upload_miss_bytes += copy.size;
|
||||
device_memory.ReadBlockUnsafe(device_addr, src_pointer, copy.size);
|
||||
}
|
||||
|
||||
// Apply the staging offset
|
||||
copy.src_offset += upload_staging.offset;
|
||||
|
|
@ -1783,6 +1917,30 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, DAddr device_addr, u64
|
|||
if constexpr (USE_MEMORY_MAPS) {
|
||||
auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes);
|
||||
const u8* const mapped_memory = download_staging.mapped_span.data();
|
||||
const auto resolve_mirror_pointer = [&]() -> u8* {
|
||||
const u64 current_mapping_version = device_memory.GetMappingVersion();
|
||||
if (mirror_mapping_version != current_mapping_version) {
|
||||
buffer_mirrors.clear();
|
||||
mirror_mapping_version = current_mapping_version;
|
||||
}
|
||||
|
||||
auto mirror_it = buffer_mirrors.find(buffer.CpuAddr());
|
||||
if (mirror_it == buffer_mirrors.end()) {
|
||||
if (auto mirror =
|
||||
device_memory.CreateMirrorMapping(buffer.CpuAddr(), buffer.SizeBytes());
|
||||
mirror) {
|
||||
auto [it, inserted] =
|
||||
buffer_mirrors.emplace(buffer.CpuAddr(), std::move(mirror));
|
||||
mirror_it = it;
|
||||
if (inserted && !mirror_creation_logged) [[unlikely]] {
|
||||
LOG_INFO(HW_GPU, "Buffer mirror mapping enabled (first successful mapping)");
|
||||
mirror_creation_logged = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return mirror_it != buffer_mirrors.end() ? mirror_it->second.Data() : nullptr;
|
||||
};
|
||||
u8* const mirror_pointer = resolve_mirror_pointer();
|
||||
const std::span<BufferCopy> copies_span(copies.data(), copies.data() + copies.size());
|
||||
for (BufferCopy& copy : copies) {
|
||||
// Modify copies to have the staging offset in mind
|
||||
|
|
@ -1796,14 +1954,65 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, DAddr device_addr, u64
|
|||
// Undo the modified offset
|
||||
const u64 dst_offset = copy.dst_offset - download_staging.offset;
|
||||
const u8* copy_mapped_memory = mapped_memory + dst_offset;
|
||||
device_memory.WriteBlockUnsafe(copy_device_addr, copy_mapped_memory, copy.size);
|
||||
if (mirror_pointer != nullptr) {
|
||||
mirror_download_hit_copies++;
|
||||
mirror_download_hit_bytes += copy.size;
|
||||
if (!mirror_download_logged) [[unlikely]] {
|
||||
LOG_INFO(HW_GPU, "Buffer mirror fast path active for download sync");
|
||||
mirror_download_logged = true;
|
||||
}
|
||||
std::memcpy(mirror_pointer + static_cast<size_t>(copy.src_offset),
|
||||
copy_mapped_memory, copy.size);
|
||||
} else {
|
||||
mirror_download_miss_copies++;
|
||||
mirror_download_miss_bytes += copy.size;
|
||||
device_memory.WriteBlockUnsafe(copy_device_addr, copy_mapped_memory, copy.size);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy);
|
||||
const auto resolve_mirror_pointer = [&]() -> u8* {
|
||||
const u64 current_mapping_version = device_memory.GetMappingVersion();
|
||||
if (mirror_mapping_version != current_mapping_version) {
|
||||
buffer_mirrors.clear();
|
||||
mirror_mapping_version = current_mapping_version;
|
||||
}
|
||||
|
||||
auto mirror_it = buffer_mirrors.find(buffer.CpuAddr());
|
||||
if (mirror_it == buffer_mirrors.end()) {
|
||||
if (auto mirror =
|
||||
device_memory.CreateMirrorMapping(buffer.CpuAddr(), buffer.SizeBytes());
|
||||
mirror) {
|
||||
auto [it, inserted] =
|
||||
buffer_mirrors.emplace(buffer.CpuAddr(), std::move(mirror));
|
||||
mirror_it = it;
|
||||
if (inserted && !mirror_creation_logged) [[unlikely]] {
|
||||
LOG_INFO(HW_GPU, "Buffer mirror mapping enabled (first successful mapping)");
|
||||
mirror_creation_logged = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return mirror_it != buffer_mirrors.end() ? mirror_it->second.Data() : nullptr;
|
||||
};
|
||||
u8* const mirror_pointer = resolve_mirror_pointer();
|
||||
for (const BufferCopy& copy : copies) {
|
||||
buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size));
|
||||
const DAddr copy_device_addr = buffer.CpuAddr() + copy.src_offset;
|
||||
device_memory.WriteBlockUnsafe(copy_device_addr, immediate_buffer.data(), copy.size);
|
||||
if (mirror_pointer != nullptr) {
|
||||
mirror_download_hit_copies++;
|
||||
mirror_download_hit_bytes += copy.size;
|
||||
if (!mirror_download_logged) [[unlikely]] {
|
||||
LOG_INFO(HW_GPU, "Buffer mirror fast path active for download sync");
|
||||
mirror_download_logged = true;
|
||||
}
|
||||
std::memcpy(mirror_pointer + static_cast<size_t>(copy.src_offset),
|
||||
immediate_buffer.data(), copy.size);
|
||||
} else {
|
||||
mirror_download_miss_copies++;
|
||||
mirror_download_miss_bytes += copy.size;
|
||||
device_memory.WriteBlockUnsafe(copy_device_addr, immediate_buffer.data(),
|
||||
copy.size);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1844,6 +2053,10 @@ void BufferCache<P>::DeleteBuffer(BufferId buffer_id, bool do_not_mark) {
|
|||
if (!do_not_mark) {
|
||||
Buffer& buffer = slot_buffers[buffer_id];
|
||||
memory_tracker.MarkRegionAsCpuModified(buffer.CpuAddr(), buffer.SizeBytes());
|
||||
buffer_mirrors.erase(buffer.CpuAddr());
|
||||
} else {
|
||||
const Buffer& buffer = slot_buffers[buffer_id];
|
||||
buffer_mirrors.erase(buffer.CpuAddr());
|
||||
}
|
||||
|
||||
Unregister(buffer_id);
|
||||
|
|
|
|||
|
|
@ -473,6 +473,8 @@ private:
|
|||
Tegra::MaxwellDeviceMemoryManager& device_memory;
|
||||
|
||||
Common::SlotVector<Buffer> slot_buffers;
|
||||
ankerl::unordered_dense::map<DAddr, Tegra::MaxwellDeviceMemoryManager::MirrorMapping>
|
||||
buffer_mirrors;
|
||||
#ifdef YUZU_LEGACY
|
||||
static constexpr size_t TICKS_TO_DESTROY = 6;
|
||||
#else
|
||||
|
|
@ -522,6 +524,26 @@ private:
|
|||
|
||||
std::array<BufferId, ((1ULL << 34) >> CACHING_PAGEBITS)> page_table;
|
||||
Common::ScratchBuffer<u8> tmp_buffer;
|
||||
bool mirror_creation_logged = false;
|
||||
bool mirror_upload_logged = false;
|
||||
bool mirror_download_logged = false;
|
||||
u64 mirror_mapping_version = 0;
|
||||
u64 mirror_upload_hit_copies = 0;
|
||||
u64 mirror_upload_miss_copies = 0;
|
||||
u64 mirror_upload_hit_bytes = 0;
|
||||
u64 mirror_upload_miss_bytes = 0;
|
||||
u64 mirror_download_hit_copies = 0;
|
||||
u64 mirror_download_miss_copies = 0;
|
||||
u64 mirror_download_hit_bytes = 0;
|
||||
u64 mirror_download_miss_bytes = 0;
|
||||
u64 mirror_upload_hit_copies_last = 0;
|
||||
u64 mirror_upload_miss_copies_last = 0;
|
||||
u64 mirror_upload_hit_bytes_last = 0;
|
||||
u64 mirror_upload_miss_bytes_last = 0;
|
||||
u64 mirror_download_hit_copies_last = 0;
|
||||
u64 mirror_download_miss_copies_last = 0;
|
||||
u64 mirror_download_hit_bytes_last = 0;
|
||||
u64 mirror_download_miss_bytes_last = 0;
|
||||
};
|
||||
|
||||
} // namespace VideoCommon
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue