From 473613089196229f79e6bf17e89a303846fde11a Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 28 Mar 2026 03:02:59 +0000 Subject: [PATCH] [vk/vma] force anv to have HOST_CACHED stream buffers Signed-off-by: lizzie --- .../vulkan_common/vulkan_memory_allocator.cpp | 215 ++++++++---------- 1 file changed, 89 insertions(+), 126 deletions(-) diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp index 9c7cd8a61f..c861300eea 100644 --- a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp +++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp @@ -26,84 +26,58 @@ #include "common/settings.h" namespace Vulkan { - namespace { +namespace { -// Helpers translating MemoryUsage to flags/usage +[[nodiscard]] VkMemoryPropertyFlags MemoryUsagePreferredVmaFlags(MemoryUsage usage) { + if (usage == MemoryUsage::Download) + return VK_MEMORY_PROPERTY_HOST_CACHED_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + return usage != MemoryUsage::DeviceLocal ? VK_MEMORY_PROPERTY_HOST_COHERENT_BIT : VkMemoryPropertyFlagBits{}; +} - [[maybe_unused]] VkMemoryPropertyFlags MemoryUsagePropertyFlags(MemoryUsage usage) { - switch (usage) { - case MemoryUsage::DeviceLocal: - return VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; - case MemoryUsage::Upload: - return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; - case MemoryUsage::Download: - return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | - VK_MEMORY_PROPERTY_HOST_CACHED_BIT; - case MemoryUsage::Stream: - return VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; - } - ASSERT_MSG(false, "Invalid memory usage={}", usage); - return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; - } +[[nodiscard]] VmaAllocationCreateFlags MemoryUsageVmaFlags(MemoryUsage usage) { + switch (usage) { + case MemoryUsage::Upload: + case MemoryUsage::Stream: + return VMA_ALLOCATION_CREATE_MAPPED_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT; + case MemoryUsage::Download: + return VMA_ALLOCATION_CREATE_MAPPED_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; + case MemoryUsage::DeviceLocal: + return {}; + } + return {}; +} - [[nodiscard]] VkMemoryPropertyFlags MemoryUsagePreferredVmaFlags(MemoryUsage usage) { - if (usage == MemoryUsage::Download) { - return VK_MEMORY_PROPERTY_HOST_CACHED_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; - } - return usage != MemoryUsage::DeviceLocal ? VK_MEMORY_PROPERTY_HOST_COHERENT_BIT - : VkMemoryPropertyFlagBits{}; - } - - [[nodiscard]] VmaAllocationCreateFlags MemoryUsageVmaFlags(MemoryUsage usage) { - switch (usage) { - case MemoryUsage::Upload: - case MemoryUsage::Stream: - return VMA_ALLOCATION_CREATE_MAPPED_BIT | - VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT; - case MemoryUsage::Download: - return VMA_ALLOCATION_CREATE_MAPPED_BIT | - VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; - case MemoryUsage::DeviceLocal: - return {}; - } - return {}; - } - - [[nodiscard]] VmaMemoryUsage MemoryUsageVma(MemoryUsage usage) { - switch (usage) { - case MemoryUsage::DeviceLocal: - case MemoryUsage::Stream: - return VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE; - case MemoryUsage::Upload: - case MemoryUsage::Download: - return VMA_MEMORY_USAGE_AUTO_PREFER_HOST; - } - return VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE; - } +[[nodiscard]] VmaMemoryUsage MemoryUsageVma(MemoryUsage usage) { + switch (usage) { + case MemoryUsage::DeviceLocal: + case MemoryUsage::Stream: + return VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE; + case MemoryUsage::Upload: + case MemoryUsage::Download: + return VMA_MEMORY_USAGE_AUTO_PREFER_HOST; + } + return VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE; +} // This avoids calling vkGetBufferMemoryRequirements* directly. - template - static VkBuffer GetVkHandleFromBuffer(const T &buf) { - if constexpr (requires { static_cast(buf); }) { - return static_cast(buf); - } else if constexpr (requires {{ buf.GetHandle() } -> std::convertible_to; }) { - return buf.GetHandle(); - } else if constexpr (requires {{ buf.Handle() } -> std::convertible_to; }) { - return buf.Handle(); - } else if constexpr (requires {{ buf.vk_handle() } -> std::convertible_to; }) { - return buf.vk_handle(); - } else { - static_assert(sizeof(T) == 0, "Cannot extract VkBuffer handle from vk::Buffer"); - return VK_NULL_HANDLE; - } - } +template +static VkBuffer GetVkHandleFromBuffer(const T &buf) { + if constexpr (requires { static_cast(buf); }) { + return static_cast(buf); + } else if constexpr (requires {{ buf.GetHandle() } -> std::convertible_to; }) { + return buf.GetHandle(); + } else if constexpr (requires {{ buf.Handle() } -> std::convertible_to; }) { + return buf.Handle(); + } else if constexpr (requires {{ buf.vk_handle() } -> std::convertible_to; }) { + return buf.vk_handle(); + } else { + static_assert(sizeof(T) == 0, "Cannot extract VkBuffer handle from vk::Buffer"); + return VK_NULL_HANDLE; + } +} - } // namespace +} // namespace //MemoryCommit is now VMA-backed MemoryCommit::MemoryCommit(VmaAllocator alloc, VmaAllocation a, @@ -124,12 +98,12 @@ namespace Vulkan { MemoryCommit::~MemoryCommit() { Release(); } MemoryCommit::MemoryCommit(MemoryCommit &&rhs) noexcept - : allocator{std::exchange(rhs.allocator, nullptr)}, - allocation{std::exchange(rhs.allocation, nullptr)}, - memory{std::exchange(rhs.memory, VK_NULL_HANDLE)}, - offset{std::exchange(rhs.offset, 0)}, - size{std::exchange(rhs.size, 0)}, - mapped_ptr{std::exchange(rhs.mapped_ptr, nullptr)} {} + : allocator{std::exchange(rhs.allocator, nullptr)}, + allocation{std::exchange(rhs.allocation, nullptr)}, + memory{std::exchange(rhs.memory, VK_NULL_HANDLE)}, + offset{std::exchange(rhs.offset, 0)}, + size{std::exchange(rhs.size, 0)}, + mapped_ptr{std::exchange(rhs.mapped_ptr, nullptr)} {} MemoryCommit &MemoryCommit::operator=(MemoryCommit &&rhs) noexcept { if (this != &rhs) { @@ -144,32 +118,27 @@ namespace Vulkan { return *this; } - std::span MemoryCommit::Map() - { + std::span MemoryCommit::Map() { if (!allocation) return {}; if (!mapped_ptr) { if (vmaMapMemory(allocator, allocation, &mapped_ptr) != VK_SUCCESS) return {}; } - const size_t n = static_cast(std::min(size, - (std::numeric_limits::max)())); + const size_t n = size_t(std::min(size, (std::numeric_limits::max)())); return std::span{static_cast(mapped_ptr), n}; } - std::span MemoryCommit::Map() const - { + std::span MemoryCommit::Map() const { if (!allocation) return {}; if (!mapped_ptr) { void *p = nullptr; if (vmaMapMemory(allocator, allocation, &p) != VK_SUCCESS) return {}; const_cast(this)->mapped_ptr = p; } - const size_t n = static_cast(std::min(size, - (std::numeric_limits::max)())); + const size_t n = size_t(std::min(size, (std::numeric_limits::max)())); return std::span{static_cast(mapped_ptr), n}; } - void MemoryCommit::Unmap() - { + void MemoryCommit::Unmap() { if (allocation && mapped_ptr) { vmaUnmapMemory(allocator, allocation); mapped_ptr = nullptr; @@ -182,9 +151,7 @@ namespace Vulkan { if (Settings::values.gpu_logging_enabled.GetValue() && Settings::values.gpu_log_memory_tracking.GetValue() && memory != VK_NULL_HANDLE) { - GPU::Logging::GPULogger::GetInstance().LogMemoryDeallocation( - reinterpret_cast(memory) - ); + GPU::Logging::GPULogger::GetInstance().LogMemoryDeallocation(uintptr_t(memory)); } if (mapped_ptr) { @@ -201,22 +168,18 @@ namespace Vulkan { } MemoryAllocator::MemoryAllocator(const Device &device_) - : device{device_}, allocator{device.GetAllocator()}, - properties{device_.GetPhysical().GetMemoryProperties().memoryProperties}, - buffer_image_granularity{ - device_.GetPhysical().GetProperties().limits.bufferImageGranularity} { - + : device{device_}, allocator{device.GetAllocator()} + , properties{device_.GetPhysical().GetMemoryProperties().memoryProperties} + , buffer_image_granularity{device_.GetPhysical().GetProperties().limits.bufferImageGranularity} + { // Preserve the previous "RenderDoc small heap" trimming behavior that we had in original vma minus the heap bug - if (device.HasDebuggingToolAttached()) - { + if (device.HasDebuggingToolAttached()) { using namespace Common::Literals; ForEachDeviceLocalHostVisibleHeap(device, [this](size_t heap_idx, VkMemoryHeap &heap) { if (heap.size <= 256_MiB) { - for (u32 t = 0; t < properties.memoryTypeCount; ++t) { - if (properties.memoryTypes[t].heapIndex == heap_idx) { + for (u32 t = 0; t < properties.memoryTypeCount; ++t) + if (properties.memoryTypes[t].heapIndex == heap_idx) valid_memory_types &= ~(1u << t); - } - } } }); } @@ -224,17 +187,16 @@ namespace Vulkan { MemoryAllocator::~MemoryAllocator() = default; - vk::Image MemoryAllocator::CreateImage(const VkImageCreateInfo &ci) const - { + vk::Image MemoryAllocator::CreateImage(const VkImageCreateInfo &ci) const { const VmaAllocationCreateInfo alloc_ci = { - .flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT, - .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE, - .requiredFlags = 0, - .preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, - .memoryTypeBits = 0, - .pool = VK_NULL_HANDLE, - .pUserData = nullptr, - .priority = 0.f, + .flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT, + .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE, + .requiredFlags = 0, + .preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + .memoryTypeBits = 0, + .pool = VK_NULL_HANDLE, + .pUserData = nullptr, + .priority = 0.f, }; VkImage handle{}; @@ -246,28 +208,29 @@ namespace Vulkan { if (Settings::values.gpu_logging_enabled.GetValue() && Settings::values.gpu_log_memory_tracking.GetValue()) { GPU::Logging::GPULogger::GetInstance().LogMemoryAllocation( - reinterpret_cast(alloc_info.deviceMemory), - static_cast(alloc_info.size), + uintptr_t(alloc_info.deviceMemory), + u64(alloc_info.size), VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT ); } - return vk::Image(handle, ci.usage, *device.GetLogical(), allocator, allocation, - device.GetDispatchLoader()); + return vk::Image(handle, ci.usage, *device.GetLogical(), allocator, allocation, device.GetDispatchLoader()); } - vk::Buffer - MemoryAllocator::CreateBuffer(const VkBufferCreateInfo &ci, MemoryUsage usage) const - { + vk::Buffer MemoryAllocator::CreateBuffer(const VkBufferCreateInfo &ci, MemoryUsage usage) const { + // MESA will do memcpy() if not marked as host cached, so just force mark it for most buffers + auto const anv_flags = (usage == MemoryUsage::Stream + && device.GetDriverID() == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA) + ? VK_MEMORY_PROPERTY_HOST_CACHED_BIT : 0; const VmaAllocationCreateInfo alloc_ci = { - .flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | MemoryUsageVmaFlags(usage), - .usage = MemoryUsageVma(usage), - .requiredFlags = 0, - .preferredFlags = MemoryUsagePreferredVmaFlags(usage), - .memoryTypeBits = usage == MemoryUsage::Stream ? 0u : valid_memory_types, - .pool = VK_NULL_HANDLE, - .pUserData = nullptr, - .priority = 0.f, + .flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | MemoryUsageVmaFlags(usage), + .usage = MemoryUsageVma(usage), + .requiredFlags = 0, + .preferredFlags = MemoryUsagePreferredVmaFlags(usage) | anv_flags, + .memoryTypeBits = usage == MemoryUsage::Stream ? 0u : valid_memory_types, + .pool = VK_NULL_HANDLE, + .pUserData = nullptr, + .priority = 0.f, }; VkBuffer handle{};