[vk/vma] force anv to have HOST_CACHED stream buffers

Signed-off-by: lizzie <lizzie@eden-emu.dev>
This commit is contained in:
lizzie 2026-03-28 03:02:59 +00:00
parent c984c387d7
commit 4736130891

View file

@ -28,45 +28,19 @@
namespace Vulkan {
namespace {
// Helpers translating MemoryUsage to flags/usage
[[maybe_unused]] VkMemoryPropertyFlags MemoryUsagePropertyFlags(MemoryUsage usage) {
switch (usage) {
case MemoryUsage::DeviceLocal:
return VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
case MemoryUsage::Upload:
return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
case MemoryUsage::Download:
return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
case MemoryUsage::Stream:
return VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
}
ASSERT_MSG(false, "Invalid memory usage={}", usage);
return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
}
[[nodiscard]] VkMemoryPropertyFlags MemoryUsagePreferredVmaFlags(MemoryUsage usage) {
if (usage == MemoryUsage::Download) {
if (usage == MemoryUsage::Download)
return VK_MEMORY_PROPERTY_HOST_CACHED_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
}
return usage != MemoryUsage::DeviceLocal ? VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
: VkMemoryPropertyFlagBits{};
return usage != MemoryUsage::DeviceLocal ? VK_MEMORY_PROPERTY_HOST_COHERENT_BIT : VkMemoryPropertyFlagBits{};
}
[[nodiscard]] VmaAllocationCreateFlags MemoryUsageVmaFlags(MemoryUsage usage) {
switch (usage) {
case MemoryUsage::Upload:
case MemoryUsage::Stream:
return VMA_ALLOCATION_CREATE_MAPPED_BIT |
VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT;
return VMA_ALLOCATION_CREATE_MAPPED_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT;
case MemoryUsage::Download:
return VMA_ALLOCATION_CREATE_MAPPED_BIT |
VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT;
return VMA_ALLOCATION_CREATE_MAPPED_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT;
case MemoryUsage::DeviceLocal:
return {};
}
@ -144,32 +118,27 @@ namespace Vulkan {
return *this;
}
std::span<u8> MemoryCommit::Map()
{
std::span<u8> MemoryCommit::Map() {
if (!allocation) return {};
if (!mapped_ptr) {
if (vmaMapMemory(allocator, allocation, &mapped_ptr) != VK_SUCCESS) return {};
}
const size_t n = static_cast<size_t>(std::min<VkDeviceSize>(size,
(std::numeric_limits<size_t>::max)()));
const size_t n = size_t(std::min<VkDeviceSize>(size, (std::numeric_limits<size_t>::max)()));
return std::span<u8>{static_cast<u8 *>(mapped_ptr), n};
}
std::span<const u8> MemoryCommit::Map() const
{
std::span<const u8> MemoryCommit::Map() const {
if (!allocation) return {};
if (!mapped_ptr) {
void *p = nullptr;
if (vmaMapMemory(allocator, allocation, &p) != VK_SUCCESS) return {};
const_cast<MemoryCommit *>(this)->mapped_ptr = p;
}
const size_t n = static_cast<size_t>(std::min<VkDeviceSize>(size,
(std::numeric_limits<size_t>::max)()));
const size_t n = size_t(std::min<VkDeviceSize>(size, (std::numeric_limits<size_t>::max)()));
return std::span<const u8>{static_cast<const u8 *>(mapped_ptr), n};
}
void MemoryCommit::Unmap()
{
void MemoryCommit::Unmap() {
if (allocation && mapped_ptr) {
vmaUnmapMemory(allocator, allocation);
mapped_ptr = nullptr;
@ -182,9 +151,7 @@ namespace Vulkan {
if (Settings::values.gpu_logging_enabled.GetValue() &&
Settings::values.gpu_log_memory_tracking.GetValue() &&
memory != VK_NULL_HANDLE) {
GPU::Logging::GPULogger::GetInstance().LogMemoryDeallocation(
reinterpret_cast<uintptr_t>(memory)
);
GPU::Logging::GPULogger::GetInstance().LogMemoryDeallocation(uintptr_t(memory));
}
if (mapped_ptr) {
@ -201,31 +168,26 @@ namespace Vulkan {
}
MemoryAllocator::MemoryAllocator(const Device &device_)
: device{device_}, allocator{device.GetAllocator()},
properties{device_.GetPhysical().GetMemoryProperties().memoryProperties},
buffer_image_granularity{
device_.GetPhysical().GetProperties().limits.bufferImageGranularity} {
// Preserve the previous "RenderDoc small heap" trimming behavior that we had in original vma minus the heap bug
if (device.HasDebuggingToolAttached())
: device{device_}, allocator{device.GetAllocator()}
, properties{device_.GetPhysical().GetMemoryProperties().memoryProperties}
, buffer_image_granularity{device_.GetPhysical().GetProperties().limits.bufferImageGranularity}
{
// Preserve the previous "RenderDoc small heap" trimming behavior that we had in original vma minus the heap bug
if (device.HasDebuggingToolAttached()) {
using namespace Common::Literals;
ForEachDeviceLocalHostVisibleHeap(device, [this](size_t heap_idx, VkMemoryHeap &heap) {
if (heap.size <= 256_MiB) {
for (u32 t = 0; t < properties.memoryTypeCount; ++t) {
if (properties.memoryTypes[t].heapIndex == heap_idx) {
for (u32 t = 0; t < properties.memoryTypeCount; ++t)
if (properties.memoryTypes[t].heapIndex == heap_idx)
valid_memory_types &= ~(1u << t);
}
}
}
});
}
}
MemoryAllocator::~MemoryAllocator() = default;
vk::Image MemoryAllocator::CreateImage(const VkImageCreateInfo &ci) const
{
vk::Image MemoryAllocator::CreateImage(const VkImageCreateInfo &ci) const {
const VmaAllocationCreateInfo alloc_ci = {
.flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT,
.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE,
@ -246,24 +208,25 @@ namespace Vulkan {
if (Settings::values.gpu_logging_enabled.GetValue() &&
Settings::values.gpu_log_memory_tracking.GetValue()) {
GPU::Logging::GPULogger::GetInstance().LogMemoryAllocation(
reinterpret_cast<uintptr_t>(alloc_info.deviceMemory),
static_cast<u64>(alloc_info.size),
uintptr_t(alloc_info.deviceMemory),
u64(alloc_info.size),
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT
);
}
return vk::Image(handle, ci.usage, *device.GetLogical(), allocator, allocation,
device.GetDispatchLoader());
return vk::Image(handle, ci.usage, *device.GetLogical(), allocator, allocation, device.GetDispatchLoader());
}
vk::Buffer
MemoryAllocator::CreateBuffer(const VkBufferCreateInfo &ci, MemoryUsage usage) const
{
vk::Buffer MemoryAllocator::CreateBuffer(const VkBufferCreateInfo &ci, MemoryUsage usage) const {
// MESA will do memcpy() if not marked as host cached, so just force mark it for most buffers
auto const anv_flags = (usage == MemoryUsage::Stream
&& device.GetDriverID() == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA)
? VK_MEMORY_PROPERTY_HOST_CACHED_BIT : 0;
const VmaAllocationCreateInfo alloc_ci = {
.flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | MemoryUsageVmaFlags(usage),
.usage = MemoryUsageVma(usage),
.requiredFlags = 0,
.preferredFlags = MemoryUsagePreferredVmaFlags(usage),
.preferredFlags = MemoryUsagePreferredVmaFlags(usage) | anv_flags,
.memoryTypeBits = usage == MemoryUsage::Stream ? 0u : valid_memory_types,
.pool = VK_NULL_HANDLE,
.pUserData = nullptr,