revert [vk] Fast UBO: fix tracking, resize heuristics, add debug guard (#2695) (#2706)

revert [vk] Fast UBO: fix tracking, resize heuristics, add debug guard (#2695)

Well, stuff showed up after testing phase, that showed us this change break SMO and some mods after being merged directly into master, we will keep stuying why happens this and add a better handling later.

Co-authored-by: Ribbit <ribbit@placeholder.com>
Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/2695
Reviewed-by: CamilleLaVey <camillelavey99@gmail.com>
Co-authored-by: Ribbit <ribbit@eden-emu.dev>
Co-committed-by: Ribbit <ribbit@eden-emu.dev>

Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/2706
Co-authored-by: CamilleLaVey <camillelavey99@gmail.com>
Co-committed-by: CamilleLaVey <camillelavey99@gmail.com>
This commit is contained in:
CamilleLaVey 2025-10-09 21:37:27 +02:00 committed by crueter
parent 954c17c18a
commit 3c6ef765af
No known key found for this signature in database
GPG key ID: 425ACD2D4830EBC6
6 changed files with 33 additions and 76 deletions

View file

@ -386,10 +386,11 @@ void BufferCache<P>::BindHostComputeBuffers() {
template <class P>
void BufferCache<P>::SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask,
const UniformBufferSizes* sizes) {
const bool mask_changed = channel_state->enabled_uniform_buffer_masks != mask;
if (mask_changed) {
channel_state->fast_bound_uniform_buffers.fill(0);
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
if (channel_state->enabled_uniform_buffer_masks != mask) {
if constexpr (IS_OPENGL) {
channel_state->fast_bound_uniform_buffers.fill(0);
}
channel_state->dirty_uniform_buffers.fill(~u32{0});
channel_state->uniform_buffer_binding_sizes.fill({});
}
@ -805,7 +806,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
channel_state->uniform_buffer_binding_sizes[stage][binding_index] != size;
if (should_fast_bind) {
// We only have to bind when the currently bound buffer is not the fast version
channel_state->fast_bound_uniform_buffers[stage] |= 1u << binding_index;
channel_state->fast_bound_uniform_buffers[stage] |= 1U << binding_index;
channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size;
runtime.BindFastUniformBuffer(stage, binding_index, size);
}
@ -814,22 +815,13 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
return;
}
}
channel_state->fast_bound_uniform_buffers[stage] |= 1u << binding_index;
channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size;
if constexpr (IS_OPENGL) {
channel_state->fast_bound_uniform_buffers[stage] |= 1U << binding_index;
channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size;
}
// Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan
const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size);
#ifdef YUZU_DEBUG
ASSERT(binding_index < NUM_GRAPHICS_UNIFORM_BUFFERS);
ASSERT(span.size() >= size && "UBO stream span too small");
if (!device_memory.ReadBlockFastChecked(device_addr, span.data(), size)) {
LOG_CRITICAL(Render, "DeviceMemory OOB/unmapped: addr=0x{:x} size={}", device_addr, size);
channel_state->fast_bound_uniform_buffers[stage] &= ~(1u << binding_index);
ASSERT(false);
return;
}
#else
device_memory.ReadBlockUnsafe(device_addr, span.data(), size);
#endif
return;
}
// Classic cached path
@ -838,8 +830,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
}
// Skip binding if it's not needed and if the bound buffer is not the fast version
// This exists to avoid instances where the fast buffer is bound and a GPU write happens
const bool was_fast_bound = HasFastUniformBufferBound(stage, binding_index);
needs_bind |= was_fast_bound;
needs_bind |= HasFastUniformBufferBound(stage, binding_index);
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
needs_bind |= channel_state->uniform_buffer_binding_sizes[stage][binding_index] != size;
}
@ -848,6 +839,9 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
}
const u32 offset = buffer.Offset(device_addr);
if constexpr (IS_OPENGL) {
// Fast buffer will be unbound
channel_state->fast_bound_uniform_buffers[stage] &= ~(1U << binding_index);
// Mark the index as dirty if offset doesn't match
const bool is_copy_bind = offset != 0 && !runtime.SupportsNonZeroUniformOffset();
channel_state->dirty_uniform_buffers[stage] |= (is_copy_bind ? 1U : 0U) << index;
@ -861,7 +855,6 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
} else {
runtime.BindUniformBuffer(buffer, offset, size);
}
channel_state->fast_bound_uniform_buffers[stage] &= ~(1u << binding_index);
}
template <class P>
@ -1796,7 +1789,12 @@ std::span<u8> BufferCache<P>::ImmediateBuffer(size_t wanted_capacity) {
template <class P>
bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept {
return ((channel_state->fast_bound_uniform_buffers[stage] >> binding_index) & 1u) != 0;
if constexpr (IS_OPENGL) {
return ((channel_state->fast_bound_uniform_buffers[stage] >> binding_index) & 1) != 0;
} else {
// Only OpenGL has fast uniform buffers
return false;
}
}
template <class P>

View file

@ -53,7 +53,6 @@ constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8;
constexpr u32 NUM_STORAGE_BUFFERS = 16;
constexpr u32 NUM_TEXTURE_BUFFERS = 32;
constexpr u32 NUM_STAGES = 5;
static_assert(NUM_GRAPHICS_UNIFORM_BUFFERS <= 32, "fast bitmask must fit u32");
using UniformBufferSizes = std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>;
using ComputeUniformBufferSizes = std::array<u32, NUM_COMPUTE_UNIFORM_BUFFERS>;
@ -138,8 +137,8 @@ public:
u32 written_compute_texture_buffers = 0;
u32 image_compute_texture_buffers = 0;
std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS> uniform_cache_hits{};
std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS> uniform_cache_shots{};
std::array<u32, 16> uniform_cache_hits{};
std::array<u32, 16> uniform_cache_shots{};
u32 uniform_buffer_skip_cache_size = DEFAULT_SKIP_CACHE_SIZE;