From 63aa33a54427c0965bb38c36652ad8e74c44602b Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Fri, 26 Jun 2026 21:58:43 -0400 Subject: [PATCH] [vulkan] Simplification of Acquire policy for descriptors usage --- .../renderer_vulkan/pipeline_helper.h | 9 +++++++++ .../renderer_vulkan/vk_compute_pass.cpp | 14 +++++++------- .../renderer_vulkan/vk_compute_pipeline.cpp | 3 ++- .../renderer_vulkan/vk_compute_pipeline.h | 1 + .../renderer_vulkan/vk_graphics_pipeline.cpp | 3 ++- .../renderer_vulkan/vk_graphics_pipeline.h | 1 + .../renderer_vulkan/vk_update_descriptor.cpp | 17 ++++++++++------- .../renderer_vulkan/vk_update_descriptor.h | 2 +- 8 files changed, 33 insertions(+), 17 deletions(-) diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index 451a02c7e6..35c9379987 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -22,6 +22,15 @@ namespace Vulkan { using Shader::Backend::SPIRV::NUM_TEXTURE_AND_IMAGE_SCALING_WORDS; +[[nodiscard]] inline u32 NumDescriptorEntries(const Shader::Info& info) { + return Shader::NumDescriptors(info.constant_buffer_descriptors) + + Shader::NumDescriptors(info.storage_buffers_descriptors) + + Shader::NumDescriptors(info.texture_buffer_descriptors) + + Shader::NumDescriptors(info.image_buffer_descriptors) + + Shader::NumDescriptors(info.texture_descriptors) + + Shader::NumDescriptors(info.image_descriptors); +} + class DescriptorLayoutBuilder { public: DescriptorLayoutBuilder(const Device& device_) : device{&device_} {} diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index c993700249..471cbbb0df 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -326,7 +326,7 @@ std::pair Uint8Pass::Assemble(u32 num_vertices, VkBuffer const u32 staging_size = static_cast(num_vertices * sizeof(u16)); const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); - compute_pass_descriptor_queue.Acquire(scheduler); + compute_pass_descriptor_queue.Acquire(scheduler, 2); compute_pass_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices); compute_pass_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size); const void* const descriptor_data{compute_pass_descriptor_queue.UpdateData()}; @@ -384,7 +384,7 @@ std::pair QuadIndexedPass::Assemble( const std::size_t staging_size = num_tri_vertices * sizeof(u32); const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); - compute_pass_descriptor_queue.Acquire(scheduler); + compute_pass_descriptor_queue.Acquire(scheduler, 2); compute_pass_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size); compute_pass_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size); const void* const descriptor_data{compute_pass_descriptor_queue.UpdateData()}; @@ -429,7 +429,7 @@ void ConditionalRenderingResolvePass::Resolve(VkBuffer dst_buffer, VkBuffer src_ } const size_t compare_size = compare_to_zero ? 8 : 24; - compute_pass_descriptor_queue.Acquire(scheduler); + compute_pass_descriptor_queue.Acquire(scheduler, 2); compute_pass_descriptor_queue.AddBuffer(src_buffer, src_offset, compare_size); compute_pass_descriptor_queue.AddBuffer(dst_buffer, 0, sizeof(u32)); const void* const descriptor_data{compute_pass_descriptor_queue.UpdateData()}; @@ -498,7 +498,7 @@ void QueriesPrefixScanPass::Run(VkBuffer accumulation_buffer, VkBuffer dst_buffe static constexpr size_t DISPATCH_SIZE = 2048U; size_t runs_to_do = std::min(current_runs, DISPATCH_SIZE); current_runs -= runs_to_do; - compute_pass_descriptor_queue.Acquire(scheduler); + compute_pass_descriptor_queue.Acquire(scheduler, 3); compute_pass_descriptor_queue.AddBuffer(src_buffer, 0, number_of_sums * sizeof(u64)); compute_pass_descriptor_queue.AddBuffer(dst_buffer, 0, number_of_sums * sizeof(u64)); compute_pass_descriptor_queue.AddBuffer(accumulation_buffer, 0, sizeof(u64)); @@ -600,7 +600,7 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 8U); const u32 num_dispatches_z = image.info.resources.layers; - compute_pass_descriptor_queue.Acquire(scheduler); + compute_pass_descriptor_queue.Acquire(scheduler, 2); compute_pass_descriptor_queue.AddBuffer(map.buffer, input_offset, image.guest_size_bytes - swizzle.buffer_offset); compute_pass_descriptor_queue.AddImage(image.StorageImageView(swizzle.level)); @@ -821,7 +821,7 @@ void BlockLinearUnswizzle3DPass::UnswizzleChunk( pc.blocks_dim[1] = blocks_y; pc.blocks_dim[2] = z_count; // Only process the count - compute_pass_descriptor_queue.Acquire(scheduler); + compute_pass_descriptor_queue.Acquire(scheduler, 3); compute_pass_descriptor_queue.AddBuffer(*image.runtime->swizzle_table_buffer, 0, image.runtime->swizzle_table_size); compute_pass_descriptor_queue.AddBuffer(swizzled.buffer, @@ -989,7 +989,7 @@ void MSAACopyPass::CopyImage(Image& dst_image, Image& src_image, ASSERT(copy.dst_subresource.base_layer == 0); ASSERT(copy.dst_subresource.num_layers == 1); - compute_pass_descriptor_queue.Acquire(scheduler); + compute_pass_descriptor_queue.Acquire(scheduler, 2); compute_pass_descriptor_queue.AddImage( src_image.StorageImageView(copy.src_subresource.base_level)); compute_pass_descriptor_queue.AddImage( diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index a3a3390d53..0076de8cc0 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -45,6 +45,7 @@ ComputePipeline::ComputePipeline(const Device& device_, Scheduler& scheduler, vk } std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(), uniform_buffer_sizes.begin()); + num_descriptor_entries = NumDescriptorEntries(info); auto func{[this, &scheduler, &descriptor_pool, shader_notify, pipeline_statistics] { DescriptorLayoutBuilder builder{device}; @@ -113,7 +114,7 @@ ComputePipeline::ComputePipeline(const Device& device_, Scheduler& scheduler, vk void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, Tegra::MemoryManager& gpu_memory, Scheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache) { - guest_descriptor_queue.Acquire(scheduler); + guest_descriptor_queue.Acquire(scheduler, num_descriptor_entries); buffer_cache.SetComputeUniformBufferState(info.constant_buffer_mask, &uniform_buffer_sizes); buffer_cache.UnbindComputeStorageBuffers(); diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index f3abe4c931..1feeed4840 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -53,6 +53,7 @@ private: vk::PipelineCache& pipeline_cache; GuestDescriptorQueue& guest_descriptor_queue; Shader::Info info; + u32 num_descriptor_entries{}; VideoCommon::ComputeUniformBufferSizes uniform_buffer_sizes{}; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 3e272e4cba..b1447e9e18 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -268,6 +268,7 @@ GraphicsPipeline::GraphicsPipeline( num_textures += Shader::NumDescriptors(info->texture_descriptors); num_image_elements += Shader::NumDescriptors(info->texture_descriptors); num_image_elements += Shader::NumDescriptors(info->image_descriptors); + num_descriptor_entries += NumDescriptorEntries(*info); } fragment_has_color0_output = stage_infos[NUM_STAGES - 1].stores_frag_color[0]; auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool, pipeline_statistics] { @@ -473,7 +474,7 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) { buffer_cache.UpdateGraphicsBuffers(is_indexed); buffer_cache.BindHostGeometryBuffers(is_indexed); - guest_descriptor_queue.Acquire(scheduler); + guest_descriptor_queue.Acquire(scheduler, num_descriptor_entries); RescalingPushConstant rescaling; RenderAreaPushConstant render_area; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 1a41e50a36..d1caeaee8e 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -159,6 +159,7 @@ private: std::array stage_infos; std::array enabled_uniform_buffer_masks{}; VideoCommon::UniformBufferSizes uniform_buffer_sizes{}; + u32 num_descriptor_entries{}; size_t num_image_elements{}; u32 num_textures{}; bool fragment_has_color0_output{}; diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp index 7de4bfd938..b0227ac908 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp @@ -7,6 +7,7 @@ #include #include +#include "common/assert.h" #include "common/logging.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" @@ -32,13 +33,15 @@ void UpdateDescriptorQueue::TickFrame() { payload_cursor = payload_start; } -void UpdateDescriptorQueue::Acquire(Scheduler& scheduler) { - // Minimum number of entries required. - // This is the maximum number of entries a single draw call might use. - static constexpr size_t MIN_ENTRIES = 0x400; - - if (std::distance(payload_start, payload_cursor) + MIN_ENTRIES >= FRAME_PAYLOAD_SIZE) { - LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread"); +void UpdateDescriptorQueue::Acquire(Scheduler& scheduler, size_t required_entries) { + static constexpr size_t DEFAULT_REQUIRED_ENTRIES = 0x400; + const size_t reserve = required_entries > 0 ? required_entries : DEFAULT_REQUIRED_ENTRIES; + ASSERT_MSG(reserve < FRAME_PAYLOAD_SIZE, "Descriptor reservation {} >= frame capacity {}", + reserve, FRAME_PAYLOAD_SIZE); + const size_t used = static_cast(std::distance(payload_start, payload_cursor)); + if (used + reserve >= FRAME_PAYLOAD_SIZE) { + LOG_WARNING(Render_Vulkan, "Payload overflow (used={}, reserve={}, capacity={})", + used, reserve, FRAME_PAYLOAD_SIZE); scheduler.WaitWorker(); payload_cursor = payload_start; } diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h index 6e9f5d2829..39b2b5a688 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.h +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h @@ -38,7 +38,7 @@ public: ~UpdateDescriptorQueue(); void TickFrame(); - void Acquire(Scheduler& scheduler); + void Acquire(Scheduler& scheduler, size_t required_entries = 0); const DescriptorUpdateEntry* UpdateData() const noexcept { return upload_start;