[vk] Clamp dynamic descriptors based on device limits (#4115)
Some checks failed
tx-src / sources (push) Has been cancelled
Check Strings / check-strings (push) Has been cancelled

Should fix this specific crash
```
* thread #82, name = 'GPU', stop reason = Exception 0xc0000005 encountered at address 0x7ff7e5193e89: Access violation reading location 0x00000098
  * frame #0: 0x00007ff7e5193e89 eden.exe`std::unique_ptr<Vulkan::Scheduler::CommandChunk,std::default_delete<Vulkan::Scheduler::CommandChunk> >::operator->(this=<unavailable>) at memory:3453 [inlined]
    frame #1: 0x00007ff7e5193e81 eden.exe`void Vulkan::Scheduler::DispatchWork(this=0x0000000000000000) at vk_scheduler.cpp:146
    frame #2: 0x00007ff7e5193d8f eden.exe`void Vulkan::Scheduler::WaitWorker(this=<unavailable>) at vk_scheduler.cpp:133
    frame #3: 0x00007ff7e54f472e eden.exe`void Vulkan::UpdateDescriptorQueue::Acquire(this=0x0000026c77cfb4b8) at vk_update_descriptor.cpp:41
    frame #4: 0x00007ff7e5537a43 eden.exe`void Vulkan::ASTCDecoderPass::Assemble(this=0x0000026c794fba10, image=0x0000026cfb794850, map=0x00000025e28fcb30, swizzles=<unavailable>) at vk_compute_pass.cpp:603
    frame #5: 0x00007ff7e55043e1 eden.exe`void Vulkan::TextureCacheRuntime::AccelerateImageUpload(this=<unavailable>, image=0x0000026cfb794850, map=0x00000025e28fcb30, swizzles=size=10, z_start=0, z_count=0) at vk_texture_cache.cpp:2482
    frame #6: 0x00007ff7e5516592 eden.exe`void VideoCommon::TextureCache<struct Vulkan::TextureCacheParams>::UploadImageContents<struct Vulkan::StagingBufferRef>(this=<unavailable>, image=0x0000026cfb794850, staging=0x00000025e28fcb30) at texture_cache.h:1147
    frame #7: 0x00007ff7e551523d eden.exe`void VideoCommon::TextureCache<struct Vulkan::TextureCacheParams>::RefreshContents(this=0x0000026c794fd800, image=0x0000026cfb794850, image_id=(index = 3801072224)) at texture_cache.h:1133
    frame #8: 0x00007ff7e5517b50 eden.exe`struct Common::SlotId VideoCommon::TextureCache<struct Vulkan::TextureCacheParams>::JoinImages(this=0x0000026c794fd800, info=<unavailable>, gpu_addr=25374426112, cpu_addr=2254181376) at texture_cache.h:1644
    frame #9: 0x00007ff7e5516ff6 eden.exe`struct Common::SlotId VideoCommon::TextureCache<struct Vulkan::TextureCacheParams>::InsertImage(this=0x0000026c794fd800, info=0x00000025e28fd270, gpu_addr=25374426112, options=0x0) at texture_cache.h:1513
    frame #10: 0x00007ff7e5516a60 eden.exe`struct Common::SlotId VideoCommon::TextureCache<struct Vulkan::TextureCacheParams>::FindOrInsertImage(this=0x0000026c794fd800, info=0x00000025e28fd270, gpu_addr=25374426112, options=0x0) at texture_cache.h:1194
    frame #11: 0x00007ff7e5515a13 eden.exe`struct Common::SlotId VideoCommon::TextureCache<struct Vulkan::TextureCacheParams>::CreateImageView(this=0x0000026c794fd800, config=0x00000025e28fd370) at texture_cache.h:1173
    frame #12: 0x00007ff7e550cd64 eden.exe`struct Common::SlotId VideoCommon::TextureCache<struct Vulkan::TextureCacheParams>::VisitImageView(this=0x0000026c794fd800, index=4586, compute=<unavailable>) at texture_cache.h:554
    frame #13: 0x00007ff7e550d181 eden.exe`void VideoCommon::TextureCache<struct Vulkan::TextureCacheParams>::FillImageViews(this=0x0000026c794fd800, views=size=5, compute=<unavailable>, blacklist=<unavailable>) at texture_cache.h:227
    frame #14: 0x00007ff7e58896e4 eden.exe`Vulkan::GraphicsPipeline::ConfigureImpl<Vulkan::(anonymous namespace)::SimpleStorageSpec>(this=0x0000026f865c1a60, is_indexed=<unavailable>) at vk_graphics_pipeline.cpp:415
    frame #15: 0x00007ff7e5889479 eden.exe`<lambda_1>::operator(pl=<unavailable>, is_indexed=<unavailable>) at vk_graphics_pipeline.h:123 [inlined]
    frame #16: 0x00007ff7e5889474 eden.exe`<lambda_1>::__invoke(pl=<unavailable>, is_indexed=<unavailable>) at vk_graphics_pipeline.h:123
    frame #17: 0x00007ff7e519d2a8 eden.exe`Vulkan::GraphicsPipeline::Configure(this=0x0000026f865c1a60, is_indexed=<unavailable>) at vk_graphics_pipeline.h:105 [inlined]
    frame #18: 0x00007ff7e519d29d eden.exe`Vulkan::RasterizerVulkan::PrepareDraw<`lambda at D:\a\g\g\eden-source\src\video_core\renderer_vulkan\vk_rasterizer.cpp:256:29'>(this=0x0000026c764f9368, is_indexed=<unavailable>, draw_func=0x00000025e28fdb80) at vk_rasterizer.cpp:244
    frame #19: 0x00007ff7e519d1db eden.exe`void Vulkan::RasterizerVulkan::Draw(this=<unavailable>, is_indexed=<unavailable>, instance_count=<unavailable>) at vk_rasterizer.cpp:256
    frame #20: 0x00007ff7e50d9025 eden.exe`void Tegra::HLE_DrawIndexedIndirect::Fallback(this=0x0000026f2af85118, maxwell3d=0x0000026ccbf9eb00, parameters=size=6) at macro.cpp:178
    frame #21: 0x00007ff7e50d8f21 eden.exe`void Tegra::HLE_DrawIndexedIndirect::Execute(this=0x0000026f2af85118, maxwell3d=0x0000026ccbf9eb00, parameters=size=6) at macro.cpp:129
    frame #22: 0x00007ff7e50db607 eden.exe`Tegra::MacroEngine::Execute::<lambda_0>::operator(this=0x00000025e28fde70, acm= Active Type = Tegra::HLE_DrawIndexedIndirect ) at macro.cpp:1362
    frame #23: 0x00007ff7e50dada0 eden.exe`void Tegra::MacroEngine::Execute(this=0x0000026ccbfaa020, maxwell3d=0x0000026ccbf9eb00, method=418, parameters=<unavailable>) at macro.cpp:1392
    frame #24: 0x00007ff7e4ef062b eden.exe`void Tegra::Engines::Maxwell3D::CallMacroMethod(this=0x0000026ccbf9eb00, method=<unavailable>, parameters=<unavailable>) at maxwell_3d.cpp:390
    frame #25: 0x00007ff7e4ef04be eden.exe`void Tegra::Engines::Maxwell3D::ProcessMacro(this=0x0000026ccbf9eb00, method=<unavailable>, base_start=<unavailable>, amount=5, is_last_call=<unavailable>) at maxwell_3d.cpp:223
    frame #26: 0x00007ff7e4ef1673 eden.exe`void Tegra::Engines::Maxwell3D::CallMultiMethod(this=<unavailable>, method=<unavailable>, base_start=0x000001ea398e22f4, amount=<unavailable>, methods_pending=5) at maxwell_3d.cpp:419
    frame #27: 0x00007ff7e4ef535a eden.exe`void Tegra::DmaPusher::CallMultiMethod(this=<unavailable>, base_start=0x000001ea398e22f4, num_methods=<unavailable>) const at dma_pusher.cpp:201
    frame #28: 0x00007ff7e4ef5023 eden.exe`void Tegra::DmaPusher::ProcessCommands(this=0x0000026ccbfba698, commands=size=36) at dma_pusher.cpp:120
    frame #29: 0x00007ff7e4ef4ced eden.exe`bool Tegra::DmaPusher::Step(this=0x0000026ccbfba698) at dma_pusher.cpp:88
    frame #30: 0x00007ff7e4ef49f8 eden.exe`void Tegra::DmaPusher::DispatchCalls(this=0x0000026ccbfba698) at dma_pusher.cpp:40
    frame #31: 0x00007ff7e4ede797 eden.exe`void Tegra::Control::Scheduler::Push(this=<unavailable>, channel=-493895072, entries=0x00000025e28fe2e0) at scheduler.cpp:31
    frame #32: 0x00007ff7e4eedc7b eden.exe`VideoCommon::GPUThread::ThreadManager::StartThread::<lambda_0>::operator(this=<unavailable>, stop_token=stop_token @ 0x00000025e28ffb40) at gpu_thread.cpp:42 [inlined]
    frame #33: 0x00007ff7e4eedae8 eden.exe`std::invoke(_Obj=<unavailable>, _Arg1=0x0000026ccb3edef0) at type_traits:1680 [inlined]
    frame #34: 0x00007ff7e4eedac1 eden.exe`std:🧵:_Invoke<std::tuple<`lambda at D:\a\g\g\eden-source\src\video_core\gpu_thread.cpp:29:27',std::stop_token>,0,1>(_RawVals=0x0000026ccb3edef0) at thread:60
    frame #35: 0x00007ff8e87a37b0 ucrtbase.dll`wcsrchr + 336
    ```

Signed-off-by: lizzie <lizzie@eden-emu.dev>

Co-authored-by: CamilleLaVey <camillelavey99@gmail.com>
Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/4115
Reviewed-by: MaranBr <maranbr@eden-emu.dev>
Reviewed-by: CamilleLaVey <camillelavey99@gmail.com>
This commit is contained in:
lizzie 2026-06-30 04:33:30 +02:00 committed by crueter
parent 1b482fa99b
commit 2068b5d452
No known key found for this signature in database
GPG key ID: 425ACD2D4830EBC6
21 changed files with 223 additions and 161 deletions

View file

@ -245,16 +245,31 @@ ShaderCache::ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
std::min<u32>(device.GetMaxUserClipDistances(), Maxwell::Regs::NumClipDistances),
},
host_info{
.support_float64 = true,
.support_float16 = false,
.support_int64 = device.HasShaderInt64(),
.needs_demote_reorder = device.IsAmd(),
.support_snorm_render_buffer = false,
.support_viewport_index_layer = device.HasVertexViewportLayer(),
.min_ssbo_alignment = static_cast<u32>(device.GetShaderStorageBufferAlignment()),
.support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(),
.support_conditional_barrier = device.SupportsConditionalBarriers(),
.min_ssbo_alignment = static_cast<u32>(device.GetShaderStorageBufferAlignment()),
.max_per_stage_descriptor_sampled_images =
Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
.max_per_stage_resources = Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
.max_descriptor_set_samplers = Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
.max_descriptor_set_uniform_buffers = Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
.max_descriptor_set_uniform_buffers_dynamic =
Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
.max_descriptor_set_storage_buffers = Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
.max_descriptor_set_storage_buffers_dynamic =
Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
.max_descriptor_set_sampled_images = Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
.max_descriptor_set_storage_images = Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
.max_descriptor_set_input_attachements =
Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
.support_float64 = true,
.support_float16 = false,
.support_int64 = device.HasShaderInt64(),
.needs_demote_reorder = device.IsAmd(),
.support_snorm_render_buffer = false,
.support_viewport_index_layer = device.HasVertexViewportLayer(),
.support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(),
.support_conditional_barrier = device.SupportsConditionalBarriers(),
} {
host_info.ApplyDescriptorLimitPolicy();
if (use_asynchronous_shaders) {
workers = CreateWorkers();
}

View file

@ -22,6 +22,15 @@ namespace Vulkan {
using Shader::Backend::SPIRV::NUM_TEXTURE_AND_IMAGE_SCALING_WORDS;
[[nodiscard]] inline u32 NumDescriptorEntries(const Shader::Info& info) {
return Shader::NumDescriptors(info.constant_buffer_descriptors) +
Shader::NumDescriptors(info.storage_buffers_descriptors) +
Shader::NumDescriptors(info.texture_buffer_descriptors) +
Shader::NumDescriptors(info.image_buffer_descriptors) +
Shader::NumDescriptors(info.texture_descriptors) +
Shader::NumDescriptors(info.image_descriptors);
}
class DescriptorLayoutBuilder {
public:
DescriptorLayoutBuilder(const Device& device_) : device{&device_} {}

View file

@ -326,7 +326,7 @@ std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer
const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16));
const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
compute_pass_descriptor_queue.Acquire();
compute_pass_descriptor_queue.Acquire(scheduler, 2);
compute_pass_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices);
compute_pass_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
const void* const descriptor_data{compute_pass_descriptor_queue.UpdateData()};
@ -384,7 +384,7 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
const std::size_t staging_size = num_tri_vertices * sizeof(u32);
const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
compute_pass_descriptor_queue.Acquire();
compute_pass_descriptor_queue.Acquire(scheduler, 2);
compute_pass_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size);
compute_pass_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
const void* const descriptor_data{compute_pass_descriptor_queue.UpdateData()};
@ -429,7 +429,7 @@ void ConditionalRenderingResolvePass::Resolve(VkBuffer dst_buffer, VkBuffer src_
}
const size_t compare_size = compare_to_zero ? 8 : 24;
compute_pass_descriptor_queue.Acquire();
compute_pass_descriptor_queue.Acquire(scheduler, 2);
compute_pass_descriptor_queue.AddBuffer(src_buffer, src_offset, compare_size);
compute_pass_descriptor_queue.AddBuffer(dst_buffer, 0, sizeof(u32));
const void* const descriptor_data{compute_pass_descriptor_queue.UpdateData()};
@ -498,7 +498,7 @@ void QueriesPrefixScanPass::Run(VkBuffer accumulation_buffer, VkBuffer dst_buffe
static constexpr size_t DISPATCH_SIZE = 2048U;
size_t runs_to_do = std::min<size_t>(current_runs, DISPATCH_SIZE);
current_runs -= runs_to_do;
compute_pass_descriptor_queue.Acquire();
compute_pass_descriptor_queue.Acquire(scheduler, 3);
compute_pass_descriptor_queue.AddBuffer(src_buffer, 0, number_of_sums * sizeof(u64));
compute_pass_descriptor_queue.AddBuffer(dst_buffer, 0, number_of_sums * sizeof(u64));
compute_pass_descriptor_queue.AddBuffer(accumulation_buffer, 0, sizeof(u64));
@ -600,7 +600,7 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 8U);
const u32 num_dispatches_z = image.info.resources.layers;
compute_pass_descriptor_queue.Acquire();
compute_pass_descriptor_queue.Acquire(scheduler, 2);
compute_pass_descriptor_queue.AddBuffer(map.buffer, input_offset,
image.guest_size_bytes - swizzle.buffer_offset);
compute_pass_descriptor_queue.AddImage(image.StorageImageView(swizzle.level));
@ -821,7 +821,7 @@ void BlockLinearUnswizzle3DPass::UnswizzleChunk(
pc.blocks_dim[1] = blocks_y;
pc.blocks_dim[2] = z_count; // Only process the count
compute_pass_descriptor_queue.Acquire();
compute_pass_descriptor_queue.Acquire(scheduler, 3);
compute_pass_descriptor_queue.AddBuffer(*image.runtime->swizzle_table_buffer, 0,
image.runtime->swizzle_table_size);
compute_pass_descriptor_queue.AddBuffer(swizzled.buffer,
@ -989,7 +989,7 @@ void MSAACopyPass::CopyImage(Image& dst_image, Image& src_image,
ASSERT(copy.dst_subresource.base_layer == 0);
ASSERT(copy.dst_subresource.num_layers == 1);
compute_pass_descriptor_queue.Acquire();
compute_pass_descriptor_queue.Acquire(scheduler, 2);
compute_pass_descriptor_queue.AddImage(
src_image.StorageImageView(copy.src_subresource.base_level));
compute_pass_descriptor_queue.AddImage(

View file

@ -45,6 +45,7 @@ ComputePipeline::ComputePipeline(const Device& device_, Scheduler& scheduler, vk
}
std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(),
uniform_buffer_sizes.begin());
num_descriptor_entries = NumDescriptorEntries(info);
auto func{[this, &scheduler, &descriptor_pool, shader_notify, pipeline_statistics] {
DescriptorLayoutBuilder builder{device};
@ -113,7 +114,7 @@ ComputePipeline::ComputePipeline(const Device& device_, Scheduler& scheduler, vk
void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
Tegra::MemoryManager& gpu_memory, Scheduler& scheduler,
BufferCache& buffer_cache, TextureCache& texture_cache) {
guest_descriptor_queue.Acquire();
guest_descriptor_queue.Acquire(scheduler, num_descriptor_entries);
buffer_cache.SetComputeUniformBufferState(info.constant_buffer_mask, &uniform_buffer_sizes);
buffer_cache.UnbindComputeStorageBuffers();

View file

@ -53,6 +53,7 @@ private:
vk::PipelineCache& pipeline_cache;
GuestDescriptorQueue& guest_descriptor_queue;
Shader::Info info;
u32 num_descriptor_entries{};
VideoCommon::ComputeUniformBufferSizes uniform_buffer_sizes{};

View file

@ -268,6 +268,7 @@ GraphicsPipeline::GraphicsPipeline(
num_textures += Shader::NumDescriptors(info->texture_descriptors);
num_image_elements += Shader::NumDescriptors(info->texture_descriptors);
num_image_elements += Shader::NumDescriptors(info->image_descriptors);
num_descriptor_entries += NumDescriptorEntries(*info);
}
fragment_has_color0_output = stage_infos[NUM_STAGES - 1].stores_frag_color[0];
auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool, pipeline_statistics] {
@ -473,7 +474,7 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
buffer_cache.UpdateGraphicsBuffers(is_indexed);
buffer_cache.BindHostGeometryBuffers(is_indexed);
guest_descriptor_queue.Acquire();
guest_descriptor_queue.Acquire(scheduler, num_descriptor_entries);
RescalingPushConstant rescaling;
RenderAreaPushConstant render_area;

View file

@ -159,6 +159,7 @@ private:
std::array<Shader::Info, NUM_STAGES> stage_infos;
std::array<u32, 5> enabled_uniform_buffer_masks{};
VideoCommon::UniformBufferSizes uniform_buffer_sizes{};
u32 num_descriptor_entries{};
size_t num_image_elements{};
u32 num_textures{};
bool fragment_has_color0_output{};

View file

@ -439,10 +439,21 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
.has_broken_robust =
device.IsNvidia() && device.GetNvidiaArch() <= NvidiaArchitecture::Arch_Pascal,
.min_ssbo_alignment = device.GetStorageBufferAlignment(),
.max_user_clip_distances = device.GetMaxUserClipDistances(),
.max_user_clip_distances = device.GetMaxUserClipDistances()
};
host_info = Shader::HostTranslateInfo{
.min_ssbo_alignment = device.GetStorageBufferAlignment(),
.max_per_stage_descriptor_sampled_images = device.GetMaxPerStageDescriptorSampledImages(),
.max_per_stage_resources = device.GetMaxPerStageResources(),
.max_descriptor_set_samplers = device.GetMaxDescriptorSetSamplers(),
.max_descriptor_set_uniform_buffers = device.GetMaxDescriptorSetUniformBuffers(),
.max_descriptor_set_uniform_buffers_dynamic = device.GetMaxDescriptorSetUniformBuffersDynamic(),
.max_descriptor_set_storage_buffers = device.GetMaxDescriptorSetStorageBuffers(),
.max_descriptor_set_storage_buffers_dynamic = device.GetMaxDescriptorSetStorageBuffersDynamic(),
.max_descriptor_set_sampled_images = device.GetMaxDescriptorSetSampledImages(),
.max_descriptor_set_storage_images = device.GetMaxDescriptorSetStorageImages(),
.max_descriptor_set_input_attachements = device.GetMaxDescriptorSetInputAttachments(),
.support_float64 = device.IsFloat64Supported(),
.support_float16 = device.IsFloat16Supported(),
.support_int64 = device.IsShaderInt64Supported(),
@ -451,13 +462,10 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
driver_id == VK_DRIVER_ID_SAMSUNG_PROPRIETARY,
.support_snorm_render_buffer = true,
.support_viewport_index_layer = device.IsExtShaderViewportIndexLayerSupported(),
.min_ssbo_alignment = static_cast<u32>(device.GetStorageBufferAlignment()),
.max_per_stage_descriptor_sampled_images = device.GetMaxPerStageDescriptorSampledImages(),
.max_per_stage_resources = device.GetMaxPerStageResources(),
.max_descriptor_set_sampled_images = device.GetMaxDescriptorSetSampledImages(),
.support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(),
.support_conditional_barrier = device.SupportsConditionalBarriers(),
};
host_info.ApplyDescriptorLimitPolicy();
if (device.GetMaxVertexInputAttributes() < Maxwell::NumVertexAttributes) {
LOG_WARNING(Render_Vulkan, "maxVertexInputAttributes is too low: {} < {}",

View file

@ -203,7 +203,7 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
: gpu{gpu_}, device_memory{device_memory_}, device{device_},
memory_allocator{memory_allocator_}, state_tracker{state_tracker_}, scheduler{scheduler_},
staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler),
guest_descriptor_queue(device, scheduler), compute_pass_descriptor_queue(device, scheduler),
guest_descriptor_queue(device), compute_pass_descriptor_queue(device),
blit_image(device, scheduler, state_tracker, descriptor_pool), render_pass_cache(device),
texture_cache_runtime{
device, scheduler, memory_allocator, staging_pool,

View file

@ -155,15 +155,14 @@ void Scheduler::WaitWorker() {
}
void Scheduler::DispatchWork() {
if (chunk->Empty()) {
return;
if (chunk && !chunk->Empty()) {
{
std::scoped_lock ql{queue_mutex};
work_queue.push(std::move(chunk));
}
event_cv.notify_all();
AcquireNewChunk();
}
{
std::scoped_lock ql{queue_mutex};
work_queue.push(std::move(chunk));
}
event_cv.notify_all();
AcquireNewChunk();
}
void Scheduler::RequestRenderpass(const Framebuffer* framebuffer) {

View file

@ -7,6 +7,7 @@
#include <variant>
#include <boost/container/static_vector.hpp>
#include "common/assert.h"
#include "common/logging.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
@ -15,8 +16,9 @@
namespace Vulkan {
UpdateDescriptorQueue::UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_)
: device{device_}, scheduler{scheduler_} {
UpdateDescriptorQueue::UpdateDescriptorQueue(const Device& device_)
: device{device_}
{
payload_start = payload.data();
payload_cursor = payload.data();
}
@ -31,13 +33,15 @@ void UpdateDescriptorQueue::TickFrame() {
payload_cursor = payload_start;
}
void UpdateDescriptorQueue::Acquire() {
// Minimum number of entries required.
// This is the maximum number of entries a single draw call might use.
static constexpr size_t MIN_ENTRIES = 0x400;
if (std::distance(payload_start, payload_cursor) + MIN_ENTRIES >= FRAME_PAYLOAD_SIZE) {
LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread");
void UpdateDescriptorQueue::Acquire(Scheduler& scheduler, size_t required_entries) {
static constexpr size_t DEFAULT_REQUIRED_ENTRIES = 0x400;
const size_t reserve = required_entries > 0 ? required_entries : DEFAULT_REQUIRED_ENTRIES;
ASSERT_MSG(reserve < FRAME_PAYLOAD_SIZE, "Descriptor reservation {} >= frame capacity {}",
reserve, FRAME_PAYLOAD_SIZE);
const size_t used = static_cast<size_t>(std::distance(payload_start, payload_cursor));
if (used + reserve >= FRAME_PAYLOAD_SIZE) {
LOG_WARNING(Render_Vulkan, "Payload overflow (used={}, reserve={}, capacity={})",
used, reserve, FRAME_PAYLOAD_SIZE);
scheduler.WaitWorker();
payload_cursor = payload_start;
}

View file

@ -34,12 +34,11 @@ class UpdateDescriptorQueue final {
static constexpr size_t PAYLOAD_SIZE = FRAME_PAYLOAD_SIZE * FRAMES_IN_FLIGHT;
public:
explicit UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_);
explicit UpdateDescriptorQueue(const Device& device_);
~UpdateDescriptorQueue();
void TickFrame();
void Acquire();
void Acquire(Scheduler& scheduler, size_t required_entries = 0);
const DescriptorUpdateEntry* UpdateData() const noexcept {
return upload_start;
@ -75,8 +74,6 @@ public:
private:
const Device& device;
Scheduler& scheduler;
size_t frame_index{0};
DescriptorUpdateEntry* payload_cursor = nullptr;
DescriptorUpdateEntry* payload_start = nullptr;

View file

@ -321,32 +321,23 @@ public:
return properties.properties.limits.maxPushConstantsSize;
}
/// Returns the maximum size for shared memory.
u32 GetMaxComputeSharedMemorySize() const {
return properties.properties.limits.maxComputeSharedMemorySize;
}
/// Returns the maximum number of dynamic storage buffer descriptors per set.
u32 GetMaxDescriptorSetStorageBuffersDynamic() const {
return properties.properties.limits.maxDescriptorSetStorageBuffersDynamic;
}
/// Returns the maximum number of dynamic uniform buffer descriptors per set.
u32 GetMaxDescriptorSetUniformBuffersDynamic() const {
return properties.properties.limits.maxDescriptorSetUniformBuffersDynamic;
}
u32 GetMaxPerStageDescriptorSampledImages() const {
return properties.properties.limits.maxPerStageDescriptorSampledImages;
}
u32 GetMaxPerStageResources() const {
return properties.properties.limits.maxPerStageResources;
}
u32 GetMaxDescriptorSetSampledImages() const {
return properties.properties.limits.maxDescriptorSetSampledImages;
}
#define FN_MAX_LIMIT_LIST \
FN_MAX_LIMIT_ELEM(ComputeSharedMemorySize) \
FN_MAX_LIMIT_ELEM(PerStageDescriptorSampledImages) \
FN_MAX_LIMIT_ELEM(PerStageResources) \
FN_MAX_LIMIT_ELEM(DescriptorSetSamplers) \
FN_MAX_LIMIT_ELEM(DescriptorSetUniformBuffers) \
FN_MAX_LIMIT_ELEM(DescriptorSetUniformBuffersDynamic) \
FN_MAX_LIMIT_ELEM(DescriptorSetStorageBuffers) \
FN_MAX_LIMIT_ELEM(DescriptorSetStorageBuffersDynamic) \
FN_MAX_LIMIT_ELEM(DescriptorSetSampledImages) \
FN_MAX_LIMIT_ELEM(DescriptorSetStorageImages) \
FN_MAX_LIMIT_ELEM(DescriptorSetInputAttachments)
#define FN_MAX_LIMIT_ELEM(name) \
u32 GetMax##name() const { return properties.properties.limits.max##name; }
FN_MAX_LIMIT_LIST
#undef FN_MAX_LIMIT_ELEM
#undef FN_MAX_LIMIT_LIST
/// Returns float control properties of the device.
const VkPhysicalDeviceFloatControlsPropertiesKHR& FloatControlProperties() const {