mirror of
https://git.eden-emu.dev/eden-emu/eden
synced 2026-07-01 22:16:28 +02:00
[vk] Clamp dynamic descriptors based on device limits (#4115)
Should fix this specific crash ``` * thread #82, name = 'GPU', stop reason = Exception 0xc0000005 encountered at address 0x7ff7e5193e89: Access violation reading location 0x00000098 * frame #0: 0x00007ff7e5193e89 eden.exe`std::unique_ptr<Vulkan::Scheduler::CommandChunk,std::default_delete<Vulkan::Scheduler::CommandChunk> >::operator->(this=<unavailable>) at memory:3453 [inlined] frame #1: 0x00007ff7e5193e81 eden.exe`void Vulkan::Scheduler::DispatchWork(this=0x0000000000000000) at vk_scheduler.cpp:146 frame #2: 0x00007ff7e5193d8f eden.exe`void Vulkan::Scheduler::WaitWorker(this=<unavailable>) at vk_scheduler.cpp:133 frame #3: 0x00007ff7e54f472e eden.exe`void Vulkan::UpdateDescriptorQueue::Acquire(this=0x0000026c77cfb4b8) at vk_update_descriptor.cpp:41 frame #4: 0x00007ff7e5537a43 eden.exe`void Vulkan::ASTCDecoderPass::Assemble(this=0x0000026c794fba10, image=0x0000026cfb794850, map=0x00000025e28fcb30, swizzles=<unavailable>) at vk_compute_pass.cpp:603 frame #5: 0x00007ff7e55043e1 eden.exe`void Vulkan::TextureCacheRuntime::AccelerateImageUpload(this=<unavailable>, image=0x0000026cfb794850, map=0x00000025e28fcb30, swizzles=size=10, z_start=0, z_count=0) at vk_texture_cache.cpp:2482 frame #6: 0x00007ff7e5516592 eden.exe`void VideoCommon::TextureCache<struct Vulkan::TextureCacheParams>::UploadImageContents<struct Vulkan::StagingBufferRef>(this=<unavailable>, image=0x0000026cfb794850, staging=0x00000025e28fcb30) at texture_cache.h:1147 frame #7: 0x00007ff7e551523d eden.exe`void VideoCommon::TextureCache<struct Vulkan::TextureCacheParams>::RefreshContents(this=0x0000026c794fd800, image=0x0000026cfb794850, image_id=(index = 3801072224)) at texture_cache.h:1133 frame #8: 0x00007ff7e5517b50 eden.exe`struct Common::SlotId VideoCommon::TextureCache<struct Vulkan::TextureCacheParams>::JoinImages(this=0x0000026c794fd800, info=<unavailable>, gpu_addr=25374426112, cpu_addr=2254181376) at texture_cache.h:1644 frame #9: 0x00007ff7e5516ff6 eden.exe`struct Common::SlotId VideoCommon::TextureCache<struct Vulkan::TextureCacheParams>::InsertImage(this=0x0000026c794fd800, info=0x00000025e28fd270, gpu_addr=25374426112, options=0x0) at texture_cache.h:1513 frame #10: 0x00007ff7e5516a60 eden.exe`struct Common::SlotId VideoCommon::TextureCache<struct Vulkan::TextureCacheParams>::FindOrInsertImage(this=0x0000026c794fd800, info=0x00000025e28fd270, gpu_addr=25374426112, options=0x0) at texture_cache.h:1194 frame #11: 0x00007ff7e5515a13 eden.exe`struct Common::SlotId VideoCommon::TextureCache<struct Vulkan::TextureCacheParams>::CreateImageView(this=0x0000026c794fd800, config=0x00000025e28fd370) at texture_cache.h:1173 frame #12: 0x00007ff7e550cd64 eden.exe`struct Common::SlotId VideoCommon::TextureCache<struct Vulkan::TextureCacheParams>::VisitImageView(this=0x0000026c794fd800, index=4586, compute=<unavailable>) at texture_cache.h:554 frame #13: 0x00007ff7e550d181 eden.exe`void VideoCommon::TextureCache<struct Vulkan::TextureCacheParams>::FillImageViews(this=0x0000026c794fd800, views=size=5, compute=<unavailable>, blacklist=<unavailable>) at texture_cache.h:227 frame #14: 0x00007ff7e58896e4 eden.exe`Vulkan::GraphicsPipeline::ConfigureImpl<Vulkan::(anonymous namespace)::SimpleStorageSpec>(this=0x0000026f865c1a60, is_indexed=<unavailable>) at vk_graphics_pipeline.cpp:415 frame #15: 0x00007ff7e5889479 eden.exe`<lambda_1>::operator(pl=<unavailable>, is_indexed=<unavailable>) at vk_graphics_pipeline.h:123 [inlined] frame #16: 0x00007ff7e5889474 eden.exe`<lambda_1>::__invoke(pl=<unavailable>, is_indexed=<unavailable>) at vk_graphics_pipeline.h:123 frame #17: 0x00007ff7e519d2a8 eden.exe`Vulkan::GraphicsPipeline::Configure(this=0x0000026f865c1a60, is_indexed=<unavailable>) at vk_graphics_pipeline.h:105 [inlined] frame #18: 0x00007ff7e519d29d eden.exe`Vulkan::RasterizerVulkan::PrepareDraw<`lambda at D:\a\g\g\eden-source\src\video_core\renderer_vulkan\vk_rasterizer.cpp:256:29'>(this=0x0000026c764f9368, is_indexed=<unavailable>, draw_func=0x00000025e28fdb80) at vk_rasterizer.cpp:244 frame #19: 0x00007ff7e519d1db eden.exe`void Vulkan::RasterizerVulkan::Draw(this=<unavailable>, is_indexed=<unavailable>, instance_count=<unavailable>) at vk_rasterizer.cpp:256 frame #20: 0x00007ff7e50d9025 eden.exe`void Tegra::HLE_DrawIndexedIndirect::Fallback(this=0x0000026f2af85118, maxwell3d=0x0000026ccbf9eb00, parameters=size=6) at macro.cpp:178 frame #21: 0x00007ff7e50d8f21 eden.exe`void Tegra::HLE_DrawIndexedIndirect::Execute(this=0x0000026f2af85118, maxwell3d=0x0000026ccbf9eb00, parameters=size=6) at macro.cpp:129 frame #22: 0x00007ff7e50db607 eden.exe`Tegra::MacroEngine::Execute::<lambda_0>::operator(this=0x00000025e28fde70, acm= Active Type = Tegra::HLE_DrawIndexedIndirect ) at macro.cpp:1362 frame #23: 0x00007ff7e50dada0 eden.exe`void Tegra::MacroEngine::Execute(this=0x0000026ccbfaa020, maxwell3d=0x0000026ccbf9eb00, method=418, parameters=<unavailable>) at macro.cpp:1392 frame #24: 0x00007ff7e4ef062b eden.exe`void Tegra::Engines::Maxwell3D::CallMacroMethod(this=0x0000026ccbf9eb00, method=<unavailable>, parameters=<unavailable>) at maxwell_3d.cpp:390 frame #25: 0x00007ff7e4ef04be eden.exe`void Tegra::Engines::Maxwell3D::ProcessMacro(this=0x0000026ccbf9eb00, method=<unavailable>, base_start=<unavailable>, amount=5, is_last_call=<unavailable>) at maxwell_3d.cpp:223 frame #26: 0x00007ff7e4ef1673 eden.exe`void Tegra::Engines::Maxwell3D::CallMultiMethod(this=<unavailable>, method=<unavailable>, base_start=0x000001ea398e22f4, amount=<unavailable>, methods_pending=5) at maxwell_3d.cpp:419 frame #27: 0x00007ff7e4ef535a eden.exe`void Tegra::DmaPusher::CallMultiMethod(this=<unavailable>, base_start=0x000001ea398e22f4, num_methods=<unavailable>) const at dma_pusher.cpp:201 frame #28: 0x00007ff7e4ef5023 eden.exe`void Tegra::DmaPusher::ProcessCommands(this=0x0000026ccbfba698, commands=size=36) at dma_pusher.cpp:120 frame #29: 0x00007ff7e4ef4ced eden.exe`bool Tegra::DmaPusher::Step(this=0x0000026ccbfba698) at dma_pusher.cpp:88 frame #30: 0x00007ff7e4ef49f8 eden.exe`void Tegra::DmaPusher::DispatchCalls(this=0x0000026ccbfba698) at dma_pusher.cpp:40 frame #31: 0x00007ff7e4ede797 eden.exe`void Tegra::Control::Scheduler::Push(this=<unavailable>, channel=-493895072, entries=0x00000025e28fe2e0) at scheduler.cpp:31 frame #32: 0x00007ff7e4eedc7b eden.exe`VideoCommon::GPUThread::ThreadManager::StartThread::<lambda_0>::operator(this=<unavailable>, stop_token=stop_token @ 0x00000025e28ffb40) at gpu_thread.cpp:42 [inlined] frame #33: 0x00007ff7e4eedae8 eden.exe`std::invoke(_Obj=<unavailable>, _Arg1=0x0000026ccb3edef0) at type_traits:1680 [inlined] frame #34: 0x00007ff7e4eedac1 eden.exe`std:🧵:_Invoke<std::tuple<`lambda at D:\a\g\g\eden-source\src\video_core\gpu_thread.cpp:29:27',std::stop_token>,0,1>(_RawVals=0x0000026ccb3edef0) at thread:60 frame #35: 0x00007ff8e87a37b0 ucrtbase.dll`wcsrchr + 336 ``` Signed-off-by: lizzie <lizzie@eden-emu.dev> Co-authored-by: CamilleLaVey <camillelavey99@gmail.com> Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/4115 Reviewed-by: MaranBr <maranbr@eden-emu.dev> Reviewed-by: CamilleLaVey <camillelavey99@gmail.com>
This commit is contained in:
parent
1b482fa99b
commit
2068b5d452
21 changed files with 223 additions and 161 deletions
|
|
@ -245,16 +245,31 @@ ShaderCache::ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
|
|||
std::min<u32>(device.GetMaxUserClipDistances(), Maxwell::Regs::NumClipDistances),
|
||||
},
|
||||
host_info{
|
||||
.support_float64 = true,
|
||||
.support_float16 = false,
|
||||
.support_int64 = device.HasShaderInt64(),
|
||||
.needs_demote_reorder = device.IsAmd(),
|
||||
.support_snorm_render_buffer = false,
|
||||
.support_viewport_index_layer = device.HasVertexViewportLayer(),
|
||||
.min_ssbo_alignment = static_cast<u32>(device.GetShaderStorageBufferAlignment()),
|
||||
.support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(),
|
||||
.support_conditional_barrier = device.SupportsConditionalBarriers(),
|
||||
.min_ssbo_alignment = static_cast<u32>(device.GetShaderStorageBufferAlignment()),
|
||||
.max_per_stage_descriptor_sampled_images =
|
||||
Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
|
||||
.max_per_stage_resources = Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
|
||||
.max_descriptor_set_samplers = Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
|
||||
.max_descriptor_set_uniform_buffers = Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
|
||||
.max_descriptor_set_uniform_buffers_dynamic =
|
||||
Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
|
||||
.max_descriptor_set_storage_buffers = Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
|
||||
.max_descriptor_set_storage_buffers_dynamic =
|
||||
Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
|
||||
.max_descriptor_set_sampled_images = Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
|
||||
.max_descriptor_set_storage_images = Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
|
||||
.max_descriptor_set_input_attachements =
|
||||
Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
|
||||
.support_float64 = true,
|
||||
.support_float16 = false,
|
||||
.support_int64 = device.HasShaderInt64(),
|
||||
.needs_demote_reorder = device.IsAmd(),
|
||||
.support_snorm_render_buffer = false,
|
||||
.support_viewport_index_layer = device.HasVertexViewportLayer(),
|
||||
.support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(),
|
||||
.support_conditional_barrier = device.SupportsConditionalBarriers(),
|
||||
} {
|
||||
host_info.ApplyDescriptorLimitPolicy();
|
||||
if (use_asynchronous_shaders) {
|
||||
workers = CreateWorkers();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,6 +22,15 @@ namespace Vulkan {
|
|||
|
||||
using Shader::Backend::SPIRV::NUM_TEXTURE_AND_IMAGE_SCALING_WORDS;
|
||||
|
||||
[[nodiscard]] inline u32 NumDescriptorEntries(const Shader::Info& info) {
|
||||
return Shader::NumDescriptors(info.constant_buffer_descriptors) +
|
||||
Shader::NumDescriptors(info.storage_buffers_descriptors) +
|
||||
Shader::NumDescriptors(info.texture_buffer_descriptors) +
|
||||
Shader::NumDescriptors(info.image_buffer_descriptors) +
|
||||
Shader::NumDescriptors(info.texture_descriptors) +
|
||||
Shader::NumDescriptors(info.image_descriptors);
|
||||
}
|
||||
|
||||
class DescriptorLayoutBuilder {
|
||||
public:
|
||||
DescriptorLayoutBuilder(const Device& device_) : device{&device_} {}
|
||||
|
|
|
|||
|
|
@ -326,7 +326,7 @@ std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer
|
|||
const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16));
|
||||
const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
|
||||
|
||||
compute_pass_descriptor_queue.Acquire();
|
||||
compute_pass_descriptor_queue.Acquire(scheduler, 2);
|
||||
compute_pass_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices);
|
||||
compute_pass_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
|
||||
const void* const descriptor_data{compute_pass_descriptor_queue.UpdateData()};
|
||||
|
|
@ -384,7 +384,7 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
|
|||
const std::size_t staging_size = num_tri_vertices * sizeof(u32);
|
||||
const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
|
||||
|
||||
compute_pass_descriptor_queue.Acquire();
|
||||
compute_pass_descriptor_queue.Acquire(scheduler, 2);
|
||||
compute_pass_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size);
|
||||
compute_pass_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
|
||||
const void* const descriptor_data{compute_pass_descriptor_queue.UpdateData()};
|
||||
|
|
@ -429,7 +429,7 @@ void ConditionalRenderingResolvePass::Resolve(VkBuffer dst_buffer, VkBuffer src_
|
|||
}
|
||||
const size_t compare_size = compare_to_zero ? 8 : 24;
|
||||
|
||||
compute_pass_descriptor_queue.Acquire();
|
||||
compute_pass_descriptor_queue.Acquire(scheduler, 2);
|
||||
compute_pass_descriptor_queue.AddBuffer(src_buffer, src_offset, compare_size);
|
||||
compute_pass_descriptor_queue.AddBuffer(dst_buffer, 0, sizeof(u32));
|
||||
const void* const descriptor_data{compute_pass_descriptor_queue.UpdateData()};
|
||||
|
|
@ -498,7 +498,7 @@ void QueriesPrefixScanPass::Run(VkBuffer accumulation_buffer, VkBuffer dst_buffe
|
|||
static constexpr size_t DISPATCH_SIZE = 2048U;
|
||||
size_t runs_to_do = std::min<size_t>(current_runs, DISPATCH_SIZE);
|
||||
current_runs -= runs_to_do;
|
||||
compute_pass_descriptor_queue.Acquire();
|
||||
compute_pass_descriptor_queue.Acquire(scheduler, 3);
|
||||
compute_pass_descriptor_queue.AddBuffer(src_buffer, 0, number_of_sums * sizeof(u64));
|
||||
compute_pass_descriptor_queue.AddBuffer(dst_buffer, 0, number_of_sums * sizeof(u64));
|
||||
compute_pass_descriptor_queue.AddBuffer(accumulation_buffer, 0, sizeof(u64));
|
||||
|
|
@ -600,7 +600,7 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
|
|||
const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 8U);
|
||||
const u32 num_dispatches_z = image.info.resources.layers;
|
||||
|
||||
compute_pass_descriptor_queue.Acquire();
|
||||
compute_pass_descriptor_queue.Acquire(scheduler, 2);
|
||||
compute_pass_descriptor_queue.AddBuffer(map.buffer, input_offset,
|
||||
image.guest_size_bytes - swizzle.buffer_offset);
|
||||
compute_pass_descriptor_queue.AddImage(image.StorageImageView(swizzle.level));
|
||||
|
|
@ -821,7 +821,7 @@ void BlockLinearUnswizzle3DPass::UnswizzleChunk(
|
|||
pc.blocks_dim[1] = blocks_y;
|
||||
pc.blocks_dim[2] = z_count; // Only process the count
|
||||
|
||||
compute_pass_descriptor_queue.Acquire();
|
||||
compute_pass_descriptor_queue.Acquire(scheduler, 3);
|
||||
compute_pass_descriptor_queue.AddBuffer(*image.runtime->swizzle_table_buffer, 0,
|
||||
image.runtime->swizzle_table_size);
|
||||
compute_pass_descriptor_queue.AddBuffer(swizzled.buffer,
|
||||
|
|
@ -989,7 +989,7 @@ void MSAACopyPass::CopyImage(Image& dst_image, Image& src_image,
|
|||
ASSERT(copy.dst_subresource.base_layer == 0);
|
||||
ASSERT(copy.dst_subresource.num_layers == 1);
|
||||
|
||||
compute_pass_descriptor_queue.Acquire();
|
||||
compute_pass_descriptor_queue.Acquire(scheduler, 2);
|
||||
compute_pass_descriptor_queue.AddImage(
|
||||
src_image.StorageImageView(copy.src_subresource.base_level));
|
||||
compute_pass_descriptor_queue.AddImage(
|
||||
|
|
|
|||
|
|
@ -45,6 +45,7 @@ ComputePipeline::ComputePipeline(const Device& device_, Scheduler& scheduler, vk
|
|||
}
|
||||
std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(),
|
||||
uniform_buffer_sizes.begin());
|
||||
num_descriptor_entries = NumDescriptorEntries(info);
|
||||
|
||||
auto func{[this, &scheduler, &descriptor_pool, shader_notify, pipeline_statistics] {
|
||||
DescriptorLayoutBuilder builder{device};
|
||||
|
|
@ -113,7 +114,7 @@ ComputePipeline::ComputePipeline(const Device& device_, Scheduler& scheduler, vk
|
|||
void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
|
||||
Tegra::MemoryManager& gpu_memory, Scheduler& scheduler,
|
||||
BufferCache& buffer_cache, TextureCache& texture_cache) {
|
||||
guest_descriptor_queue.Acquire();
|
||||
guest_descriptor_queue.Acquire(scheduler, num_descriptor_entries);
|
||||
|
||||
buffer_cache.SetComputeUniformBufferState(info.constant_buffer_mask, &uniform_buffer_sizes);
|
||||
buffer_cache.UnbindComputeStorageBuffers();
|
||||
|
|
|
|||
|
|
@ -53,6 +53,7 @@ private:
|
|||
vk::PipelineCache& pipeline_cache;
|
||||
GuestDescriptorQueue& guest_descriptor_queue;
|
||||
Shader::Info info;
|
||||
u32 num_descriptor_entries{};
|
||||
|
||||
VideoCommon::ComputeUniformBufferSizes uniform_buffer_sizes{};
|
||||
|
||||
|
|
|
|||
|
|
@ -268,6 +268,7 @@ GraphicsPipeline::GraphicsPipeline(
|
|||
num_textures += Shader::NumDescriptors(info->texture_descriptors);
|
||||
num_image_elements += Shader::NumDescriptors(info->texture_descriptors);
|
||||
num_image_elements += Shader::NumDescriptors(info->image_descriptors);
|
||||
num_descriptor_entries += NumDescriptorEntries(*info);
|
||||
}
|
||||
fragment_has_color0_output = stage_infos[NUM_STAGES - 1].stores_frag_color[0];
|
||||
auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool, pipeline_statistics] {
|
||||
|
|
@ -473,7 +474,7 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
|
|||
buffer_cache.UpdateGraphicsBuffers(is_indexed);
|
||||
buffer_cache.BindHostGeometryBuffers(is_indexed);
|
||||
|
||||
guest_descriptor_queue.Acquire();
|
||||
guest_descriptor_queue.Acquire(scheduler, num_descriptor_entries);
|
||||
|
||||
RescalingPushConstant rescaling;
|
||||
RenderAreaPushConstant render_area;
|
||||
|
|
|
|||
|
|
@ -159,6 +159,7 @@ private:
|
|||
std::array<Shader::Info, NUM_STAGES> stage_infos;
|
||||
std::array<u32, 5> enabled_uniform_buffer_masks{};
|
||||
VideoCommon::UniformBufferSizes uniform_buffer_sizes{};
|
||||
u32 num_descriptor_entries{};
|
||||
size_t num_image_elements{};
|
||||
u32 num_textures{};
|
||||
bool fragment_has_color0_output{};
|
||||
|
|
|
|||
|
|
@ -439,10 +439,21 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
|
|||
.has_broken_robust =
|
||||
device.IsNvidia() && device.GetNvidiaArch() <= NvidiaArchitecture::Arch_Pascal,
|
||||
.min_ssbo_alignment = device.GetStorageBufferAlignment(),
|
||||
.max_user_clip_distances = device.GetMaxUserClipDistances(),
|
||||
.max_user_clip_distances = device.GetMaxUserClipDistances()
|
||||
};
|
||||
|
||||
host_info = Shader::HostTranslateInfo{
|
||||
.min_ssbo_alignment = device.GetStorageBufferAlignment(),
|
||||
.max_per_stage_descriptor_sampled_images = device.GetMaxPerStageDescriptorSampledImages(),
|
||||
.max_per_stage_resources = device.GetMaxPerStageResources(),
|
||||
.max_descriptor_set_samplers = device.GetMaxDescriptorSetSamplers(),
|
||||
.max_descriptor_set_uniform_buffers = device.GetMaxDescriptorSetUniformBuffers(),
|
||||
.max_descriptor_set_uniform_buffers_dynamic = device.GetMaxDescriptorSetUniformBuffersDynamic(),
|
||||
.max_descriptor_set_storage_buffers = device.GetMaxDescriptorSetStorageBuffers(),
|
||||
.max_descriptor_set_storage_buffers_dynamic = device.GetMaxDescriptorSetStorageBuffersDynamic(),
|
||||
.max_descriptor_set_sampled_images = device.GetMaxDescriptorSetSampledImages(),
|
||||
.max_descriptor_set_storage_images = device.GetMaxDescriptorSetStorageImages(),
|
||||
.max_descriptor_set_input_attachements = device.GetMaxDescriptorSetInputAttachments(),
|
||||
.support_float64 = device.IsFloat64Supported(),
|
||||
.support_float16 = device.IsFloat16Supported(),
|
||||
.support_int64 = device.IsShaderInt64Supported(),
|
||||
|
|
@ -451,13 +462,10 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
|
|||
driver_id == VK_DRIVER_ID_SAMSUNG_PROPRIETARY,
|
||||
.support_snorm_render_buffer = true,
|
||||
.support_viewport_index_layer = device.IsExtShaderViewportIndexLayerSupported(),
|
||||
.min_ssbo_alignment = static_cast<u32>(device.GetStorageBufferAlignment()),
|
||||
.max_per_stage_descriptor_sampled_images = device.GetMaxPerStageDescriptorSampledImages(),
|
||||
.max_per_stage_resources = device.GetMaxPerStageResources(),
|
||||
.max_descriptor_set_sampled_images = device.GetMaxDescriptorSetSampledImages(),
|
||||
.support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(),
|
||||
.support_conditional_barrier = device.SupportsConditionalBarriers(),
|
||||
};
|
||||
host_info.ApplyDescriptorLimitPolicy();
|
||||
|
||||
if (device.GetMaxVertexInputAttributes() < Maxwell::NumVertexAttributes) {
|
||||
LOG_WARNING(Render_Vulkan, "maxVertexInputAttributes is too low: {} < {}",
|
||||
|
|
|
|||
|
|
@ -203,7 +203,7 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
|
|||
: gpu{gpu_}, device_memory{device_memory_}, device{device_},
|
||||
memory_allocator{memory_allocator_}, state_tracker{state_tracker_}, scheduler{scheduler_},
|
||||
staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler),
|
||||
guest_descriptor_queue(device, scheduler), compute_pass_descriptor_queue(device, scheduler),
|
||||
guest_descriptor_queue(device), compute_pass_descriptor_queue(device),
|
||||
blit_image(device, scheduler, state_tracker, descriptor_pool), render_pass_cache(device),
|
||||
texture_cache_runtime{
|
||||
device, scheduler, memory_allocator, staging_pool,
|
||||
|
|
|
|||
|
|
@ -155,15 +155,14 @@ void Scheduler::WaitWorker() {
|
|||
}
|
||||
|
||||
void Scheduler::DispatchWork() {
|
||||
if (chunk->Empty()) {
|
||||
return;
|
||||
if (chunk && !chunk->Empty()) {
|
||||
{
|
||||
std::scoped_lock ql{queue_mutex};
|
||||
work_queue.push(std::move(chunk));
|
||||
}
|
||||
event_cv.notify_all();
|
||||
AcquireNewChunk();
|
||||
}
|
||||
{
|
||||
std::scoped_lock ql{queue_mutex};
|
||||
work_queue.push(std::move(chunk));
|
||||
}
|
||||
event_cv.notify_all();
|
||||
AcquireNewChunk();
|
||||
}
|
||||
|
||||
void Scheduler::RequestRenderpass(const Framebuffer* framebuffer) {
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@
|
|||
#include <variant>
|
||||
#include <boost/container/static_vector.hpp>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/logging.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||
|
|
@ -15,8 +16,9 @@
|
|||
|
||||
namespace Vulkan {
|
||||
|
||||
UpdateDescriptorQueue::UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_)
|
||||
: device{device_}, scheduler{scheduler_} {
|
||||
UpdateDescriptorQueue::UpdateDescriptorQueue(const Device& device_)
|
||||
: device{device_}
|
||||
{
|
||||
payload_start = payload.data();
|
||||
payload_cursor = payload.data();
|
||||
}
|
||||
|
|
@ -31,13 +33,15 @@ void UpdateDescriptorQueue::TickFrame() {
|
|||
payload_cursor = payload_start;
|
||||
}
|
||||
|
||||
void UpdateDescriptorQueue::Acquire() {
|
||||
// Minimum number of entries required.
|
||||
// This is the maximum number of entries a single draw call might use.
|
||||
static constexpr size_t MIN_ENTRIES = 0x400;
|
||||
|
||||
if (std::distance(payload_start, payload_cursor) + MIN_ENTRIES >= FRAME_PAYLOAD_SIZE) {
|
||||
LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread");
|
||||
void UpdateDescriptorQueue::Acquire(Scheduler& scheduler, size_t required_entries) {
|
||||
static constexpr size_t DEFAULT_REQUIRED_ENTRIES = 0x400;
|
||||
const size_t reserve = required_entries > 0 ? required_entries : DEFAULT_REQUIRED_ENTRIES;
|
||||
ASSERT_MSG(reserve < FRAME_PAYLOAD_SIZE, "Descriptor reservation {} >= frame capacity {}",
|
||||
reserve, FRAME_PAYLOAD_SIZE);
|
||||
const size_t used = static_cast<size_t>(std::distance(payload_start, payload_cursor));
|
||||
if (used + reserve >= FRAME_PAYLOAD_SIZE) {
|
||||
LOG_WARNING(Render_Vulkan, "Payload overflow (used={}, reserve={}, capacity={})",
|
||||
used, reserve, FRAME_PAYLOAD_SIZE);
|
||||
scheduler.WaitWorker();
|
||||
payload_cursor = payload_start;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -34,12 +34,11 @@ class UpdateDescriptorQueue final {
|
|||
static constexpr size_t PAYLOAD_SIZE = FRAME_PAYLOAD_SIZE * FRAMES_IN_FLIGHT;
|
||||
|
||||
public:
|
||||
explicit UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_);
|
||||
explicit UpdateDescriptorQueue(const Device& device_);
|
||||
~UpdateDescriptorQueue();
|
||||
|
||||
void TickFrame();
|
||||
|
||||
void Acquire();
|
||||
void Acquire(Scheduler& scheduler, size_t required_entries = 0);
|
||||
|
||||
const DescriptorUpdateEntry* UpdateData() const noexcept {
|
||||
return upload_start;
|
||||
|
|
@ -75,8 +74,6 @@ public:
|
|||
|
||||
private:
|
||||
const Device& device;
|
||||
Scheduler& scheduler;
|
||||
|
||||
size_t frame_index{0};
|
||||
DescriptorUpdateEntry* payload_cursor = nullptr;
|
||||
DescriptorUpdateEntry* payload_start = nullptr;
|
||||
|
|
|
|||
|
|
@ -321,32 +321,23 @@ public:
|
|||
return properties.properties.limits.maxPushConstantsSize;
|
||||
}
|
||||
|
||||
/// Returns the maximum size for shared memory.
|
||||
u32 GetMaxComputeSharedMemorySize() const {
|
||||
return properties.properties.limits.maxComputeSharedMemorySize;
|
||||
}
|
||||
|
||||
/// Returns the maximum number of dynamic storage buffer descriptors per set.
|
||||
u32 GetMaxDescriptorSetStorageBuffersDynamic() const {
|
||||
return properties.properties.limits.maxDescriptorSetStorageBuffersDynamic;
|
||||
}
|
||||
|
||||
/// Returns the maximum number of dynamic uniform buffer descriptors per set.
|
||||
u32 GetMaxDescriptorSetUniformBuffersDynamic() const {
|
||||
return properties.properties.limits.maxDescriptorSetUniformBuffersDynamic;
|
||||
}
|
||||
|
||||
u32 GetMaxPerStageDescriptorSampledImages() const {
|
||||
return properties.properties.limits.maxPerStageDescriptorSampledImages;
|
||||
}
|
||||
|
||||
u32 GetMaxPerStageResources() const {
|
||||
return properties.properties.limits.maxPerStageResources;
|
||||
}
|
||||
|
||||
u32 GetMaxDescriptorSetSampledImages() const {
|
||||
return properties.properties.limits.maxDescriptorSetSampledImages;
|
||||
}
|
||||
#define FN_MAX_LIMIT_LIST \
|
||||
FN_MAX_LIMIT_ELEM(ComputeSharedMemorySize) \
|
||||
FN_MAX_LIMIT_ELEM(PerStageDescriptorSampledImages) \
|
||||
FN_MAX_LIMIT_ELEM(PerStageResources) \
|
||||
FN_MAX_LIMIT_ELEM(DescriptorSetSamplers) \
|
||||
FN_MAX_LIMIT_ELEM(DescriptorSetUniformBuffers) \
|
||||
FN_MAX_LIMIT_ELEM(DescriptorSetUniformBuffersDynamic) \
|
||||
FN_MAX_LIMIT_ELEM(DescriptorSetStorageBuffers) \
|
||||
FN_MAX_LIMIT_ELEM(DescriptorSetStorageBuffersDynamic) \
|
||||
FN_MAX_LIMIT_ELEM(DescriptorSetSampledImages) \
|
||||
FN_MAX_LIMIT_ELEM(DescriptorSetStorageImages) \
|
||||
FN_MAX_LIMIT_ELEM(DescriptorSetInputAttachments)
|
||||
#define FN_MAX_LIMIT_ELEM(name) \
|
||||
u32 GetMax##name() const { return properties.properties.limits.max##name; }
|
||||
FN_MAX_LIMIT_LIST
|
||||
#undef FN_MAX_LIMIT_ELEM
|
||||
#undef FN_MAX_LIMIT_LIST
|
||||
|
||||
/// Returns float control properties of the device.
|
||||
const VkPhysicalDeviceFloatControlsPropertiesKHR& FloatControlProperties() const {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue