[shader_recompiler] unify descriptor limit policy

This commit is contained in:
CamilleLaVey 2026-06-26 20:24:05 -04:00
parent 8037f1fbf6
commit fb53c4e135
7 changed files with 65 additions and 27 deletions

View file

@ -236,8 +236,11 @@ void LowerGeometryPassthrough(const IR::Program& program, const HostTranslateInf
IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) { Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) {
HostTranslateInfo normalized_host_info{host_info};
normalized_host_info.ApplyDescriptorLimitPolicy();
IR::Program program; IR::Program program;
program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg, host_info); program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg, normalized_host_info);
program.blocks = GenerateBlocks(program.syntax_list); program.blocks = GenerateBlocks(program.syntax_list);
program.post_order_blocks = PostOrder(program.syntax_list.front()); program.post_order_blocks = PostOrder(program.syntax_list.front());
program.stage = env.ShaderStage(); program.stage = env.ShaderStage();
@ -260,9 +263,9 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
program.info.passthrough.mask[i] = ((mask[i / 32] >> (i % 32)) & 1) == 0; program.info.passthrough.mask[i] = ((mask[i / 32] >> (i % 32)) & 1) == 0;
} }
if (!host_info.support_geometry_shader_passthrough) { if (!normalized_host_info.support_geometry_shader_passthrough) {
program.output_vertices = GetOutputTopologyVertices(program.output_topology); program.output_vertices = GetOutputTopologyVertices(program.output_topology);
LowerGeometryPassthrough(program, host_info); LowerGeometryPassthrough(program, normalized_host_info);
} }
} }
break; break;
@ -277,16 +280,16 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
RemoveUnreachableBlocks(program); RemoveUnreachableBlocks(program);
// Replace instructions before the SSA rewrite // Replace instructions before the SSA rewrite
if (!host_info.support_float64) { if (!normalized_host_info.support_float64) {
Optimization::LowerFp64ToFp32(program); Optimization::LowerFp64ToFp32(program);
} }
if (!host_info.support_float16) { if (!normalized_host_info.support_float16) {
Optimization::LowerFp16ToFp32(program); Optimization::LowerFp16ToFp32(program);
} }
if (!host_info.support_int64) { if (!normalized_host_info.support_int64) {
Optimization::LowerInt64ToInt32(program); Optimization::LowerInt64ToInt32(program);
} }
if (!host_info.support_conditional_barrier) { if (!normalized_host_info.support_conditional_barrier) {
Optimization::ConditionalBarrierPass(program); Optimization::ConditionalBarrierPass(program);
} }
Optimization::SsaRewritePass(program); Optimization::SsaRewritePass(program);
@ -295,8 +298,8 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
Optimization::PositionPass(env, program); Optimization::PositionPass(env, program);
Optimization::GlobalMemoryToStorageBufferPass(program, host_info); Optimization::GlobalMemoryToStorageBufferPass(program, normalized_host_info);
Optimization::TexturePass(env, program, host_info); Optimization::TexturePass(env, program, normalized_host_info);
if (Settings::values.resolution_info.active || Settings::values.rescale_hack.GetValue()) { if (Settings::values.resolution_info.active || Settings::values.rescale_hack.GetValue()) {
Optimization::RescalingPass(program); Optimization::RescalingPass(program);
@ -306,7 +309,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
Optimization::VerificationPass(program); Optimization::VerificationPass(program);
} }
Optimization::CollectShaderInfoPass(env, program); Optimization::CollectShaderInfoPass(env, program);
Optimization::LayerPass(program, host_info); Optimization::LayerPass(program, normalized_host_info);
Optimization::VendorWorkaroundPass(program); Optimization::VendorWorkaroundPass(program);
CollectInterpolationInfo(env, program); CollectInterpolationInfo(env, program);

View file

@ -15,6 +15,8 @@ namespace Shader {
/// Misc information about the host /// Misc information about the host
struct HostTranslateInfo { struct HostTranslateInfo {
static constexpr u32 DEFAULT_DESCRIPTOR_LIMIT = 1024;
u64 min_ssbo_alignment{}; ///< Minimum alignment supported by the device for SSBOs u64 min_ssbo_alignment{}; ///< Minimum alignment supported by the device for SSBOs
u32 max_per_stage_descriptor_sampled_images{}; ///< maximum sampled descriptors per stage u32 max_per_stage_descriptor_sampled_images{}; ///< maximum sampled descriptors per stage
u32 max_per_stage_resources{}; ///< maximum resources per stage u32 max_per_stage_resources{}; ///< maximum resources per stage
@ -36,6 +38,29 @@ struct HostTranslateInfo {
///< passthrough shaders ///< passthrough shaders
bool support_conditional_barrier{}; ///< True when the device supports barriers in conditional bool support_conditional_barrier{}; ///< True when the device supports barriers in conditional
///< control flow ///< control flow
void ApplyDescriptorLimitPolicy() noexcept {
if (min_ssbo_alignment == 0) {
min_ssbo_alignment = 1;
}
ApplyDescriptorLimitFallback(max_per_stage_descriptor_sampled_images);
ApplyDescriptorLimitFallback(max_per_stage_resources);
ApplyDescriptorLimitFallback(max_descriptor_set_samplers);
ApplyDescriptorLimitFallback(max_descriptor_set_uniform_buffers);
ApplyDescriptorLimitFallback(max_descriptor_set_uniform_buffers_dynamic);
ApplyDescriptorLimitFallback(max_descriptor_set_storage_buffers);
ApplyDescriptorLimitFallback(max_descriptor_set_storage_buffers_dynamic);
ApplyDescriptorLimitFallback(max_descriptor_set_sampled_images);
ApplyDescriptorLimitFallback(max_descriptor_set_storage_images);
ApplyDescriptorLimitFallback(max_descriptor_set_input_attachements);
}
private:
static void ApplyDescriptorLimitFallback(u32& limit) noexcept {
if (limit == 0) {
limit = DEFAULT_DESCRIPTOR_LIMIT;
}
}
}; };
} // namespace Shader } // namespace Shader

View file

@ -47,7 +47,8 @@ u32 DynamicDescriptorSizeShift(const IR::U32& dynamic_offset) {
} }
u32 DynamicDescriptorCount(u32 base_offset, u32 size_shift, u32 max_descriptors) { u32 DynamicDescriptorCount(u32 base_offset, u32 size_shift, u32 max_descriptors) {
auto const max_cbuf_bytes = 16 * max_descriptors; auto const descriptor_limit = (std::max)(1U, max_descriptors);
auto const max_cbuf_bytes = 16 * descriptor_limit;
if (size_shift >= 31 || base_offset >= max_cbuf_bytes) if (size_shift >= 31 || base_offset >= max_cbuf_bytes)
return 1; return 1;
auto const stride = 1U << size_shift; auto const stride = 1U << size_shift;
@ -55,7 +56,7 @@ u32 DynamicDescriptorCount(u32 base_offset, u32 size_shift, u32 max_descriptors)
if (available < DESCRIPTOR_SIZE) if (available < DESCRIPTOR_SIZE)
return 1; return 1;
auto const available_count = 1U + (available - DESCRIPTOR_SIZE) / stride; auto const available_count = 1U + (available - DESCRIPTOR_SIZE) / stride;
return std::min(max_descriptors, available_count); return std::min(descriptor_limit, available_count);
} }
u32 SaturatingSub(u32 lhs, u32 rhs) { u32 SaturatingSub(u32 lhs, u32 rhs) {
@ -70,8 +71,9 @@ template <typename T>
} }
u32 DynamicSampledTextureCap(const Info& info, const HostTranslateInfo& host_info, u32 dynamic_arrays) { u32 DynamicSampledTextureCap(const Info& info, const HostTranslateInfo& host_info, u32 dynamic_arrays) {
auto const sampled_limit = std::min(host_info.max_per_stage_descriptor_sampled_images, host_info.max_descriptor_set_sampled_images); auto const sampled_limit = (std::max)(1U, std::min(host_info.max_per_stage_descriptor_sampled_images,
auto const resource_limit = host_info.max_per_stage_resources; host_info.max_descriptor_set_sampled_images));
auto const resource_limit = (std::max)(1U, host_info.max_per_stage_resources);
if (dynamic_arrays > 0) { if (dynamic_arrays > 0) {
auto const sampled_static_count = StaticDescriptorCount(info.texture_buffer_descriptors) + StaticDescriptorCount(info.texture_descriptors); auto const sampled_static_count = StaticDescriptorCount(info.texture_buffer_descriptors) + StaticDescriptorCount(info.texture_descriptors);
auto const resource_static_count = auto const resource_static_count =
@ -444,8 +446,9 @@ std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst, Environme
return std::nullopt; return std::nullopt;
} }
auto const size_shift = DynamicDescriptorSizeShift(dynamic_offset); auto const size_shift = DynamicDescriptorSizeShift(dynamic_offset);
auto const sampled_limit = (std::min)(host_info.max_per_stage_descriptor_sampled_images, host_info.max_descriptor_set_sampled_images); auto const sampled_limit = (std::max)(1U, (std::min)(host_info.max_per_stage_descriptor_sampled_images,
auto const resource_limit = host_info.max_per_stage_resources; host_info.max_descriptor_set_sampled_images));
auto const resource_limit = (std::max)(1U, host_info.max_per_stage_resources);
return ConstBufferAddr{ return ConstBufferAddr{
.index = index.U32(), .index = index.U32(),
.offset = base_offset, .offset = base_offset,

View file

@ -244,19 +244,22 @@ ShaderCache::ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
.max_user_clip_distances = .max_user_clip_distances =
std::min<u32>(device.GetMaxUserClipDistances(), Maxwell::Regs::NumClipDistances), std::min<u32>(device.GetMaxUserClipDistances(), Maxwell::Regs::NumClipDistances),
}, },
// TODO: proper limits?
host_info{ host_info{
.min_ssbo_alignment = static_cast<u32>(device.GetShaderStorageBufferAlignment()), .min_ssbo_alignment = static_cast<u32>(device.GetShaderStorageBufferAlignment()),
.max_per_stage_descriptor_sampled_images = 1024,//device.GetMaxPerStageDescriptorSampledImages(), .max_per_stage_descriptor_sampled_images =
.max_per_stage_resources = 1024,//device.GetMaxPerStageResources(), Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
.max_descriptor_set_samplers = 1024,//device.GetMaxDescriptorSetSamplers(), .max_per_stage_resources = Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
.max_descriptor_set_uniform_buffers = 1024,//device.GetMaxDescriptorSetUniformBuffers(), .max_descriptor_set_samplers = Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
.max_descriptor_set_uniform_buffers_dynamic = 1024,//device.GetMaxDescriptorSetUniformBuffersDynamic(), .max_descriptor_set_uniform_buffers = Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
.max_descriptor_set_storage_buffers = 1024,//device.GetMaxDescriptorSetStorageBuffers(), .max_descriptor_set_uniform_buffers_dynamic =
.max_descriptor_set_storage_buffers_dynamic = 1024,//device.GetMaxDescriptorSetStorageBuffersDynamic(), Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
.max_descriptor_set_sampled_images = 1024,//device.GetMaxDescriptorSetSampledImages(), .max_descriptor_set_storage_buffers = Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
.max_descriptor_set_storage_images = 1024,//device.GetMaxDescriptorSetStorageImages(), .max_descriptor_set_storage_buffers_dynamic =
.max_descriptor_set_input_attachements = 1024,//device.GetMaxDescriptorSetInputAttachments(), Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
.max_descriptor_set_sampled_images = Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
.max_descriptor_set_storage_images = Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
.max_descriptor_set_input_attachements =
Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
.support_float64 = true, .support_float64 = true,
.support_float16 = false, .support_float16 = false,
.support_int64 = device.HasShaderInt64(), .support_int64 = device.HasShaderInt64(),
@ -266,6 +269,7 @@ ShaderCache::ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
.support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(), .support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(),
.support_conditional_barrier = device.SupportsConditionalBarriers(), .support_conditional_barrier = device.SupportsConditionalBarriers(),
} { } {
host_info.ApplyDescriptorLimitPolicy();
if (use_asynchronous_shaders) { if (use_asynchronous_shaders) {
workers = CreateWorkers(); workers = CreateWorkers();
} }

View file

@ -465,6 +465,7 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
.support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(), .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(),
.support_conditional_barrier = device.SupportsConditionalBarriers(), .support_conditional_barrier = device.SupportsConditionalBarriers(),
}; };
host_info.ApplyDescriptorLimitPolicy();
if (device.GetMaxVertexInputAttributes() < Maxwell::NumVertexAttributes) { if (device.GetMaxVertexInputAttributes() < Maxwell::NumVertexAttributes) {
LOG_WARNING(Render_Vulkan, "maxVertexInputAttributes is too low: {} < {}", LOG_WARNING(Render_Vulkan, "maxVertexInputAttributes is too low: {} < {}",

View file

@ -44,6 +44,7 @@ int IrShaderRecompilerImpl(int argc, char *argv[]) {
host_info.support_geometry_shader_passthrough = true; host_info.support_geometry_shader_passthrough = true;
host_info.support_conditional_barrier = true; host_info.support_conditional_barrier = true;
host_info.min_ssbo_alignment = 0; host_info.min_ssbo_alignment = 0;
host_info.ApplyDescriptorLimitPolicy();
auto program = Shader::Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg, host_info); auto program = Shader::Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg, host_info);
auto const dumped_ir = Shader::IR::DumpProgram(program); auto const dumped_ir = Shader::IR::DumpProgram(program);
std::printf("%s\n", dumped_ir.c_str()); std::printf("%s\n", dumped_ir.c_str());

View file

@ -52,6 +52,7 @@ int SpirvShaderRecompilerImpl(int argc, char *argv[]) {
host_info.support_geometry_shader_passthrough = true; host_info.support_geometry_shader_passthrough = true;
host_info.support_conditional_barrier = true; host_info.support_conditional_barrier = true;
host_info.min_ssbo_alignment = 0; host_info.min_ssbo_alignment = 0;
host_info.ApplyDescriptorLimitPolicy();
auto program = Shader::Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg, host_info); auto program = Shader::Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg, host_info);
// IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, // IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,