mirror of
https://git.eden-emu.dev/eden-emu/eden
synced 2026-06-28 07:35:34 +02:00
[shader_recompiler] unify descriptor limit policy
This commit is contained in:
parent
8037f1fbf6
commit
fb53c4e135
7 changed files with 65 additions and 27 deletions
|
|
@ -236,8 +236,11 @@ void LowerGeometryPassthrough(const IR::Program& program, const HostTranslateInf
|
|||
|
||||
IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
|
||||
Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) {
|
||||
HostTranslateInfo normalized_host_info{host_info};
|
||||
normalized_host_info.ApplyDescriptorLimitPolicy();
|
||||
|
||||
IR::Program program;
|
||||
program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg, host_info);
|
||||
program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg, normalized_host_info);
|
||||
program.blocks = GenerateBlocks(program.syntax_list);
|
||||
program.post_order_blocks = PostOrder(program.syntax_list.front());
|
||||
program.stage = env.ShaderStage();
|
||||
|
|
@ -260,9 +263,9 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
|
|||
program.info.passthrough.mask[i] = ((mask[i / 32] >> (i % 32)) & 1) == 0;
|
||||
}
|
||||
|
||||
if (!host_info.support_geometry_shader_passthrough) {
|
||||
if (!normalized_host_info.support_geometry_shader_passthrough) {
|
||||
program.output_vertices = GetOutputTopologyVertices(program.output_topology);
|
||||
LowerGeometryPassthrough(program, host_info);
|
||||
LowerGeometryPassthrough(program, normalized_host_info);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
|
@ -277,16 +280,16 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
|
|||
RemoveUnreachableBlocks(program);
|
||||
|
||||
// Replace instructions before the SSA rewrite
|
||||
if (!host_info.support_float64) {
|
||||
if (!normalized_host_info.support_float64) {
|
||||
Optimization::LowerFp64ToFp32(program);
|
||||
}
|
||||
if (!host_info.support_float16) {
|
||||
if (!normalized_host_info.support_float16) {
|
||||
Optimization::LowerFp16ToFp32(program);
|
||||
}
|
||||
if (!host_info.support_int64) {
|
||||
if (!normalized_host_info.support_int64) {
|
||||
Optimization::LowerInt64ToInt32(program);
|
||||
}
|
||||
if (!host_info.support_conditional_barrier) {
|
||||
if (!normalized_host_info.support_conditional_barrier) {
|
||||
Optimization::ConditionalBarrierPass(program);
|
||||
}
|
||||
Optimization::SsaRewritePass(program);
|
||||
|
|
@ -295,8 +298,8 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
|
|||
|
||||
Optimization::PositionPass(env, program);
|
||||
|
||||
Optimization::GlobalMemoryToStorageBufferPass(program, host_info);
|
||||
Optimization::TexturePass(env, program, host_info);
|
||||
Optimization::GlobalMemoryToStorageBufferPass(program, normalized_host_info);
|
||||
Optimization::TexturePass(env, program, normalized_host_info);
|
||||
|
||||
if (Settings::values.resolution_info.active || Settings::values.rescale_hack.GetValue()) {
|
||||
Optimization::RescalingPass(program);
|
||||
|
|
@ -306,7 +309,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
|
|||
Optimization::VerificationPass(program);
|
||||
}
|
||||
Optimization::CollectShaderInfoPass(env, program);
|
||||
Optimization::LayerPass(program, host_info);
|
||||
Optimization::LayerPass(program, normalized_host_info);
|
||||
Optimization::VendorWorkaroundPass(program);
|
||||
|
||||
CollectInterpolationInfo(env, program);
|
||||
|
|
|
|||
|
|
@ -15,6 +15,8 @@ namespace Shader {
|
|||
|
||||
/// Misc information about the host
|
||||
struct HostTranslateInfo {
|
||||
static constexpr u32 DEFAULT_DESCRIPTOR_LIMIT = 1024;
|
||||
|
||||
u64 min_ssbo_alignment{}; ///< Minimum alignment supported by the device for SSBOs
|
||||
u32 max_per_stage_descriptor_sampled_images{}; ///< maximum sampled descriptors per stage
|
||||
u32 max_per_stage_resources{}; ///< maximum resources per stage
|
||||
|
|
@ -36,6 +38,29 @@ struct HostTranslateInfo {
|
|||
///< passthrough shaders
|
||||
bool support_conditional_barrier{}; ///< True when the device supports barriers in conditional
|
||||
///< control flow
|
||||
|
||||
void ApplyDescriptorLimitPolicy() noexcept {
|
||||
if (min_ssbo_alignment == 0) {
|
||||
min_ssbo_alignment = 1;
|
||||
}
|
||||
ApplyDescriptorLimitFallback(max_per_stage_descriptor_sampled_images);
|
||||
ApplyDescriptorLimitFallback(max_per_stage_resources);
|
||||
ApplyDescriptorLimitFallback(max_descriptor_set_samplers);
|
||||
ApplyDescriptorLimitFallback(max_descriptor_set_uniform_buffers);
|
||||
ApplyDescriptorLimitFallback(max_descriptor_set_uniform_buffers_dynamic);
|
||||
ApplyDescriptorLimitFallback(max_descriptor_set_storage_buffers);
|
||||
ApplyDescriptorLimitFallback(max_descriptor_set_storage_buffers_dynamic);
|
||||
ApplyDescriptorLimitFallback(max_descriptor_set_sampled_images);
|
||||
ApplyDescriptorLimitFallback(max_descriptor_set_storage_images);
|
||||
ApplyDescriptorLimitFallback(max_descriptor_set_input_attachements);
|
||||
}
|
||||
|
||||
private:
|
||||
static void ApplyDescriptorLimitFallback(u32& limit) noexcept {
|
||||
if (limit == 0) {
|
||||
limit = DEFAULT_DESCRIPTOR_LIMIT;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Shader
|
||||
|
|
|
|||
|
|
@ -47,7 +47,8 @@ u32 DynamicDescriptorSizeShift(const IR::U32& dynamic_offset) {
|
|||
}
|
||||
|
||||
u32 DynamicDescriptorCount(u32 base_offset, u32 size_shift, u32 max_descriptors) {
|
||||
auto const max_cbuf_bytes = 16 * max_descriptors;
|
||||
auto const descriptor_limit = (std::max)(1U, max_descriptors);
|
||||
auto const max_cbuf_bytes = 16 * descriptor_limit;
|
||||
if (size_shift >= 31 || base_offset >= max_cbuf_bytes)
|
||||
return 1;
|
||||
auto const stride = 1U << size_shift;
|
||||
|
|
@ -55,7 +56,7 @@ u32 DynamicDescriptorCount(u32 base_offset, u32 size_shift, u32 max_descriptors)
|
|||
if (available < DESCRIPTOR_SIZE)
|
||||
return 1;
|
||||
auto const available_count = 1U + (available - DESCRIPTOR_SIZE) / stride;
|
||||
return std::min(max_descriptors, available_count);
|
||||
return std::min(descriptor_limit, available_count);
|
||||
}
|
||||
|
||||
u32 SaturatingSub(u32 lhs, u32 rhs) {
|
||||
|
|
@ -70,8 +71,9 @@ template <typename T>
|
|||
}
|
||||
|
||||
u32 DynamicSampledTextureCap(const Info& info, const HostTranslateInfo& host_info, u32 dynamic_arrays) {
|
||||
auto const sampled_limit = std::min(host_info.max_per_stage_descriptor_sampled_images, host_info.max_descriptor_set_sampled_images);
|
||||
auto const resource_limit = host_info.max_per_stage_resources;
|
||||
auto const sampled_limit = (std::max)(1U, std::min(host_info.max_per_stage_descriptor_sampled_images,
|
||||
host_info.max_descriptor_set_sampled_images));
|
||||
auto const resource_limit = (std::max)(1U, host_info.max_per_stage_resources);
|
||||
if (dynamic_arrays > 0) {
|
||||
auto const sampled_static_count = StaticDescriptorCount(info.texture_buffer_descriptors) + StaticDescriptorCount(info.texture_descriptors);
|
||||
auto const resource_static_count =
|
||||
|
|
@ -444,8 +446,9 @@ std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst, Environme
|
|||
return std::nullopt;
|
||||
}
|
||||
auto const size_shift = DynamicDescriptorSizeShift(dynamic_offset);
|
||||
auto const sampled_limit = (std::min)(host_info.max_per_stage_descriptor_sampled_images, host_info.max_descriptor_set_sampled_images);
|
||||
auto const resource_limit = host_info.max_per_stage_resources;
|
||||
auto const sampled_limit = (std::max)(1U, (std::min)(host_info.max_per_stage_descriptor_sampled_images,
|
||||
host_info.max_descriptor_set_sampled_images));
|
||||
auto const resource_limit = (std::max)(1U, host_info.max_per_stage_resources);
|
||||
return ConstBufferAddr{
|
||||
.index = index.U32(),
|
||||
.offset = base_offset,
|
||||
|
|
|
|||
|
|
@ -244,19 +244,22 @@ ShaderCache::ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
|
|||
.max_user_clip_distances =
|
||||
std::min<u32>(device.GetMaxUserClipDistances(), Maxwell::Regs::NumClipDistances),
|
||||
},
|
||||
// TODO: proper limits?
|
||||
host_info{
|
||||
.min_ssbo_alignment = static_cast<u32>(device.GetShaderStorageBufferAlignment()),
|
||||
.max_per_stage_descriptor_sampled_images = 1024,//device.GetMaxPerStageDescriptorSampledImages(),
|
||||
.max_per_stage_resources = 1024,//device.GetMaxPerStageResources(),
|
||||
.max_descriptor_set_samplers = 1024,//device.GetMaxDescriptorSetSamplers(),
|
||||
.max_descriptor_set_uniform_buffers = 1024,//device.GetMaxDescriptorSetUniformBuffers(),
|
||||
.max_descriptor_set_uniform_buffers_dynamic = 1024,//device.GetMaxDescriptorSetUniformBuffersDynamic(),
|
||||
.max_descriptor_set_storage_buffers = 1024,//device.GetMaxDescriptorSetStorageBuffers(),
|
||||
.max_descriptor_set_storage_buffers_dynamic = 1024,//device.GetMaxDescriptorSetStorageBuffersDynamic(),
|
||||
.max_descriptor_set_sampled_images = 1024,//device.GetMaxDescriptorSetSampledImages(),
|
||||
.max_descriptor_set_storage_images = 1024,//device.GetMaxDescriptorSetStorageImages(),
|
||||
.max_descriptor_set_input_attachements = 1024,//device.GetMaxDescriptorSetInputAttachments(),
|
||||
.max_per_stage_descriptor_sampled_images =
|
||||
Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
|
||||
.max_per_stage_resources = Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
|
||||
.max_descriptor_set_samplers = Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
|
||||
.max_descriptor_set_uniform_buffers = Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
|
||||
.max_descriptor_set_uniform_buffers_dynamic =
|
||||
Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
|
||||
.max_descriptor_set_storage_buffers = Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
|
||||
.max_descriptor_set_storage_buffers_dynamic =
|
||||
Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
|
||||
.max_descriptor_set_sampled_images = Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
|
||||
.max_descriptor_set_storage_images = Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
|
||||
.max_descriptor_set_input_attachements =
|
||||
Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
|
||||
.support_float64 = true,
|
||||
.support_float16 = false,
|
||||
.support_int64 = device.HasShaderInt64(),
|
||||
|
|
@ -266,6 +269,7 @@ ShaderCache::ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
|
|||
.support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(),
|
||||
.support_conditional_barrier = device.SupportsConditionalBarriers(),
|
||||
} {
|
||||
host_info.ApplyDescriptorLimitPolicy();
|
||||
if (use_asynchronous_shaders) {
|
||||
workers = CreateWorkers();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -465,6 +465,7 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
|
|||
.support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(),
|
||||
.support_conditional_barrier = device.SupportsConditionalBarriers(),
|
||||
};
|
||||
host_info.ApplyDescriptorLimitPolicy();
|
||||
|
||||
if (device.GetMaxVertexInputAttributes() < Maxwell::NumVertexAttributes) {
|
||||
LOG_WARNING(Render_Vulkan, "maxVertexInputAttributes is too low: {} < {}",
|
||||
|
|
|
|||
|
|
@ -44,6 +44,7 @@ int IrShaderRecompilerImpl(int argc, char *argv[]) {
|
|||
host_info.support_geometry_shader_passthrough = true;
|
||||
host_info.support_conditional_barrier = true;
|
||||
host_info.min_ssbo_alignment = 0;
|
||||
host_info.ApplyDescriptorLimitPolicy();
|
||||
auto program = Shader::Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg, host_info);
|
||||
auto const dumped_ir = Shader::IR::DumpProgram(program);
|
||||
std::printf("%s\n", dumped_ir.c_str());
|
||||
|
|
|
|||
|
|
@ -52,6 +52,7 @@ int SpirvShaderRecompilerImpl(int argc, char *argv[]) {
|
|||
host_info.support_geometry_shader_passthrough = true;
|
||||
host_info.support_conditional_barrier = true;
|
||||
host_info.min_ssbo_alignment = 0;
|
||||
host_info.ApplyDescriptorLimitPolicy();
|
||||
auto program = Shader::Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg, host_info);
|
||||
|
||||
// IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue