mirror of
https://git.eden-emu.dev/eden-emu/eden
synced 2026-06-28 09:45:45 +02:00
[shader_recompiler] unify descriptor limit policy
This commit is contained in:
parent
8037f1fbf6
commit
fb53c4e135
7 changed files with 65 additions and 27 deletions
|
|
@ -236,8 +236,11 @@ void LowerGeometryPassthrough(const IR::Program& program, const HostTranslateInf
|
||||||
|
|
||||||
IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
|
IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
|
||||||
Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) {
|
Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) {
|
||||||
|
HostTranslateInfo normalized_host_info{host_info};
|
||||||
|
normalized_host_info.ApplyDescriptorLimitPolicy();
|
||||||
|
|
||||||
IR::Program program;
|
IR::Program program;
|
||||||
program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg, host_info);
|
program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg, normalized_host_info);
|
||||||
program.blocks = GenerateBlocks(program.syntax_list);
|
program.blocks = GenerateBlocks(program.syntax_list);
|
||||||
program.post_order_blocks = PostOrder(program.syntax_list.front());
|
program.post_order_blocks = PostOrder(program.syntax_list.front());
|
||||||
program.stage = env.ShaderStage();
|
program.stage = env.ShaderStage();
|
||||||
|
|
@ -260,9 +263,9 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
|
||||||
program.info.passthrough.mask[i] = ((mask[i / 32] >> (i % 32)) & 1) == 0;
|
program.info.passthrough.mask[i] = ((mask[i / 32] >> (i % 32)) & 1) == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!host_info.support_geometry_shader_passthrough) {
|
if (!normalized_host_info.support_geometry_shader_passthrough) {
|
||||||
program.output_vertices = GetOutputTopologyVertices(program.output_topology);
|
program.output_vertices = GetOutputTopologyVertices(program.output_topology);
|
||||||
LowerGeometryPassthrough(program, host_info);
|
LowerGeometryPassthrough(program, normalized_host_info);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
@ -277,16 +280,16 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
|
||||||
RemoveUnreachableBlocks(program);
|
RemoveUnreachableBlocks(program);
|
||||||
|
|
||||||
// Replace instructions before the SSA rewrite
|
// Replace instructions before the SSA rewrite
|
||||||
if (!host_info.support_float64) {
|
if (!normalized_host_info.support_float64) {
|
||||||
Optimization::LowerFp64ToFp32(program);
|
Optimization::LowerFp64ToFp32(program);
|
||||||
}
|
}
|
||||||
if (!host_info.support_float16) {
|
if (!normalized_host_info.support_float16) {
|
||||||
Optimization::LowerFp16ToFp32(program);
|
Optimization::LowerFp16ToFp32(program);
|
||||||
}
|
}
|
||||||
if (!host_info.support_int64) {
|
if (!normalized_host_info.support_int64) {
|
||||||
Optimization::LowerInt64ToInt32(program);
|
Optimization::LowerInt64ToInt32(program);
|
||||||
}
|
}
|
||||||
if (!host_info.support_conditional_barrier) {
|
if (!normalized_host_info.support_conditional_barrier) {
|
||||||
Optimization::ConditionalBarrierPass(program);
|
Optimization::ConditionalBarrierPass(program);
|
||||||
}
|
}
|
||||||
Optimization::SsaRewritePass(program);
|
Optimization::SsaRewritePass(program);
|
||||||
|
|
@ -295,8 +298,8 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
|
||||||
|
|
||||||
Optimization::PositionPass(env, program);
|
Optimization::PositionPass(env, program);
|
||||||
|
|
||||||
Optimization::GlobalMemoryToStorageBufferPass(program, host_info);
|
Optimization::GlobalMemoryToStorageBufferPass(program, normalized_host_info);
|
||||||
Optimization::TexturePass(env, program, host_info);
|
Optimization::TexturePass(env, program, normalized_host_info);
|
||||||
|
|
||||||
if (Settings::values.resolution_info.active || Settings::values.rescale_hack.GetValue()) {
|
if (Settings::values.resolution_info.active || Settings::values.rescale_hack.GetValue()) {
|
||||||
Optimization::RescalingPass(program);
|
Optimization::RescalingPass(program);
|
||||||
|
|
@ -306,7 +309,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
|
||||||
Optimization::VerificationPass(program);
|
Optimization::VerificationPass(program);
|
||||||
}
|
}
|
||||||
Optimization::CollectShaderInfoPass(env, program);
|
Optimization::CollectShaderInfoPass(env, program);
|
||||||
Optimization::LayerPass(program, host_info);
|
Optimization::LayerPass(program, normalized_host_info);
|
||||||
Optimization::VendorWorkaroundPass(program);
|
Optimization::VendorWorkaroundPass(program);
|
||||||
|
|
||||||
CollectInterpolationInfo(env, program);
|
CollectInterpolationInfo(env, program);
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,8 @@ namespace Shader {
|
||||||
|
|
||||||
/// Misc information about the host
|
/// Misc information about the host
|
||||||
struct HostTranslateInfo {
|
struct HostTranslateInfo {
|
||||||
|
static constexpr u32 DEFAULT_DESCRIPTOR_LIMIT = 1024;
|
||||||
|
|
||||||
u64 min_ssbo_alignment{}; ///< Minimum alignment supported by the device for SSBOs
|
u64 min_ssbo_alignment{}; ///< Minimum alignment supported by the device for SSBOs
|
||||||
u32 max_per_stage_descriptor_sampled_images{}; ///< maximum sampled descriptors per stage
|
u32 max_per_stage_descriptor_sampled_images{}; ///< maximum sampled descriptors per stage
|
||||||
u32 max_per_stage_resources{}; ///< maximum resources per stage
|
u32 max_per_stage_resources{}; ///< maximum resources per stage
|
||||||
|
|
@ -36,6 +38,29 @@ struct HostTranslateInfo {
|
||||||
///< passthrough shaders
|
///< passthrough shaders
|
||||||
bool support_conditional_barrier{}; ///< True when the device supports barriers in conditional
|
bool support_conditional_barrier{}; ///< True when the device supports barriers in conditional
|
||||||
///< control flow
|
///< control flow
|
||||||
|
|
||||||
|
void ApplyDescriptorLimitPolicy() noexcept {
|
||||||
|
if (min_ssbo_alignment == 0) {
|
||||||
|
min_ssbo_alignment = 1;
|
||||||
|
}
|
||||||
|
ApplyDescriptorLimitFallback(max_per_stage_descriptor_sampled_images);
|
||||||
|
ApplyDescriptorLimitFallback(max_per_stage_resources);
|
||||||
|
ApplyDescriptorLimitFallback(max_descriptor_set_samplers);
|
||||||
|
ApplyDescriptorLimitFallback(max_descriptor_set_uniform_buffers);
|
||||||
|
ApplyDescriptorLimitFallback(max_descriptor_set_uniform_buffers_dynamic);
|
||||||
|
ApplyDescriptorLimitFallback(max_descriptor_set_storage_buffers);
|
||||||
|
ApplyDescriptorLimitFallback(max_descriptor_set_storage_buffers_dynamic);
|
||||||
|
ApplyDescriptorLimitFallback(max_descriptor_set_sampled_images);
|
||||||
|
ApplyDescriptorLimitFallback(max_descriptor_set_storage_images);
|
||||||
|
ApplyDescriptorLimitFallback(max_descriptor_set_input_attachements);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
static void ApplyDescriptorLimitFallback(u32& limit) noexcept {
|
||||||
|
if (limit == 0) {
|
||||||
|
limit = DEFAULT_DESCRIPTOR_LIMIT;
|
||||||
|
}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Shader
|
} // namespace Shader
|
||||||
|
|
|
||||||
|
|
@ -47,7 +47,8 @@ u32 DynamicDescriptorSizeShift(const IR::U32& dynamic_offset) {
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 DynamicDescriptorCount(u32 base_offset, u32 size_shift, u32 max_descriptors) {
|
u32 DynamicDescriptorCount(u32 base_offset, u32 size_shift, u32 max_descriptors) {
|
||||||
auto const max_cbuf_bytes = 16 * max_descriptors;
|
auto const descriptor_limit = (std::max)(1U, max_descriptors);
|
||||||
|
auto const max_cbuf_bytes = 16 * descriptor_limit;
|
||||||
if (size_shift >= 31 || base_offset >= max_cbuf_bytes)
|
if (size_shift >= 31 || base_offset >= max_cbuf_bytes)
|
||||||
return 1;
|
return 1;
|
||||||
auto const stride = 1U << size_shift;
|
auto const stride = 1U << size_shift;
|
||||||
|
|
@ -55,7 +56,7 @@ u32 DynamicDescriptorCount(u32 base_offset, u32 size_shift, u32 max_descriptors)
|
||||||
if (available < DESCRIPTOR_SIZE)
|
if (available < DESCRIPTOR_SIZE)
|
||||||
return 1;
|
return 1;
|
||||||
auto const available_count = 1U + (available - DESCRIPTOR_SIZE) / stride;
|
auto const available_count = 1U + (available - DESCRIPTOR_SIZE) / stride;
|
||||||
return std::min(max_descriptors, available_count);
|
return std::min(descriptor_limit, available_count);
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 SaturatingSub(u32 lhs, u32 rhs) {
|
u32 SaturatingSub(u32 lhs, u32 rhs) {
|
||||||
|
|
@ -70,8 +71,9 @@ template <typename T>
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 DynamicSampledTextureCap(const Info& info, const HostTranslateInfo& host_info, u32 dynamic_arrays) {
|
u32 DynamicSampledTextureCap(const Info& info, const HostTranslateInfo& host_info, u32 dynamic_arrays) {
|
||||||
auto const sampled_limit = std::min(host_info.max_per_stage_descriptor_sampled_images, host_info.max_descriptor_set_sampled_images);
|
auto const sampled_limit = (std::max)(1U, std::min(host_info.max_per_stage_descriptor_sampled_images,
|
||||||
auto const resource_limit = host_info.max_per_stage_resources;
|
host_info.max_descriptor_set_sampled_images));
|
||||||
|
auto const resource_limit = (std::max)(1U, host_info.max_per_stage_resources);
|
||||||
if (dynamic_arrays > 0) {
|
if (dynamic_arrays > 0) {
|
||||||
auto const sampled_static_count = StaticDescriptorCount(info.texture_buffer_descriptors) + StaticDescriptorCount(info.texture_descriptors);
|
auto const sampled_static_count = StaticDescriptorCount(info.texture_buffer_descriptors) + StaticDescriptorCount(info.texture_descriptors);
|
||||||
auto const resource_static_count =
|
auto const resource_static_count =
|
||||||
|
|
@ -444,8 +446,9 @@ std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst, Environme
|
||||||
return std::nullopt;
|
return std::nullopt;
|
||||||
}
|
}
|
||||||
auto const size_shift = DynamicDescriptorSizeShift(dynamic_offset);
|
auto const size_shift = DynamicDescriptorSizeShift(dynamic_offset);
|
||||||
auto const sampled_limit = (std::min)(host_info.max_per_stage_descriptor_sampled_images, host_info.max_descriptor_set_sampled_images);
|
auto const sampled_limit = (std::max)(1U, (std::min)(host_info.max_per_stage_descriptor_sampled_images,
|
||||||
auto const resource_limit = host_info.max_per_stage_resources;
|
host_info.max_descriptor_set_sampled_images));
|
||||||
|
auto const resource_limit = (std::max)(1U, host_info.max_per_stage_resources);
|
||||||
return ConstBufferAddr{
|
return ConstBufferAddr{
|
||||||
.index = index.U32(),
|
.index = index.U32(),
|
||||||
.offset = base_offset,
|
.offset = base_offset,
|
||||||
|
|
|
||||||
|
|
@ -244,19 +244,22 @@ ShaderCache::ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
|
||||||
.max_user_clip_distances =
|
.max_user_clip_distances =
|
||||||
std::min<u32>(device.GetMaxUserClipDistances(), Maxwell::Regs::NumClipDistances),
|
std::min<u32>(device.GetMaxUserClipDistances(), Maxwell::Regs::NumClipDistances),
|
||||||
},
|
},
|
||||||
// TODO: proper limits?
|
|
||||||
host_info{
|
host_info{
|
||||||
.min_ssbo_alignment = static_cast<u32>(device.GetShaderStorageBufferAlignment()),
|
.min_ssbo_alignment = static_cast<u32>(device.GetShaderStorageBufferAlignment()),
|
||||||
.max_per_stage_descriptor_sampled_images = 1024,//device.GetMaxPerStageDescriptorSampledImages(),
|
.max_per_stage_descriptor_sampled_images =
|
||||||
.max_per_stage_resources = 1024,//device.GetMaxPerStageResources(),
|
Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
|
||||||
.max_descriptor_set_samplers = 1024,//device.GetMaxDescriptorSetSamplers(),
|
.max_per_stage_resources = Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
|
||||||
.max_descriptor_set_uniform_buffers = 1024,//device.GetMaxDescriptorSetUniformBuffers(),
|
.max_descriptor_set_samplers = Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
|
||||||
.max_descriptor_set_uniform_buffers_dynamic = 1024,//device.GetMaxDescriptorSetUniformBuffersDynamic(),
|
.max_descriptor_set_uniform_buffers = Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
|
||||||
.max_descriptor_set_storage_buffers = 1024,//device.GetMaxDescriptorSetStorageBuffers(),
|
.max_descriptor_set_uniform_buffers_dynamic =
|
||||||
.max_descriptor_set_storage_buffers_dynamic = 1024,//device.GetMaxDescriptorSetStorageBuffersDynamic(),
|
Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
|
||||||
.max_descriptor_set_sampled_images = 1024,//device.GetMaxDescriptorSetSampledImages(),
|
.max_descriptor_set_storage_buffers = Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
|
||||||
.max_descriptor_set_storage_images = 1024,//device.GetMaxDescriptorSetStorageImages(),
|
.max_descriptor_set_storage_buffers_dynamic =
|
||||||
.max_descriptor_set_input_attachements = 1024,//device.GetMaxDescriptorSetInputAttachments(),
|
Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
|
||||||
|
.max_descriptor_set_sampled_images = Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
|
||||||
|
.max_descriptor_set_storage_images = Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
|
||||||
|
.max_descriptor_set_input_attachements =
|
||||||
|
Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
|
||||||
.support_float64 = true,
|
.support_float64 = true,
|
||||||
.support_float16 = false,
|
.support_float16 = false,
|
||||||
.support_int64 = device.HasShaderInt64(),
|
.support_int64 = device.HasShaderInt64(),
|
||||||
|
|
@ -266,6 +269,7 @@ ShaderCache::ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
|
||||||
.support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(),
|
.support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(),
|
||||||
.support_conditional_barrier = device.SupportsConditionalBarriers(),
|
.support_conditional_barrier = device.SupportsConditionalBarriers(),
|
||||||
} {
|
} {
|
||||||
|
host_info.ApplyDescriptorLimitPolicy();
|
||||||
if (use_asynchronous_shaders) {
|
if (use_asynchronous_shaders) {
|
||||||
workers = CreateWorkers();
|
workers = CreateWorkers();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -465,6 +465,7 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
|
||||||
.support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(),
|
.support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(),
|
||||||
.support_conditional_barrier = device.SupportsConditionalBarriers(),
|
.support_conditional_barrier = device.SupportsConditionalBarriers(),
|
||||||
};
|
};
|
||||||
|
host_info.ApplyDescriptorLimitPolicy();
|
||||||
|
|
||||||
if (device.GetMaxVertexInputAttributes() < Maxwell::NumVertexAttributes) {
|
if (device.GetMaxVertexInputAttributes() < Maxwell::NumVertexAttributes) {
|
||||||
LOG_WARNING(Render_Vulkan, "maxVertexInputAttributes is too low: {} < {}",
|
LOG_WARNING(Render_Vulkan, "maxVertexInputAttributes is too low: {} < {}",
|
||||||
|
|
|
||||||
|
|
@ -44,6 +44,7 @@ int IrShaderRecompilerImpl(int argc, char *argv[]) {
|
||||||
host_info.support_geometry_shader_passthrough = true;
|
host_info.support_geometry_shader_passthrough = true;
|
||||||
host_info.support_conditional_barrier = true;
|
host_info.support_conditional_barrier = true;
|
||||||
host_info.min_ssbo_alignment = 0;
|
host_info.min_ssbo_alignment = 0;
|
||||||
|
host_info.ApplyDescriptorLimitPolicy();
|
||||||
auto program = Shader::Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg, host_info);
|
auto program = Shader::Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg, host_info);
|
||||||
auto const dumped_ir = Shader::IR::DumpProgram(program);
|
auto const dumped_ir = Shader::IR::DumpProgram(program);
|
||||||
std::printf("%s\n", dumped_ir.c_str());
|
std::printf("%s\n", dumped_ir.c_str());
|
||||||
|
|
|
||||||
|
|
@ -52,6 +52,7 @@ int SpirvShaderRecompilerImpl(int argc, char *argv[]) {
|
||||||
host_info.support_geometry_shader_passthrough = true;
|
host_info.support_geometry_shader_passthrough = true;
|
||||||
host_info.support_conditional_barrier = true;
|
host_info.support_conditional_barrier = true;
|
||||||
host_info.min_ssbo_alignment = 0;
|
host_info.min_ssbo_alignment = 0;
|
||||||
|
host_info.ApplyDescriptorLimitPolicy();
|
||||||
auto program = Shader::Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg, host_info);
|
auto program = Shader::Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg, host_info);
|
||||||
|
|
||||||
// IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
|
// IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue