mirror of
https://git.eden-emu.dev/eden-emu/eden
synced 2026-05-26 23:57:03 +02:00
[vk] tomodachi toggle for hotfix PR (#3974)
mostly just tomo toggle so no konflict Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3974
This commit is contained in:
parent
62e188b560
commit
be5326e8e9
8 changed files with 206 additions and 21 deletions
|
|
@ -598,6 +598,8 @@ struct Values {
|
|||
SwitchableSetting<bool> gpu_unswizzle_enabled{linkage, false, "gpu_unswizzle_enabled",
|
||||
Category::RendererHacks};
|
||||
|
||||
SwitchableSetting<bool> legacy_descriptor_indices{linkage, true, "legacy_descriptor_indices", Category::RendererHacks};
|
||||
|
||||
SwitchableSetting<ExtendedDynamicState> dyna_state{linkage,
|
||||
#if defined(ANDROID)
|
||||
ExtendedDynamicState::Disabled,
|
||||
|
|
|
|||
|
|
@ -14,6 +14,38 @@
|
|||
|
||||
namespace Shader::Backend::SPIRV {
|
||||
namespace {
|
||||
class DescriptorIndex {
|
||||
public:
|
||||
explicit DescriptorIndex(EmitContext& ctx, const IR::Value& index)
|
||||
: id{index.IsImmediate() ? ctx.Const(index.U32()) : ctx.Def(index)},
|
||||
is_non_uniform{ctx.profile.support_sampled_image_array_nonuniform_indexing &&
|
||||
!index.IsImmediate()} {
|
||||
if (!is_non_uniform) {
|
||||
return;
|
||||
}
|
||||
if (ctx.profile.supported_spirv < 0x00010400) {
|
||||
ctx.AddExtension("SPV_EXT_descriptor_indexing");
|
||||
}
|
||||
ctx.AddCapability(spv::Capability::ShaderNonUniform);
|
||||
ctx.AddCapability(spv::Capability::SampledImageArrayNonUniformIndexing);
|
||||
Decorate(ctx, id);
|
||||
}
|
||||
|
||||
Id Value() const {
|
||||
return id;
|
||||
}
|
||||
|
||||
void Decorate(EmitContext& ctx, Id object) const {
|
||||
if (is_non_uniform) {
|
||||
ctx.Decorate(object, spv::Decoration::NonUniform);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
Id id;
|
||||
bool is_non_uniform;
|
||||
};
|
||||
|
||||
class ImageOperands {
|
||||
public:
|
||||
[[maybe_unused]] static constexpr bool ImageSampleOffsetAllowed = false;
|
||||
|
|
@ -189,8 +221,17 @@ private:
|
|||
Id Texture(EmitContext& ctx, IR::TextureInstInfo info, [[maybe_unused]] const IR::Value& index) {
|
||||
const TextureDefinition& def{ctx.textures.at(info.descriptor_index)};
|
||||
if (def.count > 1) {
|
||||
const Id pointer{ctx.OpAccessChain(def.pointer_type, def.id, ctx.Def(index))};
|
||||
return ctx.OpLoad(def.sampled_type, pointer);
|
||||
if (Settings::values.legacy_descriptor_indices.GetValue()) {
|
||||
const Id pointer{ctx.OpAccessChain(def.pointer_type, def.id, ctx.Def(index))};
|
||||
return ctx.OpLoad(def.sampled_type, pointer);
|
||||
} else {
|
||||
const DescriptorIndex idx{ctx, index};
|
||||
const Id pointer{ctx.OpAccessChain(def.pointer_type, def.id, idx.Value())};
|
||||
idx.Decorate(ctx, pointer);
|
||||
const Id object{ctx.OpLoad(def.sampled_type, pointer)};
|
||||
idx.Decorate(ctx, object);
|
||||
return object;
|
||||
}
|
||||
} else {
|
||||
return ctx.OpLoad(def.sampled_type, def.id);
|
||||
}
|
||||
|
|
@ -208,9 +249,20 @@ Id TextureImage(EmitContext& ctx, IR::TextureInstInfo info, const IR::Value& ind
|
|||
} else {
|
||||
const TextureDefinition& def{ctx.textures.at(info.descriptor_index)};
|
||||
if (def.count > 1) {
|
||||
const Id idx{index.IsImmediate() ? ctx.Const(index.U32()) : ctx.Def(index)};
|
||||
const Id ptr{ctx.OpAccessChain(def.pointer_type, def.id, idx)};
|
||||
return ctx.OpImage(def.image_type, ctx.OpLoad(def.sampled_type, ptr));
|
||||
if (Settings::values.legacy_descriptor_indices.GetValue()) {
|
||||
const Id idx{index.IsImmediate() ? ctx.Const(index.U32()) : ctx.Def(index)};
|
||||
const Id ptr{ctx.OpAccessChain(def.pointer_type, def.id, idx)};
|
||||
return ctx.OpImage(def.image_type, ctx.OpLoad(def.sampled_type, ptr));
|
||||
} else {
|
||||
const DescriptorIndex idx{ctx, index};
|
||||
const Id ptr{ctx.OpAccessChain(def.pointer_type, def.id, idx.Value())};
|
||||
idx.Decorate(ctx, ptr);
|
||||
const Id object{ctx.OpLoad(def.sampled_type, ptr)};
|
||||
idx.Decorate(ctx, object);
|
||||
const Id image{ctx.OpImage(def.image_type, object)};
|
||||
idx.Decorate(ctx, image);
|
||||
return image;
|
||||
}
|
||||
}
|
||||
return ctx.OpImage(def.image_type, ctx.OpLoad(def.sampled_type, def.id));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -20,6 +20,9 @@ struct HostTranslateInfo {
|
|||
bool support_snorm_render_buffer{}; ///< True when the device supports SNORM render buffers
|
||||
bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS
|
||||
u32 min_ssbo_alignment{}; ///< Minimum alignment supported by the device for SSBOs
|
||||
u32 max_per_stage_descriptor_sampled_images{1024}; ///< maximum sampled descriptors per stage
|
||||
u32 max_per_stage_resources{4096}; ///< maximum resources per stage
|
||||
u32 max_descriptor_set_sampled_images{1024}; ///< maximum sampled descriptors per set
|
||||
bool support_geometry_shader_passthrough{}; ///< True when the device supports geometry
|
||||
///< passthrough shaders
|
||||
bool support_conditional_barrier{}; ///< True when the device supports barriers in conditional
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@
|
|||
#include <limits>
|
||||
#include <boost/container/small_vector.hpp>
|
||||
|
||||
#include "common/settings.h"
|
||||
#include "shader_recompiler/environment.h"
|
||||
#include "shader_recompiler/frontend/ir/basic_block.h"
|
||||
#include "shader_recompiler/frontend/ir/breadth_first_search.h"
|
||||
|
|
@ -32,6 +33,71 @@ using TextureInstVector = boost::container::small_vector<TextureInst, 24>;
|
|||
|
||||
constexpr u32 DESCRIPTOR_SIZE = 8;
|
||||
constexpr u32 DESCRIPTOR_SIZE_SHIFT = static_cast<u32>(std::countr_zero(DESCRIPTOR_SIZE));
|
||||
constexpr u32 DYNAMIC_DESCRIPTOR_CBUF_BYTES = 16 * 1024;
|
||||
constexpr u32 MAX_DYNAMIC_DESCRIPTOR_COUNT = 1024;
|
||||
|
||||
u32 DynamicDescriptorSizeShift(const IR::U32& dynamic_offset) {
|
||||
const IR::Inst* const inst{dynamic_offset.InstRecursive()};
|
||||
if (!inst || inst->GetOpcode() != IR::Opcode::ShiftLeftLogical32) {
|
||||
return DESCRIPTOR_SIZE_SHIFT;
|
||||
}
|
||||
const IR::Value shift{inst->Arg(1)};
|
||||
if (!shift.IsImmediate()) {
|
||||
return DESCRIPTOR_SIZE_SHIFT;
|
||||
}
|
||||
const u32 size_shift{shift.U32()};
|
||||
return size_shift >= DESCRIPTOR_SIZE_SHIFT && size_shift < 31 ? size_shift
|
||||
: DESCRIPTOR_SIZE_SHIFT;
|
||||
}
|
||||
|
||||
u32 DynamicDescriptorCount(u32 base_offset, u32 size_shift) {
|
||||
if (size_shift >= 31 || base_offset >= DYNAMIC_DESCRIPTOR_CBUF_BYTES) {
|
||||
return 1;
|
||||
}
|
||||
const u32 stride{1U << size_shift};
|
||||
const u32 available{DYNAMIC_DESCRIPTOR_CBUF_BYTES - base_offset};
|
||||
if (available < DESCRIPTOR_SIZE) {
|
||||
return 1;
|
||||
}
|
||||
const u32 available_count{1U + (available - DESCRIPTOR_SIZE) / stride};
|
||||
return std::min(MAX_DYNAMIC_DESCRIPTOR_COUNT, available_count);
|
||||
}
|
||||
|
||||
u32 SaturatingSub(u32 lhs, u32 rhs) {
|
||||
return lhs > rhs ? lhs - rhs : 0;
|
||||
}
|
||||
|
||||
template <typename Descriptors>
|
||||
u32 StaticDescriptorCount(const Descriptors& descriptors) {
|
||||
u32 count{};
|
||||
for (const auto& desc : descriptors) {
|
||||
if (desc.count <= 1) {
|
||||
count += desc.count;
|
||||
}
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
u32 DynamicSampledTextureCap(const Info& info, const HostTranslateInfo& host_info,
|
||||
u32 dynamic_arrays) {
|
||||
if (dynamic_arrays == 0) {
|
||||
return MAX_DYNAMIC_DESCRIPTOR_COUNT;
|
||||
}
|
||||
const u32 sampled_static_count{StaticDescriptorCount(info.texture_buffer_descriptors) +
|
||||
StaticDescriptorCount(info.texture_descriptors)};
|
||||
const u32 resource_static_count{
|
||||
NumDescriptors(info.constant_buffer_descriptors) +
|
||||
NumDescriptors(info.storage_buffers_descriptors) + sampled_static_count +
|
||||
NumDescriptors(info.image_buffer_descriptors) + NumDescriptors(info.image_descriptors)};
|
||||
const u32 sampled_limit{std::min(host_info.max_per_stage_descriptor_sampled_images,
|
||||
host_info.max_descriptor_set_sampled_images)};
|
||||
const u32 sampled_budget{SaturatingSub(sampled_limit, sampled_static_count)};
|
||||
const u32 resource_budget{SaturatingSub(host_info.max_per_stage_resources,
|
||||
resource_static_count)};
|
||||
const u32 sampled_cap{sampled_budget / dynamic_arrays};
|
||||
const u32 resource_cap{resource_budget / dynamic_arrays};
|
||||
return std::max(1U, std::min({MAX_DYNAMIC_DESCRIPTOR_COUNT, sampled_cap, resource_cap}));
|
||||
}
|
||||
|
||||
IR::Opcode IndexedInstruction(const IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
|
|
@ -109,6 +175,39 @@ IR::Opcode IndexedInstruction(const IR::Inst& inst) {
|
|||
}
|
||||
}
|
||||
|
||||
bool IsStorageImageOpcode(IR::Opcode opcode) {
|
||||
switch (opcode) {
|
||||
case IR::Opcode::ImageRead:
|
||||
case IR::Opcode::ImageAtomicIAdd32:
|
||||
case IR::Opcode::ImageAtomicSMin32:
|
||||
case IR::Opcode::ImageAtomicUMin32:
|
||||
case IR::Opcode::ImageAtomicSMax32:
|
||||
case IR::Opcode::ImageAtomicUMax32:
|
||||
case IR::Opcode::ImageAtomicInc32:
|
||||
case IR::Opcode::ImageAtomicDec32:
|
||||
case IR::Opcode::ImageAtomicAnd32:
|
||||
case IR::Opcode::ImageAtomicOr32:
|
||||
case IR::Opcode::ImageAtomicXor32:
|
||||
case IR::Opcode::ImageAtomicExchange32:
|
||||
case IR::Opcode::ImageWrite:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
u32 DynamicSampledTextureArrayCount(const TextureInstVector& to_replace) {
|
||||
u32 count{};
|
||||
for (const TextureInst& inst : to_replace) {
|
||||
const auto flags{inst.inst->Flags<IR::TextureInstInfo>()};
|
||||
if (inst.cbuf.count > 1 && !IsStorageImageOpcode(IndexedInstruction(*inst.inst)) &&
|
||||
flags.type != TextureType::Buffer) {
|
||||
++count;
|
||||
}
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
bool IsBindless(const IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::BindlessImageSampleImplicitLod:
|
||||
|
|
@ -354,6 +453,7 @@ std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst, Environme
|
|||
} else {
|
||||
return std::nullopt;
|
||||
}
|
||||
const u32 size_shift{DynamicDescriptorSizeShift(dynamic_offset)};
|
||||
return ConstBufferAddr{
|
||||
.index = index.U32(),
|
||||
.offset = base_offset,
|
||||
|
|
@ -362,7 +462,7 @@ std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst, Environme
|
|||
.secondary_offset = 0,
|
||||
.secondary_shift_left = 0,
|
||||
.dynamic_offset = dynamic_offset,
|
||||
.count = 8,
|
||||
.count = Settings::values.legacy_descriptor_indices.GetValue() ? 8 : DynamicDescriptorCount(base_offset, size_shift),
|
||||
.has_secondary = false,
|
||||
};
|
||||
}
|
||||
|
|
@ -589,6 +689,8 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo
|
|||
program.info.texture_descriptors,
|
||||
program.info.image_descriptors,
|
||||
};
|
||||
const u32 sampled_dynamic_cap{
|
||||
DynamicSampledTextureCap(program.info, host_info, DynamicSampledTextureArrayCount(to_replace))};
|
||||
for (TextureInst& texture_inst : to_replace) {
|
||||
// TODO: Handle arrays
|
||||
IR::Inst* const inst{texture_inst.inst};
|
||||
|
|
@ -632,6 +734,10 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo
|
|||
break;
|
||||
}
|
||||
u32 index;
|
||||
u32 size_shift = cbuf.count > 1 ? DynamicDescriptorSizeShift(cbuf.dynamic_offset) : DESCRIPTOR_SIZE_SHIFT;
|
||||
if (Settings::values.legacy_descriptor_indices.GetValue())
|
||||
size_shift = DESCRIPTOR_SIZE_SHIFT;
|
||||
u32 count = cbuf.count;
|
||||
switch (inst->GetOpcode()) {
|
||||
case IR::Opcode::ImageRead:
|
||||
case IR::Opcode::ImageAtomicIAdd32:
|
||||
|
|
@ -660,8 +766,8 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo
|
|||
.is_integer = is_integer,
|
||||
.cbuf_index = cbuf.index,
|
||||
.cbuf_offset = cbuf.offset,
|
||||
.count = cbuf.count,
|
||||
.size_shift = DESCRIPTOR_SIZE_SHIFT,
|
||||
.count = count,
|
||||
.size_shift = size_shift,
|
||||
});
|
||||
} else {
|
||||
index = descriptors.Add(ImageDescriptor{
|
||||
|
|
@ -672,8 +778,8 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo
|
|||
.is_integer = is_integer,
|
||||
.cbuf_index = cbuf.index,
|
||||
.cbuf_offset = cbuf.offset,
|
||||
.count = cbuf.count,
|
||||
.size_shift = DESCRIPTOR_SIZE_SHIFT,
|
||||
.count = count,
|
||||
.size_shift = size_shift,
|
||||
});
|
||||
}
|
||||
break;
|
||||
|
|
@ -688,10 +794,11 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo
|
|||
.secondary_cbuf_index = cbuf.secondary_index,
|
||||
.secondary_cbuf_offset = cbuf.secondary_offset,
|
||||
.secondary_shift_left = cbuf.secondary_shift_left,
|
||||
.count = cbuf.count,
|
||||
.size_shift = DESCRIPTOR_SIZE_SHIFT,
|
||||
.count = count,
|
||||
.size_shift = size_shift,
|
||||
});
|
||||
} else {
|
||||
count = std::min(count, sampled_dynamic_cap);
|
||||
index = descriptors.Add(TextureDescriptor{
|
||||
.type = flags.type,
|
||||
.is_depth = flags.is_depth != 0,
|
||||
|
|
@ -703,8 +810,8 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo
|
|||
.secondary_cbuf_index = cbuf.secondary_index,
|
||||
.secondary_cbuf_offset = cbuf.secondary_offset,
|
||||
.secondary_shift_left = cbuf.secondary_shift_left,
|
||||
.count = cbuf.count,
|
||||
.size_shift = DESCRIPTOR_SIZE_SHIFT,
|
||||
.count = count,
|
||||
.size_shift = size_shift,
|
||||
});
|
||||
}
|
||||
break;
|
||||
|
|
@ -712,12 +819,11 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo
|
|||
flags.descriptor_index.Assign(index);
|
||||
inst->SetFlags(flags);
|
||||
|
||||
if (cbuf.count > 1) {
|
||||
if (count > 1) {
|
||||
const auto insert_point{IR::Block::InstructionList::s_iterator_to(*inst)};
|
||||
IR::IREmitter ir{*texture_inst.block, insert_point};
|
||||
const IR::U32 shift{ir.Imm32(DESCRIPTOR_SIZE_SHIFT)};
|
||||
inst->SetArg(0, ir.UMin(ir.ShiftRightLogical(cbuf.dynamic_offset, shift),
|
||||
ir.Imm32(DESCRIPTOR_SIZE - 1)));
|
||||
const IR::U32 shift{ir.Imm32(size_shift)};
|
||||
inst->SetArg(0, ir.UMin(ir.ShiftRightLogical(cbuf.dynamic_offset, shift), ir.Imm32(count - 1)));
|
||||
} else {
|
||||
inst->SetArg(0, IR::Value{});
|
||||
}
|
||||
|
|
|
|||
|
|
@ -48,6 +48,7 @@ struct Profile {
|
|||
bool support_scaled_attributes{};
|
||||
bool support_multi_viewport{};
|
||||
bool support_geometry_streams{};
|
||||
bool support_sampled_image_array_nonuniform_indexing{};
|
||||
|
||||
bool warp_size_potentially_larger_than_guest{};
|
||||
|
||||
|
|
|
|||
|
|
@ -127,9 +127,8 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
|
|||
|
||||
texture_cache.SynchronizeComputeDescriptors();
|
||||
|
||||
static constexpr size_t max_elements = 64;
|
||||
boost::container::static_vector<VideoCommon::ImageViewInOut, max_elements> views;
|
||||
boost::container::static_vector<VideoCommon::SamplerId, max_elements> samplers;
|
||||
boost::container::small_vector<VideoCommon::ImageViewInOut, 64> views;
|
||||
boost::container::small_vector<VideoCommon::SamplerId, 64> samplers;
|
||||
|
||||
const auto& qmd{kepler_compute.launch_description};
|
||||
const auto& cbufs{qmd.const_buffer_config};
|
||||
|
|
|
|||
|
|
@ -416,6 +416,8 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
|
|||
.support_scaled_attributes = !device.MustEmulateScaledFormats(),
|
||||
.support_multi_viewport = device.SupportsMultiViewport(),
|
||||
.support_geometry_streams = device.AreTransformFeedbackGeometryStreamsSupported(),
|
||||
.support_sampled_image_array_nonuniform_indexing =
|
||||
device.IsSampledImageArrayNonUniformIndexingSupported(),
|
||||
|
||||
.warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(),
|
||||
|
||||
|
|
@ -450,6 +452,9 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
|
|||
.support_snorm_render_buffer = true,
|
||||
.support_viewport_index_layer = device.IsExtShaderViewportIndexLayerSupported(),
|
||||
.min_ssbo_alignment = static_cast<u32>(device.GetStorageBufferAlignment()),
|
||||
.max_per_stage_descriptor_sampled_images = device.GetMaxPerStageDescriptorSampledImages(),
|
||||
.max_per_stage_resources = device.GetMaxPerStageResources(),
|
||||
.max_descriptor_set_sampled_images = device.GetMaxDescriptorSetSampledImages(),
|
||||
.support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(),
|
||||
.support_conditional_barrier = device.SupportsConditionalBarriers(),
|
||||
};
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ VK_DEFINE_HANDLE(VmaAllocator)
|
|||
FEATURE(KHR, VariablePointer, VARIABLE_POINTERS, variable_pointer)
|
||||
|
||||
#define FOR_EACH_VK_FEATURE_1_2(FEATURE) \
|
||||
FEATURE(EXT, DescriptorIndexing, DESCRIPTOR_INDEXING, descriptor_indexing) \
|
||||
FEATURE(EXT, HostQueryReset, HOST_QUERY_RESET, host_query_reset) \
|
||||
FEATURE(KHR, 8BitStorage, 8BIT_STORAGE, bit8_storage) \
|
||||
FEATURE(KHR, TimelineSemaphore, TIMELINE_SEMAPHORE, timeline_semaphore)
|
||||
|
|
@ -335,6 +336,18 @@ public:
|
|||
return properties.properties.limits.maxDescriptorSetUniformBuffersDynamic;
|
||||
}
|
||||
|
||||
u32 GetMaxPerStageDescriptorSampledImages() const {
|
||||
return properties.properties.limits.maxPerStageDescriptorSampledImages;
|
||||
}
|
||||
|
||||
u32 GetMaxPerStageResources() const {
|
||||
return properties.properties.limits.maxPerStageResources;
|
||||
}
|
||||
|
||||
u32 GetMaxDescriptorSetSampledImages() const {
|
||||
return properties.properties.limits.maxDescriptorSetSampledImages;
|
||||
}
|
||||
|
||||
/// Returns float control properties of the device.
|
||||
const VkPhysicalDeviceFloatControlsPropertiesKHR& FloatControlProperties() const {
|
||||
return properties.float_controls;
|
||||
|
|
@ -360,6 +373,10 @@ public:
|
|||
return GetDriverID() != VK_DRIVER_ID_QUALCOMM_PROPRIETARY;
|
||||
}
|
||||
|
||||
bool IsSampledImageArrayNonUniformIndexingSupported() const {
|
||||
return features.descriptor_indexing.shaderSampledImageArrayNonUniformIndexing;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports float64 natively.
|
||||
bool IsFloat64Supported() const {
|
||||
return features.features.shaderFloat64;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue