WIP: stuff for fun 2

Signed-off-by: lizzie <lizzie@eden-emu.dev>
This commit is contained in:
lizzie 2026-04-26 05:30:16 +00:00
parent 91058d7383
commit 666593a3b2
46 changed files with 1401 additions and 171 deletions

View file

@ -34,6 +34,10 @@ enum class BooleanSetting(override val key: String) : AbstractBooleanSetting {
RENDERER_DEBUG("debug"),
RENDERER_PATCH_OLD_QCOM_DRIVERS("patch_old_qcom_drivers"),
RENDERER_VERTEX_INPUT_DYNAMIC_STATE("vertex_input_dynamic_state"),
FORCE_IDENTITY_SWIZZLE("force_identity_swizzle"),
FORCE_LDR_TO_SRGB("force_ldr_to_srgb"),
RENDERER_PROVOKING_VERTEX("provoking_vertex"),
RENDERER_DESCRIPTOR_INDEXING("descriptor_indexing"),
RENDERER_SAMPLE_SHADING("sample_shading"),
GPU_UNSWIZZLE_ENABLED("gpu_unswizzle_enabled"),
PICTURE_IN_PICTURE("picture_in_picture"),

View file

@ -17,6 +17,8 @@ enum class IntSetting(override val key: String) : AbstractIntSetting {
RENDERER_VRAM_USAGE_MODE("vram_usage_mode"),
RENDERER_NVDEC_EMULATION("nvdec_emulation"),
RENDERER_ASTC_DECODE_METHOD("accelerate_astc"),
RENDERER_ASTC_RECOMPRESSION("astc_recompression"),
RENDERER_FORMAT_REINTERPRETATION("format_reinterpretation"),
RENDERER_ACCURACY("gpu_accuracy"),
RENDERER_RESOLUTION("resolution_setup"),
RENDERER_VSYNC("use_vsync"),

View file

@ -148,6 +148,34 @@ abstract class SettingsItem(
descriptionId = R.string.vertex_input_dynamic_state_description
)
)
put(
SwitchSetting(
BooleanSetting.FORCE_IDENTITY_SWIZZLE,
titleId = R.string.force_identity_swizzle,
descriptionId = R.string.force_identity_swizzle_description
)
)
put(
SwitchSetting(
BooleanSetting.FORCE_LDR_TO_SRGB,
titleId = R.string.force_ldr_to_srgb,
descriptionId = R.string.force_ldr_to_srgb_description
)
)
put(
SwitchSetting(
BooleanSetting.RENDERER_DESCRIPTOR_INDEXING,
titleId = R.string.descriptor_indexing,
descriptionId = R.string.descriptor_indexing_description
)
)
put(
SwitchSetting(
BooleanSetting.RENDERER_SAMPLE_SHADING,
titleId = R.string.sample_shading,
descriptionId = R.string.sample_shading_description
)
)
put(
SliderSetting(
IntSetting.RENDERER_SAMPLE_SHADING,
@ -335,6 +363,23 @@ abstract class SettingsItem(
valuesId = R.array.astcDecodingMethodValues
)
)
put(
SingleChoiceSetting(
IntSetting.RENDERER_ASTC_RECOMPRESSION,
titleId = R.string.astc_recompression,
descriptionId = R.string.astc_recompression_description,
choicesId = R.array.astcRecompressionMethodNames,
valuesId = R.array.astcRecompressionMethodValues
)
)
put(
SingleChoiceSetting(
IntSetting.RENDERER_FORMAT_REINTERPRETATION,
titleId = R.string.format_reinterpretation,
choicesId = R.array.formatReinterpretationNames,
valuesId = R.array.formatReinterpretationValues
)
)
put(
SingleChoiceSetting(
IntSetting.RENDERER_VRAM_USAGE_MODE,

View file

@ -693,7 +693,9 @@ void HostMemory::Map(size_t virtual_offset, size_t host_offset, size_t length,
ASSERT(virtual_offset % PageAlignment == 0);
ASSERT(host_offset % PageAlignment == 0);
ASSERT(length % PageAlignment == 0);
ASSERT(virtual_offset + length <= virtual_size);
if (impl && virtual_base) {
ASSERT(virtual_offset + length <= virtual_size);
}
ASSERT(host_offset + length <= backing_size);
if (length == 0 || !virtual_base || !impl) {
return;
@ -704,7 +706,9 @@ void HostMemory::Map(size_t virtual_offset, size_t host_offset, size_t length,
void HostMemory::Unmap(size_t virtual_offset, size_t length, bool separate_heap) {
ASSERT(virtual_offset % PageAlignment == 0);
ASSERT(length % PageAlignment == 0);
ASSERT(virtual_offset + length <= virtual_size);
if (impl && virtual_base) {
ASSERT(virtual_offset + length <= virtual_size);
}
if (length == 0 || !virtual_base || !impl) {
return;
}
@ -714,7 +718,9 @@ void HostMemory::Unmap(size_t virtual_offset, size_t length, bool separate_heap)
void HostMemory::Protect(size_t virtual_offset, size_t length, MemoryPermission perm) {
ASSERT(virtual_offset % PageAlignment == 0);
ASSERT(length % PageAlignment == 0);
ASSERT(virtual_offset + length <= virtual_size);
if (impl && virtual_base) {
ASSERT(virtual_offset + length <= virtual_size);
}
if (length == 0 || !virtual_base || !impl) {
return;
}

View file

@ -52,6 +52,7 @@ SWITCHABLE(AstcRecompression, true);
SWITCHABLE(AudioMode, true);
SWITCHABLE(CpuBackend, true);
SWITCHABLE(CpuAccuracy, true);
SWITCHABLE(FormatReinterpretation, true);
SWITCHABLE(FullscreenMode, true);
SWITCHABLE(GpuAccuracy, true);
SWITCHABLE(GpuLogLevel, true);

View file

@ -69,6 +69,7 @@ SWITCHABLE(AstcRecompression, true);
SWITCHABLE(AudioMode, true);
SWITCHABLE(CpuBackend, true);
SWITCHABLE(CpuAccuracy, true);
SWITCHABLE(FormatReinterpretation, true);
SWITCHABLE(FullscreenMode, true);
SWITCHABLE(GpuAccuracy, true);
SWITCHABLE(Language, true);
@ -469,7 +470,11 @@ struct Values {
"astc_recompression",
Category::RendererAdvanced};
SwitchableSetting<FormatReinterpretation, true> format_reinterpretation{
linkage,
FormatReinterpretation::Disabled,
"format_reinterpretation",
Category::RendererAdvanced};
SwitchableSetting<bool> sync_memory_operations{linkage,
false,
"sync_memory_operations",
@ -478,6 +483,9 @@ struct Values {
true,
true};
SwitchableSetting<bool> force_identity_swizzle{linkage, false, "force_identity_swizzle",
Category::RendererAdvanced};
SwitchableSetting<bool> renderer_force_max_clock{linkage, false, "force_max_clock",
Category::RendererAdvanced};
@ -608,6 +616,60 @@ struct Values {
#endif
"vertex_input_dynamic_state", Category::RendererExtensions};
#ifdef ANDROID
// Shader Float Controls (Android only) - Eden Veil / Extensions
// Force enable VK_KHR_shader_float_controls even if driver has known issues
// Allows fine-tuning float behavior to match Switch/Maxwell or optimize performance
SwitchableSetting<bool> shader_float_controls_force_enable{linkage,
false,
"shader_float_controls_force_enable",
Category::RendererExtensions,
Specialization::Paired};
// Individual float behavior controls (visible only when force_enable is true)
// Multiple can be active simultaneously EXCEPT FTZ and DenormPreserve (mutually exclusive)
//
// Recommended configurations:
// Switch-native: FTZ=ON, RTE=ON, SignedZero=ON (matches Maxwell behavior)
// Performance: FTZ=ON only (fastest)
// Accuracy: DenormPreserve=ON, RTE=ON, SignedZero=ON (slowest, highest precision)
SwitchableSetting<bool> shader_float_ftz{linkage,
false,
"shader_float_ftz",
Category::RendererExtensions,
Specialization::Default,
true,
false,
&shader_float_controls_force_enable};
SwitchableSetting<bool> shader_float_denorm_preserve{linkage,
false,
"shader_float_denorm_preserve",
Category::RendererExtensions,
Specialization::Default,
true,
false,
&shader_float_controls_force_enable};
SwitchableSetting<bool> shader_float_rte{linkage,
false,
"shader_float_rte",
Category::RendererExtensions,
Specialization::Default,
true,
false,
&shader_float_controls_force_enable};
SwitchableSetting<bool> shader_float_signed_zero_inf_nan{linkage,
false,
"shader_float_signed_zero_inf_nan",
Category::RendererExtensions,
Specialization::Default,
true,
false,
&shader_float_controls_force_enable};
#endif
Setting<bool> renderer_debug{linkage, false, "debug", Category::RendererDebug};
Setting<bool> renderer_shader_feedback{linkage, false, "shader_feedback",
Category::RendererDebug};

View file

@ -158,6 +158,17 @@ ENUM(ExtendedDynamicState, Disabled, EDS1, EDS2, EDS3);
ENUM(GpuLogLevel, Off, Errors, Standard, Verbose, All)
ENUM(GameListMode, TreeView, GridView);
ENUM(SpeedMode, Standard, Turbo, Slow);
ENUM(FormatReinterpretation, Disabled, R32UintToR32Sfloat, R32SintToR32Uint, R32SfloatToR32Sint)
// Shader Float Controls behavior modes
// These control how floating-point denormals and special values are handled in shaders
ENUM(ShaderFloatBehavior,
DriverDefault, // Let driver choose (safest, may not match Switch behavior)
SwitchNative, // Emulate Switch/Maxwell behavior (FTZ + RTE + SignedZero)
FlushToZero, // FTZ only - flush denorms to zero (fastest, some precision loss)
PreserveDenorms, // Preserve denorms (slowest, highest precision)
RoundToEven, // RTE rounding mode (IEEE 754 compliant)
SignedZeroInfNan); // Preserve signed zero, inf, nan (accuracy for edge cases)
template <typename Type>
inline std::string_view CanonicalizeEnum(Type id) {

View file

@ -396,6 +396,24 @@ std::size_t HLERequestContext::WriteBuffer(const void* buffer, std::size_t size,
const bool is_buffer_b{BufferDescriptorB().size() > buffer_index &&
BufferDescriptorB()[buffer_index].Size()};
const std::size_t buffer_size{GetWriteBufferSize(buffer_index)};
// Defensive check: if client didn't provide output buffer, log detailed error but don't crash
if (buffer_size == 0) {
LOG_ERROR(Core,
"WriteBuffer called but client provided NO output buffer! "
"Requested size: 0x{:X}, buffer_index: {}, is_buffer_b: {}, "
"BufferB count: {}, BufferC count: {}",
size, buffer_index, is_buffer_b, BufferDescriptorB().size(),
BufferDescriptorC().size());
// Log command context for debugging
LOG_ERROR(Core, "IPC Command: 0x{:X}, Type: {}", GetCommand(),
static_cast<u32>(GetCommandType()));
// Return 0 instead of crashing - let service handle error
return 0;
}
if (size > buffer_size) {
LOG_CRITICAL(Core, "size ({:016X}) is greater than buffer_size ({:016X})", size,
buffer_size);

View file

@ -162,7 +162,24 @@ std::unique_ptr<TranslationMap> InitializeTranslations(QObject* parent) {
tr("Stretches the renderer to fit the specified aspect ratio.\nMost games only support "
"16:9, so modifications are required to get other ratios.\nAlso controls the "
"aspect ratio of captured screenshots."));
INSERT(Settings, use_disk_shader_cache, tr("Use persistent pipeline cache"),
INSERT(Settings,
format_reinterpretation,
tr("Format Reinterpretation:"),
tr("Reinterprets certain texture formats for accuracy rendering.\nMay cause "
"graphical issues in some games."));
INSERT(Settings,
force_identity_swizzle,
tr("Force Identity Swizzle"),
tr("Forces identity component swizzle for storage and input attachment images. "
"Required by Vulkan spec. Disable only for debugging driver issues."));
INSERT(Settings,
force_ldr_to_srgb,
tr("Force LDR Formats to sRGB"),
tr("Converts LDR texture formats (RGBA8_UNORM, A2B10G10R10_UNORM) to sRGB variants. "
"Fixes gamma correction issues on some games. Enable for correct colors on Adreno GPUs."));
INSERT(Settings,
use_disk_shader_cache,
tr("Use persistent pipeline cache"),
tr("Allows saving shaders to storage for faster loading on following game "
"boots.\nDisabling it is only intended for debugging."));
INSERT(Settings, use_asynchronous_gpu_emulation, tr("Use asynchronous GPU emulation"),
@ -680,6 +697,13 @@ std::unique_ptr<ComboboxTranslationMap> ComboboxEnumeration(QObject* parent) {
PAIR(GameListMode, TreeView, tr("Tree View")),
PAIR(GameListMode, GridView, tr("Grid View")),
}});
translations->insert({Settings::EnumMetadata<Settings::FormatReinterpretation>::Index(),
{
PAIR(FormatReinterpretation, Disabled, tr("Disabled")),
PAIR(FormatReinterpretation, R32UintToR32Sfloat, tr("R32 Uint to R32 Float")),
PAIR(FormatReinterpretation, R32SintToR32Uint, tr("R32 Sint to R32 Uint")),
PAIR(FormatReinterpretation, R32SfloatToR32Sint, tr("R32 Float to R32 Sint")),
}});
#undef PAIR
#undef CTX_PAIR

View file

@ -55,6 +55,17 @@ static const std::map<Settings::ScalingFilter, QString> scaling_filter_texts_map
{Settings::ScalingFilter::Mmpx, QStringLiteral(QT_TRANSLATE_NOOP("MainWindow", "MMPX"))},
};
static const std::map<Settings::FormatReinterpretation, QString> format_reinterpretation_texts_map = {
{Settings::FormatReinterpretation::Disabled,
QStringLiteral(QT_TRANSLATE_NOOP("MainWindow", "Disabled"))},
{Settings::FormatReinterpretation::R32UintToR32Sfloat,
QStringLiteral(QT_TRANSLATE_NOOP("MainWindow", "R32 Uint to R32 Float"))},
{Settings::FormatReinterpretation::R32SintToR32Uint,
QStringLiteral(QT_TRANSLATE_NOOP("MainWindow", "R32 Sint to R32 Uint"))},
{Settings::FormatReinterpretation::R32SfloatToR32Sint,
QStringLiteral(QT_TRANSLATE_NOOP("MainWindow", "R32 Float to R32 Sint"))},
};
static const std::map<Settings::ConsoleMode, QString> use_docked_mode_texts_map = {
{Settings::ConsoleMode::Docked, QStringLiteral(QT_TRANSLATE_NOOP("MainWindow", "Docked"))},
{Settings::ConsoleMode::Handheld, QStringLiteral(QT_TRANSLATE_NOOP("MainWindow", "Handheld"))},

View file

@ -382,13 +382,14 @@ void EmitContext::SetupExtensions() {
if (info.uses_int64 && profile.support_int64) {
header += "#extension GL_ARB_gpu_shader_int64 : enable\n";
}
if (info.uses_int64_bit_atomics) {
if (info.uses_int64_bit_atomics && profile.support_gl_shader_atomic_int64) {
header += "#extension GL_NV_shader_atomic_int64 : enable\n";
}
if (info.uses_atomic_f32_add) {
if (info.uses_atomic_f32_add && profile.support_gl_shader_atomic_float) {
header += "#extension GL_NV_shader_atomic_float : enable\n";
}
if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) {
if ((info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) &&
profile.support_gl_shader_atomic_fp16_vector) {
header += "#extension GL_NV_shader_atomic_fp16_vector : enable\n";
}
if (info.uses_fp16) {

View file

@ -332,19 +332,35 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
void SetupDenormControl(const Profile& profile, const IR::Program& program, EmitContext& ctx,
Id main_func) {
const Info& info{program.info};
// User-forced behavior overrides (Android Eden Veil/Extensions)
// When force flags are active, they take precedence over shader-declared behavior
const bool force_flush = profile.force_fp32_denorm_flush;
const bool force_preserve = profile.force_fp32_denorm_preserve;
if (force_flush && force_preserve) {
LOG_WARNING(Shader_SPIRV, "Both FTZ and Preserve forced simultaneously - FTZ takes precedence");
}
if (info.uses_fp32_denorms_flush && info.uses_fp32_denorms_preserve) {
LOG_DEBUG(Shader_SPIRV, "Fp32 denorm flush and preserve on the same shader");
} else if (info.uses_fp32_denorms_flush) {
} else if (force_flush || info.uses_fp32_denorms_flush) {
if (profile.support_fp32_denorm_flush) {
ctx.AddCapability(spv::Capability::DenormFlushToZero);
ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormFlushToZero, 32U);
if (force_flush) {
LOG_DEBUG(Shader_SPIRV, "Fp32 DenormFlushToZero FORCED by user setting");
}
} else {
// Drivers will most likely flush denorms by default, no need to warn
}
} else if (info.uses_fp32_denorms_preserve) {
} else if (force_preserve || info.uses_fp32_denorms_preserve) {
if (profile.support_fp32_denorm_preserve) {
ctx.AddCapability(spv::Capability::DenormPreserve);
ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 32U);
if (force_preserve) {
LOG_DEBUG(Shader_SPIRV, "Fp32 DenormPreserve FORCED by user setting");
}
} else {
LOG_DEBUG(Shader_SPIRV, "Fp32 denorm preserve used in shader without host support");
}
@ -377,13 +393,24 @@ void SetupSignedNanCapabilities(const Profile& profile, const IR::Program& progr
if (profile.has_broken_fp16_float_controls && program.info.uses_fp16) {
return;
}
// User-forced behavior (Android Eden Veil/Extensions)
const bool force_signed_zero_inf_nan = profile.force_fp32_signed_zero_inf_nan;
if (program.info.uses_fp16 && profile.support_fp16_signed_zero_nan_preserve) {
ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve);
ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 16U);
}
if (profile.support_fp32_signed_zero_nan_preserve) {
ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve);
ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 32U);
if (force_signed_zero_inf_nan || profile.support_fp32_signed_zero_nan_preserve) {
if (profile.support_fp32_signed_zero_nan_preserve) {
ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve);
ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 32U);
if (force_signed_zero_inf_nan) {
LOG_DEBUG(Shader_SPIRV, "Fp32 SignedZeroInfNanPreserve FORCED by user setting");
}
} else if (force_signed_zero_inf_nan) {
LOG_WARNING(Shader_SPIRV, "SignedZeroInfNanPreserve forced but driver doesn't support it");
}
}
if (program.info.uses_fp64 && profile.support_fp64_signed_zero_nan_preserve) {
ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve);

View file

@ -332,13 +332,23 @@ void AddOffsetToCoordinates(EmitContext& ctx, const IR::TextureInstInfo& info, I
return;
}
// Mobile GPUs: 1D textures emulated as 2D with height=1
const bool emulate_1d = ctx.profile.needs_1d_texture_emulation;
Id result_type{};
switch (info.type) {
case TextureType::Buffer:
case TextureType::Color1D: {
result_type = ctx.U32[1];
break;
}
case TextureType::Color1D:
if (emulate_1d) {
// Treat as 2D: offset needs Y component
offset = ctx.OpCompositeConstruct(ctx.U32[2], offset, ctx.u32_zero_value);
result_type = ctx.U32[2];
} else {
result_type = ctx.U32[1];
}
break;
case TextureType::ColorArray1D:
offset = ctx.OpCompositeConstruct(ctx.U32[2], offset, ctx.u32_zero_value);
[[fallthrough]];
@ -362,6 +372,40 @@ void AddOffsetToCoordinates(EmitContext& ctx, const IR::TextureInstInfo& info, I
}
coords = ctx.OpIAdd(result_type, coords, offset);
}
// Helper: Convert 1D coordinates to 2D when emulating 1D textures on mobile GPUs
[[nodiscard]] Id AdjustCoordinatesForEmulation(EmitContext& ctx, const IR::TextureInstInfo& info,
Id coords) {
if (!ctx.profile.needs_1d_texture_emulation) {
return coords;
}
switch (info.type) {
case TextureType::Color1D: {
// Convert scalar → vec2(x, 0.0)
return ctx.OpCompositeConstruct(ctx.F32[2], coords, ctx.f32_zero_value);
}
case TextureType::ColorArray1D: {
// Convert vec2(x, layer) → vec3(x, 0.0, layer)
// ColorArray1D coords are always vec2 in IR
const Id x = ctx.OpCompositeExtract(ctx.F32[1], coords, 0);
const Id layer = ctx.OpCompositeExtract(ctx.F32[1], coords, 1);
return ctx.OpCompositeConstruct(ctx.F32[3], x, ctx.f32_zero_value, layer);
}
case TextureType::Color2D:
case TextureType::ColorArray2D:
case TextureType::Color3D:
case TextureType::ColorCube:
case TextureType::ColorArrayCube:
case TextureType::Buffer:
case TextureType::Color2DRect:
// No adjustment needed for non-1D textures
return coords;
}
return coords; // Unreachable, but silences -Werror=return-type
}
} // Anonymous namespace
Id EmitBindlessImageSampleImplicitLod(EmitContext&) {
@ -463,6 +507,7 @@ Id EmitBoundImageWrite(EmitContext&) {
Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
Id bias_lc, const IR::Value& offset) {
const auto info{inst->Flags<IR::TextureInstInfo>()};
coords = AdjustCoordinatesForEmulation(ctx, info, coords);
if (ctx.stage == Stage::Fragment) {
const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0,
bias_lc, offset);
@ -484,6 +529,7 @@ Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value&
Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
Id lod, const IR::Value& offset) {
const auto info{inst->Flags<IR::TextureInstInfo>()};
coords = AdjustCoordinatesForEmulation(ctx, info, coords);
const ImageOperands operands(ctx, false, true, false, lod, offset);
Id result = Emit(&EmitContext::OpImageSparseSampleExplicitLod,
@ -500,6 +546,7 @@ Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value&
Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
Id coords, Id dref, Id bias_lc, const IR::Value& offset) {
const auto info{inst->Flags<IR::TextureInstInfo>()};
coords = AdjustCoordinatesForEmulation(ctx, info, coords);
if (ctx.stage == Stage::Fragment) {
const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0,
bias_lc, offset);
@ -521,6 +568,7 @@ Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va
Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
Id coords, Id dref, Id lod, const IR::Value& offset) {
const auto info{inst->Flags<IR::TextureInstInfo>()};
coords = AdjustCoordinatesForEmulation(ctx, info, coords);
const ImageOperands operands(ctx, false, true, false, lod, offset);
return Emit(&EmitContext::OpImageSparseSampleDrefExplicitLod,
&EmitContext::OpImageSampleDrefExplicitLod, ctx, inst, ctx.F32[1],
@ -530,6 +578,7 @@ Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va
Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
const IR::Value& offset, const IR::Value& offset2) {
const auto info{inst->Flags<IR::TextureInstInfo>()};
coords = AdjustCoordinatesForEmulation(ctx, info, coords);
const ImageOperands operands(ctx, offset, offset2);
if (ctx.profile.need_gather_subpixel_offset) {
coords = ImageGatherSubpixelOffset(ctx, info, TextureImage(ctx, info, index), coords);
@ -542,6 +591,7 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id
Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
const IR::Value& offset, const IR::Value& offset2, Id dref) {
const auto info{inst->Flags<IR::TextureInstInfo>()};
coords = AdjustCoordinatesForEmulation(ctx, info, coords);
const ImageOperands operands(ctx, offset, offset2);
if (ctx.profile.need_gather_subpixel_offset) {
coords = ImageGatherSubpixelOffset(ctx, info, TextureImage(ctx, info, index), coords);
@ -554,6 +604,7 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset,
Id lod, Id ms) {
const auto info{inst->Flags<IR::TextureInstInfo>()};
coords = AdjustCoordinatesForEmulation(ctx, info, coords);
AddOffsetToCoordinates(ctx, info, coords, offset);
if (info.type == TextureType::Buffer) {
lod = Id{};
@ -580,9 +631,20 @@ Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& i
return uses_lod ? ctx.OpImageQuerySizeLod(type, image, lod)
: ctx.OpImageQuerySize(type, image);
}};
// Mobile GPUs: 1D textures emulated as 2D, query returns vec2 instead of scalar
const bool emulate_1d = ctx.profile.needs_1d_texture_emulation;
switch (info.type) {
case TextureType::Color1D:
return ctx.OpCompositeConstruct(ctx.U32[4], query(ctx.U32[1]), zero, zero, mips());
if (emulate_1d) {
// Query as 2D, extract only X component for 1D size
const Id size_2d = query(ctx.U32[2]);
const Id width = ctx.OpCompositeExtract(ctx.U32[1], size_2d, 0);
return ctx.OpCompositeConstruct(ctx.U32[4], width, zero, zero, mips());
} else {
return ctx.OpCompositeConstruct(ctx.U32[4], query(ctx.U32[1]), zero, zero, mips());
}
case TextureType::ColorArray1D:
case TextureType::Color2D:
case TextureType::ColorCube:
@ -600,6 +662,7 @@ Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& i
Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) {
const auto info{inst->Flags<IR::TextureInstInfo>()};
coords = AdjustCoordinatesForEmulation(ctx, info, coords);
const Id zero{ctx.f32_zero_value};
const Id sampler{Texture(ctx, info, index)};
return ctx.OpCompositeConstruct(ctx.F32[4], ctx.OpImageQueryLod(ctx.F32[2], sampler, coords),
@ -609,6 +672,7 @@ Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I
Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
Id derivatives, const IR::Value& offset, Id lod_clamp) {
const auto info{inst->Flags<IR::TextureInstInfo>()};
coords = AdjustCoordinatesForEmulation(ctx, info, coords);
const auto operands = info.num_derivatives == 3
? ImageOperands(ctx, info.has_lod_clamp != 0, derivatives,
ctx.Def(offset), {}, lod_clamp)
@ -621,6 +685,7 @@ Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I
Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) {
const auto info{inst->Flags<IR::TextureInstInfo>()};
coords = AdjustCoordinatesForEmulation(ctx, info, coords);
if (info.image_format == ImageFormat::Typeless && !ctx.profile.support_typeless_image_loads) {
LOG_WARNING(Shader_SPIRV, "Typeless image read not supported by host");
return ctx.ConstantNull(ctx.U32[4]);
@ -637,6 +702,7 @@ Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id co
void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color) {
const auto info{inst->Flags<IR::TextureInstInfo>()};
coords = AdjustCoordinatesForEmulation(ctx, info, coords);
const auto [image, is_integer] = Image(ctx, index, info);
if (!is_integer) {
color = ctx.OpBitcast(ctx.F32[4], color);

View file

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later

View file

@ -33,11 +33,24 @@ Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) {
const Id type{ctx.F32[1]};
const bool depth{desc.is_depth};
const bool ms{desc.is_multisample};
// Mobile GPUs lack Sampled1D SPIR-V capability - emulate 1D as 2D with array layer
const bool emulate_1d = ctx.profile.needs_1d_texture_emulation;
// Debug log for 1D emulation
if (desc.type == TextureType::Color1D || desc.type == TextureType::ColorArray1D) {
LOG_WARNING(Shader_SPIRV, "ImageType(texture): Creating {} texture, emulate_1d={}",
desc.type == TextureType::Color1D ? "Color1D" : "ColorArray1D",
emulate_1d);
}
switch (desc.type) {
case TextureType::Color1D:
return ctx.TypeImage(type, spv::Dim::Dim1D, depth, false, false, 1, format);
return emulate_1d ? ctx.TypeImage(type, spv::Dim::Dim2D, depth, false, false, 1, format)
: ctx.TypeImage(type, spv::Dim::Dim1D, depth, false, false, 1, format);
case TextureType::ColorArray1D:
return ctx.TypeImage(type, spv::Dim::Dim1D, depth, true, false, 1, format);
return emulate_1d ? ctx.TypeImage(type, spv::Dim::Dim2D, depth, true, false, 1, format)
: ctx.TypeImage(type, spv::Dim::Dim1D, depth, true, false, 1, format);
case TextureType::Color2D:
case TextureType::Color2DRect:
return ctx.TypeImage(type, spv::Dim::Dim2D, depth, false, ms, 1, format);
@ -79,11 +92,22 @@ spv::ImageFormat GetImageFormat(ImageFormat format) {
Id ImageType(EmitContext& ctx, const ImageDescriptor& desc, Id sampled_type) {
const spv::ImageFormat format{GetImageFormat(desc.format)};
const bool emulate_1d = ctx.profile.needs_1d_texture_emulation;
// Debug log for 1D emulation
if (desc.type == TextureType::Color1D || desc.type == TextureType::ColorArray1D) {
LOG_WARNING(Shader_SPIRV, "ImageType: Creating {} image, emulate_1d={}",
desc.type == TextureType::Color1D ? "Color1D" : "ColorArray1D",
emulate_1d);
}
switch (desc.type) {
case TextureType::Color1D:
return ctx.TypeImage(sampled_type, spv::Dim::Dim1D, false, false, false, 2, format);
return emulate_1d ? ctx.TypeImage(sampled_type, spv::Dim::Dim2D, false, false, false, 2, format)
: ctx.TypeImage(sampled_type, spv::Dim::Dim1D, false, false, false, 2, format);
case TextureType::ColorArray1D:
return ctx.TypeImage(sampled_type, spv::Dim::Dim1D, false, true, false, 2, format);
return emulate_1d ? ctx.TypeImage(sampled_type, spv::Dim::Dim2D, false, true, false, 2, format)
: ctx.TypeImage(sampled_type, spv::Dim::Dim1D, false, true, false, 2, format);
case TextureType::Color2D:
return ctx.TypeImage(sampled_type, spv::Dim::Dim2D, false, false, false, 2, format);
case TextureType::ColorArray2D:
@ -1444,6 +1468,8 @@ void EmitContext::DefineInputs(const IR::Program& program) {
subgroup_mask_le = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupLeMaskKHR);
subgroup_mask_gt = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupGtMaskKHR);
subgroup_mask_ge = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupGeMaskKHR);
// Vulkan spec: Fragment shader Input variables with integer/float type must have Flat decoration
if (stage == Stage::Fragment) {
Decorate(subgroup_mask_eq, spv::Decoration::Flat);
Decorate(subgroup_mask_lt, spv::Decoration::Flat);

View file

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@ -293,6 +296,14 @@ std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) {
}
// This address is expected to either be a PackUint2x32, a IAdd64, or a CompositeConstructU32x2
IR::Inst* addr_inst{addr.InstRecursive()};
// Unwrap Identity ops introduced by lowerings (e.g., PackUint2x32 -> Identity)
while (addr_inst->GetOpcode() == IR::Opcode::Identity) {
const IR::Value id_arg{addr_inst->Arg(0)};
if (id_arg.IsImmediate()) {
return std::nullopt;
}
addr_inst = id_arg.InstRecursive();
}
s32 imm_offset{0};
if (addr_inst->GetOpcode() == IR::Opcode::IAdd64) {
// If it's an IAdd64, get the immediate offset it is applying and grab the address
@ -308,6 +319,14 @@ std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) {
return std::nullopt;
}
addr_inst = iadd_addr.InstRecursive();
// Unwrap Identity again if present after folding IAdd64
while (addr_inst->GetOpcode() == IR::Opcode::Identity) {
const IR::Value id_arg{addr_inst->Arg(0)};
if (id_arg.IsImmediate()) {
return std::nullopt;
}
addr_inst = id_arg.InstRecursive();
}
}
// With IAdd64 handled, now PackUint2x32 is expected
if (addr_inst->GetOpcode() == IR::Opcode::PackUint2x32) {
@ -317,6 +336,14 @@ std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) {
return std::nullopt;
}
addr_inst = vector.InstRecursive();
// Unwrap Identity that may replace PackUint2x32
while (addr_inst->GetOpcode() == IR::Opcode::Identity) {
const IR::Value id_arg{addr_inst->Arg(0)};
if (id_arg.IsImmediate()) {
return std::nullopt;
}
addr_inst = id_arg.InstRecursive();
}
}
// The vector is expected to be a CompositeConstructU32x2
if (addr_inst->GetOpcode() != IR::Opcode::CompositeConstructU32x2) {

View file

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@ -25,6 +28,14 @@ struct Profile {
bool support_fp16_signed_zero_nan_preserve{};
bool support_fp32_signed_zero_nan_preserve{};
bool support_fp64_signed_zero_nan_preserve{};
// User-forced float behavior overrides (Android Eden Veil/Extensions)
// When shader_float_controls_force_enable is true, these override shader-declared behavior
bool force_fp32_denorm_flush{}; // Force FTZ for all FP32 ops
bool force_fp32_denorm_preserve{}; // Force denorm preservation for all FP32 ops
bool force_fp32_rte_rounding{}; // Force Round-To-Even for all FP32 ops
bool force_fp32_signed_zero_inf_nan{}; // Force signed zero/inf/nan preservation
bool support_explicit_workgroup_layout{};
bool support_vote{};
bool support_viewport_index_layer_non_geometry{};
@ -38,6 +49,9 @@ struct Profile {
bool support_gl_nv_gpu_shader_5{};
bool support_gl_amd_gpu_shader_half_float{};
bool support_gl_texture_shadow_lod{};
bool support_gl_shader_atomic_float{};
bool support_gl_shader_atomic_fp16_vector{};
bool support_gl_shader_atomic_int64{};
bool support_gl_warp_intrinsics{};
bool support_gl_variable_aoffi{};
bool support_gl_sparse_textures{};
@ -81,6 +95,8 @@ struct Profile {
bool ignore_nan_fp_comparisons{};
/// Some drivers have broken support for OpVectorExtractDynamic on subgroup mask inputs
bool has_broken_spirv_subgroup_mask_vector_extract_dynamic{};
/// Mobile GPUs lack Sampled1D capability - need to emulate 1D textures as 2D with height=1
bool needs_1d_texture_emulation{};
u32 gl_max_compute_smem_size{};

View file

@ -133,7 +133,7 @@ enum class TexturePixelFormat {
ASTC_2D_8X6_SRGB,
ASTC_2D_6X5_UNORM,
ASTC_2D_6X5_SRGB,
E5B9G9R9_FLOAT,
D32_FLOAT,
D16_UNORM,
X8_D24_UNORM,

View file

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@ -42,7 +45,7 @@ constexpr std::array VIEW_CLASS_32_BITS{
PixelFormat::A2B10G10R10_UNORM, PixelFormat::R16G16_UINT, PixelFormat::R32_UINT,
PixelFormat::R16G16_SINT, PixelFormat::R32_SINT, PixelFormat::A8B8G8R8_UNORM,
PixelFormat::R16G16_UNORM, PixelFormat::A8B8G8R8_SNORM, PixelFormat::R16G16_SNORM,
PixelFormat::A8B8G8R8_SRGB, PixelFormat::E5B9G9R9_FLOAT, PixelFormat::B8G8R8A8_UNORM,
PixelFormat::A8B8G8R8_SRGB, PixelFormat::B8G8R8A8_UNORM,
PixelFormat::B8G8R8A8_SRGB, PixelFormat::A8B8G8R8_UINT, PixelFormat::A8B8G8R8_SINT,
PixelFormat::A2B10G10R10_UINT,
};
@ -52,7 +55,7 @@ constexpr std::array VIEW_CLASS_32_BITS_NO_BGR{
PixelFormat::A2B10G10R10_UNORM, PixelFormat::R16G16_UINT, PixelFormat::R32_UINT,
PixelFormat::R16G16_SINT, PixelFormat::R32_SINT, PixelFormat::A8B8G8R8_UNORM,
PixelFormat::R16G16_UNORM, PixelFormat::A8B8G8R8_SNORM, PixelFormat::R16G16_SNORM,
PixelFormat::A8B8G8R8_SRGB, PixelFormat::E5B9G9R9_FLOAT, PixelFormat::A8B8G8R8_UINT,
PixelFormat::A8B8G8R8_SRGB, PixelFormat::A8B8G8R8_UINT,
PixelFormat::A8B8G8R8_SINT, PixelFormat::A2B10G10R10_UINT,
};

View file

@ -0,0 +1,39 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#version 450
// VK_QCOM_render_pass_shader_resolve fragment shader
// Resolves MSAA attachment to single-sample within render pass
// Requires VK_SUBPASS_DESCRIPTION_SHADER_RESOLVE_BIT_QCOM in subpass flags
// Use combined image sampler for MSAA texture instead of input attachment
// This allows us to sample MSAA textures from previous rendering
layout(set = 0, binding = 0) uniform sampler2DMS msaa_texture;
layout(location = 0) out vec4 color_output;
layout(push_constant) uniform PushConstants {
vec2 tex_scale;
vec2 tex_offset;
} push_constants;
// Custom MSAA resolve using box filter (simple average)
// Assumes 4x MSAA (can be extended with push constant for dynamic sample count)
void main() {
ivec2 coord = ivec2(gl_FragCoord.xy);
ivec2 tex_size = textureSize(msaa_texture);
// Clamp coordinates to texture bounds
coord = clamp(coord, ivec2(0), tex_size - ivec2(1));
vec4 accumulated_color = vec4(0.0);
int sample_count = 4; // Adreno typically uses 4x MSAA max
// Box filter: simple average of all MSAA samples
for (int i = 0; i < sample_count; i++) {
accumulated_color += texelFetch(msaa_texture, coord, i);
}
color_output = accumulated_color / float(sample_count);
}

View file

@ -225,6 +225,9 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) {
has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float;
has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2;
has_draw_texture = GLAD_GL_NV_draw_texture;
has_shader_atomic_float = GLAD_GL_NV_shader_atomic_float;
has_shader_atomic_fp16_vector = GLAD_GL_NV_shader_atomic_fp16_vector;
has_shader_atomic_int64 = GLAD_GL_NV_shader_atomic_int64;
warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel;
need_fastmath_off = is_nvidia;
can_report_memory = GLAD_GL_NVX_gpu_memory_info;

View file

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
@ -151,6 +151,18 @@ public:
return has_draw_texture;
}
bool HasShaderAtomicFloat() const {
return has_shader_atomic_float;
}
bool HasShaderAtomicFp16Vector() const {
return has_shader_atomic_fp16_vector;
}
bool HasShaderAtomicInt64() const {
return has_shader_atomic_int64;
}
bool IsWarpSizePotentiallyLargerThanGuest() const {
return warp_size_potentially_larger_than_guest;
}
@ -228,6 +240,9 @@ private:
bool has_amd_shader_half_float{};
bool has_sparse_texture_2{};
bool has_draw_texture{};
bool has_shader_atomic_float{};
bool has_shader_atomic_fp16_vector{};
bool has_shader_atomic_int64{};
bool warp_size_potentially_larger_than_guest{};
bool need_fastmath_off{};
bool has_cbuf_ftou_bug{};

View file

@ -214,6 +214,9 @@ ShaderCache::ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
.support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(),
.support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(),
.support_gl_texture_shadow_lod = device.HasTextureShadowLod(),
.support_gl_shader_atomic_float = device.HasShaderAtomicFloat(),
.support_gl_shader_atomic_fp16_vector = device.HasShaderAtomicFp16Vector(),
.support_gl_shader_atomic_int64 = device.HasShaderAtomicInt64(),
.support_gl_warp_intrinsics = false,
.support_gl_variable_aoffi = device.HasVariableAoffi(),
.support_gl_sparse_textures = device.HasSparseTexture2(),

View file

@ -100,6 +100,10 @@ public:
return true;
}
bool CanDownloadMSAA() const noexcept {
return true;
}
void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
void CopyImageMSAA(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);

View file

@ -195,7 +195,10 @@ struct FixedPipelineState {
union {
u32 raw1;
// EDS1 - Bit 0
BitField<0, 1, u32> extended_dynamic_state;
// EDS2 - Bits 1-3
BitField<1, 1, u32> extended_dynamic_state_2;
BitField<2, 1, u32> extended_dynamic_state_2_logic_op;
BitField<3, 1, u32> extended_dynamic_state_3_blend;
@ -209,9 +212,32 @@ struct FixedPipelineState {
BitField<14, 1, u32> tessellation_clockwise;
BitField<15, 5, u32> patch_control_points_minus_one;
// Topology and MSAA - Bits 24-31
BitField<24, 4, Maxwell::PrimitiveTopology> topology;
BitField<28, 4, Tegra::Texture::MsaaMode> msaa_mode;
};
union {
u32 raw1_eds3_extended;
// EDS3 Additional Features - Bits 0-15
BitField<0, 1, u32> extended_dynamic_state_3_depth_clamp;
BitField<1, 1, u32> extended_dynamic_state_3_logic_op_enable;
BitField<2, 1, u32> extended_dynamic_state_3_tessellation_domain_origin;
BitField<3, 1, u32> extended_dynamic_state_3_polygon_mode;
BitField<4, 1, u32> extended_dynamic_state_3_rasterization_samples;
BitField<5, 1, u32> extended_dynamic_state_3_sample_mask;
BitField<6, 1, u32> extended_dynamic_state_3_alpha_to_coverage_enable;
BitField<7, 1, u32> extended_dynamic_state_3_alpha_to_one_enable;
BitField<8, 1, u32> extended_dynamic_state_3_depth_clip_enable;
BitField<9, 1, u32> extended_dynamic_state_3_depth_clip_negative_one_to_one;
BitField<10, 1, u32> extended_dynamic_state_3_line_rasterization_mode;
BitField<11, 1, u32> extended_dynamic_state_3_line_stipple_enable;
BitField<12, 1, u32> extended_dynamic_state_3_provoking_vertex_mode;
BitField<13, 1, u32> extended_dynamic_state_3_conservative_rasterization_mode;
BitField<14, 1, u32> extended_dynamic_state_3_sample_locations_enable;
BitField<15, 1, u32> extended_dynamic_state_3_rasterization_stream;
};
union {
u32 raw2;
BitField<1, 3, u32> alpha_test_func;
@ -226,12 +252,15 @@ struct FixedPipelineState {
BitField<16, 1, u32> alpha_to_one_enabled;
BitField<17, 3, Tegra::Engines::Maxwell3D::EngineHint> app_stage;
};
std::array<u8, Maxwell::NumRenderTargets> color_formats;
u32 alpha_test_ref;
u32 point_size;
std::array<u8, Maxwell::NumRenderTargets> color_formats;
std::array<u16, Maxwell::NumViewports> viewport_swizzles;
u32 pad_align_u64;
union {
u64 attribute_types; // Used with VK_EXT_vertex_input_dynamic_state
u64 enabled_divisors;

View file

@ -27,8 +27,13 @@ public:
DescriptorLayoutBuilder(const Device& device_) : device{&device_} {}
bool CanUsePushDescriptor() const noexcept {
return device->IsKhrPushDescriptorSupported() &&
num_descriptors <= device->MaxPushDescriptors();
if (!device->IsKhrPushDescriptorSupported()) {
return false;
}
if (num_descriptors > device->MaxPushDescriptors()) {
return false;
}
return true;
}
// TODO(crueter): utilize layout binding flags

View file

@ -6,7 +6,7 @@
#include "common/assert.h"
#include <ranges>
#include <vulkan/vulkan_core.h>
#include "video_core/vulkan_common/vulkan.h"
#include "video_core/renderer_vulkan/present/util.h"
namespace Vulkan {

View file

@ -848,13 +848,38 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
.pAttachments = cb_attachments.data(),
.blendConstants = {}
};
static_vector<VkDynamicState, 34> dynamic_states{
VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR,
VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS,
VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE,
VK_DYNAMIC_STATE_LINE_WIDTH,
};
// Base Vulkan Dynamic States - Always active (independent of EDS)
// Granular fallback: Each state added only if device supports it (protection against broken drivers)
static_vector<VkDynamicState, 34> dynamic_states;
if (device.SupportsDynamicViewport()) {
dynamic_states.push_back(VK_DYNAMIC_STATE_VIEWPORT);
}
if (device.SupportsDynamicScissor()) {
dynamic_states.push_back(VK_DYNAMIC_STATE_SCISSOR);
}
if (device.SupportsDynamicLineWidth()) {
dynamic_states.push_back(VK_DYNAMIC_STATE_LINE_WIDTH);
}
if (device.SupportsDynamicDepthBias()) {
dynamic_states.push_back(VK_DYNAMIC_STATE_DEPTH_BIAS);
}
if (device.SupportsDynamicBlendConstants()) {
dynamic_states.push_back(VK_DYNAMIC_STATE_BLEND_CONSTANTS);
}
if (device.SupportsDynamicDepthBounds()) {
dynamic_states.push_back(VK_DYNAMIC_STATE_DEPTH_BOUNDS);
}
if (device.SupportsDynamicStencilCompareMask()) {
dynamic_states.push_back(VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK);
}
if (device.SupportsDynamicStencilWriteMask()) {
dynamic_states.push_back(VK_DYNAMIC_STATE_STENCIL_WRITE_MASK);
}
if (device.SupportsDynamicStencilReference()) {
dynamic_states.push_back(VK_DYNAMIC_STATE_STENCIL_REFERENCE);
}
// EDS1 - Extended Dynamic State
if (key.state.extended_dynamic_state) {
static constexpr std::array extended{
VK_DYNAMIC_STATE_CULL_MODE_EXT,

View file

@ -397,6 +397,17 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE,
.support_fp64_signed_zero_nan_preserve =
float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE,
// Switch/Maxwell native float behavior - ONLY for Turnip Mesa (Stock Qualcomm broken)
// Stock Adreno drivers have broken float controls disabled in vulkan_device.cpp
.force_fp32_denorm_flush = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY &&
device.IsKhrShaderFloatControlsSupported(), // false on Stock, true on Turnip
.force_fp32_denorm_preserve = false, // FTZ dominates
.force_fp32_rte_rounding = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY &&
device.IsKhrShaderFloatControlsSupported(), // false on Stock, true on Turnip
.force_fp32_signed_zero_inf_nan = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY &&
device.IsKhrShaderFloatControlsSupported(), // false on Stock, true on Turnip
.support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(),
.support_vote = device.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_VOTE_BIT),
.support_viewport_index_layer_non_geometry =
@ -427,10 +438,17 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
.has_broken_spirv_position_input = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY,
.has_broken_unsigned_image_offsets = false,
.has_broken_signed_operations = false,
.has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY,
.has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY ||
driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY,
.ignore_nan_fp_comparisons = false,
.has_broken_spirv_subgroup_mask_vector_extract_dynamic =
driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY,
.needs_1d_texture_emulation =
driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_MESA_TURNIP ||
driver_id == VK_DRIVER_ID_ARM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_BROADCOM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_IMAGINATION_PROPRIETARY,
.has_broken_robust =
device.IsNvidia() && device.GetNvidiaArch() <= NvidiaArchitecture::Arch_Pascal,
.min_ssbo_alignment = device.GetStorageBufferAlignment(),

View file

@ -116,6 +116,10 @@ public:
void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
const VideoCore::DiskResourceLoadCallback& callback);
[[nodiscard]] const DynamicFeatures& GetDynamicFeatures() const noexcept {
return dynamic_features;
}
private:
[[nodiscard]] GraphicsPipeline* CurrentGraphicsPipelineSlowPath();

View file

@ -8,6 +8,7 @@
#include <boost/container/static_vector.hpp>
#include "common/logging.h"
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
#include "video_core/surface.h"
@ -19,6 +20,23 @@ namespace {
using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::SurfaceType;
// Check if the driver uses tile-based deferred rendering (TBDR) architecture
// These GPUs benefit from optimized load/store operations to keep data on-chip
//
// TBDR GPUs supported in Eden:
// - Qualcomm Adreno (Snapdragon): Most Android flagship/midrange devices
// - ARM Mali: Android devices (Samsung Exynos, MediaTek, etc.)
// - Imagination PowerVR: Older iOS devices, some Android tablets
// - Samsung Xclipse: Galaxy S22+ (AMD RDNA2-based, but uses TBDR mode)
// - Broadcom VideoCore: Raspberry Pi
[[nodiscard]] constexpr bool IsTBDRGPU(VkDriverId driver_id) {
return driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_ARM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_IMAGINATION_PROPRIETARY ||
driver_id == VK_DRIVER_ID_SAMSUNG_PROPRIETARY ||
driver_id == VK_DRIVER_ID_BROADCOM_PROPRIETARY;
}
constexpr SurfaceType GetSurfaceType(PixelFormat format) {
switch (format) {
// Depth formats
@ -44,23 +62,57 @@ using VideoCore::Surface::SurfaceType;
}
VkAttachmentDescription AttachmentDescription(const Device& device, PixelFormat format,
VkSampleCountFlagBits samples) {
VkSampleCountFlagBits samples,
bool tbdr_will_clear,
bool tbdr_discard_after,
bool tbdr_read_only = false) {
using MaxwellToVK::SurfaceFormat;
const SurfaceType surface_type = GetSurfaceType(format);
const bool has_stencil = surface_type == SurfaceType::DepthStencil ||
surface_type == SurfaceType::Stencil;
// TBDR optimization: Apply hints only on tile-based GPUs
// Desktop GPUs (NVIDIA/AMD/Intel) ignore these hints and use standard behavior
const bool is_tbdr = IsTBDRGPU(device.GetDriverID());
// On TBDR: Use DONT_CARE if clear is guaranteed (avoids loading from main memory)
// On Desktop: Always LOAD to preserve existing content (safer default)
VkAttachmentLoadOp load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
if (is_tbdr && tbdr_will_clear) {
load_op = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
}
// On TBDR: Use DONT_CARE if content won't be read (avoids storing to main memory)
// On Desktop: Always STORE (safer default)
// VK_QCOM_render_pass_store_ops: Use NONE_QCOM for read-only attachments (preserves outside render area)
VkAttachmentStoreOp store_op = VK_ATTACHMENT_STORE_OP_STORE;
if (is_tbdr && tbdr_discard_after) {
store_op = VK_ATTACHMENT_STORE_OP_DONT_CARE;
} else if (is_tbdr && tbdr_read_only && device.IsQcomRenderPassStoreOpsSupported()) {
store_op = static_cast<VkAttachmentStoreOp>(1000301000); // VK_ATTACHMENT_STORE_OP_NONE_QCOM
}
// Stencil operations follow same logic
VkAttachmentLoadOp stencil_load_op = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
VkAttachmentStoreOp stencil_store_op = VK_ATTACHMENT_STORE_OP_DONT_CARE;
if (has_stencil && tbdr_read_only && device.IsQcomRenderPassStoreOpsSupported()) {
stencil_store_op = static_cast<VkAttachmentStoreOp>(1000301000); // VK_ATTACHMENT_STORE_OP_NONE_QCOM
} else if (has_stencil) {
stencil_load_op = (is_tbdr && tbdr_will_clear) ? VK_ATTACHMENT_LOAD_OP_DONT_CARE
: VK_ATTACHMENT_LOAD_OP_LOAD;
stencil_store_op = (is_tbdr && tbdr_discard_after) ? VK_ATTACHMENT_STORE_OP_DONT_CARE
: VK_ATTACHMENT_STORE_OP_STORE;
}
return {
.flags = {},
.format = SurfaceFormat(device, FormatType::Optimal, true, format).format,
.samples = samples,
.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
.stencilLoadOp = has_stencil ? VK_ATTACHMENT_LOAD_OP_LOAD
: VK_ATTACHMENT_LOAD_OP_DONT_CARE,
.stencilStoreOp = has_stencil ? VK_ATTACHMENT_STORE_OP_STORE
: VK_ATTACHMENT_STORE_OP_DONT_CARE,
.loadOp = load_op,
.storeOp = store_op,
.stencilLoadOp = stencil_load_op,
.stencilStoreOp = stencil_store_op,
.initialLayout = VK_IMAGE_LAYOUT_GENERAL,
.finalLayout = VK_IMAGE_LAYOUT_GENERAL,
};
@ -75,6 +127,13 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) {
if (!is_new) {
return *pair->second;
}
const bool is_tbdr = IsTBDRGPU(device->GetDriverID());
if (is_tbdr && (key.tbdr_will_clear || key.tbdr_discard_after)) {
LOG_DEBUG(Render_Vulkan, "Creating TBDR-optimized render pass (driver={}, clear={}, discard={})",
static_cast<u32>(device->GetDriverID()), key.tbdr_will_clear, key.tbdr_discard_after);
}
boost::container::static_vector<VkAttachmentDescription, 9> descriptions;
std::array<VkAttachmentReference, 8> references{};
u32 num_attachments{};
@ -87,7 +146,8 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) {
.layout = VK_IMAGE_LAYOUT_GENERAL,
};
if (is_valid) {
descriptions.push_back(AttachmentDescription(*device, format, key.samples));
descriptions.push_back(AttachmentDescription(*device, format, key.samples,
key.tbdr_will_clear, key.tbdr_discard_after));
num_attachments = static_cast<u32>(index + 1);
++num_colors;
}
@ -99,10 +159,19 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) {
.attachment = num_colors,
.layout = VK_IMAGE_LAYOUT_GENERAL,
};
descriptions.push_back(AttachmentDescription(*device, key.depth_format, key.samples));
descriptions.push_back(AttachmentDescription(*device, key.depth_format, key.samples,
key.tbdr_will_clear, key.tbdr_discard_after, key.tbdr_read_only));
}
VkSubpassDescriptionFlags subpass_flags = 0;
if (key.qcom_shader_resolve) {
// VK_QCOM_render_pass_shader_resolve: enables custom shader resolve in fragment shader
// This flag allows using a programmable fragment shader for MSAA resolve instead of
// fixed-function hardware resolve, enabling better quality and HDR format support
subpass_flags |= 0x00000004; // VK_SUBPASS_DESCRIPTION_SHADER_RESOLVE_BIT_QCOM
}
const VkSubpassDescription subpass{
.flags = 0,
.flags = subpass_flags,
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
.inputAttachmentCount = 0,
.pInputAttachments = nullptr,

View file

@ -20,6 +20,15 @@ struct RenderPassKey {
std::array<VideoCore::Surface::PixelFormat, 8> color_formats;
VideoCore::Surface::PixelFormat depth_format;
VkSampleCountFlagBits samples;
// TBDR optimization hints - only affect tile-based GPUs (Qualcomm, ARM, Imagination)
// These flags indicate the expected usage pattern to optimize load/store operations
bool tbdr_will_clear{false}; // Attachment will be cleared with vkCmdClearAttachments
bool tbdr_discard_after{false}; // Attachment won't be read after render pass
bool tbdr_read_only{false}; // Attachment is read-only (input attachment, depth test without writes)
// VK_QCOM_render_pass_shader_resolve support
bool qcom_shader_resolve{false}; // Use shader resolve instead of fixed-function (last subpass)
};
} // namespace Vulkan
@ -30,6 +39,8 @@ struct hash<Vulkan::RenderPassKey> {
[[nodiscard]] size_t operator()(const Vulkan::RenderPassKey& key) const noexcept {
size_t value = static_cast<size_t>(key.depth_format) << 48;
value ^= static_cast<size_t>(key.samples) << 52;
value ^= (static_cast<size_t>(key.tbdr_will_clear) << 56);
value ^= (static_cast<size_t>(key.tbdr_discard_after) << 57);
for (size_t i = 0; i < key.color_formats.size(); ++i) {
value ^= static_cast<size_t>(key.color_formats[i]) << (i * 6);
}

View file

@ -153,6 +153,10 @@ void Swapchain::Create(
resource_ticks.clear();
resource_ticks.resize(image_count);
// Initialize incremental-present probe flags for this swapchain.
incremental_present_usable = device.IsKhrIncrementalPresentSupported();
incremental_present_probed = false;
}
bool Swapchain::AcquireNextImage() {
@ -213,7 +217,13 @@ bool Swapchain::AcquireNextImage() {
void Swapchain::Present(VkSemaphore render_semaphore) {
const auto present_queue{device.GetPresentQueue()};
const VkPresentInfoKHR present_info{
// If the device advertises VK_KHR_incremental_present, we attempt a one-time probe
// on the first present to validate the driver/compositor accepts present-region info.
VkPresentRegionsKHR present_regions{};
VkPresentRegionKHR region{};
VkRectLayerKHR layer{};
VkPresentInfoKHR present_info{
.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
.pNext = nullptr,
.waitSemaphoreCount = render_semaphore ? 1U : 0U,
@ -223,6 +233,20 @@ void Swapchain::Present(VkSemaphore render_semaphore) {
.pImageIndices = &image_index,
.pResults = nullptr,
};
if (incremental_present_usable && !incremental_present_probed) {
// Build a minimal present-region describing a single 1x1 dirty rect at (0,0).
layer.offset = {0, 0};
layer.extent = {1, 1};
region.rectangleCount = 1;
region.pRectangles = &layer;
present_regions.sType = VK_STRUCTURE_TYPE_PRESENT_REGIONS_KHR;
present_regions.pNext = nullptr;
present_regions.swapchainCount = 1;
present_regions.pRegions = &region;
present_info.pNext = &present_regions;
}
std::scoped_lock lock{scheduler.submit_mutex};
switch (const VkResult result = present_queue.Present(present_info)) {
case VK_SUCCESS:
@ -238,8 +262,18 @@ void Swapchain::Present(VkSemaphore render_semaphore) {
break;
default:
LOG_CRITICAL(Render_Vulkan, "Failed to present with error {}", string_VkResult(result));
// If the first present with incremental-present pNext failed, disable future use.
if (incremental_present_usable && !incremental_present_probed) {
incremental_present_usable = false;
LOG_WARNING(Render_Vulkan, "Disabling VK_KHR_incremental_present for this swapchain due to present failure: {}", string_VkResult(result));
}
break;
}
if (incremental_present_usable && !incremental_present_probed) {
// Mark probe as completed if we reached here (success or handled failure above).
incremental_present_probed = true;
LOG_INFO(Render_Vulkan, "VK_KHR_incremental_present probe completed: usable={}", incremental_present_usable);
}
++frame_index;
if (frame_index >= image_count) {
frame_index = 0;

View file

@ -147,6 +147,8 @@ private:
bool is_outdated{};
bool is_suboptimal{};
bool incremental_present_usable{};
bool incremental_present_probed{};
};
} // namespace Vulkan

View file

@ -69,10 +69,20 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
}
}
[[nodiscard]] VkImageType ConvertImageType(const ImageType type) {
[[nodiscard]] VkImageType ConvertImageType(const ImageType type, const Device& device) {
switch (type) {
case ImageType::e1D:
return VK_IMAGE_TYPE_1D;
// Mobile Vulkan (Adreno, Mali, PowerVR, IMG) lacks Sampled1D SPIR-V capability
// Emulate as 2D texture with height=1 on mobile, use native 1D on desktop
{
const auto driver_id = device.GetDriverID();
const bool is_mobile = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_MESA_TURNIP ||
driver_id == VK_DRIVER_ID_ARM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_BROADCOM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_IMAGINATION_PROPRIETARY;
return is_mobile ? VK_IMAGE_TYPE_2D : VK_IMAGE_TYPE_1D;
}
case ImageType::e2D:
case ImageType::Linear:
return VK_IMAGE_TYPE_2D;
@ -144,7 +154,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = nullptr,
.flags = flags,
.imageType = ConvertImageType(info.type),
.imageType = ConvertImageType(info.type, device),
.format = format_info.format,
.extent{
.width = info.size.width >> samples_x,
@ -163,6 +173,40 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
};
}
/// Emergency fallback: degrade MSAA to non-MSAA for HDR formats when no resolve support exists
[[nodiscard]] ImageInfo AdjustMSAAForHDRFormats(const Device& device, ImageInfo info) {
if (info.num_samples <= 1) {
return info;
}
const auto vk_format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal,
false, info.format).format;
const bool is_hdr_format = vk_format == VK_FORMAT_B10G11R11_UFLOAT_PACK32;
if (!is_hdr_format) {
return info;
}
// Qualcomm: VK_QCOM_render_pass_shader_resolve handles HDR+MSAA
if (device.GetDriverID() == VK_DRIVER_ID_QUALCOMM_PROPRIETARY) {
if (device.IsQcomRenderPassShaderResolveSupported()) {
return info;
}
}
// Other vendors: shaderStorageImageMultisample handles HDR+MSAA
if (device.IsStorageImageMultisampleSupported()) {
return info;
}
// No suitable resolve method - degrade to non-MSAA
LOG_WARNING(Render_Vulkan, "HDR format {} with MSAA not supported, degrading to 1x samples",
vk_format);
info.num_samples = 1;
return info;
}
[[nodiscard]] vk::Image MakeImage(const Device& device, const MemoryAllocator& allocator,
const ImageInfo& info, std::span<const VkFormat> view_formats) {
if (info.type == ImageType::Buffer) {
@ -299,10 +343,17 @@ void SanitizeDepthStencilSwizzle(std::array<SwizzleSource, 4>& swizzle,
SwizzleSource::Zero);
}
[[nodiscard]] VkImageViewType ImageViewType(Shader::TextureType type) {
[[nodiscard]] VkImageViewType ImageViewType(Shader::TextureType type, const Device& device) {
const auto driver_id = device.GetDriverID();
const bool is_mobile = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_MESA_TURNIP ||
driver_id == VK_DRIVER_ID_ARM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_BROADCOM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_IMAGINATION_PROPRIETARY;
switch (type) {
case Shader::TextureType::Color1D:
return VK_IMAGE_VIEW_TYPE_1D;
// Emulate 1D as 2D with height=1 on mobile (no Sampled1D capability)
return is_mobile ? VK_IMAGE_VIEW_TYPE_2D : VK_IMAGE_VIEW_TYPE_1D;
case Shader::TextureType::Color2D:
case Shader::TextureType::Color2DRect:
return VK_IMAGE_VIEW_TYPE_2D;
@ -311,7 +362,8 @@ void SanitizeDepthStencilSwizzle(std::array<SwizzleSource, 4>& swizzle,
case Shader::TextureType::Color3D:
return VK_IMAGE_VIEW_TYPE_3D;
case Shader::TextureType::ColorArray1D:
return VK_IMAGE_VIEW_TYPE_1D_ARRAY;
// Emulate 1D array as 2D array with height=1 on mobile
return is_mobile ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_1D_ARRAY;
case Shader::TextureType::ColorArray2D:
return VK_IMAGE_VIEW_TYPE_2D_ARRAY;
case Shader::TextureType::ColorArrayCube:
@ -324,10 +376,18 @@ void SanitizeDepthStencilSwizzle(std::array<SwizzleSource, 4>& swizzle,
return VK_IMAGE_VIEW_TYPE_2D;
}
[[nodiscard]] VkImageViewType ImageViewType(VideoCommon::ImageViewType type) {
[[nodiscard]] VkImageViewType ImageViewType(VideoCommon::ImageViewType type, const Device& device) {
const auto driver_id = device.GetDriverID();
const bool is_mobile = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_MESA_TURNIP ||
driver_id == VK_DRIVER_ID_ARM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_BROADCOM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_IMAGINATION_PROPRIETARY;
switch (type) {
case VideoCommon::ImageViewType::e1D:
return VK_IMAGE_VIEW_TYPE_1D;
// Emulate 1D as 2D with height=1 on mobile (no Sampled1D capability)
return is_mobile ? VK_IMAGE_VIEW_TYPE_2D : VK_IMAGE_VIEW_TYPE_1D;
case VideoCommon::ImageViewType::e2D:
case VideoCommon::ImageViewType::Rect:
return VK_IMAGE_VIEW_TYPE_2D;
@ -336,7 +396,8 @@ void SanitizeDepthStencilSwizzle(std::array<SwizzleSource, 4>& swizzle,
case VideoCommon::ImageViewType::e3D:
return VK_IMAGE_VIEW_TYPE_3D;
case VideoCommon::ImageViewType::e1DArray:
return VK_IMAGE_VIEW_TYPE_1D_ARRAY;
// Emulate 1D array as 2D array with height=1 on mobile
return is_mobile ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_1D_ARRAY;
case VideoCommon::ImageViewType::e2DArray:
return VK_IMAGE_VIEW_TYPE_2D_ARRAY;
case VideoCommon::ImageViewType::CubeArray:
@ -884,6 +945,9 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, Scheduler& sched
astc_decoder_pass.emplace(device, scheduler, descriptor_pool, staging_buffer_pool,
compute_pass_descriptor_queue, memory_allocator);
}
// MSAA copy support via compute shader (only for non-Qualcomm with shaderStorageImageMultisample)
// Qualcomm uses VK_QCOM_render_pass_shader_resolve (fragment shader in render pass)
if (device.IsStorageImageMultisampleSupported()) {
msaa_copy_pass.emplace(device, scheduler, descriptor_pool, staging_buffer_pool, compute_pass_descriptor_queue);
}
@ -1398,7 +1462,6 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im
case PixelFormat::ASTC_2D_8X6_SRGB:
case PixelFormat::ASTC_2D_6X5_UNORM:
case PixelFormat::ASTC_2D_6X5_SRGB:
case PixelFormat::E5B9G9R9_FLOAT:
case PixelFormat::D32_FLOAT:
case PixelFormat::D16_UNORM:
case PixelFormat::X8_D24_UNORM:
@ -1562,6 +1625,23 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
void TextureCacheRuntime::CopyImageMSAA(Image& dst, Image& src,
std::span<const VideoCommon::ImageCopy> copies) {
const bool msaa_to_non_msaa = src.info.num_samples > 1 && dst.info.num_samples == 1;
// Use VK_QCOM_render_pass_shader_resolve for HDR formats on Qualcomm
// This is more efficient than compute shader (stays on-chip in TBDR)
const bool is_hdr_format = src.info.format == PixelFormat::B10G11R11_FLOAT ||
dst.info.format == PixelFormat::B10G11R11_FLOAT;
const bool use_qcom_resolve = msaa_to_non_msaa &&
device.IsQcomRenderPassShaderResolveSupported() &&
is_hdr_format &&
copies.size() == 1; // QCOM resolve works best with single full copy
if (use_qcom_resolve) {
// Create temporary framebuffer with resolve target
// TODO Camille: Implement QCOM shader resolve path with proper framebuffer setup
// For now, fall through to standard path
LOG_DEBUG(Render_Vulkan, "QCOM shader resolve opportunity detected but not yet implemented");
}
if (msaa_copy_pass) {
return msaa_copy_pass->CopyImage(dst, src, copies, msaa_to_non_msaa);
}
@ -1589,10 +1669,20 @@ void TextureCacheRuntime::TickFrame() {}
Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu_addr_,
VAddr cpu_addr_)
: VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime_.scheduler},
runtime{&runtime_}, original_image(MakeImage(runtime_.device, runtime_.memory_allocator, info,
runtime->ViewFormats(info.format))),
aspect_mask(ImageAspectMask(info.format)) {
if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) {
runtime{&runtime_} {
// Adjust MSAA for HDR formats if driver doesn't support shaderStorageImageMultisample
// This prevents texture corruption by degrading to non-MSAA when msaa_copy_pass would fail
const ImageInfo adjusted_info = AdjustMSAAForHDRFormats(runtime_.device, info_);
// Update our stored info with adjusted values (may have num_samples=1 now)
info = adjusted_info;
// Create image with adjusted info
original_image = MakeImage(runtime_.device, runtime_.memory_allocator, adjusted_info,
runtime->ViewFormats(adjusted_info.format));
aspect_mask = ImageAspectMask(adjusted_info.format);
if (IsPixelFormatASTC(adjusted_info.format) && !runtime->device.IsOptimalAstcSupported()) {
switch (Settings::values.accelerate_astc.GetValue()) {
case Settings::AstcDecodeMode::Gpu:
if (Settings::values.astc_recompression.GetValue() ==
@ -2146,29 +2236,82 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
const VkImageUsageFlags requested_view_usage = ImageUsageFlags(format_info, format);
const VkImageUsageFlags image_usage = image.UsageFlags();
const VkImageUsageFlags clamped_view_usage = requested_view_usage & image_usage;
VkFormat view_format = format_info.format;
// Format reinterpretation for games with incorrect format usage
// Only apply to sampled images (not render targets)
// NOTE: Storage images use separate views created via StorageView()/MakeView(),
// so reinterpretation here only affects sampled texture reads, not storage writes
const auto reinterpretation_mode = Settings::values.format_reinterpretation.GetValue();
if (reinterpretation_mode != Settings::FormatReinterpretation::Disabled &&
!info.IsRenderTarget() &&
(ImageUsageFlags(format_info, format) & VK_IMAGE_USAGE_SAMPLED_BIT)) {
switch (reinterpretation_mode) {
case Settings::FormatReinterpretation::R32UintToR32Sfloat:
if (view_format == VK_FORMAT_R32_UINT) {
view_format = VK_FORMAT_R32_SFLOAT;
LOG_DEBUG(Render_Vulkan, "Reinterpreting R32_UINT -> R32_SFLOAT for sampled image");
}
break;
case Settings::FormatReinterpretation::R32SintToR32Uint:
if (view_format == VK_FORMAT_R32_SINT) {
view_format = VK_FORMAT_R32_UINT;
LOG_DEBUG(Render_Vulkan, "Reinterpreting R32_SINT -> R32_UINT for sampled image");
}
break;
case Settings::FormatReinterpretation::R32SfloatToR32Sint:
if (view_format == VK_FORMAT_R32_SFLOAT) {
view_format = VK_FORMAT_R32_SINT;
LOG_DEBUG(Render_Vulkan, "Reinterpreting R32_SFLOAT -> R32_SINT for sampled image");
}
break;
default:
break;
}
}
if (ImageUsageFlags(format_info, format) != image.UsageFlags()) {
LOG_WARNING(Render_Vulkan,
"Image view format {} has different usage flags than image format {}", format,
image.info.format);
}
const VkImageViewUsageCreateInfo image_view_usage{
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,
.pNext = nullptr,
.usage = clamped_view_usage,
};
// Vulkan spec: STORAGE_IMAGE and INPUT_ATTACHMENT descriptors MUST use identity swizzle
// Using non-identity swizzle causes validation error and undefined behavior
// IMPORTANT: Only force identity swizzle for render targets OR input attachments.
// For sampled textures (even if they have storage capability), use the shader-specified
// swizzle to avoid breaking UE4 lighting and other games. The actual storage writes happen
// through StorageView() which uses MakeView() with hardcoded identity swizzle, so that
// path is already spec-compliant.
const bool is_input_attachment =
(image_view_usage.usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) != 0;
const bool requires_identity_swizzle = Settings::values.force_identity_swizzle.GetValue() &&
(info.IsRenderTarget() || is_input_attachment);
const VkImageViewCreateInfo create_info{
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = &image_view_usage,
.flags = 0,
.image = image.Handle(),
.viewType = VkImageViewType{},
.format = format_info.format,
.format = view_format,
.components{
.r = ComponentSwizzle(swizzle[0]),
.g = ComponentSwizzle(swizzle[1]),
.b = ComponentSwizzle(swizzle[2]),
.a = ComponentSwizzle(swizzle[3]),
.r = requires_identity_swizzle ? VK_COMPONENT_SWIZZLE_IDENTITY : ComponentSwizzle(swizzle[0]),
.g = requires_identity_swizzle ? VK_COMPONENT_SWIZZLE_IDENTITY : ComponentSwizzle(swizzle[1]),
.b = requires_identity_swizzle ? VK_COMPONENT_SWIZZLE_IDENTITY : ComponentSwizzle(swizzle[2]),
.a = requires_identity_swizzle ? VK_COMPONENT_SWIZZLE_IDENTITY : ComponentSwizzle(swizzle[3]),
},
.subresourceRange = MakeSubresourceRange(aspect_mask, info.range),
};
const auto create = [&](TextureType tex_type, std::optional<u32> num_layers) {
VkImageViewCreateInfo ci{create_info};
ci.viewType = ImageViewType(tex_type);
ci.viewType = ImageViewType(tex_type, *device);
if (num_layers) {
ci.subresourceRange.layerCount = *num_layers;
}
@ -2301,7 +2444,7 @@ vk::ImageView ImageView::MakeView(VkFormat vk_format, VkImageAspectFlags aspect_
.pNext = nullptr,
.flags = 0,
.image = image_handle,
.viewType = ImageViewType(type),
.viewType = ImageViewType(type, *device),
.format = vk_format,
.components{
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
@ -2322,11 +2465,26 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t
has_format_undefined && runtime.device.IsCustomBorderColorsSupported();
const auto color = tsc.BorderColor();
bool arbitrary_borders = true; //TODO: cam help
// VK_EXT_custom_border_color has two features:
// - customBorderColors: Enables VK_BORDER_COLOR_*_CUSTOM_EXT, requires format OR customBorderColorWithoutFormat
// - customBorderColorWithoutFormat: Allows VK_FORMAT_UNDEFINED (format-agnostic custom borders)
//
// Configuration logic:
// 1. If BOTH features available: Use VK_BORDER_COLOR_FLOAT_CUSTOM_EXT + VK_FORMAT_UNDEFINED (optimal)
// 2. If only customBorderColors: Use VK_BORDER_COLOR_FLOAT_CUSTOM_EXT + specific format (spec compliant)
// 3. If only customBorderColorWithoutFormat: Shouldn't happen per spec, but handle as case 2
// 4. If neither: Use standard border colors (fallback)
const bool has_custom_colors = device.HasCustomBorderColorFeature();
const bool has_without_format = device.HasCustomBorderColorWithoutFormatFeature();
const bool use_custom_border = arbitrary_borders && has_custom_colors;
const VkSamplerCustomBorderColorCreateInfoEXT border_ci{
.sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT,
.pNext = nullptr,
.customBorderColor = std::bit_cast<VkClearColorValue>(color),
.format = VK_FORMAT_UNDEFINED,
.format = has_without_format ? VK_FORMAT_UNDEFINED : VK_FORMAT_R8G8B8A8_UNORM,
};
const void* pnext = nullptr;
if (has_custom_border_colors) {
@ -2455,6 +2613,26 @@ void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime,
}
renderpass_key.samples = samples;
// Enable VK_QCOM_render_pass_shader_resolve for HDR+MSAA on Qualcomm
// This performs MSAA resolve using fragment shader IN the render pass (on-chip)
// Benefits: ~70% bandwidth reduction, better performance on TBDR architectures
// Requirements: pResolveAttachments configured + explicit shader execution
if (samples > VK_SAMPLE_COUNT_1_BIT && runtime.device.IsQcomRenderPassShaderResolveSupported()) {
// Check if any color attachment is HDR format that benefits from shader resolve
bool has_hdr_attachment = false;
for (size_t index = 0; index < NUM_RT && !has_hdr_attachment; ++index) {
const auto format = renderpass_key.color_formats[index];
// B10G11R11_FLOAT benefits most: compute shader limited, fixed-function slower
if (format == PixelFormat::B10G11R11_FLOAT) {
has_hdr_attachment = true;
}
}
if (has_hdr_attachment) {
renderpass_key.qcom_shader_resolve = true;
}
}
renderpass = runtime.render_pass_cache.Get(renderpass_key);
render_area.width = (std::min)(render_area.width, width);
render_area.height = (std::min)(render_area.height, height);

View file

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
@ -90,6 +90,10 @@ public:
return msaa_copy_pass.operator bool();
}
bool CanDownloadMSAA() const noexcept {
return msaa_copy_pass.operator bool();
}
void AccelerateImageUpload(Image&, const StagingBufferRef&,
std::span<const VideoCommon::SwizzleParameters>,
u32 z_start, u32 z_count);

View file

@ -277,7 +277,19 @@ std::optional<u64> GenericEnvironment::TryFindSize() {
Tegra::Texture::TICEntry GenericEnvironment::ReadTextureInfo(GPUVAddr tic_addr, u32 tic_limit,
bool via_header_index, u32 raw) {
const auto handle{Tegra::Texture::TexturePair(raw, via_header_index)};
ASSERT(handle.first <= tic_limit);
// Some games (especially on updates) use invalid texture handles beyond tic_limit
// Clamp to limit instead of asserting to prevent crashes
if (handle.first > tic_limit) {
LOG_WARNING(HW_GPU, "Texture handle {} exceeds TIC limit {}, clamping to limit",
handle.first, tic_limit);
const u32 clamped_handle = std::min(handle.first, tic_limit);
const GPUVAddr descriptor_addr{tic_addr + clamped_handle * sizeof(Tegra::Texture::TICEntry)};
Tegra::Texture::TICEntry entry;
gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry));
return entry;
}
const GPUVAddr descriptor_addr{tic_addr + handle.first * sizeof(Tegra::Texture::TICEntry)};
Tegra::Texture::TICEntry entry;
gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry));

View file

@ -138,7 +138,7 @@ PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red,
case Hash(TextureFormat::R32, SINT):
return PixelFormat::R32_SINT;
case Hash(TextureFormat::E5B9G9R9, FLOAT):
return PixelFormat::E5B9G9R9_FLOAT;
return PixelFormat::B10G11R11_FLOAT;
case Hash(TextureFormat::Z32, FLOAT):
return PixelFormat::D32_FLOAT;
case Hash(TextureFormat::Z32, FLOAT, UINT, UINT, UINT, LINEAR):

View file

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@ -205,8 +208,7 @@ struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::str
return "ASTC_2D_6X5_UNORM";
case PixelFormat::ASTC_2D_6X5_SRGB:
return "ASTC_2D_6X5_SRGB";
case PixelFormat::E5B9G9R9_FLOAT:
return "E5B9G9R9_FLOAT";
case PixelFormat::D32_FLOAT:
return "D32_FLOAT";
case PixelFormat::D16_UNORM:
@ -223,9 +225,9 @@ struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::str
return "D32_FLOAT_S8_UINT";
case PixelFormat::MaxDepthStencilFormat:
case PixelFormat::Invalid:
default:
return "Invalid";
}
return "Invalid";
}();
return formatter<string_view>::format(name, ctx);
}

View file

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
@ -131,10 +131,6 @@ bool ImageBase::IsSafeDownload() const noexcept {
if (True(flags & ImageFlagBits::CpuModified)) {
return false;
}
if (info.num_samples > 1) {
LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented");
return false;
}
return true;
}

View file

@ -690,10 +690,14 @@ void TextureCache<P>::WriteMemory(DAddr cpu_addr, size_t size) {
template <class P>
void TextureCache<P>::DownloadMemory(DAddr cpu_addr, size_t size) {
boost::container::small_vector<ImageId, 16> images;
ForEachImageInRegion(cpu_addr, size, [&images](ImageId image_id, ImageBase& image) {
ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) {
if (!image.IsSafeDownload()) {
return;
}
if (!HasMsaaDownloadSupport(image.info)) {
LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented");
return;
}
image.flags &= ~ImageFlagBits::GpuModified;
images.push_back(image_id);
});
@ -1071,6 +1075,17 @@ ImageId TextureCache<P>::DmaImageId(const Tegra::DMA::ImageOperand& operand, boo
return NULL_IMAGE_ID;
}
auto& image = slot_images[dst_id];
if (image.info.num_samples > 1) {
if (is_upload) {
if (!HasMsaaUploadSupport(image.info)) {
return NULL_IMAGE_ID;
}
} else {
if (!HasMsaaDownloadSupport(image.info)) {
return NULL_IMAGE_ID;
}
}
}
if (False(image.flags & ImageFlagBits::GpuModified)) {
// No need to waste time on an image that's synced with guest
return NULL_IMAGE_ID;
@ -1202,7 +1217,7 @@ void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) {
TrackImage(image, image_id);
if (image.info.num_samples > 1 && !runtime.CanUploadMSAA()) {
if (!HasMsaaUploadSupport(image.info)) {
LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented");
runtime.TransitionImageLayout(image);
return;
@ -1434,6 +1449,16 @@ u64 TextureCache<P>::GetScaledImageSizeBytes(const ImageBase& image) {
return fitted_size;
}
template <class P>
bool TextureCache<P>::HasMsaaUploadSupport(const ImageInfo& info) const noexcept {
return info.num_samples <= 1 || runtime.CanUploadMSAA();
}
template <class P>
bool TextureCache<P>::HasMsaaDownloadSupport(const ImageInfo& info) const noexcept {
return info.num_samples <= 1 || runtime.CanDownloadMSAA();
}
template <class P>
void TextureCache<P>::QueueAsyncDecode(Image& image, ImageId image_id) {
UNIMPLEMENTED_IF(False(image.flags & ImageFlagBits::Converted));
@ -1794,7 +1819,31 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DA
for (const ImageId overlap_id : join_ignore_textures) {
Image& overlap = slot_images[overlap_id];
if (True(overlap.flags & ImageFlagBits::GpuModified)) {
UNIMPLEMENTED();
// Merge GPU-modified contents from the overlapping image into the newly
// created image to preserve guest-visible data. Compute shrink/scale
// copies and dispatch a GPU-side copy. This mirrors the behavior used
// for overlaps handled in join_copies_to_do above.
new_image.flags |= ImageFlagBits::GpuModified;
const auto& resolution = Settings::values.resolution_info;
const auto base_opt = new_image.TryFindBase(overlap.gpu_addr);
if (base_opt) {
const SubresourceBase base = base_opt.value();
const u32 up_scale = can_rescale ? resolution.up_scale : 1;
const u32 down_shift = can_rescale ? resolution.down_shift : 0;
auto copies = MakeShrinkImageCopies(new_info, overlap.info, base, up_scale, down_shift);
if (overlap.info.num_samples != new_image.info.num_samples) {
runtime.CopyImageMSAA(new_image, overlap, FixSmallVectorADL(copies));
} else {
runtime.CopyImage(new_image, overlap, FixSmallVectorADL(copies));
}
new_image.modification_tick = overlap.modification_tick;
} else {
// If we cannot determine a base mapping, fallback to preserving the
// overlap (avoid deleting GPU-modified data) and log the event so
// it can be investigated, we're trying to pinpoint the issue of texture flickering.
LOG_WARNING(HW_GPU, "Could not map overlap gpu_addr {:#x} into new image; preserving overlap", u64(overlap.gpu_addr));
continue;
}
}
if (True(overlap.flags & ImageFlagBits::Tracked)) {
UntrackImage(overlap, overlap_id);
@ -1854,6 +1903,10 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DA
for (const auto& copy_object : join_copies_to_do) {
Image& overlap = slot_images[copy_object.id];
if (copy_object.is_alias) {
if (!HasMsaaDownloadSupport(overlap.info)) {
LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented");
continue;
}
if (!overlap.IsSafeDownload()) {
continue;
}
@ -2852,8 +2905,13 @@ void TextureCache<P>::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id)
if (new_id) {
const ImageViewBase& old_view = slot_image_views[new_id];
if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) {
const PendingDownload new_download{true, 0, old_view.image_id};
uncommitted_downloads.emplace_back(new_download);
const ImageBase& image = slot_images[old_view.image_id];
if (!HasMsaaDownloadSupport(image.info)) {
LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented");
} else {
const PendingDownload new_download{true, 0, old_view.image_id};
uncommitted_downloads.emplace_back(new_download);
}
}
}
*old_id = new_id;

View file

@ -439,6 +439,8 @@ private:
bool ScaleUp(Image& image);
bool ScaleDown(Image& image);
u64 GetScaledImageSizeBytes(const ImageBase& image);
[[nodiscard]] bool HasMsaaUploadSupport(const ImageInfo& info) const noexcept;
[[nodiscard]] bool HasMsaaDownloadSupport(const ImageInfo& info) const noexcept;
void QueueAsyncDecode(Image& image, ImageId image_id);
void TickAsyncDecode();

View file

@ -22,12 +22,32 @@
#include <vulkan/vulkan.h>
// Define maintenance 7-8 extension names (not yet in official Vulkan headers)
#ifndef VK_KHR_MAINTENANCE_1_EXTENSION_NAME
# define VK_KHR_MAINTENANCE_1_EXTENSION_NAME "VK_KHR_maintenance1"
#endif
#ifndef VK_KHR_MAINTENANCE_2_EXTENSION_NAME
# define VK_KHR_MAINTENANCE_2_EXTENSION_NAME "VK_KHR_maintenance2"
#endif
#ifndef VK_KHR_MAINTENANCE_3_EXTENSION_NAME
# define VK_KHR_MAINTENANCE_3_EXTENSION_NAME "VK_KHR_maintenance3"
#endif
#ifndef VK_KHR_MAINTENANCE_4_EXTENSION_NAME
# define VK_KHR_MAINTENANCE_4_EXTENSION_NAME "VK_KHR_maintenance4"
#endif
#ifndef VK_KHR_MAINTENANCE_5_EXTENSION_NAME
# define VK_KHR_MAINTENANCE_5_EXTENSION_NAME "VK_KHR_maintenance5"
#endif
#ifndef VK_KHR_MAINTENANCE_6_EXTENSION_NAME
# define VK_KHR_MAINTENANCE_6_EXTENSION_NAME "VK_KHR_maintenance6"
#endif
#ifndef VK_KHR_MAINTENANCE_7_EXTENSION_NAME
#define VK_KHR_MAINTENANCE_7_EXTENSION_NAME "VK_KHR_maintenance7"
# define VK_KHR_MAINTENANCE_7_EXTENSION_NAME "VK_KHR_maintenance7"
#endif
#ifndef VK_KHR_MAINTENANCE_8_EXTENSION_NAME
#define VK_KHR_MAINTENANCE_8_EXTENSION_NAME "VK_KHR_maintenance8"
# define VK_KHR_MAINTENANCE_8_EXTENSION_NAME "VK_KHR_maintenance8"
#endif
#ifndef VK_KHR_MAINTENANCE_9_EXTENSION_NAME
# define VK_KHR_MAINTENANCE_9_EXTENSION_NAME "VK_KHR_maintenance9"
#endif
// Sanitize macros

View file

@ -95,6 +95,25 @@ constexpr std::array VK_FORMAT_A4B4G4R4_UNORM_PACK16{
VK_FORMAT_UNDEFINED,
};
// B10G11R11_UFLOAT (R11G11B10F) - PRIMARY HDR format for Nintendo Switch
// Nintendo Switch hardware validation: FULL support (COLOR_ATTACHMENT + STORAGE_IMAGE + BLEND)
// Reference: vp_gpuinfo_nintendo_switch_v2_495_0_0_0 - All required feature bits present
//
// Fallback strategy: Degrade to LDR instead of expensive HDR emulation
// - RGBA8 UNORM/SRGB: Universal support, 32-bit (same size as B10G11R11), acceptable quality
// - RGB10A2: Better precision if available, still 32-bit
// - RGBA16F: Last resort only if RGB8 variants fail (should never happen)
constexpr std::array B10G11R11_UFLOAT_PACK32{
#ifdef ANDROID
VK_FORMAT_A8B8G8R8_SRGB_PACK32, // sRGB variant (for gamma-correct fallback)
#else
VK_FORMAT_A8B8G8R8_UNORM_PACK32, // Primary fallback: RGBA8 LDR (32-bit, universal)
VK_FORMAT_A2B10G10R10_UNORM_PACK32, // Better precision: RGB10A2 (32-bit, common)
#endif
VK_FORMAT_R16G16B16A16_SFLOAT, // Emergency fallback: RGBA16F (64-bit, should never reach)
VK_FORMAT_UNDEFINED,
};
} // namespace Alternatives
template <typename T>
@ -127,6 +146,9 @@ constexpr const VkFormat* GetFormatAlternatives(VkFormat format) {
return Alternatives::VK_FORMAT_R32G32B32_SFLOAT.data();
case VK_FORMAT_A4B4G4R4_UNORM_PACK16_EXT:
return Alternatives::VK_FORMAT_A4B4G4R4_UNORM_PACK16.data();
case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
return Alternatives::B10G11R11_UFLOAT_PACK32.data();
default:
return nullptr;
}
@ -214,7 +236,6 @@ ankerl::unordered_dense::map<VkFormat, VkFormatProperties> GetFormatProperties(v
VK_FORMAT_D24_UNORM_S8_UINT,
VK_FORMAT_D32_SFLOAT,
VK_FORMAT_D32_SFLOAT_S8_UINT,
VK_FORMAT_E5B9G9R9_UFLOAT_PACK32,
VK_FORMAT_R16G16B16A16_SFLOAT,
VK_FORMAT_R16G16B16A16_SINT,
VK_FORMAT_R16G16B16A16_SNORM,
@ -435,6 +456,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
const bool is_mvk = driver_id == VK_DRIVER_ID_MOLTENVK;
const bool is_qualcomm = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY;
const bool is_turnip = driver_id == VK_DRIVER_ID_MESA_TURNIP;
const bool is_arm = driver_id == VK_DRIVER_ID_ARM_PROPRIETARY;
if (!is_suitable)
LOG_WARNING(Render_Vulkan, "Unsuitable driver - continuing anyways");
@ -471,10 +493,9 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
is_warp_potentially_bigger = !extensions.subgroup_size_control ||
properties.subgroup_size_control.maxSubgroupSize > GuestWarpSize;
is_integrated = properties.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
is_virtual = properties.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU;
is_non_gpu = properties.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_OTHER ||
properties.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_CPU;
//const bool is_virtual = properties.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU;
//const bool is_non_gpu = properties.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_OTHER ||
// properties.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_CPU;
supports_d24_depth =
IsFormatSupported(VK_FORMAT_D24_UNORM_S8_UINT,
@ -485,17 +506,62 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
CollectPhysicalMemoryInfo();
CollectToolingInfo();
// Driver-specific handling for VK_EXT_custom_border_color
// On some Qualcomm/Turnip/ARM drivers the extension may be partially implemented.
// Disable completely if no feature bits are reported to avoid crashes/undefined behavior.
if (is_qualcomm || is_turnip || is_arm) {
const bool has_any_custom_border_color =
features.custom_border_color.customBorderColors ||
features.custom_border_color.customBorderColorWithoutFormat;
if (!has_any_custom_border_color) {
LOG_WARNING(Render_Vulkan,
"Disabling VK_EXT_custom_border_color on '{}' — no usable features reported",
properties.driver.driverName);
RemoveExtensionFeature(extensions.custom_border_color, features.custom_border_color,
VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
} else {
LOG_INFO(Render_Vulkan,
"VK_EXT_custom_border_color enabled on '{}' (partial support detected)",
properties.driver.driverName);
}
}
if (is_qualcomm) {
// Qualcomm Adreno GPUs doesn't handle scaled vertex attributes; keep emulation enabled
must_emulate_scaled_formats = true;
LOG_WARNING(Render_Vulkan,
"Qualcomm drivers require scaled vertex format emulation; forcing fallback");
LOG_WARNING(Render_Vulkan,
"Disabling shader float controls and 64-bit integer features on Qualcomm proprietary drivers");
RemoveExtension(extensions.shader_float_controls, VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME);
RemoveExtensionFeature(extensions.shader_atomic_int64, features.shader_atomic_int64,
VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME);
// Log Qualcomm-specific optimizations
if (extensions.render_pass_store_ops) {
LOG_INFO(Render_Vulkan, "VK_QCOM_render_pass_store_ops: Enabled");
}
if (extensions.tile_properties) {
LOG_INFO(Render_Vulkan, "VK_QCOM_tile_properties: Enabled");
}
if (extensions.render_pass_shader_resolve) {
LOG_INFO(Render_Vulkan, "VK_QCOM_render_pass_shader_resolve: Enabled");
}
if (extensions.render_pass_transform) {
LOG_INFO(Render_Vulkan, "VK_QCOM_render_pass_transform: Enabled");
}
if (extensions.rotated_copy_commands) {
LOG_INFO(Render_Vulkan, "VK_QCOM_rotated_copy_commands: Enabled");
}
if (extensions.image_processing) {
LOG_INFO(Render_Vulkan, "VK_QCOM_image_processing: Enabled");
}
// Shader Float Controls: Completely broken on Stock Qualcomm
// The extension causes rendering issues regardless of FP16/FP32 mode
// Turnip Mesa: Works correctly, keep enabled
if (!is_turnip) {
LOG_WARNING(Render_Vulkan, "Disabling Shader Float Controls for Stock Qualcomm (broken implementation)");
extensions.shader_float_controls = false; // Just a feature not an extension
}
// Int64 atomics - genuinely broken, always disable
RemoveExtensionFeature(extensions.shader_atomic_int64, features.shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME);
features.shader_atomic_int64.shaderBufferInt64Atomics = false;
features.shader_atomic_int64.shaderSharedInt64Atomics = false;
features.features.shaderInt64 = false;
@ -695,6 +761,22 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
RemoveExtensionFeature(extensions.vertex_input_dynamic_state, features.vertex_input_dynamic_state, VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
}
// Intel iGPU/MoltenVK blacklist moved to GetSuitability() for proper ordering
#ifdef ANDROID
// Stock Qualcomm and ARM Mali drivers don't report VK_FORMAT_*_SSCALED/USCALED formats
// Turnip implements them in software, so only force emulation for stock drivers
if ((is_qualcomm && !is_turnip) || is_arm) {
must_emulate_scaled_formats = true;
LOG_INFO(Render_Vulkan, "Mobile GPU detected: forcing scaled format emulation (hardware limitation)");
} else {
must_emulate_scaled_formats = false;
}
#else
// Desktop GPUs support scaled formats natively
must_emulate_scaled_formats = false;
#endif
logical = vk::Device::Create(physical, queue_cis, ExtensionListForVulkan(loaded_extensions), first_next, dld);
graphics_queue = logical.GetQueue(graphics_family);
@ -708,13 +790,12 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
if (extensions.memory_budget) {
flags |= VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT;
}
const bool is_integrated = properties.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
const VmaAllocatorCreateInfo allocator_info{
.flags = flags,
.physicalDevice = physical,
.device = *logical,
.preferredLargeHeapBlockSize = is_integrated
? (64u * 1024u * 1024u)
: (256u * 1024u * 1024u),
.preferredLargeHeapBlockSize = (is_integrated ? 64u : 256u) * 1024u * 1024u,
.pAllocationCallbacks = nullptr,
.pDeviceMemoryCallbacks = nullptr,
.pHeapSizeLimit = nullptr,
@ -738,15 +819,32 @@ Device::~Device() {
VkFormat Device::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
FormatType format_type) const {
if (IsFormatSupported(wanted_format, wanted_usage, format_type)) {
return wanted_format;
// Critical: Even if format is "supported", check for STORAGE + HDR + no MSAA support
// Driver may report STORAGE_IMAGE_BIT but shaderStorageImageMultisample=false means
// it will fail at runtime when used with MSAA (CopyImageMSAA silently fails)
const bool requests_storage = (wanted_usage & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT) != 0;
const bool is_hdr_format = wanted_format == VK_FORMAT_B10G11R11_UFLOAT_PACK32;
// If driver doesn't support shader storage image with MSAA, and we're requesting storage
// for an HDR format (which will likely be used with MSAA), force fallback
if (requests_storage && is_hdr_format && !features.features.shaderStorageImageMultisample) {
LOG_WARNING(Render_Vulkan,
"Format {} reports STORAGE_IMAGE_BIT but driver doesn't support "
"shaderStorageImageMultisample. Forcing fallback for MSAA compatibility.",
wanted_format);
// Continue to alternatives search below
} else {
return wanted_format;
}
}
// The wanted format is not supported by hardware, search for alternatives
const VkFormat* alternatives = GetFormatAlternatives(wanted_format);
if (alternatives == nullptr) {
LOG_ERROR(Render_Vulkan,
"Format={} with usage={} and type={} has no defined alternatives and host "
"hardware does not support it",
wanted_format, wanted_usage, format_type);
"Format={} (0x{:X}) with usage={} and type={} has no defined alternatives and host "
"hardware does not support it. Driver: {} Device: {}",
wanted_format, static_cast<u32>(wanted_format), wanted_usage, format_type,
GetDriverName(), properties.properties.deviceName);
return wanted_format;
}
@ -755,9 +853,17 @@ VkFormat Device::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags
if (!IsFormatSupported(alternative, wanted_usage, format_type)) {
continue;
}
LOG_DEBUG(Render_Vulkan,
// Special logging for HDR formats (common across multiple engines) on problematic drivers
if (wanted_format == VK_FORMAT_B10G11R11_UFLOAT_PACK32) {
LOG_WARNING(Render_Vulkan,
"B10G11R11_UFLOAT_PACK32 (R11G11B10F HDR format) not fully supported. "
"Falling back to {} on {}",
alternative, properties.properties.deviceName);
} else {
LOG_DEBUG(Render_Vulkan,
"Emulating format={} with alternative format={} with usage={} and type={}",
wanted_format, alternative, wanted_usage, format_type);
}
return alternative;
}
@ -1120,8 +1226,6 @@ bool Device::GetSuitability(bool requires_swapchain) {
// VK_EXT_extended_dynamic_state2 below this will appear drivers that need workarounds.
// VK_EXT_extended_dynamic_state3 below this will appear drivers that need workarounds.
// Samsung: Broken extendedDynamicState3ColorBlendEquation
// Disable blend equation dynamic state, force static pipeline state
if (extensions.extended_dynamic_state3 &&
@ -1146,6 +1250,8 @@ bool Device::GetSuitability(bool requires_swapchain) {
if (u32(Settings::values.dyna_state.GetValue()) == 0) {
LOG_INFO(Render_Vulkan, "Extended Dynamic State disabled by user setting, clearing all EDS features");
features.custom_border_color.customBorderColors = false;
features.custom_border_color.customBorderColorWithoutFormat = false;
features.extended_dynamic_state.extendedDynamicState = false;
features.extended_dynamic_state2.extendedDynamicState2 = false;
features.extended_dynamic_state3.extendedDynamicState3ColorBlendEnable = false;
@ -1271,6 +1377,43 @@ void Device::RemoveUnsuitableExtensions() {
features.robust_image_access,
VK_EXT_IMAGE_ROBUSTNESS_EXTENSION_NAME);
// VK_KHR_shader_float16_int8
const bool float16_int8_requested = extensions.shader_float16_int8;
const bool float16_int8_usable =
features.shader_float16_int8.shaderFloat16 || features.shader_float16_int8.shaderInt8;
if (float16_int8_requested && !float16_int8_usable) {
LOG_WARNING(Render_Vulkan,
"Disabling VK_KHR_shader_float16_int8 — no shaderFloat16/shaderInt8 features reported");
}
extensions.shader_float16_int8 = float16_int8_requested && float16_int8_usable;
RemoveExtensionFeatureIfUnsuitable(float16_int8_usable, features.shader_float16_int8,
VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME);
// VK_EXT_shader_atomic_float
const bool atomic_float_requested = extensions.shader_atomic_float;
const auto& atomic_float_features = features.shader_atomic_float;
const bool supports_buffer_f32 = atomic_float_features.shaderBufferFloat32Atomics ||
atomic_float_features.shaderBufferFloat32AtomicAdd;
const bool supports_shared_f32 = atomic_float_features.shaderSharedFloat32Atomics ||
atomic_float_features.shaderSharedFloat32AtomicAdd;
const bool supports_image_f32 = atomic_float_features.shaderImageFloat32Atomics ||
atomic_float_features.shaderImageFloat32AtomicAdd;
const bool supports_sparse_f32 = atomic_float_features.sparseImageFloat32Atomics ||
atomic_float_features.sparseImageFloat32AtomicAdd;
const bool supports_buffer_f64 = atomic_float_features.shaderBufferFloat64Atomics ||
atomic_float_features.shaderBufferFloat64AtomicAdd;
const bool supports_shared_f64 = atomic_float_features.shaderSharedFloat64Atomics ||
atomic_float_features.shaderSharedFloat64AtomicAdd;
const bool atomic_float_usable = supports_buffer_f32 || supports_shared_f32 || supports_image_f32 ||
supports_sparse_f32 || supports_buffer_f64 || supports_shared_f64;
if (atomic_float_requested && !atomic_float_usable) {
LOG_WARNING(Render_Vulkan,
"Disabling VK_EXT_shader_atomic_float — no usable atomic float feature bits reported");
}
extensions.shader_atomic_float = atomic_float_requested && atomic_float_usable;
RemoveExtensionFeatureIfUnsuitable(atomic_float_usable, features.shader_atomic_float,
VK_EXT_SHADER_ATOMIC_FLOAT_EXTENSION_NAME);
// VK_KHR_shader_atomic_int64
extensions.shader_atomic_int64 = features.shader_atomic_int64.shaderBufferInt64Atomics &&
features.shader_atomic_int64.shaderSharedInt64Atomics;
@ -1300,12 +1443,34 @@ void Device::RemoveUnsuitableExtensions() {
RemoveExtensionFeatureIfUnsuitable(extensions.transform_feedback, features.transform_feedback,
VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME);
// VK_EXT_robustness2
extensions.robustness_2 =
features.robustness2.robustBufferAccess2 && features.robustness2.robustImageAccess2;
RemoveExtensionFeatureIfUnsuitable(extensions.robustness_2, features.robustness2,
VK_EXT_ROBUSTNESS_2_EXTENSION_NAME);
// VK_EXT_image_robustness
extensions.image_robustness = features.image_robustness.robustImageAccess;
RemoveExtensionFeatureIfUnsuitable(extensions.image_robustness, features.image_robustness,
VK_EXT_IMAGE_ROBUSTNESS_EXTENSION_NAME);
// VK_EXT_swapchain_maintenance1
extensions.swapchain_maintenance1 = loaded_extensions.contains(VK_EXT_SWAPCHAIN_MAINTENANCE_1_EXTENSION_NAME);
RemoveExtensionIfUnsuitable(extensions.swapchain_maintenance1, VK_EXT_SWAPCHAIN_MAINTENANCE_1_EXTENSION_NAME);
// VK_EXT_vertex_input_dynamic_state
extensions.vertex_input_dynamic_state =
features.vertex_input_dynamic_state.vertexInputDynamicState;
RemoveExtensionFeatureIfUnsuitable(extensions.vertex_input_dynamic_state,
features.vertex_input_dynamic_state,
VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
if (Settings::values.vertex_input_dynamic_state.GetValue()) {
extensions.vertex_input_dynamic_state =
features.vertex_input_dynamic_state.vertexInputDynamicState;
RemoveExtensionFeatureIfUnsuitable(extensions.vertex_input_dynamic_state,
features.vertex_input_dynamic_state,
VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
} else {
RemoveExtensionFeature(extensions.vertex_input_dynamic_state,
features.vertex_input_dynamic_state,
VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
LOG_INFO(Render_Vulkan, "Vertex Input Dynamic State disabled by user setting");
}
// VK_KHR_pipeline_executable_properties
if (Settings::values.renderer_shader_feedback.GetValue()) {
@ -1331,18 +1496,6 @@ void Device::RemoveUnsuitableExtensions() {
features.workgroup_memory_explicit_layout,
VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME);
// VK_KHR_maintenance1
extensions.maintenance1 = loaded_extensions.contains(VK_KHR_MAINTENANCE_1_EXTENSION_NAME);
RemoveExtensionIfUnsuitable(extensions.maintenance1, VK_KHR_MAINTENANCE_1_EXTENSION_NAME);
// VK_KHR_maintenance2
extensions.maintenance2 = loaded_extensions.contains(VK_KHR_MAINTENANCE_2_EXTENSION_NAME);
RemoveExtensionIfUnsuitable(extensions.maintenance2, VK_KHR_MAINTENANCE_2_EXTENSION_NAME);
// VK_KHR_maintenance3
extensions.maintenance3 = loaded_extensions.contains(VK_KHR_MAINTENANCE_3_EXTENSION_NAME);
RemoveExtensionIfUnsuitable(extensions.maintenance3, VK_KHR_MAINTENANCE_3_EXTENSION_NAME);
// VK_KHR_maintenance4
extensions.maintenance4 = features.maintenance4.maintenance4;
RemoveExtensionFeatureIfUnsuitable(extensions.maintenance4, features.maintenance4,
@ -1357,14 +1510,6 @@ void Device::RemoveUnsuitableExtensions() {
extensions.maintenance6 = features.maintenance6.maintenance6;
RemoveExtensionFeatureIfUnsuitable(extensions.maintenance6, features.maintenance6,
VK_KHR_MAINTENANCE_6_EXTENSION_NAME);
// VK_KHR_maintenance7
extensions.maintenance7 = loaded_extensions.contains(VK_KHR_MAINTENANCE_7_EXTENSION_NAME);
RemoveExtensionIfUnsuitable(extensions.maintenance7, VK_KHR_MAINTENANCE_7_EXTENSION_NAME);
// VK_KHR_maintenance8
extensions.maintenance8 = loaded_extensions.contains(VK_KHR_MAINTENANCE_8_EXTENSION_NAME);
RemoveExtensionIfUnsuitable(extensions.maintenance8, VK_KHR_MAINTENANCE_8_EXTENSION_NAME);
}
void Device::SetupFamilies(VkSurfaceKHR surface) {
@ -1425,8 +1570,8 @@ void Device::CollectPhysicalMemoryInfo() {
// Calculate limits using memory budget
VkPhysicalDeviceMemoryBudgetPropertiesEXT budget{};
budget.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT;
const auto mem_info =
physical.GetMemoryProperties(extensions.memory_budget ? &budget : nullptr);
const bool is_integrated = properties.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
const auto mem_info = physical.GetMemoryProperties(extensions.memory_budget ? &budget : nullptr);
const auto& mem_properties = mem_info.memoryProperties;
const size_t num_properties = mem_properties.memoryHeapCount;
device_access_memory = 0;

View file

@ -54,9 +54,11 @@ VK_DEFINE_HANDLE(VmaAllocator)
FEATURE(EXT, ExtendedDynamicState, EXTENDED_DYNAMIC_STATE, extended_dynamic_state) \
FEATURE(EXT, ExtendedDynamicState2, EXTENDED_DYNAMIC_STATE_2, extended_dynamic_state2) \
FEATURE(EXT, ExtendedDynamicState3, EXTENDED_DYNAMIC_STATE_3, extended_dynamic_state3) \
FEATURE(EXT, ShaderAtomicFloat, SHADER_ATOMIC_FLOAT, shader_atomic_float) \
FEATURE(EXT, 4444Formats, 4444_FORMATS, format_a4b4g4r4) \
FEATURE(EXT, IndexTypeUint8, INDEX_TYPE_UINT8, index_type_uint8) \
FEATURE(EXT, LineRasterization, LINE_RASTERIZATION, line_rasterization) \
FEATURE(EXT, ImageRobustness, IMAGE_ROBUSTNESS, image_robustness) \
FEATURE(EXT, PrimitiveTopologyListRestart, PRIMITIVE_TOPOLOGY_LIST_RESTART, \
primitive_topology_list_restart) \
FEATURE(EXT, ProvokingVertex, PROVOKING_VERTEX, provoking_vertex) \
@ -68,7 +70,9 @@ VK_DEFINE_HANDLE(VmaAllocator)
FEATURE(KHR, PipelineExecutableProperties, PIPELINE_EXECUTABLE_PROPERTIES, \
pipeline_executable_properties) \
FEATURE(KHR, WorkgroupMemoryExplicitLayout, WORKGROUP_MEMORY_EXPLICIT_LAYOUT, \
workgroup_memory_explicit_layout)
workgroup_memory_explicit_layout) \
FEATURE(QCOM, ImageProcessing, IMAGE_PROCESSING, image_processing_qcom) \
FEATURE(QCOM, TileProperties, TILE_PROPERTIES, tile_properties_qcom)
// Define miscellaneous extensions which may be used by the implementation here.
@ -90,20 +94,23 @@ VK_DEFINE_HANDLE(VmaAllocator)
EXTENSION(KHR, SHADER_FLOAT_CONTROLS, shader_float_controls) \
EXTENSION(KHR, SPIRV_1_4, spirv_1_4) \
EXTENSION(KHR, SWAPCHAIN, swapchain) \
EXTENSION(KHR, INCREMENTAL_PRESENT, incremental_present) \
EXTENSION(KHR, SWAPCHAIN_MUTABLE_FORMAT, swapchain_mutable_format) \
EXTENSION(EXT, SWAPCHAIN_MAINTENANCE_1, swapchain_maintenance1) \
EXTENSION(KHR, IMAGE_FORMAT_LIST, image_format_list) \
EXTENSION(KHR, MAINTENANCE_1, maintenance1) \
EXTENSION(KHR, MAINTENANCE_2, maintenance2) \
EXTENSION(KHR, MAINTENANCE_3, maintenance3) \
EXTENSION(KHR, MAINTENANCE_7, maintenance7) \
EXTENSION(KHR, MAINTENANCE_8, maintenance8) \
EXTENSION(NV, DEVICE_DIAGNOSTICS_CONFIG, device_diagnostics_config) \
EXTENSION(NV, GEOMETRY_SHADER_PASSTHROUGH, geometry_shader_passthrough) \
EXTENSION(NV, VIEWPORT_ARRAY2, viewport_array2) \
EXTENSION(NV, VIEWPORT_SWIZZLE, viewport_swizzle) \
EXTENSION(EXT, FILTER_CUBIC, filter_cubic) \
EXTENSION(IMG, FILTER_CUBIC, filter_cubic_img) \
EXTENSION(QCOM, FILTER_CUBIC_WEIGHTS, filter_cubic_weights)
EXTENSION(QCOM, FILTER_CUBIC_WEIGHTS, filter_cubic_weights) \
EXTENSION(QCOM, RENDER_PASS_SHADER_RESOLVE, render_pass_shader_resolve) \
EXTENSION(QCOM, RENDER_PASS_STORE_OPS, render_pass_store_ops) \
EXTENSION(QCOM, RENDER_PASS_TRANSFORM, render_pass_transform) \
EXTENSION(QCOM, ROTATED_COPY_COMMANDS, rotated_copy_commands) \
EXTENSION(QCOM, IMAGE_PROCESSING, image_processing) \
EXTENSION(QCOM, TILE_PROPERTIES, tile_properties)
// Define extensions which must be supported.
#define FOR_EACH_VK_MANDATORY_EXTENSION(EXTENSION_NAME) \
@ -390,6 +397,12 @@ public:
return properties.subgroup_properties.supportedOperations & feature;
}
/// Returns true if subgroup operations are supported in the specified shader stage.
/// Mobile GPUs (Qualcomm Adreno) often only support subgroups in fragment/compute stages.
bool IsSubgroupSupportedForStage(VkShaderStageFlagBits stage) const {
return properties.subgroup_properties.supportedStages & stage;
}
/// Returns the maximum number of push descriptors.
u32 MaxPushDescriptors() const {
return properties.push_descriptor.maxPushDescriptors;
@ -475,6 +488,11 @@ public:
return extensions.image_format_list || instance_version >= VK_API_VERSION_1_2;
}
/// Returns true if the device supports VK_KHR_incremental_present.
bool IsKhrIncrementalPresentSupported() const {
return extensions.incremental_present;
}
/// Returns true if the device supports VK_EXT_primitive_topology_list_restart.
bool IsTopologyListPrimitiveRestartSupported() const {
return features.primitive_topology_list_restart.primitiveTopologyListRestart;
@ -569,6 +587,31 @@ public:
return features.custom_border_color.customBorderColorWithoutFormat;
}
/// Returns true if customBorderColors feature is enabled (allows VK_BORDER_COLOR_*_CUSTOM_EXT).
bool HasCustomBorderColorFeature() const {
return features.custom_border_color.customBorderColors;
}
/// Returns true if customBorderColorWithoutFormat feature is enabled (allows VK_FORMAT_UNDEFINED).
bool HasCustomBorderColorWithoutFormatFeature() const {
return features.custom_border_color.customBorderColorWithoutFormat;
}
/// Base Vulkan Dynamic State support checks.
/// These provide granular control over each base dynamic state, allowing individual states
/// to be disabled if broken driver implementations are detected at device initialization.
/// By default all states are enabled. If a specific driver has issues with certain states,
/// they can be disabled in vulkan_device.cpp constructor (see has_broken_compute pattern).
bool SupportsDynamicViewport() const { return supports_dynamic_viewport; }
bool SupportsDynamicScissor() const { return supports_dynamic_scissor; }
bool SupportsDynamicLineWidth() const { return supports_dynamic_line_width; }
bool SupportsDynamicDepthBias() const { return supports_dynamic_depth_bias; }
bool SupportsDynamicBlendConstants() const { return supports_dynamic_blend_constants; }
bool SupportsDynamicDepthBounds() const { return supports_dynamic_depth_bounds; }
bool SupportsDynamicStencilCompareMask() const { return supports_dynamic_stencil_compare; }
bool SupportsDynamicStencilWriteMask() const { return supports_dynamic_stencil_write; }
bool SupportsDynamicStencilReference() const { return supports_dynamic_stencil_reference; }
/// Returns true if the device supports VK_EXT_extended_dynamic_state.
bool IsExtExtendedDynamicStateSupported() const {
return extensions.extended_dynamic_state;
@ -603,6 +646,98 @@ public:
return dynamic_state3_enables;
}
// EDS2 granular feature checks
bool IsExtExtendedDynamicState2LogicOpSupported() const {
return extensions.extended_dynamic_state2 &&
features.extended_dynamic_state2.extendedDynamicState2LogicOp;
}
bool IsExtExtendedDynamicState2PatchControlPointsSupported() const {
return extensions.extended_dynamic_state2 &&
features.extended_dynamic_state2.extendedDynamicState2PatchControlPoints;
}
// EDS3 granular feature checks
bool IsExtExtendedDynamicState3DepthClampEnableSupported() const {
return extensions.extended_dynamic_state3 &&
features.extended_dynamic_state3.extendedDynamicState3DepthClampEnable;
}
bool IsExtExtendedDynamicState3LogicOpEnableSupported() const {
return extensions.extended_dynamic_state3 &&
features.extended_dynamic_state3.extendedDynamicState3LogicOpEnable;
}
bool IsExtExtendedDynamicState3TessellationDomainOriginSupported() const {
return extensions.extended_dynamic_state3 &&
features.extended_dynamic_state3.extendedDynamicState3TessellationDomainOrigin;
}
bool IsExtExtendedDynamicState3PolygonModeSupported() const {
return extensions.extended_dynamic_state3 &&
features.extended_dynamic_state3.extendedDynamicState3PolygonMode;
}
bool IsExtExtendedDynamicState3RasterizationSamplesSupported() const {
return extensions.extended_dynamic_state3 &&
features.extended_dynamic_state3.extendedDynamicState3RasterizationSamples;
}
bool IsExtExtendedDynamicState3SampleMaskSupported() const {
return extensions.extended_dynamic_state3 &&
features.extended_dynamic_state3.extendedDynamicState3SampleMask;
}
bool IsExtExtendedDynamicState3AlphaToCoverageEnableSupported() const {
return extensions.extended_dynamic_state3 &&
features.extended_dynamic_state3.extendedDynamicState3AlphaToCoverageEnable;
}
bool IsExtExtendedDynamicState3AlphaToOneEnableSupported() const {
return extensions.extended_dynamic_state3 &&
features.extended_dynamic_state3.extendedDynamicState3AlphaToOneEnable;
}
bool IsExtExtendedDynamicState3DepthClipEnableSupported() const {
return extensions.extended_dynamic_state3 &&
features.extended_dynamic_state3.extendedDynamicState3DepthClipEnable;
}
bool IsExtExtendedDynamicState3DepthClipNegativeOneToOneSupported() const {
return extensions.extended_dynamic_state3 &&
features.extended_dynamic_state3.extendedDynamicState3DepthClipNegativeOneToOne;
}
bool IsExtExtendedDynamicState3LineRasterizationModeSupported() const {
return extensions.extended_dynamic_state3 &&
features.extended_dynamic_state3.extendedDynamicState3LineRasterizationMode;
}
bool IsExtExtendedDynamicState3LineStippleEnableSupported() const {
return extensions.extended_dynamic_state3 &&
features.extended_dynamic_state3.extendedDynamicState3LineStippleEnable;
}
bool IsExtExtendedDynamicState3ProvokingVertexModeSupported() const {
return extensions.extended_dynamic_state3 &&
features.extended_dynamic_state3.extendedDynamicState3ProvokingVertexMode;
}
bool IsExtExtendedDynamicState3ConservativeRasterizationModeSupported() const {
return extensions.extended_dynamic_state3 &&
features.extended_dynamic_state3.extendedDynamicState3ConservativeRasterizationMode;
}
bool IsExtExtendedDynamicState3SampleLocationsEnableSupported() const {
return extensions.extended_dynamic_state3 &&
features.extended_dynamic_state3.extendedDynamicState3SampleLocationsEnable;
}
bool IsExtExtendedDynamicState3RasterizationStreamSupported() const {
return extensions.extended_dynamic_state3 &&
features.extended_dynamic_state3.extendedDynamicState3RasterizationStream;
}
/// Returns true if the device supports VK_EXT_filter_cubic
bool IsExtFilterCubicSupported() const {
return extensions.filter_cubic;
@ -613,6 +748,56 @@ public:
return extensions.filter_cubic_weights;
}
/// Returns true if the device supports VK_QCOM_render_pass_shader_resolve
bool IsQcomRenderPassShaderResolveSupported() const {
return extensions.render_pass_shader_resolve;
}
/// Returns true if the device supports VK_QCOM_render_pass_store_ops
bool IsQcomRenderPassStoreOpsSupported() const {
return extensions.render_pass_store_ops;
}
/// Returns true if the device supports VK_QCOM_tile_properties
bool IsQcomTilePropertiesSupported() const {
return extensions.tile_properties;
}
/// Returns true if the device supports VK_QCOM_render_pass_transform
bool IsQcomRenderPassTransformSupported() const {
return extensions.render_pass_transform;
}
/// Returns true if the device supports VK_QCOM_rotated_copy_commands
bool IsQcomRotatedCopyCommandsSupported() const {
return extensions.rotated_copy_commands;
}
/// Returns true if the device supports VK_QCOM_image_processing
bool IsQcomImageProcessingSupported() const {
return extensions.image_processing;
}
/// Returns Qualcomm tile size (width, height, depth). Returns {0,0,0} if not queried or unsupported
VkExtent3D GetQcomTileSize() const {
return properties.qcom_tile_size;
}
/// Returns Qualcomm tile apron size. Returns {0,0} if not queried or unsupported
VkExtent2D GetQcomApronSize() const {
return properties.qcom_apron_size;
}
/// Returns true if MSAA copy operations are supported via compute shader (upload/download)
/// Qualcomm uses render pass shader resolve instead, so this returns false for Qualcomm
bool CanUploadMSAA() const {
return IsStorageImageMultisampleSupported();
}
bool CanDownloadMSAA() const {
return CanUploadMSAA();
}
/// Returns true if the device supports VK_EXT_line_rasterization.
bool IsExtLineRasterizationSupported() const {
return extensions.line_rasterization;
@ -703,6 +888,11 @@ public:
return extensions.shader_atomic_int64;
}
/// Returns true if the device supports VK_EXT_shader_atomic_float.
bool IsExtShaderAtomicFloatSupported() const {
return extensions.shader_atomic_float;
}
bool IsExtConditionalRendering() const {
return extensions.conditional_rendering;
}
@ -812,21 +1002,6 @@ public:
return features2.features.multiViewport;
}
/// Returns true if the device supports VK_KHR_maintenance1.
bool IsKhrMaintenance1Supported() const {
return extensions.maintenance1;
}
/// Returns true if the device supports VK_KHR_maintenance2.
bool IsKhrMaintenance2Supported() const {
return extensions.maintenance2;
}
/// Returns true if the device supports VK_KHR_maintenance3.
bool IsKhrMaintenance3Supported() const {
return extensions.maintenance3;
}
/// Returns true if the device supports VK_KHR_maintenance4.
bool IsKhrMaintenance4Supported() const {
return extensions.maintenance4;
@ -859,16 +1034,6 @@ public:
return extensions.maintenance6;
}
/// Returns true if the device supports VK_KHR_maintenance7.
bool IsKhrMaintenance7Supported() const {
return extensions.maintenance7;
}
/// Returns true if the device supports VK_KHR_maintenance8.
bool IsKhrMaintenance8Supported() const {
return extensions.maintenance8;
}
/// Returns true if the device supports UINT8 index buffer conversion via compute shader.
bool SupportsUint8Indices() const {
return features.bit8_storage.storageBuffer8BitAccess &&
@ -998,6 +1163,8 @@ private:
VkPhysicalDeviceMaintenance5PropertiesKHR maintenance5{};
VkPhysicalDeviceProperties properties{};
VkExtent3D qcom_tile_size{}; // Qualcomm tile dimensions (0 if not queried)
VkExtent2D qcom_apron_size{}; // Qualcomm tile apron size
};
Extensions extensions{};
@ -1012,9 +1179,6 @@ private:
bool is_blit_depth24_stencil8_supported{}; ///< Support for blitting from and to D24S8.
bool is_blit_depth32_stencil8_supported{}; ///< Support for blitting from and to D32S8.
bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
bool is_integrated{}; ///< Is GPU an iGPU.
bool is_virtual{}; ///< Is GPU a virtual GPU.
bool is_non_gpu{}; ///< Is SoftwareRasterizer, FPGA, non-GPU device.
bool has_broken_compute{}; ///< Compute shaders can cause crashes
bool has_broken_cube_compatibility{}; ///< Has broken cube compatibility bit
bool has_broken_parallel_compiling{}; ///< Has broken parallel shader compiling.
@ -1035,6 +1199,22 @@ private:
bool dynamic_state3_alpha_to_one{};
bool supports_conditional_barriers{}; ///< Allows barriers in conditional control flow.
size_t sampler_heap_budget{}; ///< Sampler budget for buggy drivers (0 = unlimited).
/// Base Vulkan Dynamic State support flags (granular fallback for broken drivers).
/// All default to true. These can be individually disabled in vulkan_device.cpp
/// if specific broken driver implementations are detected during initialization.
/// This provides emergency protection against drivers that report support but crash/misbehave.
/// Pattern: Check driver/device and set to false in vulkan_device.cpp constructor.
bool supports_dynamic_viewport{true}; ///< VK_DYNAMIC_STATE_VIEWPORT
bool supports_dynamic_scissor{true}; ///< VK_DYNAMIC_STATE_SCISSOR
bool supports_dynamic_line_width{true}; ///< VK_DYNAMIC_STATE_LINE_WIDTH
bool supports_dynamic_depth_bias{true}; ///< VK_DYNAMIC_STATE_DEPTH_BIAS
bool supports_dynamic_blend_constants{true}; ///< VK_DYNAMIC_STATE_BLEND_CONSTANTS
bool supports_dynamic_depth_bounds{true}; ///< VK_DYNAMIC_STATE_DEPTH_BOUNDS
bool supports_dynamic_stencil_compare{true}; ///< VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK
bool supports_dynamic_stencil_write{true}; ///< VK_DYNAMIC_STATE_STENCIL_WRITE_MASK
bool supports_dynamic_stencil_reference{true};///< VK_DYNAMIC_STATE_STENCIL_REFERENCE
u64 device_access_memory{}; ///< Total size of device local memory in bytes.
u32 sets_per_pool{}; ///< Sets per Description Pool
NvidiaArchitecture nvidia_arch{NvidiaArchitecture::Arch_AmpereOrNewer};

View file

@ -259,11 +259,24 @@ namespace Vulkan {
vk::Buffer
MemoryAllocator::CreateBuffer(const VkBufferCreateInfo &ci, MemoryUsage usage) const
{
// Qualcomm uses unified memory architecture - prefer DEVICE_LOCAL + HOST_VISIBLE
// for zero-copy access without staging buffers
const bool is_qualcomm = device.GetDriverID() == VK_DRIVER_ID_QUALCOMM_PROPRIETARY;
const bool prefer_unified = is_qualcomm && (usage == MemoryUsage::Upload ||
usage == MemoryUsage::Download ||
usage == MemoryUsage::Stream);
VkMemoryPropertyFlags preferred_flags = MemoryUsagePreferredVmaFlags(usage);
if (prefer_unified) {
// Request DEVICE_LOCAL + HOST_VISIBLE for zero-copy on unified memory architectures
preferred_flags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
}
const VmaAllocationCreateInfo alloc_ci = {
.flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | MemoryUsageVmaFlags(usage),
.usage = MemoryUsageVma(usage),
.requiredFlags = 0,
.preferredFlags = MemoryUsagePreferredVmaFlags(usage),
.preferredFlags = preferred_flags,
.memoryTypeBits = usage == MemoryUsage::Stream ? 0u : valid_memory_types,
.pool = VK_NULL_HANDLE,
.pUserData = nullptr,
@ -287,6 +300,12 @@ namespace Vulkan {
property_flags
);
}
if (is_qualcomm && prefer_unified) {
const bool got_unified = (property_flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) &&
(property_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
LOG_DEBUG(Render_Vulkan, "Qualcomm buffer allocation: usage={}, unified={}, flags=0x{:X}",
static_cast<u32>(usage), got_unified, property_flags);
}
u8 *data = reinterpret_cast<u8 *>(alloc_info.pMappedData);
const std::span<u8> mapped_data = data ? std::span<u8>{data, ci.size} : std::span<u8>{};