mirror of
https://git.eden-emu.dev/eden-emu/eden
synced 2026-05-01 15:08:57 +02:00
[spv, qcom] Implement warp intrinsics support
This commit is contained in:
parent
a793a132fc
commit
458302b3f5
9 changed files with 204 additions and 14 deletions
|
|
@ -11,6 +11,7 @@
|
|||
#include <vector>
|
||||
#include <spirv-tools/optimizer.hpp>
|
||||
|
||||
#include "common/logging/log.h"
|
||||
#include "common/settings.h"
|
||||
#include "shader_recompiler/backend/spirv/emit_spirv.h"
|
||||
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
|
||||
|
|
@ -439,15 +440,23 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct
|
|||
ctx.AddExtension("SPV_KHR_shader_draw_parameters");
|
||||
ctx.AddCapability(spv::Capability::DrawParameters);
|
||||
}
|
||||
if ((info.uses_subgroup_vote || info.uses_subgroup_invocation_id ||
|
||||
info.uses_subgroup_shuffles) &&
|
||||
profile.support_vote) {
|
||||
const bool stage_supports_warp = profile.SupportsWarpIntrinsics(ctx.stage);
|
||||
const bool needs_warp_intrinsics = info.uses_subgroup_vote ||
|
||||
info.uses_subgroup_invocation_id ||
|
||||
info.uses_subgroup_shuffles;
|
||||
|
||||
if (needs_warp_intrinsics && profile.support_vote && stage_supports_warp) {
|
||||
ctx.AddCapability(spv::Capability::GroupNonUniformBallot);
|
||||
ctx.AddCapability(spv::Capability::GroupNonUniformShuffle);
|
||||
if (!profile.warp_size_potentially_larger_than_guest) {
|
||||
// vote ops are only used when not taking the long path
|
||||
ctx.AddCapability(spv::Capability::GroupNonUniformVote);
|
||||
}
|
||||
} else if (needs_warp_intrinsics && !stage_supports_warp) {
|
||||
LOG_WARNING(Shader,
|
||||
"Warp intrinsics requested in stage {} but the device does not report subgroup "
|
||||
"support; falling back to scalar approximations",
|
||||
static_cast<u32>(ctx.stage));
|
||||
}
|
||||
if (info.uses_int64_bit_atomics && profile.support_int64_atomics) {
|
||||
ctx.AddCapability(spv::Capability::Int64Atomics);
|
||||
|
|
|
|||
|
|
@ -1,3 +1,6 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
|
|
@ -78,9 +81,25 @@ Id AddPartitionBase(EmitContext& ctx, Id thread_id) {
|
|||
const Id partition_base{ctx.OpShiftLeftLogical(ctx.U32[1], partition_idx, ctx.Const(5u))};
|
||||
return ctx.OpIAdd(ctx.U32[1], thread_id, partition_base);
|
||||
}
|
||||
|
||||
bool SupportsWarpIntrinsics(const EmitContext& ctx) {
|
||||
return ctx.profile.SupportsWarpIntrinsics(ctx.stage);
|
||||
}
|
||||
|
||||
void SetAlwaysInBounds(EmitContext& ctx, IR::Inst* inst) {
|
||||
SetInBoundsFlag(inst, ctx.true_value);
|
||||
}
|
||||
|
||||
Id FallbackBallotMask(EmitContext& ctx, Id pred) {
|
||||
const Id full_mask{ctx.Const(0xFFFFFFFFu)};
|
||||
return ctx.OpSelect(ctx.U32[1], pred, full_mask, ctx.u32_zero_value);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
Id EmitLaneId(EmitContext& ctx) {
|
||||
if (!SupportsWarpIntrinsics(ctx)) {
|
||||
return ctx.u32_zero_value;
|
||||
}
|
||||
const Id id{GetThreadId(ctx)};
|
||||
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
|
||||
return id;
|
||||
|
|
@ -89,6 +108,9 @@ Id EmitLaneId(EmitContext& ctx) {
|
|||
}
|
||||
|
||||
Id EmitVoteAll(EmitContext& ctx, Id pred) {
|
||||
if (!SupportsWarpIntrinsics(ctx)) {
|
||||
return pred;
|
||||
}
|
||||
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
|
||||
return ctx.OpGroupNonUniformAll(ctx.U1, SubgroupScope(ctx), pred);
|
||||
}
|
||||
|
|
@ -102,6 +124,9 @@ Id EmitVoteAll(EmitContext& ctx, Id pred) {
|
|||
}
|
||||
|
||||
Id EmitVoteAny(EmitContext& ctx, Id pred) {
|
||||
if (!SupportsWarpIntrinsics(ctx)) {
|
||||
return pred;
|
||||
}
|
||||
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
|
||||
return ctx.OpGroupNonUniformAny(ctx.U1, SubgroupScope(ctx), pred);
|
||||
}
|
||||
|
|
@ -115,6 +140,9 @@ Id EmitVoteAny(EmitContext& ctx, Id pred) {
|
|||
}
|
||||
|
||||
Id EmitVoteEqual(EmitContext& ctx, Id pred) {
|
||||
if (!SupportsWarpIntrinsics(ctx)) {
|
||||
return pred;
|
||||
}
|
||||
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
|
||||
return ctx.OpGroupNonUniformAllEqual(ctx.U1, SubgroupScope(ctx), pred);
|
||||
}
|
||||
|
|
@ -129,6 +157,9 @@ Id EmitVoteEqual(EmitContext& ctx, Id pred) {
|
|||
}
|
||||
|
||||
Id EmitSubgroupBallot(EmitContext& ctx, Id pred) {
|
||||
if (!SupportsWarpIntrinsics(ctx)) {
|
||||
return FallbackBallotMask(ctx, pred);
|
||||
}
|
||||
const Id ballot{ctx.OpGroupNonUniformBallot(ctx.U32[4], SubgroupScope(ctx), pred)};
|
||||
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
|
||||
return ctx.OpCompositeExtract(ctx.U32[1], ballot, 0U);
|
||||
|
|
@ -137,27 +168,46 @@ Id EmitSubgroupBallot(EmitContext& ctx, Id pred) {
|
|||
}
|
||||
|
||||
Id EmitSubgroupEqMask(EmitContext& ctx) {
|
||||
if (!SupportsWarpIntrinsics(ctx)) {
|
||||
return ctx.u32_zero_value;
|
||||
}
|
||||
return LoadMask(ctx, ctx.subgroup_mask_eq);
|
||||
}
|
||||
|
||||
Id EmitSubgroupLtMask(EmitContext& ctx) {
|
||||
if (!SupportsWarpIntrinsics(ctx)) {
|
||||
return ctx.u32_zero_value;
|
||||
}
|
||||
return LoadMask(ctx, ctx.subgroup_mask_lt);
|
||||
}
|
||||
|
||||
Id EmitSubgroupLeMask(EmitContext& ctx) {
|
||||
if (!SupportsWarpIntrinsics(ctx)) {
|
||||
return ctx.u32_zero_value;
|
||||
}
|
||||
return LoadMask(ctx, ctx.subgroup_mask_le);
|
||||
}
|
||||
|
||||
Id EmitSubgroupGtMask(EmitContext& ctx) {
|
||||
if (!SupportsWarpIntrinsics(ctx)) {
|
||||
return ctx.u32_zero_value;
|
||||
}
|
||||
return LoadMask(ctx, ctx.subgroup_mask_gt);
|
||||
}
|
||||
|
||||
Id EmitSubgroupGeMask(EmitContext& ctx) {
|
||||
if (!SupportsWarpIntrinsics(ctx)) {
|
||||
return ctx.u32_zero_value;
|
||||
}
|
||||
return LoadMask(ctx, ctx.subgroup_mask_ge);
|
||||
}
|
||||
|
||||
Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
|
||||
Id segmentation_mask) {
|
||||
if (!SupportsWarpIntrinsics(ctx)) {
|
||||
SetAlwaysInBounds(ctx, inst);
|
||||
return value;
|
||||
}
|
||||
const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)};
|
||||
const Id thread_id{EmitLaneId(ctx)};
|
||||
const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)};
|
||||
|
|
@ -177,6 +227,10 @@ Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id cla
|
|||
|
||||
Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
|
||||
Id segmentation_mask) {
|
||||
if (!SupportsWarpIntrinsics(ctx)) {
|
||||
SetAlwaysInBounds(ctx, inst);
|
||||
return value;
|
||||
}
|
||||
const Id thread_id{EmitLaneId(ctx)};
|
||||
const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
|
||||
Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)};
|
||||
|
|
@ -192,6 +246,10 @@ Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
|
|||
|
||||
Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
|
||||
Id segmentation_mask) {
|
||||
if (!SupportsWarpIntrinsics(ctx)) {
|
||||
SetAlwaysInBounds(ctx, inst);
|
||||
return value;
|
||||
}
|
||||
const Id thread_id{EmitLaneId(ctx)};
|
||||
const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
|
||||
Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)};
|
||||
|
|
@ -207,6 +265,10 @@ Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clam
|
|||
|
||||
Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
|
||||
Id segmentation_mask) {
|
||||
if (!SupportsWarpIntrinsics(ctx)) {
|
||||
SetAlwaysInBounds(ctx, inst);
|
||||
return value;
|
||||
}
|
||||
const Id thread_id{EmitLaneId(ctx)};
|
||||
const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
|
||||
Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)};
|
||||
|
|
|
|||
|
|
@ -1419,6 +1419,7 @@ void EmitContext::DefineImages(const Info& info, u32& binding, u32& scaling_inde
|
|||
void EmitContext::DefineInputs(const IR::Program& program) {
|
||||
const Info& info{program.info};
|
||||
const VaryingState loads{info.loads.mask | info.passthrough.mask};
|
||||
const bool stage_supports_warp = profile.SupportsWarpIntrinsics(stage);
|
||||
|
||||
if (info.uses_workgroup_id) {
|
||||
workgroup_id = DefineInput(*this, U32[3], false, spv::BuiltIn::WorkgroupId);
|
||||
|
|
@ -1442,7 +1443,7 @@ void EmitContext::DefineInputs(const IR::Program& program) {
|
|||
if (info.uses_is_helper_invocation) {
|
||||
is_helper_invocation = DefineInput(*this, U1, false, spv::BuiltIn::HelperInvocation);
|
||||
}
|
||||
if (info.uses_subgroup_mask) {
|
||||
if (info.uses_subgroup_mask && stage_supports_warp) {
|
||||
subgroup_mask_eq = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupEqMaskKHR);
|
||||
subgroup_mask_lt = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupLtMaskKHR);
|
||||
subgroup_mask_le = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupLeMaskKHR);
|
||||
|
|
@ -1456,9 +1457,10 @@ void EmitContext::DefineInputs(const IR::Program& program) {
|
|||
Decorate(subgroup_mask_ge, spv::Decoration::Flat);
|
||||
}
|
||||
}
|
||||
if (info.uses_fswzadd || info.uses_subgroup_invocation_id || info.uses_subgroup_shuffles ||
|
||||
(profile.warp_size_potentially_larger_than_guest &&
|
||||
(info.uses_subgroup_vote || info.uses_subgroup_mask))) {
|
||||
if (stage_supports_warp &&
|
||||
(info.uses_fswzadd || info.uses_subgroup_invocation_id || info.uses_subgroup_shuffles ||
|
||||
(profile.warp_size_potentially_larger_than_guest &&
|
||||
(info.uses_subgroup_vote || info.uses_subgroup_mask)))) {
|
||||
AddCapability(spv::Capability::GroupNonUniform);
|
||||
subgroup_local_invocation_id =
|
||||
DefineInput(*this, U32[1], false, spv::BuiltIn::SubgroupLocalInvocationId);
|
||||
|
|
|
|||
|
|
@ -1,9 +1,15 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <limits>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/stage.h"
|
||||
|
||||
namespace Shader {
|
||||
|
||||
|
|
@ -46,6 +52,8 @@ struct Profile {
|
|||
bool support_multi_viewport{};
|
||||
bool support_geometry_streams{};
|
||||
|
||||
u32 warp_stage_support_mask{std::numeric_limits<u32>::max()};
|
||||
|
||||
bool warp_size_potentially_larger_than_guest{};
|
||||
|
||||
bool lower_left_origin_mode{};
|
||||
|
|
@ -90,6 +98,11 @@ struct Profile {
|
|||
u64 min_ssbo_alignment{};
|
||||
|
||||
u32 max_user_clip_distances{};
|
||||
|
||||
bool SupportsWarpIntrinsics(Stage stage) const {
|
||||
const u32 bit = 1u << static_cast<u32>(stage);
|
||||
return (warp_stage_support_mask & bit) != 0;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Shader
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue