mirror of
https://git.eden-emu.dev/eden-emu/eden
synced 2026-05-01 12:58:59 +02:00
[vk, qcom] Adjusting unused features
This commit is contained in:
parent
6aa09b6b9e
commit
eec090f76b
6 changed files with 131 additions and 61 deletions
|
|
@ -1,3 +1,6 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
|
||||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
|
@ -12,10 +15,17 @@ Id Pointer(EmitContext& ctx, Id pointer_type, Id array, Id offset, u32 shift) {
|
||||||
return ctx.OpAccessChain(pointer_type, array, ctx.u32_zero_value, index);
|
return ctx.OpAccessChain(pointer_type, array, ctx.u32_zero_value, index);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Id WorkgroupWordPointer(EmitContext& ctx, Id index) {
|
||||||
|
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||||
|
return ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index);
|
||||||
|
}
|
||||||
|
return ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index);
|
||||||
|
}
|
||||||
|
|
||||||
Id Word(EmitContext& ctx, Id offset) {
|
Id Word(EmitContext& ctx, Id offset) {
|
||||||
const Id shift_id{ctx.Const(2U)};
|
const Id shift_id{ctx.Const(2U)};
|
||||||
const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
|
const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
|
||||||
const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
|
const Id pointer{WorkgroupWordPointer(ctx, index)};
|
||||||
return ctx.OpLoad(ctx.U32[1], pointer);
|
return ctx.OpLoad(ctx.U32[1], pointer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -28,7 +38,9 @@ std::pair<Id, Id> ExtractArgs(EmitContext& ctx, Id offset, u32 mask, u32 count)
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
Id EmitLoadSharedU8(EmitContext& ctx, Id offset) {
|
Id EmitLoadSharedU8(EmitContext& ctx, Id offset) {
|
||||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
const bool use_explicit = ctx.profile.support_explicit_workgroup_layout &&
|
||||||
|
ctx.profile.support_explicit_workgroup_layout_u8;
|
||||||
|
if (use_explicit) {
|
||||||
const Id pointer{
|
const Id pointer{
|
||||||
ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
|
ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
|
||||||
return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer));
|
return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer));
|
||||||
|
|
@ -39,7 +51,9 @@ Id EmitLoadSharedU8(EmitContext& ctx, Id offset) {
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitLoadSharedS8(EmitContext& ctx, Id offset) {
|
Id EmitLoadSharedS8(EmitContext& ctx, Id offset) {
|
||||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
const bool use_explicit = ctx.profile.support_explicit_workgroup_layout &&
|
||||||
|
ctx.profile.support_explicit_workgroup_layout_u8;
|
||||||
|
if (use_explicit) {
|
||||||
const Id pointer{
|
const Id pointer{
|
||||||
ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
|
ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
|
||||||
return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer));
|
return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer));
|
||||||
|
|
@ -50,7 +64,9 @@ Id EmitLoadSharedS8(EmitContext& ctx, Id offset) {
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitLoadSharedU16(EmitContext& ctx, Id offset) {
|
Id EmitLoadSharedU16(EmitContext& ctx, Id offset) {
|
||||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
const bool use_explicit = ctx.profile.support_explicit_workgroup_layout &&
|
||||||
|
ctx.profile.support_explicit_workgroup_layout_u16;
|
||||||
|
if (use_explicit) {
|
||||||
const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
|
const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
|
||||||
return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer));
|
return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer));
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -60,7 +76,9 @@ Id EmitLoadSharedU16(EmitContext& ctx, Id offset) {
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitLoadSharedS16(EmitContext& ctx, Id offset) {
|
Id EmitLoadSharedS16(EmitContext& ctx, Id offset) {
|
||||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
const bool use_explicit = ctx.profile.support_explicit_workgroup_layout &&
|
||||||
|
ctx.profile.support_explicit_workgroup_layout_u16;
|
||||||
|
if (use_explicit) {
|
||||||
const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
|
const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
|
||||||
return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer));
|
return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer));
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -86,8 +104,8 @@ Id EmitLoadSharedU64(EmitContext& ctx, Id offset) {
|
||||||
const Id shift_id{ctx.Const(2U)};
|
const Id shift_id{ctx.Const(2U)};
|
||||||
const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
|
const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
|
||||||
const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.Const(1U))};
|
const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.Const(1U))};
|
||||||
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)};
|
const Id lhs_pointer{WorkgroupWordPointer(ctx, base_index)};
|
||||||
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)};
|
const Id rhs_pointer{WorkgroupWordPointer(ctx, next_index)};
|
||||||
return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer),
|
return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer),
|
||||||
ctx.OpLoad(ctx.U32[1], rhs_pointer));
|
ctx.OpLoad(ctx.U32[1], rhs_pointer));
|
||||||
}
|
}
|
||||||
|
|
@ -103,14 +121,16 @@ Id EmitLoadSharedU128(EmitContext& ctx, Id offset) {
|
||||||
std::array<Id, 4> values{};
|
std::array<Id, 4> values{};
|
||||||
for (u32 i = 0; i < 4; ++i) {
|
for (u32 i = 0; i < 4; ++i) {
|
||||||
const Id index{i == 0 ? base_index : ctx.OpIAdd(ctx.U32[1], base_index, ctx.Const(i))};
|
const Id index{i == 0 ? base_index : ctx.OpIAdd(ctx.U32[1], base_index, ctx.Const(i))};
|
||||||
const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
|
const Id pointer{WorkgroupWordPointer(ctx, index)};
|
||||||
values[i] = ctx.OpLoad(ctx.U32[1], pointer);
|
values[i] = ctx.OpLoad(ctx.U32[1], pointer);
|
||||||
}
|
}
|
||||||
return ctx.OpCompositeConstruct(ctx.U32[4], values);
|
return ctx.OpCompositeConstruct(ctx.U32[4], values);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value) {
|
void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value) {
|
||||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
const bool use_explicit = ctx.profile.support_explicit_workgroup_layout &&
|
||||||
|
ctx.profile.support_explicit_workgroup_layout_u8;
|
||||||
|
if (use_explicit) {
|
||||||
const Id pointer{
|
const Id pointer{
|
||||||
ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
|
ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
|
||||||
ctx.OpStore(pointer, ctx.OpUConvert(ctx.U8, value));
|
ctx.OpStore(pointer, ctx.OpUConvert(ctx.U8, value));
|
||||||
|
|
@ -120,7 +140,9 @@ void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value) {
|
void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value) {
|
||||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
const bool use_explicit = ctx.profile.support_explicit_workgroup_layout &&
|
||||||
|
ctx.profile.support_explicit_workgroup_layout_u16;
|
||||||
|
if (use_explicit) {
|
||||||
const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
|
const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
|
||||||
ctx.OpStore(pointer, ctx.OpUConvert(ctx.U16, value));
|
ctx.OpStore(pointer, ctx.OpUConvert(ctx.U16, value));
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -135,7 +157,7 @@ void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) {
|
||||||
} else {
|
} else {
|
||||||
const Id shift{ctx.Const(2U)};
|
const Id shift{ctx.Const(2U)};
|
||||||
const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
|
const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
|
||||||
pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset);
|
pointer = WorkgroupWordPointer(ctx, word_offset);
|
||||||
}
|
}
|
||||||
ctx.OpStore(pointer, value);
|
ctx.OpStore(pointer, value);
|
||||||
}
|
}
|
||||||
|
|
@ -149,8 +171,8 @@ void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) {
|
||||||
const Id shift{ctx.Const(2U)};
|
const Id shift{ctx.Const(2U)};
|
||||||
const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
|
const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
|
||||||
const Id next_offset{ctx.OpIAdd(ctx.U32[1], word_offset, ctx.Const(1U))};
|
const Id next_offset{ctx.OpIAdd(ctx.U32[1], word_offset, ctx.Const(1U))};
|
||||||
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset)};
|
const Id lhs_pointer{WorkgroupWordPointer(ctx, word_offset)};
|
||||||
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_offset)};
|
const Id rhs_pointer{WorkgroupWordPointer(ctx, next_offset)};
|
||||||
ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U));
|
ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U));
|
||||||
ctx.OpStore(rhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U));
|
ctx.OpStore(rhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U));
|
||||||
}
|
}
|
||||||
|
|
@ -165,7 +187,7 @@ void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value) {
|
||||||
const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
|
const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
|
||||||
for (u32 i = 0; i < 4; ++i) {
|
for (u32 i = 0; i < 4; ++i) {
|
||||||
const Id index{i == 0 ? base_index : ctx.OpIAdd(ctx.U32[1], base_index, ctx.Const(i))};
|
const Id index{i == 0 ? base_index : ctx.OpIAdd(ctx.U32[1], base_index, ctx.Const(i))};
|
||||||
const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
|
const Id pointer{WorkgroupWordPointer(ctx, index)};
|
||||||
ctx.OpStore(pointer, ctx.OpCompositeExtract(ctx.U32[1], value, i));
|
ctx.OpStore(pointer, ctx.OpCompositeExtract(ctx.U32[1], value, i));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -635,14 +635,66 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) {
|
||||||
|
|
||||||
return std::make_tuple(variable, element_pointer, pointer);
|
return std::make_tuple(variable, element_pointer, pointer);
|
||||||
}};
|
}};
|
||||||
|
const auto define_bitfield_stores{[&](bool define_u8, bool define_u16) {
|
||||||
|
if (!define_u8 && !define_u16) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const Id func_type{TypeFunction(void_id, U32[1], U32[1])};
|
||||||
|
const auto make_function{[&](u32 mask, u32 size) {
|
||||||
|
const Id loop_header{OpLabel()};
|
||||||
|
const Id continue_block{OpLabel()};
|
||||||
|
const Id merge_block{OpLabel()};
|
||||||
|
|
||||||
|
const Id func{OpFunction(void_id, spv::FunctionControlMask::MaskNone, func_type)};
|
||||||
|
const Id offset{OpFunctionParameter(U32[1])};
|
||||||
|
const Id insert_value{OpFunctionParameter(U32[1])};
|
||||||
|
AddLabel();
|
||||||
|
OpBranch(loop_header);
|
||||||
|
|
||||||
|
AddLabel(loop_header);
|
||||||
|
const Id word_offset{OpShiftRightArithmetic(U32[1], offset, Const(2U))};
|
||||||
|
const Id shift_offset{OpShiftLeftLogical(U32[1], offset, Const(3U))};
|
||||||
|
const Id bit_offset{OpBitwiseAnd(U32[1], shift_offset, Const(mask))};
|
||||||
|
const Id count{Const(size)};
|
||||||
|
OpLoopMerge(merge_block, continue_block, spv::LoopControlMask::MaskNone);
|
||||||
|
OpBranch(continue_block);
|
||||||
|
|
||||||
|
AddLabel(continue_block);
|
||||||
|
const Id word_pointer{profile.support_explicit_workgroup_layout
|
||||||
|
? OpAccessChain(shared_u32, shared_memory_u32,
|
||||||
|
u32_zero_value, word_offset)
|
||||||
|
: OpAccessChain(shared_u32, shared_memory_u32, word_offset)};
|
||||||
|
const Id old_value{OpLoad(U32[1], word_pointer)};
|
||||||
|
const Id new_value{OpBitFieldInsert(U32[1], old_value, insert_value, bit_offset,
|
||||||
|
count)};
|
||||||
|
const Id atomic_res{OpAtomicCompareExchange(U32[1], word_pointer, Const(1U),
|
||||||
|
u32_zero_value, u32_zero_value, new_value,
|
||||||
|
old_value)};
|
||||||
|
const Id success{OpIEqual(U1, atomic_res, old_value)};
|
||||||
|
OpBranchConditional(success, merge_block, loop_header);
|
||||||
|
|
||||||
|
AddLabel(merge_block);
|
||||||
|
OpReturn();
|
||||||
|
OpFunctionEnd();
|
||||||
|
return func;
|
||||||
|
}};
|
||||||
|
if (define_u8) {
|
||||||
|
shared_store_u8_func = make_function(24, 8);
|
||||||
|
}
|
||||||
|
if (define_u16) {
|
||||||
|
shared_store_u16_func = make_function(16, 16);
|
||||||
|
}
|
||||||
|
}};
|
||||||
|
const bool uses_int8 = program.info.uses_int8;
|
||||||
|
const bool uses_int16 = program.info.uses_int16;
|
||||||
if (profile.support_explicit_workgroup_layout) {
|
if (profile.support_explicit_workgroup_layout) {
|
||||||
AddExtension("SPV_KHR_workgroup_memory_explicit_layout");
|
AddExtension("SPV_KHR_workgroup_memory_explicit_layout");
|
||||||
AddCapability(spv::Capability::WorkgroupMemoryExplicitLayoutKHR);
|
AddCapability(spv::Capability::WorkgroupMemoryExplicitLayoutKHR);
|
||||||
if (program.info.uses_int8) {
|
if (uses_int8 && profile.support_explicit_workgroup_layout_u8) {
|
||||||
AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout8BitAccessKHR);
|
AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout8BitAccessKHR);
|
||||||
std::tie(shared_memory_u8, shared_u8, std::ignore) = make(U8, 1);
|
std::tie(shared_memory_u8, shared_u8, std::ignore) = make(U8, 1);
|
||||||
}
|
}
|
||||||
if (program.info.uses_int16) {
|
if (uses_int16 && profile.support_explicit_workgroup_layout_u16) {
|
||||||
AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout16BitAccessKHR);
|
AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout16BitAccessKHR);
|
||||||
std::tie(shared_memory_u16, shared_u16, std::ignore) = make(U16, 2);
|
std::tie(shared_memory_u16, shared_u16, std::ignore) = make(U16, 2);
|
||||||
}
|
}
|
||||||
|
|
@ -652,6 +704,9 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) {
|
||||||
std::tie(shared_memory_u32, shared_u32, shared_memory_u32_type) = make(U32[1], 4);
|
std::tie(shared_memory_u32, shared_u32, shared_memory_u32_type) = make(U32[1], 4);
|
||||||
std::tie(shared_memory_u32x2, shared_u32x2, std::ignore) = make(U32[2], 8);
|
std::tie(shared_memory_u32x2, shared_u32x2, std::ignore) = make(U32[2], 8);
|
||||||
std::tie(shared_memory_u32x4, shared_u32x4, std::ignore) = make(U32[4], 16);
|
std::tie(shared_memory_u32x4, shared_u32x4, std::ignore) = make(U32[4], 16);
|
||||||
|
const bool need_u8_fallback = uses_int8 && !profile.support_explicit_workgroup_layout_u8;
|
||||||
|
const bool need_u16_fallback = uses_int16 && !profile.support_explicit_workgroup_layout_u16;
|
||||||
|
define_bitfield_stores(need_u8_fallback, need_u16_fallback);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const u32 num_elements{Common::DivCeil(program.shared_memory_size, 4U)};
|
const u32 num_elements{Common::DivCeil(program.shared_memory_size, 4U)};
|
||||||
|
|
@ -661,47 +716,7 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) {
|
||||||
shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]);
|
shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]);
|
||||||
shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup);
|
shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup);
|
||||||
interfaces.push_back(shared_memory_u32);
|
interfaces.push_back(shared_memory_u32);
|
||||||
|
define_bitfield_stores(uses_int8, uses_int16);
|
||||||
const Id func_type{TypeFunction(void_id, U32[1], U32[1])};
|
|
||||||
const auto make_function{[&](u32 mask, u32 size) {
|
|
||||||
const Id loop_header{OpLabel()};
|
|
||||||
const Id continue_block{OpLabel()};
|
|
||||||
const Id merge_block{OpLabel()};
|
|
||||||
|
|
||||||
const Id func{OpFunction(void_id, spv::FunctionControlMask::MaskNone, func_type)};
|
|
||||||
const Id offset{OpFunctionParameter(U32[1])};
|
|
||||||
const Id insert_value{OpFunctionParameter(U32[1])};
|
|
||||||
AddLabel();
|
|
||||||
OpBranch(loop_header);
|
|
||||||
|
|
||||||
AddLabel(loop_header);
|
|
||||||
const Id word_offset{OpShiftRightArithmetic(U32[1], offset, Const(2U))};
|
|
||||||
const Id shift_offset{OpShiftLeftLogical(U32[1], offset, Const(3U))};
|
|
||||||
const Id bit_offset{OpBitwiseAnd(U32[1], shift_offset, Const(mask))};
|
|
||||||
const Id count{Const(size)};
|
|
||||||
OpLoopMerge(merge_block, continue_block, spv::LoopControlMask::MaskNone);
|
|
||||||
OpBranch(continue_block);
|
|
||||||
|
|
||||||
AddLabel(continue_block);
|
|
||||||
const Id word_pointer{OpAccessChain(shared_u32, shared_memory_u32, word_offset)};
|
|
||||||
const Id old_value{OpLoad(U32[1], word_pointer)};
|
|
||||||
const Id new_value{OpBitFieldInsert(U32[1], old_value, insert_value, bit_offset, count)};
|
|
||||||
const Id atomic_res{OpAtomicCompareExchange(U32[1], word_pointer, Const(1U), u32_zero_value,
|
|
||||||
u32_zero_value, new_value, old_value)};
|
|
||||||
const Id success{OpIEqual(U1, atomic_res, old_value)};
|
|
||||||
OpBranchConditional(success, merge_block, loop_header);
|
|
||||||
|
|
||||||
AddLabel(merge_block);
|
|
||||||
OpReturn();
|
|
||||||
OpFunctionEnd();
|
|
||||||
return func;
|
|
||||||
}};
|
|
||||||
if (program.info.uses_int8) {
|
|
||||||
shared_store_u8_func = make_function(24, 8);
|
|
||||||
}
|
|
||||||
if (program.info.uses_int16) {
|
|
||||||
shared_store_u16_func = make_function(16, 16);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitContext::DefineSharedMemoryFunctions(const IR::Program& program) {
|
void EmitContext::DefineSharedMemoryFunctions(const IR::Program& program) {
|
||||||
|
|
|
||||||
|
|
@ -32,6 +32,8 @@ struct Profile {
|
||||||
bool support_fp32_signed_zero_nan_preserve{};
|
bool support_fp32_signed_zero_nan_preserve{};
|
||||||
bool support_fp64_signed_zero_nan_preserve{};
|
bool support_fp64_signed_zero_nan_preserve{};
|
||||||
bool support_explicit_workgroup_layout{};
|
bool support_explicit_workgroup_layout{};
|
||||||
|
bool support_explicit_workgroup_layout_u8{};
|
||||||
|
bool support_explicit_workgroup_layout_u16{};
|
||||||
bool support_vote{};
|
bool support_vote{};
|
||||||
bool support_viewport_index_layer_non_geometry{};
|
bool support_viewport_index_layer_non_geometry{};
|
||||||
bool support_viewport_mask{};
|
bool support_viewport_mask{};
|
||||||
|
|
|
||||||
|
|
@ -438,6 +438,10 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
|
||||||
.support_fp64_signed_zero_nan_preserve = float_controls_supported &&
|
.support_fp64_signed_zero_nan_preserve = float_controls_supported &&
|
||||||
float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE,
|
float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE,
|
||||||
.support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(),
|
.support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(),
|
||||||
|
.support_explicit_workgroup_layout_u8 =
|
||||||
|
device.SupportsWorkgroupExplicitLayout8Bit(),
|
||||||
|
.support_explicit_workgroup_layout_u16 =
|
||||||
|
device.SupportsWorkgroupExplicitLayout16Bit(),
|
||||||
.support_vote = device.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_VOTE_BIT),
|
.support_vote = device.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_VOTE_BIT),
|
||||||
.support_viewport_index_layer_non_geometry =
|
.support_viewport_index_layer_non_geometry =
|
||||||
device.IsExtShaderViewportIndexLayerSupported(),
|
device.IsExtShaderViewportIndexLayerSupported(),
|
||||||
|
|
|
||||||
|
|
@ -1449,12 +1449,22 @@ void Device::RemoveUnsuitableExtensions() {
|
||||||
}
|
}
|
||||||
|
|
||||||
// VK_KHR_workgroup_memory_explicit_layout
|
// VK_KHR_workgroup_memory_explicit_layout
|
||||||
extensions.workgroup_memory_explicit_layout =
|
workgroup_memory_explicit_layout_caps.supports_8bit =
|
||||||
features.features.shaderInt16 &&
|
features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayout8BitAccess;
|
||||||
|
workgroup_memory_explicit_layout_caps.supports_16bit =
|
||||||
|
features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayout16BitAccess;
|
||||||
|
const bool has_workgroup_base =
|
||||||
features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayout &&
|
features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayout &&
|
||||||
features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayout8BitAccess &&
|
|
||||||
features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayout16BitAccess &&
|
|
||||||
features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayoutScalarBlockLayout;
|
features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayoutScalarBlockLayout;
|
||||||
|
if (is_qualcomm) {
|
||||||
|
extensions.workgroup_memory_explicit_layout =
|
||||||
|
features.features.shaderInt16 && has_workgroup_base;
|
||||||
|
} else {
|
||||||
|
extensions.workgroup_memory_explicit_layout =
|
||||||
|
features.features.shaderInt16 && has_workgroup_base &&
|
||||||
|
workgroup_memory_explicit_layout_caps.supports_8bit &&
|
||||||
|
workgroup_memory_explicit_layout_caps.supports_16bit;
|
||||||
|
}
|
||||||
RemoveExtensionFeatureIfUnsuitable(extensions.workgroup_memory_explicit_layout,
|
RemoveExtensionFeatureIfUnsuitable(extensions.workgroup_memory_explicit_layout,
|
||||||
features.workgroup_memory_explicit_layout,
|
features.workgroup_memory_explicit_layout,
|
||||||
VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME);
|
VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME);
|
||||||
|
|
|
||||||
|
|
@ -499,6 +499,16 @@ public:
|
||||||
return extensions.workgroup_memory_explicit_layout;
|
return extensions.workgroup_memory_explicit_layout;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns true if explicit workgroup layout supports 8-bit access.
|
||||||
|
bool SupportsWorkgroupExplicitLayout8Bit() const {
|
||||||
|
return workgroup_memory_explicit_layout_caps.supports_8bit;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns true if explicit workgroup layout supports 16-bit access.
|
||||||
|
bool SupportsWorkgroupExplicitLayout16Bit() const {
|
||||||
|
return workgroup_memory_explicit_layout_caps.supports_16bit;
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns true if the device supports VK_KHR_image_format_list.
|
/// Returns true if the device supports VK_KHR_image_format_list.
|
||||||
bool IsKhrImageFormatListSupported() const {
|
bool IsKhrImageFormatListSupported() const {
|
||||||
return extensions.image_format_list || instance_version >= VK_API_VERSION_1_2;
|
return extensions.image_format_list || instance_version >= VK_API_VERSION_1_2;
|
||||||
|
|
@ -1062,6 +1072,13 @@ private:
|
||||||
Features features{};
|
Features features{};
|
||||||
Properties properties{};
|
Properties properties{};
|
||||||
|
|
||||||
|
struct WorkgroupExplicitLayoutCaps {
|
||||||
|
bool supports_8bit{};
|
||||||
|
bool supports_16bit{};
|
||||||
|
};
|
||||||
|
|
||||||
|
WorkgroupExplicitLayoutCaps workgroup_memory_explicit_layout_caps{};
|
||||||
|
|
||||||
VkPhysicalDeviceFeatures2 features2{};
|
VkPhysicalDeviceFeatures2 features2{};
|
||||||
VkPhysicalDeviceProperties2 properties2{};
|
VkPhysicalDeviceProperties2 properties2{};
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue