fix with bigger batch sizes

This commit is contained in:
lizzie 2026-03-17 23:28:24 +00:00 committed by crueter
parent 585b3dcde1
commit 2f0192665e
No known key found for this signature in database
GPG key ID: 425ACD2D4830EBC6
20 changed files with 164 additions and 152 deletions

View file

@ -77,7 +77,7 @@ CPUCaps::Manufacturer CPUCaps::ParseManufacturer(std::string_view brand_string)
} }
// Detects the various CPU features // Detects the various CPU features
static CPUCaps Detect() { static CPUCaps DetectCPUCapabilities() {
CPUCaps caps = {}; CPUCaps caps = {};
// Assumes the CPU supports the CPUID instruction. Those that don't would likely not support // Assumes the CPU supports the CPUID instruction. Those that don't would likely not support
@ -208,7 +208,7 @@ static CPUCaps Detect() {
} }
const CPUCaps& GetCPUCaps() { const CPUCaps& GetCPUCaps() {
static CPUCaps caps = Detect(); static CPUCaps caps = DetectCPUCapabilities();
return caps; return caps;
} }

View file

@ -31,7 +31,7 @@ using namespace oaknut::util;
namespace { namespace {
bool IsOrdered(IR::AccType acctype) { [[nodiscard]] inline bool IsOrdered(IR::AccType acctype) {
return acctype == IR::AccType::ORDERED || acctype == IR::AccType::ORDEREDRW || acctype == IR::AccType::LIMITEDORDERED; return acctype == IR::AccType::ORDERED || acctype == IR::AccType::ORDEREDRW || acctype == IR::AccType::LIMITEDORDERED;
} }

View file

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
@ -45,11 +48,6 @@ void GetCbuf(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU
} }
} }
bool IsInputArray(Stage stage) {
return stage == Stage::Geometry || stage == Stage::TessellationControl ||
stage == Stage::TessellationEval;
}
std::string VertexIndex(EmitContext& ctx, ScalarU32 vertex) { std::string VertexIndex(EmitContext& ctx, ScalarU32 vertex) {
return IsInputArray(ctx.stage) ? fmt::format("[{}]", vertex) : ""; return IsInputArray(ctx.stage) ? fmt::format("[{}]", vertex) : "";
} }

View file

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project // SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later // SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
@ -18,6 +18,11 @@ class Value;
namespace Shader::Backend::GLASM { namespace Shader::Backend::GLASM {
[[nodiscard]] inline bool IsInputArray(Stage stage) {
return stage == Stage::Geometry || stage == Stage::TessellationControl
|| stage == Stage::TessellationEval;
}
class EmitContext; class EmitContext;
// Microinstruction emitters // Microinstruction emitters

View file

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
@ -7,94 +10,92 @@
namespace Shader::Backend::GLASM { namespace Shader::Backend::GLASM {
#define NotImplemented() throw NotImplementedException("GLASM instruction {}", __LINE__)
void EmitGetRegister(EmitContext& ctx) { void EmitGetRegister(EmitContext& ctx) {
NotImplemented(); throw NotImplementedException("GLASM instruction {}", __LINE__);
} }
void EmitSetRegister(EmitContext& ctx) { void EmitSetRegister(EmitContext& ctx) {
NotImplemented(); throw NotImplementedException("GLASM instruction {}", __LINE__);
} }
void EmitGetPred(EmitContext& ctx) { void EmitGetPred(EmitContext& ctx) {
NotImplemented(); throw NotImplementedException("GLASM instruction {}", __LINE__);
} }
void EmitSetPred(EmitContext& ctx) { void EmitSetPred(EmitContext& ctx) {
NotImplemented(); throw NotImplementedException("GLASM instruction {}", __LINE__);
} }
void EmitSetGotoVariable(EmitContext& ctx) { void EmitSetGotoVariable(EmitContext& ctx) {
NotImplemented(); throw NotImplementedException("GLASM instruction {}", __LINE__);
} }
void EmitGetGotoVariable(EmitContext& ctx) { void EmitGetGotoVariable(EmitContext& ctx) {
NotImplemented(); throw NotImplementedException("GLASM instruction {}", __LINE__);
} }
void EmitSetIndirectBranchVariable(EmitContext& ctx) { void EmitSetIndirectBranchVariable(EmitContext& ctx) {
NotImplemented(); throw NotImplementedException("GLASM instruction {}", __LINE__);
} }
void EmitGetIndirectBranchVariable(EmitContext& ctx) { void EmitGetIndirectBranchVariable(EmitContext& ctx) {
NotImplemented(); throw NotImplementedException("GLASM instruction {}", __LINE__);
} }
void EmitGetZFlag(EmitContext& ctx) { void EmitGetZFlag(EmitContext& ctx) {
NotImplemented(); throw NotImplementedException("GLASM instruction {}", __LINE__);
} }
void EmitGetSFlag(EmitContext& ctx) { void EmitGetSFlag(EmitContext& ctx) {
NotImplemented(); throw NotImplementedException("GLASM instruction {}", __LINE__);
} }
void EmitGetCFlag(EmitContext& ctx) { void EmitGetCFlag(EmitContext& ctx) {
NotImplemented(); throw NotImplementedException("GLASM instruction {}", __LINE__);
} }
void EmitGetOFlag(EmitContext& ctx) { void EmitGetOFlag(EmitContext& ctx) {
NotImplemented(); throw NotImplementedException("GLASM instruction {}", __LINE__);
} }
void EmitSetZFlag(EmitContext& ctx) { void EmitSetZFlag(EmitContext& ctx) {
NotImplemented(); throw NotImplementedException("GLASM instruction {}", __LINE__);
} }
void EmitSetSFlag(EmitContext& ctx) { void EmitSetSFlag(EmitContext& ctx) {
NotImplemented(); throw NotImplementedException("GLASM instruction {}", __LINE__);
} }
void EmitSetCFlag(EmitContext& ctx) { void EmitSetCFlag(EmitContext& ctx) {
NotImplemented(); throw NotImplementedException("GLASM instruction {}", __LINE__);
} }
void EmitSetOFlag(EmitContext& ctx) { void EmitSetOFlag(EmitContext& ctx) {
NotImplemented(); throw NotImplementedException("GLASM instruction {}", __LINE__);
} }
void EmitGetZeroFromOp(EmitContext& ctx) { void EmitGetZeroFromOp(EmitContext& ctx) {
NotImplemented(); throw NotImplementedException("GLASM instruction {}", __LINE__);
} }
void EmitGetSignFromOp(EmitContext& ctx) { void EmitGetSignFromOp(EmitContext& ctx) {
NotImplemented(); throw NotImplementedException("GLASM instruction {}", __LINE__);
} }
void EmitGetCarryFromOp(EmitContext& ctx) { void EmitGetCarryFromOp(EmitContext& ctx) {
NotImplemented(); throw NotImplementedException("GLASM instruction {}", __LINE__);
} }
void EmitGetOverflowFromOp(EmitContext& ctx) { void EmitGetOverflowFromOp(EmitContext& ctx) {
NotImplemented(); throw NotImplementedException("GLASM instruction {}", __LINE__);
} }
void EmitGetSparseFromOp(EmitContext& ctx) { void EmitGetSparseFromOp(EmitContext& ctx) {
NotImplemented(); throw NotImplementedException("GLASM instruction {}", __LINE__);
} }
void EmitGetInBoundsFromOp(EmitContext& ctx) { void EmitGetInBoundsFromOp(EmitContext& ctx) {
NotImplemented(); throw NotImplementedException("GLASM instruction {}", __LINE__);
} }
} // namespace Shader::Backend::GLASM } // namespace Shader::Backend::GLASM

View file

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
@ -21,11 +24,6 @@ std::string_view InterpDecorator(Interpolation interp) {
} }
throw InvalidArgument("Invalid interpolation {}", interp); throw InvalidArgument("Invalid interpolation {}", interp);
} }
bool IsInputArray(Stage stage) {
return stage == Stage::Geometry || stage == Stage::TessellationControl ||
stage == Stage::TessellationEval;
}
} // Anonymous namespace } // Anonymous namespace
EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_,

View file

@ -32,10 +32,6 @@ std::string InputVertexIndex(EmitContext& ctx, std::string_view vertex) {
return IsInputArray(ctx.stage) ? fmt::format("[{}]", vertex) : ""; return IsInputArray(ctx.stage) ? fmt::format("[{}]", vertex) : "";
} }
std::string_view OutputVertexIndex(EmitContext& ctx) {
return ctx.stage == Stage::TessellationControl ? "[gl_InvocationID]" : "";
}
std::string ChooseCbuf(EmitContext& ctx, const IR::Value& binding, std::string_view index) { std::string ChooseCbuf(EmitContext& ctx, const IR::Value& binding, std::string_view index) {
if (binding.IsImmediate()) { if (binding.IsImmediate()) {
return fmt::format("{}_cbuf{}[{}]", ctx.stage_name, binding.U32(), index); return fmt::format("{}_cbuf{}[{}]", ctx.stage_name, binding.U32(), index);
@ -281,7 +277,7 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view val
const u32 index{IR::GenericAttributeIndex(attr)}; const u32 index{IR::GenericAttributeIndex(attr)};
const u32 attr_element{IR::GenericAttributeElement(attr)}; const u32 attr_element{IR::GenericAttributeElement(attr)};
const GenericElementInfo& info{ctx.output_generics.at(index).at(attr_element)}; const GenericElementInfo& info{ctx.output_generics.at(index).at(attr_element)};
const auto output_decorator{OutputVertexIndex(ctx)}; const auto output_decorator = ctx.stage == Stage::TessellationControl ? "[gl_InvocationID]" : "";
if (info.num_components == 1) { if (info.num_components == 1) {
ctx.Add("{}{}={};", info.name, output_decorator, value); ctx.Add("{}{}={};", info.name, output_decorator, value);
} else { } else {

View file

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project // SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later // SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project

View file

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
@ -9,9 +12,6 @@
namespace Shader::Backend::GLSL { namespace Shader::Backend::GLSL {
namespace { namespace {
std::string_view OutputVertexIndex(EmitContext& ctx) {
return ctx.stage == Stage::TessellationControl ? "[gl_InvocationID]" : "";
}
void InitializeOutputVaryings(EmitContext& ctx) { void InitializeOutputVaryings(EmitContext& ctx) {
if (ctx.uses_geometry_passthrough) { if (ctx.uses_geometry_passthrough) {
@ -25,7 +25,7 @@ void InitializeOutputVaryings(EmitContext& ctx) {
continue; continue;
} }
const auto& info_array{ctx.output_generics.at(index)}; const auto& info_array{ctx.output_generics.at(index)};
const auto output_decorator{OutputVertexIndex(ctx)}; const auto output_decorator = ctx.stage == Stage::TessellationControl ? "[gl_InvocationID]" : "";
size_t element{}; size_t element{};
while (element < info_array.size()) { while (element < info_array.size()) {
const auto& info{info_array.at(element)}; const auto& info{info_array.at(element)};

View file

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
@ -17,14 +20,13 @@ Id Image(EmitContext& ctx, IR::TextureInstInfo info) {
} }
} }
std::pair<Id, Id> AtomicArgs(EmitContext& ctx) { std::pair<Id, Id> AtomicImageArgs(EmitContext& ctx) {
const Id scope{ctx.Const(static_cast<u32>(spv::Scope::Device))}; const Id scope{ctx.Const(static_cast<u32>(spv::Scope::Device))};
const Id semantics{ctx.u32_zero_value}; const Id semantics{ctx.u32_zero_value};
return {scope, semantics}; return {scope, semantics};
} }
Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id value, Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id value, Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
if (!index.IsImmediate() || index.U32() != 0) { if (!index.IsImmediate() || index.U32() != 0) {
// TODO: handle layers // TODO: handle layers
throw NotImplementedException("Image indexing"); throw NotImplementedException("Image indexing");
@ -32,7 +34,7 @@ Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id c
const auto info{inst->Flags<IR::TextureInstInfo>()}; const auto info{inst->Flags<IR::TextureInstInfo>()};
const Id image{Image(ctx, info)}; const Id image{Image(ctx, info)};
const Id pointer{ctx.OpImageTexelPointer(ctx.image_u32, image, coords, ctx.Const(0U))}; const Id pointer{ctx.OpImageTexelPointer(ctx.image_u32, image, coords, ctx.Const(0U))};
const auto [scope, semantics]{AtomicArgs(ctx)}; const auto [scope, semantics] = AtomicImageArgs(ctx);
return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value); return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value);
} }
} // Anonymous namespace } // Anonymous namespace

View file

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
@ -6,7 +9,7 @@
namespace Shader::Maxwell { namespace Shader::Maxwell {
namespace { namespace {
enum class FloatFormat : u64 { enum class FloatConversionFormat : u64 {
F16 = 1, F16 = 1,
F32 = 2, F32 = 2,
F64 = 3, F64 = 3,
@ -21,13 +24,13 @@ enum class RoundingOp : u64 {
Trunc = 11, Trunc = 11,
}; };
[[nodiscard]] u32 WidthSize(FloatFormat width) { [[nodiscard]] u32 WidthSize(FloatConversionFormat width) {
switch (width) { switch (width) {
case FloatFormat::F16: case FloatConversionFormat::F16:
return 16; return 16;
case FloatFormat::F32: case FloatConversionFormat::F32:
return 32; return 32;
case FloatFormat::F64: case FloatConversionFormat::F64:
return 64; return 64;
default: default:
throw NotImplementedException("Invalid width {}", width); throw NotImplementedException("Invalid width {}", width);
@ -44,8 +47,8 @@ void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) {
BitField<50, 1, u64> sat; BitField<50, 1, u64> sat;
BitField<39, 4, u64> rounding_op; BitField<39, 4, u64> rounding_op;
BitField<39, 2, FpRounding> rounding; BitField<39, 2, FpRounding> rounding;
BitField<10, 2, FloatFormat> src_size; BitField<10, 2, FloatConversionFormat> src_size;
BitField<8, 2, FloatFormat> dst_size; BitField<8, 2, FloatConversionFormat> dst_size;
[[nodiscard]] RoundingOp RoundingOperation() const { [[nodiscard]] RoundingOp RoundingOperation() const {
constexpr u64 rounding_mask = 0x0B; constexpr u64 rounding_mask = 0x0B;
@ -59,7 +62,7 @@ void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) {
IR::F16F32F64 input{v.ir.FPAbsNeg(src_a, abs, f2f.neg != 0)}; IR::F16F32F64 input{v.ir.FPAbsNeg(src_a, abs, f2f.neg != 0)};
const bool any_fp64{f2f.src_size == FloatFormat::F64 || f2f.dst_size == FloatFormat::F64}; const bool any_fp64{f2f.src_size == FloatConversionFormat::F64 || f2f.dst_size == FloatConversionFormat::F64};
IR::FpControl fp_control{ IR::FpControl fp_control{
.no_contraction = false, .no_contraction = false,
.rounding = IR::FpRounding::DontCare, .rounding = IR::FpRounding::DontCare,
@ -74,13 +77,13 @@ void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) {
case RoundingOp::Pass: case RoundingOp::Pass:
// Make sure NANs are handled properly // Make sure NANs are handled properly
switch (f2f.src_size) { switch (f2f.src_size) {
case FloatFormat::F16: case FloatConversionFormat::F16:
input = v.ir.FPAdd(input, v.ir.FPConvert(16, v.ir.Imm32(0.0f)), fp_control); input = v.ir.FPAdd(input, v.ir.FPConvert(16, v.ir.Imm32(0.0f)), fp_control);
break; break;
case FloatFormat::F32: case FloatConversionFormat::F32:
input = v.ir.FPAdd(input, v.ir.Imm32(0.0f), fp_control); input = v.ir.FPAdd(input, v.ir.Imm32(0.0f), fp_control);
break; break;
case FloatFormat::F64: case FloatConversionFormat::F64:
input = v.ir.FPAdd(input, v.ir.Imm64(0.0), fp_control); input = v.ir.FPAdd(input, v.ir.Imm64(0.0), fp_control);
break; break;
} }
@ -106,15 +109,15 @@ void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) {
} }
switch (f2f.dst_size) { switch (f2f.dst_size) {
case FloatFormat::F16: { case FloatConversionFormat::F16: {
const IR::F16 imm{v.ir.FPConvert(16, v.ir.Imm32(0.0f))}; const IR::F16 imm{v.ir.FPConvert(16, v.ir.Imm32(0.0f))};
v.X(f2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(input, imm))); v.X(f2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(input, imm)));
break; break;
} }
case FloatFormat::F32: case FloatConversionFormat::F32:
v.F(f2f.dest_reg, input); v.F(f2f.dest_reg, input);
break; break;
case FloatFormat::F64: case FloatConversionFormat::F64:
v.D(f2f.dest_reg, input); v.D(f2f.dest_reg, input);
break; break;
default: default:
@ -127,21 +130,21 @@ void TranslatorVisitor::F2F_reg(u64 insn) {
union { union {
u64 insn; u64 insn;
BitField<49, 1, u64> abs; BitField<49, 1, u64> abs;
BitField<10, 2, FloatFormat> src_size; BitField<10, 2, FloatConversionFormat> src_size;
BitField<41, 1, u64> selector; BitField<41, 1, u64> selector;
} const f2f{insn}; } const f2f{insn};
IR::F16F32F64 src_a; IR::F16F32F64 src_a;
switch (f2f.src_size) { switch (f2f.src_size) {
case FloatFormat::F16: { case FloatConversionFormat::F16: {
auto [lhs_a, rhs_a]{Extract(ir, GetReg20(insn), Swizzle::H1_H0)}; auto [lhs_a, rhs_a]{Extract(ir, GetReg20(insn), Swizzle::H1_H0)};
src_a = f2f.selector != 0 ? rhs_a : lhs_a; src_a = f2f.selector != 0 ? rhs_a : lhs_a;
break; break;
} }
case FloatFormat::F32: case FloatConversionFormat::F32:
src_a = GetFloatReg20(insn); src_a = GetFloatReg20(insn);
break; break;
case FloatFormat::F64: case FloatConversionFormat::F64:
src_a = GetDoubleReg20(insn); src_a = GetDoubleReg20(insn);
break; break;
default: default:
@ -154,21 +157,21 @@ void TranslatorVisitor::F2F_cbuf(u64 insn) {
union { union {
u64 insn; u64 insn;
BitField<49, 1, u64> abs; BitField<49, 1, u64> abs;
BitField<10, 2, FloatFormat> src_size; BitField<10, 2, FloatConversionFormat> src_size;
BitField<41, 1, u64> selector; BitField<41, 1, u64> selector;
} const f2f{insn}; } const f2f{insn};
IR::F16F32F64 src_a; IR::F16F32F64 src_a;
switch (f2f.src_size) { switch (f2f.src_size) {
case FloatFormat::F16: { case FloatConversionFormat::F16: {
auto [lhs_a, rhs_a]{Extract(ir, GetCbuf(insn), Swizzle::H1_H0)}; auto [lhs_a, rhs_a]{Extract(ir, GetCbuf(insn), Swizzle::H1_H0)};
src_a = f2f.selector != 0 ? rhs_a : lhs_a; src_a = f2f.selector != 0 ? rhs_a : lhs_a;
break; break;
} }
case FloatFormat::F32: case FloatConversionFormat::F32:
src_a = GetFloatCbuf(insn); src_a = GetFloatCbuf(insn);
break; break;
case FloatFormat::F64: case FloatConversionFormat::F64:
src_a = GetDoubleCbuf(insn); src_a = GetDoubleCbuf(insn);
break; break;
default: default:
@ -181,7 +184,7 @@ void TranslatorVisitor::F2F_imm([[maybe_unused]] u64 insn) {
union { union {
u64 insn; u64 insn;
BitField<49, 1, u64> abs; BitField<49, 1, u64> abs;
BitField<10, 2, FloatFormat> src_size; BitField<10, 2, FloatConversionFormat> src_size;
BitField<41, 1, u64> selector; BitField<41, 1, u64> selector;
BitField<20, 19, u64> imm; BitField<20, 19, u64> imm;
BitField<56, 1, u64> imm_neg; BitField<56, 1, u64> imm_neg;
@ -189,7 +192,7 @@ void TranslatorVisitor::F2F_imm([[maybe_unused]] u64 insn) {
IR::F16F32F64 src_a; IR::F16F32F64 src_a;
switch (f2f.src_size) { switch (f2f.src_size) {
case FloatFormat::F16: { case FloatConversionFormat::F16: {
const u32 imm{static_cast<u32>(f2f.imm & 0x0000ffff)}; const u32 imm{static_cast<u32>(f2f.imm & 0x0000ffff)};
const IR::Value vector{ir.UnpackFloat2x16(ir.Imm32(imm | (imm << 16)))}; const IR::Value vector{ir.UnpackFloat2x16(ir.Imm32(imm | (imm << 16)))};
src_a = IR::F16{ir.CompositeExtract(vector, f2f.selector != 0 ? 0 : 1)}; src_a = IR::F16{ir.CompositeExtract(vector, f2f.selector != 0 ? 0 : 1)};
@ -198,10 +201,10 @@ void TranslatorVisitor::F2F_imm([[maybe_unused]] u64 insn) {
} }
break; break;
} }
case FloatFormat::F32: case FloatConversionFormat::F32:
src_a = GetFloatImm20(insn); src_a = GetFloatImm20(insn);
break; break;
case FloatFormat::F64: case FloatConversionFormat::F64:
src_a = GetDoubleImm20(insn); src_a = GetDoubleImm20(insn);
break; break;
default: default:

View file

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
@ -7,48 +10,48 @@
namespace Shader::Maxwell { namespace Shader::Maxwell {
namespace { namespace {
enum class Shift : u64 { enum class IADD3Shift : u64 {
None, None,
Right, Right,
Left, Left,
}; };
enum class Half : u64 { enum class IADD3Half : u64 {
All, All,
Lower, Lower,
Upper, Upper,
}; };
[[nodiscard]] IR::U32 IntegerHalf(IR::IREmitter& ir, const IR::U32& value, Half half) { [[nodiscard]] IR::U32 IntegerHalf(IR::IREmitter& ir, const IR::U32& value, IADD3Half half) {
constexpr bool is_signed{false}; constexpr bool is_signed{false};
switch (half) { switch (half) {
case Half::All: case IADD3Half::All:
return value; return value;
case Half::Lower: case IADD3Half::Lower:
return ir.BitFieldExtract(value, ir.Imm32(0), ir.Imm32(16), is_signed); return ir.BitFieldExtract(value, ir.Imm32(0), ir.Imm32(16), is_signed);
case Half::Upper: case IADD3Half::Upper:
return ir.BitFieldExtract(value, ir.Imm32(16), ir.Imm32(16), is_signed); return ir.BitFieldExtract(value, ir.Imm32(16), ir.Imm32(16), is_signed);
} }
throw NotImplementedException("Invalid half"); throw NotImplementedException("Invalid half");
} }
[[nodiscard]] IR::U32 IntegerShift(IR::IREmitter& ir, const IR::U32& value, Shift shift) { [[nodiscard]] IR::U32 IntegerShift(IR::IREmitter& ir, const IR::U32& value, IADD3Shift shift) {
switch (shift) { switch (shift) {
case Shift::None: case IADD3Shift::None:
return value; return value;
case Shift::Right: { case IADD3Shift::Right: {
// 33-bit RS IADD3 edge case // 33-bit RS IADD3 edge case
const IR::U1 edge_case{ir.GetCarryFromOp(value)}; const IR::U1 edge_case{ir.GetCarryFromOp(value)};
const IR::U32 shifted{ir.ShiftRightLogical(value, ir.Imm32(16))}; const IR::U32 shifted{ir.ShiftRightLogical(value, ir.Imm32(16))};
return IR::U32{ir.Select(edge_case, ir.IAdd(shifted, ir.Imm32(0x10000)), shifted)}; return IR::U32{ir.Select(edge_case, ir.IAdd(shifted, ir.Imm32(0x10000)), shifted)};
} }
case Shift::Left: case IADD3Shift::Left:
return ir.ShiftLeftLogical(value, ir.Imm32(16)); return ir.ShiftLeftLogical(value, ir.Imm32(16));
} }
throw NotImplementedException("Invalid shift"); throw NotImplementedException("Invalid shift");
} }
void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_a, IR::U32 op_b, IR::U32 op_c, void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_a, IR::U32 op_b, IR::U32 op_c,
Shift shift = Shift::None) { IADD3Shift shift = IADD3Shift::None) {
union { union {
u64 insn; u64 insn;
BitField<0, 8, IR::Reg> dest_reg; BitField<0, 8, IR::Reg> dest_reg;
@ -71,7 +74,7 @@ void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_a, IR::U32 op_b, IR::U32 o
IR::U32 lhs_1{v.ir.IAdd(op_a, op_b)}; IR::U32 lhs_1{v.ir.IAdd(op_a, op_b)};
if (iadd3.x != 0) { if (iadd3.x != 0) {
// TODO: How does RS behave when X is set? // TODO: How does RS behave when X is set?
if (shift == Shift::Right) { if (shift == IADD3Shift::Right) {
throw NotImplementedException("IADD3 X+RS"); throw NotImplementedException("IADD3 X+RS");
} }
const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))}; const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))};
@ -98,10 +101,10 @@ void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_a, IR::U32 op_b, IR::U32 o
void TranslatorVisitor::IADD3_reg(u64 insn) { void TranslatorVisitor::IADD3_reg(u64 insn) {
union { union {
u64 insn; u64 insn;
BitField<37, 2, Shift> shift; BitField<37, 2, IADD3Shift> shift;
BitField<35, 2, Half> half_a; BitField<35, 2, IADD3Half> half_a;
BitField<33, 2, Half> half_b; BitField<33, 2, IADD3Half> half_b;
BitField<31, 2, Half> half_c; BitField<31, 2, IADD3Half> half_c;
} const iadd3{insn}; } const iadd3{insn};
const auto op_a{IntegerHalf(ir, GetReg8(insn), iadd3.half_a)}; const auto op_a{IntegerHalf(ir, GetReg8(insn), iadd3.half_a)};

View file

@ -11,7 +11,7 @@
namespace Shader::Maxwell { namespace Shader::Maxwell {
namespace { namespace {
enum class FloatFormat : u64 { enum class IntegerToFloatFormat : u64 {
F16 = 1, F16 = 1,
F32 = 2, F32 = 2,
F64 = 3, F64 = 3,
@ -27,7 +27,7 @@ enum class IntFormat : u64 {
union EncodingIFPC { union EncodingIFPC {
u64 raw; u64 raw;
BitField<0, 8, IR::Reg> dest_reg; BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 2, FloatFormat> float_format; BitField<8, 2, IntegerToFloatFormat> float_format;
BitField<10, 2, IntFormat> int_format; BitField<10, 2, IntFormat> int_format;
BitField<13, 1, u64> is_signed; BitField<13, 1, u64> is_signed;
BitField<39, 2, FpRounding> fp_rounding; BitField<39, 2, FpRounding> fp_rounding;
@ -41,13 +41,13 @@ bool Is64(u64 insn) {
return EncodingIFPC{insn}.int_format == IntFormat::U64; return EncodingIFPC{insn}.int_format == IntFormat::U64;
} }
int BitSize(FloatFormat format) { int BitSize(IntegerToFloatFormat format) {
switch (format) { switch (format) {
case FloatFormat::F16: case IntegerToFloatFormat::F16:
return 16; return 16;
case FloatFormat::F32: case IntegerToFloatFormat::F32:
return 32; return 32;
case FloatFormat::F64: case IntegerToFloatFormat::F64:
return 64; return 64;
} }
throw NotImplementedException("Invalid float format {}", format); throw NotImplementedException("Invalid float format {}", format);
@ -119,15 +119,15 @@ void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) {
} }
} }
switch (i2f.float_format) { switch (i2f.float_format) {
case FloatFormat::F16: { case IntegerToFloatFormat::F16: {
const IR::F16 zero{v.ir.FPConvert(16, v.ir.Imm32(0.0f))}; const IR::F16 zero{v.ir.FPConvert(16, v.ir.Imm32(0.0f))};
v.X(i2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(value, zero))); v.X(i2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(value, zero)));
break; break;
} }
case FloatFormat::F32: case IntegerToFloatFormat::F32:
v.F(i2f.dest_reg, value); v.F(i2f.dest_reg, value);
break; break;
case FloatFormat::F64: { case IntegerToFloatFormat::F64: {
if (!IR::IsAligned(i2f.dest_reg, 2)) { if (!IR::IsAligned(i2f.dest_reg, 2)) {
throw NotImplementedException("Unaligned destination {}", i2f.dest_reg.Value()); throw NotImplementedException("Unaligned destination {}", i2f.dest_reg.Value());
} }

View file

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
@ -15,18 +18,18 @@ enum class SelectMode : u64 {
CBCC, CBCC,
}; };
enum class Half : u64 { enum class IMADHalf : u64 {
H0, // Least-significant bits (15:0) H0, // Least-significant bits (15:0)
H1, // Most-significant bits (31:16) H1, // Most-significant bits (31:16)
}; };
IR::U32 ExtractHalf(TranslatorVisitor& v, const IR::U32& src, Half half, bool is_signed) { IR::U32 ExtractHalf(TranslatorVisitor& v, const IR::U32& src, IMADHalf half, bool is_signed) {
const IR::U32 offset{v.ir.Imm32(half == Half::H1 ? 16 : 0)}; const IR::U32 offset{v.ir.Imm32(half == IMADHalf::H1 ? 16 : 0)};
return v.ir.BitFieldExtract(src, offset, v.ir.Imm32(16), is_signed); return v.ir.BitFieldExtract(src, offset, v.ir.Imm32(16), is_signed);
} }
void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c, void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c,
SelectMode select_mode, Half half_b, bool psl, bool mrg, bool x) { SelectMode select_mode, IMADHalf half_b, bool psl, bool mrg, bool x) {
union { union {
u64 raw; u64 raw;
BitField<0, 8, IR::Reg> dest_reg; BitField<0, 8, IR::Reg> dest_reg;
@ -34,7 +37,7 @@ void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& s
BitField<47, 1, u64> cc; BitField<47, 1, u64> cc;
BitField<48, 1, u64> is_a_signed; BitField<48, 1, u64> is_a_signed;
BitField<49, 1, u64> is_b_signed; BitField<49, 1, u64> is_b_signed;
BitField<53, 1, Half> half_a; BitField<53, 1, IMADHalf> half_a;
} const xmad{insn}; } const xmad{insn};
if (x) { if (x) {
@ -53,9 +56,9 @@ void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& s
case SelectMode::Default: case SelectMode::Default:
return src_c; return src_c;
case SelectMode::CLO: case SelectMode::CLO:
return ExtractHalf(v, src_c, Half::H0, false); return ExtractHalf(v, src_c, IMADHalf::H0, false);
case SelectMode::CHI: case SelectMode::CHI:
return ExtractHalf(v, src_c, Half::H1, false); return ExtractHalf(v, src_c, IMADHalf::H1, false);
case SelectMode::CBCC: case SelectMode::CBCC:
return v.ir.IAdd(v.ir.ShiftLeftLogical(src_b, v.ir.Imm32(16)), src_c); return v.ir.IAdd(v.ir.ShiftLeftLogical(src_b, v.ir.Imm32(16)), src_c);
case SelectMode::CSFU: case SelectMode::CSFU:
@ -66,7 +69,7 @@ void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& s
IR::U32 result{v.ir.IAdd(product, op_c)}; IR::U32 result{v.ir.IAdd(product, op_c)};
if (mrg) { if (mrg) {
// .MRG inserts src_b [15:0] into result's [31:16]. // .MRG inserts src_b [15:0] into result's [31:16].
const IR::U32 lsb_b{ExtractHalf(v, src_b, Half::H0, false)}; const IR::U32 lsb_b{ExtractHalf(v, src_b, IMADHalf::H0, false)};
result = v.ir.BitFieldInsert(result, lsb_b, v.ir.Imm32(16), v.ir.Imm32(16)); result = v.ir.BitFieldInsert(result, lsb_b, v.ir.Imm32(16), v.ir.Imm32(16));
} }
if (xmad.cc) { if (xmad.cc) {
@ -80,7 +83,7 @@ void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& s
void TranslatorVisitor::XMAD_reg(u64 insn) { void TranslatorVisitor::XMAD_reg(u64 insn) {
union { union {
u64 raw; u64 raw;
BitField<35, 1, Half> half_b; BitField<35, 1, IMADHalf> half_b;
BitField<36, 1, u64> psl; BitField<36, 1, u64> psl;
BitField<37, 1, u64> mrg; BitField<37, 1, u64> mrg;
BitField<38, 1, u64> x; BitField<38, 1, u64> x;
@ -95,7 +98,7 @@ void TranslatorVisitor::XMAD_rc(u64 insn) {
union { union {
u64 raw; u64 raw;
BitField<50, 2, SelectMode> select_mode; BitField<50, 2, SelectMode> select_mode;
BitField<52, 1, Half> half_b; BitField<52, 1, IMADHalf> half_b;
BitField<54, 1, u64> x; BitField<54, 1, u64> x;
} const xmad{insn}; } const xmad{insn};
@ -107,7 +110,7 @@ void TranslatorVisitor::XMAD_cr(u64 insn) {
union { union {
u64 raw; u64 raw;
BitField<50, 2, SelectMode> select_mode; BitField<50, 2, SelectMode> select_mode;
BitField<52, 1, Half> half_b; BitField<52, 1, IMADHalf> half_b;
BitField<54, 1, u64> x; BitField<54, 1, u64> x;
BitField<55, 1, u64> psl; BitField<55, 1, u64> psl;
BitField<56, 1, u64> mrg; BitField<56, 1, u64> mrg;
@ -128,7 +131,7 @@ void TranslatorVisitor::XMAD_imm(u64 insn) {
} const xmad{insn}; } const xmad{insn};
XMAD(*this, insn, ir.Imm32(static_cast<u32>(xmad.src_b)), GetReg39(insn), xmad.select_mode, XMAD(*this, insn, ir.Imm32(static_cast<u32>(xmad.src_b)), GetReg39(insn), xmad.select_mode,
Half::H0, xmad.psl != 0, xmad.mrg != 0, xmad.x != 0); IMADHalf::H0, xmad.psl != 0, xmad.mrg != 0, xmad.x != 0);
} }
} // namespace Shader::Maxwell } // namespace Shader::Maxwell

View file

@ -24,17 +24,17 @@ enum class SZ : u64 {
F32 F32
}; };
enum class Shift : u64 { enum class ISBERDShift : u64 {
Default, Default,
U16, U16,
B32, B32,
}; };
IR::U32 scaleIndex(IR::IREmitter& ir, IR::U32 index, Shift shift) { IR::U32 scaleIndex(IR::IREmitter& ir, IR::U32 index, ISBERDShift shift) {
switch (shift) { switch (shift) {
case Shift::Default: return index; case ISBERDShift::Default: return index;
case Shift::U16: return ir.ShiftLeftLogical(index, ir.Imm32(1)); case ISBERDShift::U16: return ir.ShiftLeftLogical(index, ir.Imm32(1));
case Shift::B32: return ir.ShiftLeftLogical(index, ir.Imm32(2)); case ISBERDShift::B32: return ir.ShiftLeftLogical(index, ir.Imm32(2));
default: UNREACHABLE(); default: UNREACHABLE();
} }
} }
@ -65,7 +65,7 @@ void TranslatorVisitor::ISBERD(u64 insn) {
BitField<32, 1, u64> o; BitField<32, 1, u64> o;
BitField<33, 2, ISBERDMode> mode; BitField<33, 2, ISBERDMode> mode;
BitField<36, 4, SZ> sz; BitField<36, 4, SZ> sz;
BitField<47, 2, Shift> shift; BitField<47, 2, ISBERDShift> shift;
} const isberd{insn}; } const isberd{insn};
IR::U32 index{}; IR::U32 index{};

View file

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
@ -30,7 +33,7 @@ enum class StoreSize : u64 {
}; };
// See Table 27 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html // See Table 27 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html
enum class LoadCache : u64 { enum class XMEMLoadCache : u64 {
CA, // Cache at all levels, likely to be accessed again CA, // Cache at all levels, likely to be accessed again
CG, // Cache at global level (cache in L2 and below, not L1) CG, // Cache at global level (cache in L2 and below, not L1)
CI, // ??? CI, // ???
@ -38,7 +41,7 @@ enum class LoadCache : u64 {
}; };
// See Table 28 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html // See Table 28 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html
enum class StoreCache : u64 { enum class XMEMStoreCache : u64 {
WB, // Cache write-back all coherent levels WB, // Cache write-back all coherent levels
CG, // Cache at global level CG, // Cache at global level
CS, // Cache streaming, likely to be accessed once CS, // Cache streaming, likely to be accessed once
@ -83,7 +86,7 @@ void TranslatorVisitor::LDG(u64 insn) {
union { union {
u64 raw; u64 raw;
BitField<0, 8, IR::Reg> dest_reg; BitField<0, 8, IR::Reg> dest_reg;
BitField<46, 2, LoadCache> cache; BitField<46, 2, XMEMLoadCache> cache;
BitField<48, 3, LoadSize> size; BitField<48, 3, LoadSize> size;
} const ldg{insn}; } const ldg{insn};
@ -137,7 +140,7 @@ void TranslatorVisitor::STG(u64 insn) {
union { union {
u64 raw; u64 raw;
BitField<0, 8, IR::Reg> data_reg; BitField<0, 8, IR::Reg> data_reg;
BitField<46, 2, StoreCache> cache; BitField<46, 2, XMEMStoreCache> cache;
BitField<48, 3, StoreSize> size; BitField<48, 3, StoreSize> size;
} const stg{insn}; } const stg{insn};

View file

@ -64,14 +64,14 @@ enum class SurfaceLoadStoreClamp : u64 {
}; };
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#cache-operators // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#cache-operators
enum class LoadCache : u64 { enum class SURFLoadCache : u64 {
CA, // Cache at all levels, likely to be accessed again CA, // Cache at all levels, likely to be accessed again
CG, // Cache at global level (L2 and below, not L1) CG, // Cache at global level (L2 and below, not L1)
CI, // ??? CI, // ???
CV, // Don't cache and fetch again (volatile) CV, // Don't cache and fetch again (volatile)
}; };
enum class StoreCache : u64 { enum class SURFStoreCache : u64 {
WB, // Cache write-back all coherent levels WB, // Cache write-back all coherent levels
CG, // Cache at global level (L2 and below, not L1) CG, // Cache at global level (L2 and below, not L1)
CS, // Cache streaming, likely to be accessed once CS, // Cache streaming, likely to be accessed once
@ -178,7 +178,7 @@ void TranslatorVisitor::SULD(u64 insn) {
BitField<52, 1, u64> d; BitField<52, 1, u64> d;
BitField<23, 1, u64> ba; BitField<23, 1, u64> ba;
BitField<33, 3, SurfaceLoadStoreType> type; BitField<33, 3, SurfaceLoadStoreType> type;
BitField<24, 2, LoadCache> cache; BitField<24, 2, SURFLoadCache> cache;
BitField<20, 3, SurfaceLoadStoreSize> size; // .D BitField<20, 3, SurfaceLoadStoreSize> size; // .D
BitField<20, 4, u64> swizzle; // .P BitField<20, 4, u64> swizzle; // .P
BitField<49, 2, SurfaceLoadStoreClamp> clamp; BitField<49, 2, SurfaceLoadStoreClamp> clamp;
@ -191,7 +191,7 @@ void TranslatorVisitor::SULD(u64 insn) {
if (suld.clamp != SurfaceLoadStoreClamp::IGN) { if (suld.clamp != SurfaceLoadStoreClamp::IGN) {
throw NotImplementedException("SurfaceLoadStoreClamp {}", suld.clamp.Value()); throw NotImplementedException("SurfaceLoadStoreClamp {}", suld.clamp.Value());
} }
if (suld.cache != LoadCache::CA && suld.cache != LoadCache::CG) { if (suld.cache != SURFLoadCache::CA && suld.cache != SURFLoadCache::CG) {
throw NotImplementedException("Cache {}", suld.cache.Value()); throw NotImplementedException("Cache {}", suld.cache.Value());
} }
const bool is_typed{suld.d != 0}; const bool is_typed{suld.d != 0};
@ -238,7 +238,7 @@ void TranslatorVisitor::SUST(u64 insn) {
BitField<52, 1, u64> d; BitField<52, 1, u64> d;
BitField<23, 1, u64> ba; BitField<23, 1, u64> ba;
BitField<33, 3, SurfaceLoadStoreType> type; BitField<33, 3, SurfaceLoadStoreType> type;
BitField<24, 2, StoreCache> cache; BitField<24, 2, SURFStoreCache> cache;
BitField<20, 3, SurfaceLoadStoreSize> size; // .D BitField<20, 3, SurfaceLoadStoreSize> size; // .D
BitField<20, 4, u64> swizzle; // .P BitField<20, 4, u64> swizzle; // .P
BitField<49, 2, SurfaceLoadStoreClamp> clamp; BitField<49, 2, SurfaceLoadStoreClamp> clamp;
@ -251,7 +251,7 @@ void TranslatorVisitor::SUST(u64 insn) {
if (sust.clamp != SurfaceLoadStoreClamp::IGN) { if (sust.clamp != SurfaceLoadStoreClamp::IGN) {
throw NotImplementedException("SurfaceLoadStoreClamp {}", sust.clamp.Value()); throw NotImplementedException("SurfaceLoadStoreClamp {}", sust.clamp.Value());
} }
if (sust.cache != StoreCache::WB && sust.cache != StoreCache::CG) { if (sust.cache != SURFStoreCache::WB && sust.cache != SURFStoreCache::CG) {
throw NotImplementedException("Cache {}", sust.cache.Value()); throw NotImplementedException("Cache {}", sust.cache.Value());
} }
const bool is_typed{sust.d != 0}; const bool is_typed{sust.d != 0};

View file

@ -134,7 +134,7 @@ IR::Value SampleTFS(TranslatorVisitor& v, u64 insn) {
} }
} }
unsigned Swizzle(u64 insn) { unsigned FetchSwizzle(u64 insn) {
#define R 1 #define R 1
#define G 2 #define G 2
#define B 4 #define B 4
@ -173,7 +173,7 @@ unsigned Swizzle(u64 insn) {
} }
} }
IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) { IR::F32 FetchExtract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) {
const bool is_shadow{sample.Type() == IR::Type::F32}; const bool is_shadow{sample.Type() == IR::Type::F32};
if (is_shadow) { if (is_shadow) {
const bool is_alpha{component == 3}; const bool is_alpha{component == 3};
@ -183,7 +183,7 @@ IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned componen
} }
} }
IR::Reg RegStoreComponent32(u64 insn, unsigned index) { IR::Reg FetchRegStoreComponent32(u64 insn, unsigned index) {
const EncodinTFS texs{insn}; const EncodinTFS texs{insn};
switch (index) { switch (index) {
case 0: case 0:
@ -201,14 +201,14 @@ IR::Reg RegStoreComponent32(u64 insn, unsigned index) {
} }
void Store32TFS(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { void Store32TFS(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
const unsigned swizzle{Swizzle(insn)}; const unsigned swizzle{FetchSwizzle(insn)};
unsigned store_index{0}; unsigned store_index{0};
for (unsigned component = 0; component < 4; ++component) { for (unsigned component = 0; component < 4; ++component) {
if (((swizzle >> component) & 1) == 0) { if (((swizzle >> component) & 1) == 0) {
continue; continue;
} }
const IR::Reg dest{RegStoreComponent32(insn, store_index)}; const IR::Reg dest{FetchRegStoreComponent32(insn, store_index)};
v.F(dest, Extract(v, sample, component)); v.F(dest, FetchExtract(v, sample, component));
++store_index; ++store_index;
} }
} }
@ -218,14 +218,14 @@ IR::U32 PackTFS(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) {
} }
void Store16TFS(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { void Store16TFS(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
const unsigned swizzle{Swizzle(insn)}; const unsigned swizzle{FetchSwizzle(insn)};
unsigned store_index{0}; unsigned store_index{0};
std::array<IR::F32, 4> swizzled; std::array<IR::F32, 4> swizzled;
for (unsigned component = 0; component < 4; ++component) { for (unsigned component = 0; component < 4; ++component) {
if (((swizzle >> component) & 1) == 0) { if (((swizzle >> component) & 1) == 0) {
continue; continue;
} }
swizzled[store_index] = Extract(v, sample, component); swizzled[store_index] = FetchExtract(v, sample, component);
++store_index; ++store_index;
} }
const IR::F32 zero{v.ir.Imm32(0.0f)}; const IR::F32 zero{v.ir.Imm32(0.0f)};

View file

@ -43,7 +43,7 @@ void CheckAlignmentTGS(IR::Reg reg, size_t alignment) {
} }
} }
IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg) { IR::Value MakeGatherOffset(TranslatorVisitor& v, IR::Reg reg) {
const IR::U32 value{v.X(reg)}; const IR::U32 value{v.X(reg)};
return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true), return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true),
v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true)); v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true));
@ -65,7 +65,7 @@ IR::Value SampleTGS(TranslatorVisitor& v, u64 insn) {
if (tld4s.aoffi != 0) { if (tld4s.aoffi != 0) {
CheckAlignmentTGS(reg_a, 2); CheckAlignmentTGS(reg_a, 2);
coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_a + 1)); coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_a + 1));
IR::Value offset = MakeOffset(v, reg_b); IR::Value offset = MakeGatherOffset(v, reg_b);
if (tld4s.dc != 0) { if (tld4s.dc != 0) {
CheckAlignmentTGS(reg_b, 2); CheckAlignmentTGS(reg_b, 2);
IR::F32 dref = v.F(reg_b + 1); IR::F32 dref = v.F(reg_b + 1);

View file

@ -40,7 +40,7 @@ void CheckAlignmentTLS(IR::Reg reg, size_t alignment) {
} }
} }
IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg) { IR::Value MakeLoadOffset(TranslatorVisitor& v, IR::Reg reg) {
const IR::U32 value{v.X(reg)}; const IR::U32 value{v.X(reg)};
return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true)); v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true));
@ -74,7 +74,7 @@ IR::Value SampleTLS(TranslatorVisitor& v, u64 insn) {
CheckAlignmentTLS(reg_a, 2); CheckAlignmentTLS(reg_a, 2);
texture_type = Shader::TextureType::Color2D; texture_type = Shader::TextureType::Color2D;
coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
offsets = MakeOffset(v, reg_b); offsets = MakeLoadOffset(v, reg_b);
break; break;
case 5: case 5:
CheckAlignmentTLS(reg_a, 2); CheckAlignmentTLS(reg_a, 2);
@ -106,7 +106,7 @@ IR::Value SampleTLS(TranslatorVisitor& v, u64 insn) {
texture_type = Shader::TextureType::Color2D; texture_type = Shader::TextureType::Color2D;
coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
lod = v.X(reg_b); lod = v.X(reg_b);
offsets = MakeOffset(v, reg_b + 1); offsets = MakeLoadOffset(v, reg_b + 1);
break; break;
default: default:
throw NotImplementedException("Illegal encoding {}", tlds.encoding.Value()); throw NotImplementedException("Illegal encoding {}", tlds.encoding.Value());
@ -119,7 +119,7 @@ IR::Value SampleTLS(TranslatorVisitor& v, u64 insn) {
return v.ir.ImageFetch(handle, coords, offsets, lod, multisample, info); return v.ir.ImageFetch(handle, coords, offsets, lod, multisample, info);
} }
unsigned Swizzle(u64 insn) { unsigned LoadSwizzle(u64 insn) {
#define R 1 #define R 1
#define G 2 #define G 2
#define B 4 #define B 4
@ -160,11 +160,11 @@ unsigned Swizzle(u64 insn) {
} }
} }
IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) { IR::F32 LoadExtract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) {
return IR::F32{v.ir.CompositeExtract(sample, component)}; return IR::F32{v.ir.CompositeExtract(sample, component)};
} }
IR::Reg RegStoreComponent32(u64 insn, unsigned index) { IR::Reg LoadRegStoreComponent32(u64 insn, unsigned index) {
const EncodinTLS tlds{insn}; const EncodinTLS tlds{insn};
switch (index) { switch (index) {
case 0: case 0:
@ -182,14 +182,14 @@ IR::Reg RegStoreComponent32(u64 insn, unsigned index) {
} }
void Store32TLS(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { void Store32TLS(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
const unsigned swizzle{Swizzle(insn)}; const unsigned swizzle{LoadSwizzle(insn)};
unsigned store_index{0}; unsigned store_index{0};
for (unsigned component = 0; component < 4; ++component) { for (unsigned component = 0; component < 4; ++component) {
if (((swizzle >> component) & 1) == 0) { if (((swizzle >> component) & 1) == 0) {
continue; continue;
} }
const IR::Reg dest{RegStoreComponent32(insn, store_index)}; const IR::Reg dest{LoadRegStoreComponent32(insn, store_index)};
v.F(dest, Extract(v, sample, component)); v.F(dest, LoadExtract(v, sample, component));
++store_index; ++store_index;
} }
} }
@ -199,14 +199,14 @@ IR::U32 PackTLS(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) {
} }
void Store16TLS(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { void Store16TLS(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
const unsigned swizzle{Swizzle(insn)}; const unsigned swizzle{LoadSwizzle(insn)};
unsigned store_index{0}; unsigned store_index{0};
std::array<IR::F32, 4> swizzled; std::array<IR::F32, 4> swizzled;
for (unsigned component = 0; component < 4; ++component) { for (unsigned component = 0; component < 4; ++component) {
if (((swizzle >> component) & 1) == 0) { if (((swizzle >> component) & 1) == 0) {
continue; continue;
} }
swizzled[store_index] = Extract(v, sample, component); swizzled[store_index] = LoadExtract(v, sample, component);
++store_index; ++store_index;
} }
const IR::F32 zero{v.ir.Imm32(0.0f)}; const IR::F32 zero{v.ir.Imm32(0.0f)};