mirror of
https://git.eden-emu.dev/eden-emu/eden
synced 2026-04-10 05:28:56 +02:00
fix with bigger batch sizes
This commit is contained in:
parent
585b3dcde1
commit
2f0192665e
20 changed files with 164 additions and 152 deletions
|
|
@ -77,7 +77,7 @@ CPUCaps::Manufacturer CPUCaps::ParseManufacturer(std::string_view brand_string)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Detects the various CPU features
|
// Detects the various CPU features
|
||||||
static CPUCaps Detect() {
|
static CPUCaps DetectCPUCapabilities() {
|
||||||
CPUCaps caps = {};
|
CPUCaps caps = {};
|
||||||
|
|
||||||
// Assumes the CPU supports the CPUID instruction. Those that don't would likely not support
|
// Assumes the CPU supports the CPUID instruction. Those that don't would likely not support
|
||||||
|
|
@ -208,7 +208,7 @@ static CPUCaps Detect() {
|
||||||
}
|
}
|
||||||
|
|
||||||
const CPUCaps& GetCPUCaps() {
|
const CPUCaps& GetCPUCaps() {
|
||||||
static CPUCaps caps = Detect();
|
static CPUCaps caps = DetectCPUCapabilities();
|
||||||
return caps;
|
return caps;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -31,7 +31,7 @@ using namespace oaknut::util;
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
bool IsOrdered(IR::AccType acctype) {
|
[[nodiscard]] inline bool IsOrdered(IR::AccType acctype) {
|
||||||
return acctype == IR::AccType::ORDERED || acctype == IR::AccType::ORDEREDRW || acctype == IR::AccType::LIMITEDORDERED;
|
return acctype == IR::AccType::ORDERED || acctype == IR::AccType::ORDEREDRW || acctype == IR::AccType::LIMITEDORDERED;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,6 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
|
||||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
|
@ -45,11 +48,6 @@ void GetCbuf(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool IsInputArray(Stage stage) {
|
|
||||||
return stage == Stage::Geometry || stage == Stage::TessellationControl ||
|
|
||||||
stage == Stage::TessellationEval;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string VertexIndex(EmitContext& ctx, ScalarU32 vertex) {
|
std::string VertexIndex(EmitContext& ctx, ScalarU32 vertex) {
|
||||||
return IsInputArray(ctx.stage) ? fmt::format("[{}]", vertex) : "";
|
return IsInputArray(ctx.stage) ? fmt::format("[{}]", vertex) : "";
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
|
||||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||||
|
|
@ -18,6 +18,11 @@ class Value;
|
||||||
|
|
||||||
namespace Shader::Backend::GLASM {
|
namespace Shader::Backend::GLASM {
|
||||||
|
|
||||||
|
[[nodiscard]] inline bool IsInputArray(Stage stage) {
|
||||||
|
return stage == Stage::Geometry || stage == Stage::TessellationControl
|
||||||
|
|| stage == Stage::TessellationEval;
|
||||||
|
}
|
||||||
|
|
||||||
class EmitContext;
|
class EmitContext;
|
||||||
|
|
||||||
// Microinstruction emitters
|
// Microinstruction emitters
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,6 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
|
||||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
|
@ -7,94 +10,92 @@
|
||||||
|
|
||||||
namespace Shader::Backend::GLASM {
|
namespace Shader::Backend::GLASM {
|
||||||
|
|
||||||
#define NotImplemented() throw NotImplementedException("GLASM instruction {}", __LINE__)
|
|
||||||
|
|
||||||
void EmitGetRegister(EmitContext& ctx) {
|
void EmitGetRegister(EmitContext& ctx) {
|
||||||
NotImplemented();
|
throw NotImplementedException("GLASM instruction {}", __LINE__);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitSetRegister(EmitContext& ctx) {
|
void EmitSetRegister(EmitContext& ctx) {
|
||||||
NotImplemented();
|
throw NotImplementedException("GLASM instruction {}", __LINE__);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitGetPred(EmitContext& ctx) {
|
void EmitGetPred(EmitContext& ctx) {
|
||||||
NotImplemented();
|
throw NotImplementedException("GLASM instruction {}", __LINE__);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitSetPred(EmitContext& ctx) {
|
void EmitSetPred(EmitContext& ctx) {
|
||||||
NotImplemented();
|
throw NotImplementedException("GLASM instruction {}", __LINE__);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitSetGotoVariable(EmitContext& ctx) {
|
void EmitSetGotoVariable(EmitContext& ctx) {
|
||||||
NotImplemented();
|
throw NotImplementedException("GLASM instruction {}", __LINE__);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitGetGotoVariable(EmitContext& ctx) {
|
void EmitGetGotoVariable(EmitContext& ctx) {
|
||||||
NotImplemented();
|
throw NotImplementedException("GLASM instruction {}", __LINE__);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitSetIndirectBranchVariable(EmitContext& ctx) {
|
void EmitSetIndirectBranchVariable(EmitContext& ctx) {
|
||||||
NotImplemented();
|
throw NotImplementedException("GLASM instruction {}", __LINE__);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitGetIndirectBranchVariable(EmitContext& ctx) {
|
void EmitGetIndirectBranchVariable(EmitContext& ctx) {
|
||||||
NotImplemented();
|
throw NotImplementedException("GLASM instruction {}", __LINE__);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitGetZFlag(EmitContext& ctx) {
|
void EmitGetZFlag(EmitContext& ctx) {
|
||||||
NotImplemented();
|
throw NotImplementedException("GLASM instruction {}", __LINE__);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitGetSFlag(EmitContext& ctx) {
|
void EmitGetSFlag(EmitContext& ctx) {
|
||||||
NotImplemented();
|
throw NotImplementedException("GLASM instruction {}", __LINE__);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitGetCFlag(EmitContext& ctx) {
|
void EmitGetCFlag(EmitContext& ctx) {
|
||||||
NotImplemented();
|
throw NotImplementedException("GLASM instruction {}", __LINE__);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitGetOFlag(EmitContext& ctx) {
|
void EmitGetOFlag(EmitContext& ctx) {
|
||||||
NotImplemented();
|
throw NotImplementedException("GLASM instruction {}", __LINE__);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitSetZFlag(EmitContext& ctx) {
|
void EmitSetZFlag(EmitContext& ctx) {
|
||||||
NotImplemented();
|
throw NotImplementedException("GLASM instruction {}", __LINE__);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitSetSFlag(EmitContext& ctx) {
|
void EmitSetSFlag(EmitContext& ctx) {
|
||||||
NotImplemented();
|
throw NotImplementedException("GLASM instruction {}", __LINE__);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitSetCFlag(EmitContext& ctx) {
|
void EmitSetCFlag(EmitContext& ctx) {
|
||||||
NotImplemented();
|
throw NotImplementedException("GLASM instruction {}", __LINE__);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitSetOFlag(EmitContext& ctx) {
|
void EmitSetOFlag(EmitContext& ctx) {
|
||||||
NotImplemented();
|
throw NotImplementedException("GLASM instruction {}", __LINE__);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitGetZeroFromOp(EmitContext& ctx) {
|
void EmitGetZeroFromOp(EmitContext& ctx) {
|
||||||
NotImplemented();
|
throw NotImplementedException("GLASM instruction {}", __LINE__);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitGetSignFromOp(EmitContext& ctx) {
|
void EmitGetSignFromOp(EmitContext& ctx) {
|
||||||
NotImplemented();
|
throw NotImplementedException("GLASM instruction {}", __LINE__);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitGetCarryFromOp(EmitContext& ctx) {
|
void EmitGetCarryFromOp(EmitContext& ctx) {
|
||||||
NotImplemented();
|
throw NotImplementedException("GLASM instruction {}", __LINE__);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitGetOverflowFromOp(EmitContext& ctx) {
|
void EmitGetOverflowFromOp(EmitContext& ctx) {
|
||||||
NotImplemented();
|
throw NotImplementedException("GLASM instruction {}", __LINE__);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitGetSparseFromOp(EmitContext& ctx) {
|
void EmitGetSparseFromOp(EmitContext& ctx) {
|
||||||
NotImplemented();
|
throw NotImplementedException("GLASM instruction {}", __LINE__);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitGetInBoundsFromOp(EmitContext& ctx) {
|
void EmitGetInBoundsFromOp(EmitContext& ctx) {
|
||||||
NotImplemented();
|
throw NotImplementedException("GLASM instruction {}", __LINE__);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Shader::Backend::GLASM
|
} // namespace Shader::Backend::GLASM
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,6 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
|
||||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
|
@ -21,11 +24,6 @@ std::string_view InterpDecorator(Interpolation interp) {
|
||||||
}
|
}
|
||||||
throw InvalidArgument("Invalid interpolation {}", interp);
|
throw InvalidArgument("Invalid interpolation {}", interp);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool IsInputArray(Stage stage) {
|
|
||||||
return stage == Stage::Geometry || stage == Stage::TessellationControl ||
|
|
||||||
stage == Stage::TessellationEval;
|
|
||||||
}
|
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_,
|
EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_,
|
||||||
|
|
|
||||||
|
|
@ -32,10 +32,6 @@ std::string InputVertexIndex(EmitContext& ctx, std::string_view vertex) {
|
||||||
return IsInputArray(ctx.stage) ? fmt::format("[{}]", vertex) : "";
|
return IsInputArray(ctx.stage) ? fmt::format("[{}]", vertex) : "";
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string_view OutputVertexIndex(EmitContext& ctx) {
|
|
||||||
return ctx.stage == Stage::TessellationControl ? "[gl_InvocationID]" : "";
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string ChooseCbuf(EmitContext& ctx, const IR::Value& binding, std::string_view index) {
|
std::string ChooseCbuf(EmitContext& ctx, const IR::Value& binding, std::string_view index) {
|
||||||
if (binding.IsImmediate()) {
|
if (binding.IsImmediate()) {
|
||||||
return fmt::format("{}_cbuf{}[{}]", ctx.stage_name, binding.U32(), index);
|
return fmt::format("{}_cbuf{}[{}]", ctx.stage_name, binding.U32(), index);
|
||||||
|
|
@ -281,7 +277,7 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view val
|
||||||
const u32 index{IR::GenericAttributeIndex(attr)};
|
const u32 index{IR::GenericAttributeIndex(attr)};
|
||||||
const u32 attr_element{IR::GenericAttributeElement(attr)};
|
const u32 attr_element{IR::GenericAttributeElement(attr)};
|
||||||
const GenericElementInfo& info{ctx.output_generics.at(index).at(attr_element)};
|
const GenericElementInfo& info{ctx.output_generics.at(index).at(attr_element)};
|
||||||
const auto output_decorator{OutputVertexIndex(ctx)};
|
const auto output_decorator = ctx.stage == Stage::TessellationControl ? "[gl_InvocationID]" : "";
|
||||||
if (info.num_components == 1) {
|
if (info.num_components == 1) {
|
||||||
ctx.Add("{}{}={};", info.name, output_decorator, value);
|
ctx.Add("{}{}={};", info.name, output_decorator, value);
|
||||||
} else {
|
} else {
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
|
||||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,6 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
|
||||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
|
@ -9,9 +12,6 @@
|
||||||
|
|
||||||
namespace Shader::Backend::GLSL {
|
namespace Shader::Backend::GLSL {
|
||||||
namespace {
|
namespace {
|
||||||
std::string_view OutputVertexIndex(EmitContext& ctx) {
|
|
||||||
return ctx.stage == Stage::TessellationControl ? "[gl_InvocationID]" : "";
|
|
||||||
}
|
|
||||||
|
|
||||||
void InitializeOutputVaryings(EmitContext& ctx) {
|
void InitializeOutputVaryings(EmitContext& ctx) {
|
||||||
if (ctx.uses_geometry_passthrough) {
|
if (ctx.uses_geometry_passthrough) {
|
||||||
|
|
@ -25,7 +25,7 @@ void InitializeOutputVaryings(EmitContext& ctx) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const auto& info_array{ctx.output_generics.at(index)};
|
const auto& info_array{ctx.output_generics.at(index)};
|
||||||
const auto output_decorator{OutputVertexIndex(ctx)};
|
const auto output_decorator = ctx.stage == Stage::TessellationControl ? "[gl_InvocationID]" : "";
|
||||||
size_t element{};
|
size_t element{};
|
||||||
while (element < info_array.size()) {
|
while (element < info_array.size()) {
|
||||||
const auto& info{info_array.at(element)};
|
const auto& info{info_array.at(element)};
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,6 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
|
||||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
|
@ -17,14 +20,13 @@ Id Image(EmitContext& ctx, IR::TextureInstInfo info) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<Id, Id> AtomicArgs(EmitContext& ctx) {
|
std::pair<Id, Id> AtomicImageArgs(EmitContext& ctx) {
|
||||||
const Id scope{ctx.Const(static_cast<u32>(spv::Scope::Device))};
|
const Id scope{ctx.Const(static_cast<u32>(spv::Scope::Device))};
|
||||||
const Id semantics{ctx.u32_zero_value};
|
const Id semantics{ctx.u32_zero_value};
|
||||||
return {scope, semantics};
|
return {scope, semantics};
|
||||||
}
|
}
|
||||||
|
|
||||||
Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id value,
|
Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id value, Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
|
||||||
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
|
|
||||||
if (!index.IsImmediate() || index.U32() != 0) {
|
if (!index.IsImmediate() || index.U32() != 0) {
|
||||||
// TODO: handle layers
|
// TODO: handle layers
|
||||||
throw NotImplementedException("Image indexing");
|
throw NotImplementedException("Image indexing");
|
||||||
|
|
@ -32,7 +34,7 @@ Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id c
|
||||||
const auto info{inst->Flags<IR::TextureInstInfo>()};
|
const auto info{inst->Flags<IR::TextureInstInfo>()};
|
||||||
const Id image{Image(ctx, info)};
|
const Id image{Image(ctx, info)};
|
||||||
const Id pointer{ctx.OpImageTexelPointer(ctx.image_u32, image, coords, ctx.Const(0U))};
|
const Id pointer{ctx.OpImageTexelPointer(ctx.image_u32, image, coords, ctx.Const(0U))};
|
||||||
const auto [scope, semantics]{AtomicArgs(ctx)};
|
const auto [scope, semantics] = AtomicImageArgs(ctx);
|
||||||
return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value);
|
return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value);
|
||||||
}
|
}
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,6 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
|
||||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
|
@ -6,7 +9,7 @@
|
||||||
|
|
||||||
namespace Shader::Maxwell {
|
namespace Shader::Maxwell {
|
||||||
namespace {
|
namespace {
|
||||||
enum class FloatFormat : u64 {
|
enum class FloatConversionFormat : u64 {
|
||||||
F16 = 1,
|
F16 = 1,
|
||||||
F32 = 2,
|
F32 = 2,
|
||||||
F64 = 3,
|
F64 = 3,
|
||||||
|
|
@ -21,13 +24,13 @@ enum class RoundingOp : u64 {
|
||||||
Trunc = 11,
|
Trunc = 11,
|
||||||
};
|
};
|
||||||
|
|
||||||
[[nodiscard]] u32 WidthSize(FloatFormat width) {
|
[[nodiscard]] u32 WidthSize(FloatConversionFormat width) {
|
||||||
switch (width) {
|
switch (width) {
|
||||||
case FloatFormat::F16:
|
case FloatConversionFormat::F16:
|
||||||
return 16;
|
return 16;
|
||||||
case FloatFormat::F32:
|
case FloatConversionFormat::F32:
|
||||||
return 32;
|
return 32;
|
||||||
case FloatFormat::F64:
|
case FloatConversionFormat::F64:
|
||||||
return 64;
|
return 64;
|
||||||
default:
|
default:
|
||||||
throw NotImplementedException("Invalid width {}", width);
|
throw NotImplementedException("Invalid width {}", width);
|
||||||
|
|
@ -44,8 +47,8 @@ void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) {
|
||||||
BitField<50, 1, u64> sat;
|
BitField<50, 1, u64> sat;
|
||||||
BitField<39, 4, u64> rounding_op;
|
BitField<39, 4, u64> rounding_op;
|
||||||
BitField<39, 2, FpRounding> rounding;
|
BitField<39, 2, FpRounding> rounding;
|
||||||
BitField<10, 2, FloatFormat> src_size;
|
BitField<10, 2, FloatConversionFormat> src_size;
|
||||||
BitField<8, 2, FloatFormat> dst_size;
|
BitField<8, 2, FloatConversionFormat> dst_size;
|
||||||
|
|
||||||
[[nodiscard]] RoundingOp RoundingOperation() const {
|
[[nodiscard]] RoundingOp RoundingOperation() const {
|
||||||
constexpr u64 rounding_mask = 0x0B;
|
constexpr u64 rounding_mask = 0x0B;
|
||||||
|
|
@ -59,7 +62,7 @@ void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) {
|
||||||
|
|
||||||
IR::F16F32F64 input{v.ir.FPAbsNeg(src_a, abs, f2f.neg != 0)};
|
IR::F16F32F64 input{v.ir.FPAbsNeg(src_a, abs, f2f.neg != 0)};
|
||||||
|
|
||||||
const bool any_fp64{f2f.src_size == FloatFormat::F64 || f2f.dst_size == FloatFormat::F64};
|
const bool any_fp64{f2f.src_size == FloatConversionFormat::F64 || f2f.dst_size == FloatConversionFormat::F64};
|
||||||
IR::FpControl fp_control{
|
IR::FpControl fp_control{
|
||||||
.no_contraction = false,
|
.no_contraction = false,
|
||||||
.rounding = IR::FpRounding::DontCare,
|
.rounding = IR::FpRounding::DontCare,
|
||||||
|
|
@ -74,13 +77,13 @@ void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) {
|
||||||
case RoundingOp::Pass:
|
case RoundingOp::Pass:
|
||||||
// Make sure NANs are handled properly
|
// Make sure NANs are handled properly
|
||||||
switch (f2f.src_size) {
|
switch (f2f.src_size) {
|
||||||
case FloatFormat::F16:
|
case FloatConversionFormat::F16:
|
||||||
input = v.ir.FPAdd(input, v.ir.FPConvert(16, v.ir.Imm32(0.0f)), fp_control);
|
input = v.ir.FPAdd(input, v.ir.FPConvert(16, v.ir.Imm32(0.0f)), fp_control);
|
||||||
break;
|
break;
|
||||||
case FloatFormat::F32:
|
case FloatConversionFormat::F32:
|
||||||
input = v.ir.FPAdd(input, v.ir.Imm32(0.0f), fp_control);
|
input = v.ir.FPAdd(input, v.ir.Imm32(0.0f), fp_control);
|
||||||
break;
|
break;
|
||||||
case FloatFormat::F64:
|
case FloatConversionFormat::F64:
|
||||||
input = v.ir.FPAdd(input, v.ir.Imm64(0.0), fp_control);
|
input = v.ir.FPAdd(input, v.ir.Imm64(0.0), fp_control);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
@ -106,15 +109,15 @@ void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) {
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (f2f.dst_size) {
|
switch (f2f.dst_size) {
|
||||||
case FloatFormat::F16: {
|
case FloatConversionFormat::F16: {
|
||||||
const IR::F16 imm{v.ir.FPConvert(16, v.ir.Imm32(0.0f))};
|
const IR::F16 imm{v.ir.FPConvert(16, v.ir.Imm32(0.0f))};
|
||||||
v.X(f2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(input, imm)));
|
v.X(f2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(input, imm)));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case FloatFormat::F32:
|
case FloatConversionFormat::F32:
|
||||||
v.F(f2f.dest_reg, input);
|
v.F(f2f.dest_reg, input);
|
||||||
break;
|
break;
|
||||||
case FloatFormat::F64:
|
case FloatConversionFormat::F64:
|
||||||
v.D(f2f.dest_reg, input);
|
v.D(f2f.dest_reg, input);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
|
@ -127,21 +130,21 @@ void TranslatorVisitor::F2F_reg(u64 insn) {
|
||||||
union {
|
union {
|
||||||
u64 insn;
|
u64 insn;
|
||||||
BitField<49, 1, u64> abs;
|
BitField<49, 1, u64> abs;
|
||||||
BitField<10, 2, FloatFormat> src_size;
|
BitField<10, 2, FloatConversionFormat> src_size;
|
||||||
BitField<41, 1, u64> selector;
|
BitField<41, 1, u64> selector;
|
||||||
} const f2f{insn};
|
} const f2f{insn};
|
||||||
|
|
||||||
IR::F16F32F64 src_a;
|
IR::F16F32F64 src_a;
|
||||||
switch (f2f.src_size) {
|
switch (f2f.src_size) {
|
||||||
case FloatFormat::F16: {
|
case FloatConversionFormat::F16: {
|
||||||
auto [lhs_a, rhs_a]{Extract(ir, GetReg20(insn), Swizzle::H1_H0)};
|
auto [lhs_a, rhs_a]{Extract(ir, GetReg20(insn), Swizzle::H1_H0)};
|
||||||
src_a = f2f.selector != 0 ? rhs_a : lhs_a;
|
src_a = f2f.selector != 0 ? rhs_a : lhs_a;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case FloatFormat::F32:
|
case FloatConversionFormat::F32:
|
||||||
src_a = GetFloatReg20(insn);
|
src_a = GetFloatReg20(insn);
|
||||||
break;
|
break;
|
||||||
case FloatFormat::F64:
|
case FloatConversionFormat::F64:
|
||||||
src_a = GetDoubleReg20(insn);
|
src_a = GetDoubleReg20(insn);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
|
@ -154,21 +157,21 @@ void TranslatorVisitor::F2F_cbuf(u64 insn) {
|
||||||
union {
|
union {
|
||||||
u64 insn;
|
u64 insn;
|
||||||
BitField<49, 1, u64> abs;
|
BitField<49, 1, u64> abs;
|
||||||
BitField<10, 2, FloatFormat> src_size;
|
BitField<10, 2, FloatConversionFormat> src_size;
|
||||||
BitField<41, 1, u64> selector;
|
BitField<41, 1, u64> selector;
|
||||||
} const f2f{insn};
|
} const f2f{insn};
|
||||||
|
|
||||||
IR::F16F32F64 src_a;
|
IR::F16F32F64 src_a;
|
||||||
switch (f2f.src_size) {
|
switch (f2f.src_size) {
|
||||||
case FloatFormat::F16: {
|
case FloatConversionFormat::F16: {
|
||||||
auto [lhs_a, rhs_a]{Extract(ir, GetCbuf(insn), Swizzle::H1_H0)};
|
auto [lhs_a, rhs_a]{Extract(ir, GetCbuf(insn), Swizzle::H1_H0)};
|
||||||
src_a = f2f.selector != 0 ? rhs_a : lhs_a;
|
src_a = f2f.selector != 0 ? rhs_a : lhs_a;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case FloatFormat::F32:
|
case FloatConversionFormat::F32:
|
||||||
src_a = GetFloatCbuf(insn);
|
src_a = GetFloatCbuf(insn);
|
||||||
break;
|
break;
|
||||||
case FloatFormat::F64:
|
case FloatConversionFormat::F64:
|
||||||
src_a = GetDoubleCbuf(insn);
|
src_a = GetDoubleCbuf(insn);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
|
@ -181,7 +184,7 @@ void TranslatorVisitor::F2F_imm([[maybe_unused]] u64 insn) {
|
||||||
union {
|
union {
|
||||||
u64 insn;
|
u64 insn;
|
||||||
BitField<49, 1, u64> abs;
|
BitField<49, 1, u64> abs;
|
||||||
BitField<10, 2, FloatFormat> src_size;
|
BitField<10, 2, FloatConversionFormat> src_size;
|
||||||
BitField<41, 1, u64> selector;
|
BitField<41, 1, u64> selector;
|
||||||
BitField<20, 19, u64> imm;
|
BitField<20, 19, u64> imm;
|
||||||
BitField<56, 1, u64> imm_neg;
|
BitField<56, 1, u64> imm_neg;
|
||||||
|
|
@ -189,7 +192,7 @@ void TranslatorVisitor::F2F_imm([[maybe_unused]] u64 insn) {
|
||||||
|
|
||||||
IR::F16F32F64 src_a;
|
IR::F16F32F64 src_a;
|
||||||
switch (f2f.src_size) {
|
switch (f2f.src_size) {
|
||||||
case FloatFormat::F16: {
|
case FloatConversionFormat::F16: {
|
||||||
const u32 imm{static_cast<u32>(f2f.imm & 0x0000ffff)};
|
const u32 imm{static_cast<u32>(f2f.imm & 0x0000ffff)};
|
||||||
const IR::Value vector{ir.UnpackFloat2x16(ir.Imm32(imm | (imm << 16)))};
|
const IR::Value vector{ir.UnpackFloat2x16(ir.Imm32(imm | (imm << 16)))};
|
||||||
src_a = IR::F16{ir.CompositeExtract(vector, f2f.selector != 0 ? 0 : 1)};
|
src_a = IR::F16{ir.CompositeExtract(vector, f2f.selector != 0 ? 0 : 1)};
|
||||||
|
|
@ -198,10 +201,10 @@ void TranslatorVisitor::F2F_imm([[maybe_unused]] u64 insn) {
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case FloatFormat::F32:
|
case FloatConversionFormat::F32:
|
||||||
src_a = GetFloatImm20(insn);
|
src_a = GetFloatImm20(insn);
|
||||||
break;
|
break;
|
||||||
case FloatFormat::F64:
|
case FloatConversionFormat::F64:
|
||||||
src_a = GetDoubleImm20(insn);
|
src_a = GetDoubleImm20(insn);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,6 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
|
||||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
|
@ -7,48 +10,48 @@
|
||||||
|
|
||||||
namespace Shader::Maxwell {
|
namespace Shader::Maxwell {
|
||||||
namespace {
|
namespace {
|
||||||
enum class Shift : u64 {
|
enum class IADD3Shift : u64 {
|
||||||
None,
|
None,
|
||||||
Right,
|
Right,
|
||||||
Left,
|
Left,
|
||||||
};
|
};
|
||||||
enum class Half : u64 {
|
enum class IADD3Half : u64 {
|
||||||
All,
|
All,
|
||||||
Lower,
|
Lower,
|
||||||
Upper,
|
Upper,
|
||||||
};
|
};
|
||||||
|
|
||||||
[[nodiscard]] IR::U32 IntegerHalf(IR::IREmitter& ir, const IR::U32& value, Half half) {
|
[[nodiscard]] IR::U32 IntegerHalf(IR::IREmitter& ir, const IR::U32& value, IADD3Half half) {
|
||||||
constexpr bool is_signed{false};
|
constexpr bool is_signed{false};
|
||||||
switch (half) {
|
switch (half) {
|
||||||
case Half::All:
|
case IADD3Half::All:
|
||||||
return value;
|
return value;
|
||||||
case Half::Lower:
|
case IADD3Half::Lower:
|
||||||
return ir.BitFieldExtract(value, ir.Imm32(0), ir.Imm32(16), is_signed);
|
return ir.BitFieldExtract(value, ir.Imm32(0), ir.Imm32(16), is_signed);
|
||||||
case Half::Upper:
|
case IADD3Half::Upper:
|
||||||
return ir.BitFieldExtract(value, ir.Imm32(16), ir.Imm32(16), is_signed);
|
return ir.BitFieldExtract(value, ir.Imm32(16), ir.Imm32(16), is_signed);
|
||||||
}
|
}
|
||||||
throw NotImplementedException("Invalid half");
|
throw NotImplementedException("Invalid half");
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] IR::U32 IntegerShift(IR::IREmitter& ir, const IR::U32& value, Shift shift) {
|
[[nodiscard]] IR::U32 IntegerShift(IR::IREmitter& ir, const IR::U32& value, IADD3Shift shift) {
|
||||||
switch (shift) {
|
switch (shift) {
|
||||||
case Shift::None:
|
case IADD3Shift::None:
|
||||||
return value;
|
return value;
|
||||||
case Shift::Right: {
|
case IADD3Shift::Right: {
|
||||||
// 33-bit RS IADD3 edge case
|
// 33-bit RS IADD3 edge case
|
||||||
const IR::U1 edge_case{ir.GetCarryFromOp(value)};
|
const IR::U1 edge_case{ir.GetCarryFromOp(value)};
|
||||||
const IR::U32 shifted{ir.ShiftRightLogical(value, ir.Imm32(16))};
|
const IR::U32 shifted{ir.ShiftRightLogical(value, ir.Imm32(16))};
|
||||||
return IR::U32{ir.Select(edge_case, ir.IAdd(shifted, ir.Imm32(0x10000)), shifted)};
|
return IR::U32{ir.Select(edge_case, ir.IAdd(shifted, ir.Imm32(0x10000)), shifted)};
|
||||||
}
|
}
|
||||||
case Shift::Left:
|
case IADD3Shift::Left:
|
||||||
return ir.ShiftLeftLogical(value, ir.Imm32(16));
|
return ir.ShiftLeftLogical(value, ir.Imm32(16));
|
||||||
}
|
}
|
||||||
throw NotImplementedException("Invalid shift");
|
throw NotImplementedException("Invalid shift");
|
||||||
}
|
}
|
||||||
|
|
||||||
void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_a, IR::U32 op_b, IR::U32 op_c,
|
void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_a, IR::U32 op_b, IR::U32 op_c,
|
||||||
Shift shift = Shift::None) {
|
IADD3Shift shift = IADD3Shift::None) {
|
||||||
union {
|
union {
|
||||||
u64 insn;
|
u64 insn;
|
||||||
BitField<0, 8, IR::Reg> dest_reg;
|
BitField<0, 8, IR::Reg> dest_reg;
|
||||||
|
|
@ -71,7 +74,7 @@ void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_a, IR::U32 op_b, IR::U32 o
|
||||||
IR::U32 lhs_1{v.ir.IAdd(op_a, op_b)};
|
IR::U32 lhs_1{v.ir.IAdd(op_a, op_b)};
|
||||||
if (iadd3.x != 0) {
|
if (iadd3.x != 0) {
|
||||||
// TODO: How does RS behave when X is set?
|
// TODO: How does RS behave when X is set?
|
||||||
if (shift == Shift::Right) {
|
if (shift == IADD3Shift::Right) {
|
||||||
throw NotImplementedException("IADD3 X+RS");
|
throw NotImplementedException("IADD3 X+RS");
|
||||||
}
|
}
|
||||||
const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))};
|
const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))};
|
||||||
|
|
@ -98,10 +101,10 @@ void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_a, IR::U32 op_b, IR::U32 o
|
||||||
void TranslatorVisitor::IADD3_reg(u64 insn) {
|
void TranslatorVisitor::IADD3_reg(u64 insn) {
|
||||||
union {
|
union {
|
||||||
u64 insn;
|
u64 insn;
|
||||||
BitField<37, 2, Shift> shift;
|
BitField<37, 2, IADD3Shift> shift;
|
||||||
BitField<35, 2, Half> half_a;
|
BitField<35, 2, IADD3Half> half_a;
|
||||||
BitField<33, 2, Half> half_b;
|
BitField<33, 2, IADD3Half> half_b;
|
||||||
BitField<31, 2, Half> half_c;
|
BitField<31, 2, IADD3Half> half_c;
|
||||||
} const iadd3{insn};
|
} const iadd3{insn};
|
||||||
|
|
||||||
const auto op_a{IntegerHalf(ir, GetReg8(insn), iadd3.half_a)};
|
const auto op_a{IntegerHalf(ir, GetReg8(insn), iadd3.half_a)};
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,7 @@
|
||||||
|
|
||||||
namespace Shader::Maxwell {
|
namespace Shader::Maxwell {
|
||||||
namespace {
|
namespace {
|
||||||
enum class FloatFormat : u64 {
|
enum class IntegerToFloatFormat : u64 {
|
||||||
F16 = 1,
|
F16 = 1,
|
||||||
F32 = 2,
|
F32 = 2,
|
||||||
F64 = 3,
|
F64 = 3,
|
||||||
|
|
@ -27,7 +27,7 @@ enum class IntFormat : u64 {
|
||||||
union EncodingIFPC {
|
union EncodingIFPC {
|
||||||
u64 raw;
|
u64 raw;
|
||||||
BitField<0, 8, IR::Reg> dest_reg;
|
BitField<0, 8, IR::Reg> dest_reg;
|
||||||
BitField<8, 2, FloatFormat> float_format;
|
BitField<8, 2, IntegerToFloatFormat> float_format;
|
||||||
BitField<10, 2, IntFormat> int_format;
|
BitField<10, 2, IntFormat> int_format;
|
||||||
BitField<13, 1, u64> is_signed;
|
BitField<13, 1, u64> is_signed;
|
||||||
BitField<39, 2, FpRounding> fp_rounding;
|
BitField<39, 2, FpRounding> fp_rounding;
|
||||||
|
|
@ -41,13 +41,13 @@ bool Is64(u64 insn) {
|
||||||
return EncodingIFPC{insn}.int_format == IntFormat::U64;
|
return EncodingIFPC{insn}.int_format == IntFormat::U64;
|
||||||
}
|
}
|
||||||
|
|
||||||
int BitSize(FloatFormat format) {
|
int BitSize(IntegerToFloatFormat format) {
|
||||||
switch (format) {
|
switch (format) {
|
||||||
case FloatFormat::F16:
|
case IntegerToFloatFormat::F16:
|
||||||
return 16;
|
return 16;
|
||||||
case FloatFormat::F32:
|
case IntegerToFloatFormat::F32:
|
||||||
return 32;
|
return 32;
|
||||||
case FloatFormat::F64:
|
case IntegerToFloatFormat::F64:
|
||||||
return 64;
|
return 64;
|
||||||
}
|
}
|
||||||
throw NotImplementedException("Invalid float format {}", format);
|
throw NotImplementedException("Invalid float format {}", format);
|
||||||
|
|
@ -119,15 +119,15 @@ void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
switch (i2f.float_format) {
|
switch (i2f.float_format) {
|
||||||
case FloatFormat::F16: {
|
case IntegerToFloatFormat::F16: {
|
||||||
const IR::F16 zero{v.ir.FPConvert(16, v.ir.Imm32(0.0f))};
|
const IR::F16 zero{v.ir.FPConvert(16, v.ir.Imm32(0.0f))};
|
||||||
v.X(i2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(value, zero)));
|
v.X(i2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(value, zero)));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case FloatFormat::F32:
|
case IntegerToFloatFormat::F32:
|
||||||
v.F(i2f.dest_reg, value);
|
v.F(i2f.dest_reg, value);
|
||||||
break;
|
break;
|
||||||
case FloatFormat::F64: {
|
case IntegerToFloatFormat::F64: {
|
||||||
if (!IR::IsAligned(i2f.dest_reg, 2)) {
|
if (!IR::IsAligned(i2f.dest_reg, 2)) {
|
||||||
throw NotImplementedException("Unaligned destination {}", i2f.dest_reg.Value());
|
throw NotImplementedException("Unaligned destination {}", i2f.dest_reg.Value());
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,6 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
|
||||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
|
@ -15,18 +18,18 @@ enum class SelectMode : u64 {
|
||||||
CBCC,
|
CBCC,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum class Half : u64 {
|
enum class IMADHalf : u64 {
|
||||||
H0, // Least-significant bits (15:0)
|
H0, // Least-significant bits (15:0)
|
||||||
H1, // Most-significant bits (31:16)
|
H1, // Most-significant bits (31:16)
|
||||||
};
|
};
|
||||||
|
|
||||||
IR::U32 ExtractHalf(TranslatorVisitor& v, const IR::U32& src, Half half, bool is_signed) {
|
IR::U32 ExtractHalf(TranslatorVisitor& v, const IR::U32& src, IMADHalf half, bool is_signed) {
|
||||||
const IR::U32 offset{v.ir.Imm32(half == Half::H1 ? 16 : 0)};
|
const IR::U32 offset{v.ir.Imm32(half == IMADHalf::H1 ? 16 : 0)};
|
||||||
return v.ir.BitFieldExtract(src, offset, v.ir.Imm32(16), is_signed);
|
return v.ir.BitFieldExtract(src, offset, v.ir.Imm32(16), is_signed);
|
||||||
}
|
}
|
||||||
|
|
||||||
void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c,
|
void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c,
|
||||||
SelectMode select_mode, Half half_b, bool psl, bool mrg, bool x) {
|
SelectMode select_mode, IMADHalf half_b, bool psl, bool mrg, bool x) {
|
||||||
union {
|
union {
|
||||||
u64 raw;
|
u64 raw;
|
||||||
BitField<0, 8, IR::Reg> dest_reg;
|
BitField<0, 8, IR::Reg> dest_reg;
|
||||||
|
|
@ -34,7 +37,7 @@ void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& s
|
||||||
BitField<47, 1, u64> cc;
|
BitField<47, 1, u64> cc;
|
||||||
BitField<48, 1, u64> is_a_signed;
|
BitField<48, 1, u64> is_a_signed;
|
||||||
BitField<49, 1, u64> is_b_signed;
|
BitField<49, 1, u64> is_b_signed;
|
||||||
BitField<53, 1, Half> half_a;
|
BitField<53, 1, IMADHalf> half_a;
|
||||||
} const xmad{insn};
|
} const xmad{insn};
|
||||||
|
|
||||||
if (x) {
|
if (x) {
|
||||||
|
|
@ -53,9 +56,9 @@ void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& s
|
||||||
case SelectMode::Default:
|
case SelectMode::Default:
|
||||||
return src_c;
|
return src_c;
|
||||||
case SelectMode::CLO:
|
case SelectMode::CLO:
|
||||||
return ExtractHalf(v, src_c, Half::H0, false);
|
return ExtractHalf(v, src_c, IMADHalf::H0, false);
|
||||||
case SelectMode::CHI:
|
case SelectMode::CHI:
|
||||||
return ExtractHalf(v, src_c, Half::H1, false);
|
return ExtractHalf(v, src_c, IMADHalf::H1, false);
|
||||||
case SelectMode::CBCC:
|
case SelectMode::CBCC:
|
||||||
return v.ir.IAdd(v.ir.ShiftLeftLogical(src_b, v.ir.Imm32(16)), src_c);
|
return v.ir.IAdd(v.ir.ShiftLeftLogical(src_b, v.ir.Imm32(16)), src_c);
|
||||||
case SelectMode::CSFU:
|
case SelectMode::CSFU:
|
||||||
|
|
@ -66,7 +69,7 @@ void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& s
|
||||||
IR::U32 result{v.ir.IAdd(product, op_c)};
|
IR::U32 result{v.ir.IAdd(product, op_c)};
|
||||||
if (mrg) {
|
if (mrg) {
|
||||||
// .MRG inserts src_b [15:0] into result's [31:16].
|
// .MRG inserts src_b [15:0] into result's [31:16].
|
||||||
const IR::U32 lsb_b{ExtractHalf(v, src_b, Half::H0, false)};
|
const IR::U32 lsb_b{ExtractHalf(v, src_b, IMADHalf::H0, false)};
|
||||||
result = v.ir.BitFieldInsert(result, lsb_b, v.ir.Imm32(16), v.ir.Imm32(16));
|
result = v.ir.BitFieldInsert(result, lsb_b, v.ir.Imm32(16), v.ir.Imm32(16));
|
||||||
}
|
}
|
||||||
if (xmad.cc) {
|
if (xmad.cc) {
|
||||||
|
|
@ -80,7 +83,7 @@ void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& s
|
||||||
void TranslatorVisitor::XMAD_reg(u64 insn) {
|
void TranslatorVisitor::XMAD_reg(u64 insn) {
|
||||||
union {
|
union {
|
||||||
u64 raw;
|
u64 raw;
|
||||||
BitField<35, 1, Half> half_b;
|
BitField<35, 1, IMADHalf> half_b;
|
||||||
BitField<36, 1, u64> psl;
|
BitField<36, 1, u64> psl;
|
||||||
BitField<37, 1, u64> mrg;
|
BitField<37, 1, u64> mrg;
|
||||||
BitField<38, 1, u64> x;
|
BitField<38, 1, u64> x;
|
||||||
|
|
@ -95,7 +98,7 @@ void TranslatorVisitor::XMAD_rc(u64 insn) {
|
||||||
union {
|
union {
|
||||||
u64 raw;
|
u64 raw;
|
||||||
BitField<50, 2, SelectMode> select_mode;
|
BitField<50, 2, SelectMode> select_mode;
|
||||||
BitField<52, 1, Half> half_b;
|
BitField<52, 1, IMADHalf> half_b;
|
||||||
BitField<54, 1, u64> x;
|
BitField<54, 1, u64> x;
|
||||||
} const xmad{insn};
|
} const xmad{insn};
|
||||||
|
|
||||||
|
|
@ -107,7 +110,7 @@ void TranslatorVisitor::XMAD_cr(u64 insn) {
|
||||||
union {
|
union {
|
||||||
u64 raw;
|
u64 raw;
|
||||||
BitField<50, 2, SelectMode> select_mode;
|
BitField<50, 2, SelectMode> select_mode;
|
||||||
BitField<52, 1, Half> half_b;
|
BitField<52, 1, IMADHalf> half_b;
|
||||||
BitField<54, 1, u64> x;
|
BitField<54, 1, u64> x;
|
||||||
BitField<55, 1, u64> psl;
|
BitField<55, 1, u64> psl;
|
||||||
BitField<56, 1, u64> mrg;
|
BitField<56, 1, u64> mrg;
|
||||||
|
|
@ -128,7 +131,7 @@ void TranslatorVisitor::XMAD_imm(u64 insn) {
|
||||||
} const xmad{insn};
|
} const xmad{insn};
|
||||||
|
|
||||||
XMAD(*this, insn, ir.Imm32(static_cast<u32>(xmad.src_b)), GetReg39(insn), xmad.select_mode,
|
XMAD(*this, insn, ir.Imm32(static_cast<u32>(xmad.src_b)), GetReg39(insn), xmad.select_mode,
|
||||||
Half::H0, xmad.psl != 0, xmad.mrg != 0, xmad.x != 0);
|
IMADHalf::H0, xmad.psl != 0, xmad.mrg != 0, xmad.x != 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Shader::Maxwell
|
} // namespace Shader::Maxwell
|
||||||
|
|
|
||||||
|
|
@ -24,17 +24,17 @@ enum class SZ : u64 {
|
||||||
F32
|
F32
|
||||||
};
|
};
|
||||||
|
|
||||||
enum class Shift : u64 {
|
enum class ISBERDShift : u64 {
|
||||||
Default,
|
Default,
|
||||||
U16,
|
U16,
|
||||||
B32,
|
B32,
|
||||||
};
|
};
|
||||||
|
|
||||||
IR::U32 scaleIndex(IR::IREmitter& ir, IR::U32 index, Shift shift) {
|
IR::U32 scaleIndex(IR::IREmitter& ir, IR::U32 index, ISBERDShift shift) {
|
||||||
switch (shift) {
|
switch (shift) {
|
||||||
case Shift::Default: return index;
|
case ISBERDShift::Default: return index;
|
||||||
case Shift::U16: return ir.ShiftLeftLogical(index, ir.Imm32(1));
|
case ISBERDShift::U16: return ir.ShiftLeftLogical(index, ir.Imm32(1));
|
||||||
case Shift::B32: return ir.ShiftLeftLogical(index, ir.Imm32(2));
|
case ISBERDShift::B32: return ir.ShiftLeftLogical(index, ir.Imm32(2));
|
||||||
default: UNREACHABLE();
|
default: UNREACHABLE();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -65,7 +65,7 @@ void TranslatorVisitor::ISBERD(u64 insn) {
|
||||||
BitField<32, 1, u64> o;
|
BitField<32, 1, u64> o;
|
||||||
BitField<33, 2, ISBERDMode> mode;
|
BitField<33, 2, ISBERDMode> mode;
|
||||||
BitField<36, 4, SZ> sz;
|
BitField<36, 4, SZ> sz;
|
||||||
BitField<47, 2, Shift> shift;
|
BitField<47, 2, ISBERDShift> shift;
|
||||||
} const isberd{insn};
|
} const isberd{insn};
|
||||||
|
|
||||||
IR::U32 index{};
|
IR::U32 index{};
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,6 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
|
||||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
|
@ -30,7 +33,7 @@ enum class StoreSize : u64 {
|
||||||
};
|
};
|
||||||
|
|
||||||
// See Table 27 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html
|
// See Table 27 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html
|
||||||
enum class LoadCache : u64 {
|
enum class XMEMLoadCache : u64 {
|
||||||
CA, // Cache at all levels, likely to be accessed again
|
CA, // Cache at all levels, likely to be accessed again
|
||||||
CG, // Cache at global level (cache in L2 and below, not L1)
|
CG, // Cache at global level (cache in L2 and below, not L1)
|
||||||
CI, // ???
|
CI, // ???
|
||||||
|
|
@ -38,7 +41,7 @@ enum class LoadCache : u64 {
|
||||||
};
|
};
|
||||||
|
|
||||||
// See Table 28 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html
|
// See Table 28 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html
|
||||||
enum class StoreCache : u64 {
|
enum class XMEMStoreCache : u64 {
|
||||||
WB, // Cache write-back all coherent levels
|
WB, // Cache write-back all coherent levels
|
||||||
CG, // Cache at global level
|
CG, // Cache at global level
|
||||||
CS, // Cache streaming, likely to be accessed once
|
CS, // Cache streaming, likely to be accessed once
|
||||||
|
|
@ -83,7 +86,7 @@ void TranslatorVisitor::LDG(u64 insn) {
|
||||||
union {
|
union {
|
||||||
u64 raw;
|
u64 raw;
|
||||||
BitField<0, 8, IR::Reg> dest_reg;
|
BitField<0, 8, IR::Reg> dest_reg;
|
||||||
BitField<46, 2, LoadCache> cache;
|
BitField<46, 2, XMEMLoadCache> cache;
|
||||||
BitField<48, 3, LoadSize> size;
|
BitField<48, 3, LoadSize> size;
|
||||||
} const ldg{insn};
|
} const ldg{insn};
|
||||||
|
|
||||||
|
|
@ -137,7 +140,7 @@ void TranslatorVisitor::STG(u64 insn) {
|
||||||
union {
|
union {
|
||||||
u64 raw;
|
u64 raw;
|
||||||
BitField<0, 8, IR::Reg> data_reg;
|
BitField<0, 8, IR::Reg> data_reg;
|
||||||
BitField<46, 2, StoreCache> cache;
|
BitField<46, 2, XMEMStoreCache> cache;
|
||||||
BitField<48, 3, StoreSize> size;
|
BitField<48, 3, StoreSize> size;
|
||||||
} const stg{insn};
|
} const stg{insn};
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -64,14 +64,14 @@ enum class SurfaceLoadStoreClamp : u64 {
|
||||||
};
|
};
|
||||||
|
|
||||||
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#cache-operators
|
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#cache-operators
|
||||||
enum class LoadCache : u64 {
|
enum class SURFLoadCache : u64 {
|
||||||
CA, // Cache at all levels, likely to be accessed again
|
CA, // Cache at all levels, likely to be accessed again
|
||||||
CG, // Cache at global level (L2 and below, not L1)
|
CG, // Cache at global level (L2 and below, not L1)
|
||||||
CI, // ???
|
CI, // ???
|
||||||
CV, // Don't cache and fetch again (volatile)
|
CV, // Don't cache and fetch again (volatile)
|
||||||
};
|
};
|
||||||
|
|
||||||
enum class StoreCache : u64 {
|
enum class SURFStoreCache : u64 {
|
||||||
WB, // Cache write-back all coherent levels
|
WB, // Cache write-back all coherent levels
|
||||||
CG, // Cache at global level (L2 and below, not L1)
|
CG, // Cache at global level (L2 and below, not L1)
|
||||||
CS, // Cache streaming, likely to be accessed once
|
CS, // Cache streaming, likely to be accessed once
|
||||||
|
|
@ -178,7 +178,7 @@ void TranslatorVisitor::SULD(u64 insn) {
|
||||||
BitField<52, 1, u64> d;
|
BitField<52, 1, u64> d;
|
||||||
BitField<23, 1, u64> ba;
|
BitField<23, 1, u64> ba;
|
||||||
BitField<33, 3, SurfaceLoadStoreType> type;
|
BitField<33, 3, SurfaceLoadStoreType> type;
|
||||||
BitField<24, 2, LoadCache> cache;
|
BitField<24, 2, SURFLoadCache> cache;
|
||||||
BitField<20, 3, SurfaceLoadStoreSize> size; // .D
|
BitField<20, 3, SurfaceLoadStoreSize> size; // .D
|
||||||
BitField<20, 4, u64> swizzle; // .P
|
BitField<20, 4, u64> swizzle; // .P
|
||||||
BitField<49, 2, SurfaceLoadStoreClamp> clamp;
|
BitField<49, 2, SurfaceLoadStoreClamp> clamp;
|
||||||
|
|
@ -191,7 +191,7 @@ void TranslatorVisitor::SULD(u64 insn) {
|
||||||
if (suld.clamp != SurfaceLoadStoreClamp::IGN) {
|
if (suld.clamp != SurfaceLoadStoreClamp::IGN) {
|
||||||
throw NotImplementedException("SurfaceLoadStoreClamp {}", suld.clamp.Value());
|
throw NotImplementedException("SurfaceLoadStoreClamp {}", suld.clamp.Value());
|
||||||
}
|
}
|
||||||
if (suld.cache != LoadCache::CA && suld.cache != LoadCache::CG) {
|
if (suld.cache != SURFLoadCache::CA && suld.cache != SURFLoadCache::CG) {
|
||||||
throw NotImplementedException("Cache {}", suld.cache.Value());
|
throw NotImplementedException("Cache {}", suld.cache.Value());
|
||||||
}
|
}
|
||||||
const bool is_typed{suld.d != 0};
|
const bool is_typed{suld.d != 0};
|
||||||
|
|
@ -238,7 +238,7 @@ void TranslatorVisitor::SUST(u64 insn) {
|
||||||
BitField<52, 1, u64> d;
|
BitField<52, 1, u64> d;
|
||||||
BitField<23, 1, u64> ba;
|
BitField<23, 1, u64> ba;
|
||||||
BitField<33, 3, SurfaceLoadStoreType> type;
|
BitField<33, 3, SurfaceLoadStoreType> type;
|
||||||
BitField<24, 2, StoreCache> cache;
|
BitField<24, 2, SURFStoreCache> cache;
|
||||||
BitField<20, 3, SurfaceLoadStoreSize> size; // .D
|
BitField<20, 3, SurfaceLoadStoreSize> size; // .D
|
||||||
BitField<20, 4, u64> swizzle; // .P
|
BitField<20, 4, u64> swizzle; // .P
|
||||||
BitField<49, 2, SurfaceLoadStoreClamp> clamp;
|
BitField<49, 2, SurfaceLoadStoreClamp> clamp;
|
||||||
|
|
@ -251,7 +251,7 @@ void TranslatorVisitor::SUST(u64 insn) {
|
||||||
if (sust.clamp != SurfaceLoadStoreClamp::IGN) {
|
if (sust.clamp != SurfaceLoadStoreClamp::IGN) {
|
||||||
throw NotImplementedException("SurfaceLoadStoreClamp {}", sust.clamp.Value());
|
throw NotImplementedException("SurfaceLoadStoreClamp {}", sust.clamp.Value());
|
||||||
}
|
}
|
||||||
if (sust.cache != StoreCache::WB && sust.cache != StoreCache::CG) {
|
if (sust.cache != SURFStoreCache::WB && sust.cache != SURFStoreCache::CG) {
|
||||||
throw NotImplementedException("Cache {}", sust.cache.Value());
|
throw NotImplementedException("Cache {}", sust.cache.Value());
|
||||||
}
|
}
|
||||||
const bool is_typed{sust.d != 0};
|
const bool is_typed{sust.d != 0};
|
||||||
|
|
|
||||||
|
|
@ -134,7 +134,7 @@ IR::Value SampleTFS(TranslatorVisitor& v, u64 insn) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned Swizzle(u64 insn) {
|
unsigned FetchSwizzle(u64 insn) {
|
||||||
#define R 1
|
#define R 1
|
||||||
#define G 2
|
#define G 2
|
||||||
#define B 4
|
#define B 4
|
||||||
|
|
@ -173,7 +173,7 @@ unsigned Swizzle(u64 insn) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) {
|
IR::F32 FetchExtract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) {
|
||||||
const bool is_shadow{sample.Type() == IR::Type::F32};
|
const bool is_shadow{sample.Type() == IR::Type::F32};
|
||||||
if (is_shadow) {
|
if (is_shadow) {
|
||||||
const bool is_alpha{component == 3};
|
const bool is_alpha{component == 3};
|
||||||
|
|
@ -183,7 +183,7 @@ IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned componen
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
IR::Reg RegStoreComponent32(u64 insn, unsigned index) {
|
IR::Reg FetchRegStoreComponent32(u64 insn, unsigned index) {
|
||||||
const EncodinTFS texs{insn};
|
const EncodinTFS texs{insn};
|
||||||
switch (index) {
|
switch (index) {
|
||||||
case 0:
|
case 0:
|
||||||
|
|
@ -201,14 +201,14 @@ IR::Reg RegStoreComponent32(u64 insn, unsigned index) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Store32TFS(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
|
void Store32TFS(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
|
||||||
const unsigned swizzle{Swizzle(insn)};
|
const unsigned swizzle{FetchSwizzle(insn)};
|
||||||
unsigned store_index{0};
|
unsigned store_index{0};
|
||||||
for (unsigned component = 0; component < 4; ++component) {
|
for (unsigned component = 0; component < 4; ++component) {
|
||||||
if (((swizzle >> component) & 1) == 0) {
|
if (((swizzle >> component) & 1) == 0) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const IR::Reg dest{RegStoreComponent32(insn, store_index)};
|
const IR::Reg dest{FetchRegStoreComponent32(insn, store_index)};
|
||||||
v.F(dest, Extract(v, sample, component));
|
v.F(dest, FetchExtract(v, sample, component));
|
||||||
++store_index;
|
++store_index;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -218,14 +218,14 @@ IR::U32 PackTFS(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Store16TFS(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
|
void Store16TFS(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
|
||||||
const unsigned swizzle{Swizzle(insn)};
|
const unsigned swizzle{FetchSwizzle(insn)};
|
||||||
unsigned store_index{0};
|
unsigned store_index{0};
|
||||||
std::array<IR::F32, 4> swizzled;
|
std::array<IR::F32, 4> swizzled;
|
||||||
for (unsigned component = 0; component < 4; ++component) {
|
for (unsigned component = 0; component < 4; ++component) {
|
||||||
if (((swizzle >> component) & 1) == 0) {
|
if (((swizzle >> component) & 1) == 0) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
swizzled[store_index] = Extract(v, sample, component);
|
swizzled[store_index] = FetchExtract(v, sample, component);
|
||||||
++store_index;
|
++store_index;
|
||||||
}
|
}
|
||||||
const IR::F32 zero{v.ir.Imm32(0.0f)};
|
const IR::F32 zero{v.ir.Imm32(0.0f)};
|
||||||
|
|
|
||||||
|
|
@ -43,7 +43,7 @@ void CheckAlignmentTGS(IR::Reg reg, size_t alignment) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg) {
|
IR::Value MakeGatherOffset(TranslatorVisitor& v, IR::Reg reg) {
|
||||||
const IR::U32 value{v.X(reg)};
|
const IR::U32 value{v.X(reg)};
|
||||||
return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true),
|
return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true),
|
||||||
v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true));
|
v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true));
|
||||||
|
|
@ -65,7 +65,7 @@ IR::Value SampleTGS(TranslatorVisitor& v, u64 insn) {
|
||||||
if (tld4s.aoffi != 0) {
|
if (tld4s.aoffi != 0) {
|
||||||
CheckAlignmentTGS(reg_a, 2);
|
CheckAlignmentTGS(reg_a, 2);
|
||||||
coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_a + 1));
|
coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_a + 1));
|
||||||
IR::Value offset = MakeOffset(v, reg_b);
|
IR::Value offset = MakeGatherOffset(v, reg_b);
|
||||||
if (tld4s.dc != 0) {
|
if (tld4s.dc != 0) {
|
||||||
CheckAlignmentTGS(reg_b, 2);
|
CheckAlignmentTGS(reg_b, 2);
|
||||||
IR::F32 dref = v.F(reg_b + 1);
|
IR::F32 dref = v.F(reg_b + 1);
|
||||||
|
|
|
||||||
|
|
@ -40,7 +40,7 @@ void CheckAlignmentTLS(IR::Reg reg, size_t alignment) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg) {
|
IR::Value MakeLoadOffset(TranslatorVisitor& v, IR::Reg reg) {
|
||||||
const IR::U32 value{v.X(reg)};
|
const IR::U32 value{v.X(reg)};
|
||||||
return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
|
return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
|
||||||
v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true));
|
v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true));
|
||||||
|
|
@ -74,7 +74,7 @@ IR::Value SampleTLS(TranslatorVisitor& v, u64 insn) {
|
||||||
CheckAlignmentTLS(reg_a, 2);
|
CheckAlignmentTLS(reg_a, 2);
|
||||||
texture_type = Shader::TextureType::Color2D;
|
texture_type = Shader::TextureType::Color2D;
|
||||||
coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
|
coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
|
||||||
offsets = MakeOffset(v, reg_b);
|
offsets = MakeLoadOffset(v, reg_b);
|
||||||
break;
|
break;
|
||||||
case 5:
|
case 5:
|
||||||
CheckAlignmentTLS(reg_a, 2);
|
CheckAlignmentTLS(reg_a, 2);
|
||||||
|
|
@ -106,7 +106,7 @@ IR::Value SampleTLS(TranslatorVisitor& v, u64 insn) {
|
||||||
texture_type = Shader::TextureType::Color2D;
|
texture_type = Shader::TextureType::Color2D;
|
||||||
coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
|
coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
|
||||||
lod = v.X(reg_b);
|
lod = v.X(reg_b);
|
||||||
offsets = MakeOffset(v, reg_b + 1);
|
offsets = MakeLoadOffset(v, reg_b + 1);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
throw NotImplementedException("Illegal encoding {}", tlds.encoding.Value());
|
throw NotImplementedException("Illegal encoding {}", tlds.encoding.Value());
|
||||||
|
|
@ -119,7 +119,7 @@ IR::Value SampleTLS(TranslatorVisitor& v, u64 insn) {
|
||||||
return v.ir.ImageFetch(handle, coords, offsets, lod, multisample, info);
|
return v.ir.ImageFetch(handle, coords, offsets, lod, multisample, info);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned Swizzle(u64 insn) {
|
unsigned LoadSwizzle(u64 insn) {
|
||||||
#define R 1
|
#define R 1
|
||||||
#define G 2
|
#define G 2
|
||||||
#define B 4
|
#define B 4
|
||||||
|
|
@ -160,11 +160,11 @@ unsigned Swizzle(u64 insn) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) {
|
IR::F32 LoadExtract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) {
|
||||||
return IR::F32{v.ir.CompositeExtract(sample, component)};
|
return IR::F32{v.ir.CompositeExtract(sample, component)};
|
||||||
}
|
}
|
||||||
|
|
||||||
IR::Reg RegStoreComponent32(u64 insn, unsigned index) {
|
IR::Reg LoadRegStoreComponent32(u64 insn, unsigned index) {
|
||||||
const EncodinTLS tlds{insn};
|
const EncodinTLS tlds{insn};
|
||||||
switch (index) {
|
switch (index) {
|
||||||
case 0:
|
case 0:
|
||||||
|
|
@ -182,14 +182,14 @@ IR::Reg RegStoreComponent32(u64 insn, unsigned index) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Store32TLS(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
|
void Store32TLS(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
|
||||||
const unsigned swizzle{Swizzle(insn)};
|
const unsigned swizzle{LoadSwizzle(insn)};
|
||||||
unsigned store_index{0};
|
unsigned store_index{0};
|
||||||
for (unsigned component = 0; component < 4; ++component) {
|
for (unsigned component = 0; component < 4; ++component) {
|
||||||
if (((swizzle >> component) & 1) == 0) {
|
if (((swizzle >> component) & 1) == 0) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const IR::Reg dest{RegStoreComponent32(insn, store_index)};
|
const IR::Reg dest{LoadRegStoreComponent32(insn, store_index)};
|
||||||
v.F(dest, Extract(v, sample, component));
|
v.F(dest, LoadExtract(v, sample, component));
|
||||||
++store_index;
|
++store_index;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -199,14 +199,14 @@ IR::U32 PackTLS(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Store16TLS(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
|
void Store16TLS(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
|
||||||
const unsigned swizzle{Swizzle(insn)};
|
const unsigned swizzle{LoadSwizzle(insn)};
|
||||||
unsigned store_index{0};
|
unsigned store_index{0};
|
||||||
std::array<IR::F32, 4> swizzled;
|
std::array<IR::F32, 4> swizzled;
|
||||||
for (unsigned component = 0; component < 4; ++component) {
|
for (unsigned component = 0; component < 4; ++component) {
|
||||||
if (((swizzle >> component) & 1) == 0) {
|
if (((swizzle >> component) & 1) == 0) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
swizzled[store_index] = Extract(v, sample, component);
|
swizzled[store_index] = LoadExtract(v, sample, component);
|
||||||
++store_index;
|
++store_index;
|
||||||
}
|
}
|
||||||
const IR::F32 zero{v.ir.Imm32(0.0f)};
|
const IR::F32 zero{v.ir.Imm32(0.0f)};
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue