mirror of
https://git.eden-emu.dev/eden-emu/eden
synced 2026-05-26 19:37:01 +02:00
[dynarmic] VEX encode movd/movq
Signed-off-by: lizzie <lizzie@eden-emu.dev>
This commit is contained in:
parent
0d736d49d6
commit
21bb1b8210
5 changed files with 95 additions and 54 deletions
|
|
@ -348,7 +348,11 @@ void A32EmitX64::EmitA32SetRegister(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
code.mov(MJitStateReg(reg), args[1].GetImmediateU32());
|
||||
} else if (args[1].IsInXmm(ctx.reg_alloc)) {
|
||||
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(code, args[1]);
|
||||
code.movd(MJitStateReg(reg), to_store);
|
||||
if (code.HasHostFeature(HostFeature::AVX)) {
|
||||
code.vmovd(MJitStateReg(reg), to_store);
|
||||
} else {
|
||||
code.movd(MJitStateReg(reg), to_store);
|
||||
}
|
||||
} else {
|
||||
const Xbyak::Reg32 to_store = ctx.reg_alloc.UseGpr(code, args[1]).cvt32();
|
||||
code.mov(MJitStateReg(reg), to_store);
|
||||
|
|
@ -641,7 +645,11 @@ void A32EmitX64::EmitA32OrQFlag(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
|
||||
void A32EmitX64::EmitA32GetGEFlags(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
|
||||
code.movd(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)]);
|
||||
if (code.HasHostFeature(HostFeature::AVX)) {
|
||||
code.vmovd(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)]);
|
||||
} else {
|
||||
code.movd(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)]);
|
||||
}
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
}
|
||||
|
||||
|
|
@ -651,7 +659,11 @@ void A32EmitX64::EmitA32SetGEFlags(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
|
||||
if (args[0].IsInXmm(ctx.reg_alloc)) {
|
||||
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(code, args[0]);
|
||||
code.movd(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], to_store);
|
||||
if (code.HasHostFeature(HostFeature::AVX)) {
|
||||
code.vmovd(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], to_store);
|
||||
} else {
|
||||
code.movd(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], to_store);
|
||||
}
|
||||
} else {
|
||||
const Xbyak::Reg32 to_store = ctx.reg_alloc.UseGpr(code, args[0]).cvt32();
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], to_store);
|
||||
|
|
|
|||
|
|
@ -339,7 +339,11 @@ void A64EmitX64::EmitA64GetS(A64EmitContext& ctx, IR::Inst* inst) {
|
|||
const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
|
||||
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
|
||||
code.movd(result, addr);
|
||||
if (code.HasHostFeature(HostFeature::AVX)) {
|
||||
code.vmovd(result, addr);
|
||||
} else {
|
||||
code.movd(result, addr);
|
||||
}
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
}
|
||||
|
||||
|
|
@ -348,7 +352,11 @@ void A64EmitX64::EmitA64GetD(A64EmitContext& ctx, IR::Inst* inst) {
|
|||
const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
|
||||
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
|
||||
code.movq(result, addr);
|
||||
if (code.HasHostFeature(HostFeature::AVX)) {
|
||||
code.vmovq(result, addr);
|
||||
} else {
|
||||
code.movq(result, addr);
|
||||
}
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
}
|
||||
|
||||
|
|
@ -357,7 +365,11 @@ void A64EmitX64::EmitA64GetQ(A64EmitContext& ctx, IR::Inst* inst) {
|
|||
const auto addr = xword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
|
||||
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
|
||||
code.movaps(result, addr);
|
||||
if (code.HasHostFeature(HostFeature::AVX)) {
|
||||
code.vmovaps(result, addr);
|
||||
} else {
|
||||
code.movaps(result, addr);
|
||||
}
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -226,13 +226,22 @@ void EmitPostProcessNaNs(BlockOfCode& code, Xbyak::Xmm result, Xbyak::Xmm op1, X
|
|||
// op1 == Inf && op2 == QNaN
|
||||
// op1 == QNaN && op2 == SNaN <<< The problematic case
|
||||
// op1 == QNaN && op2 == Inf
|
||||
|
||||
if constexpr (fsize == 32) {
|
||||
code.movd(tmp.cvt32(), op2);
|
||||
code.shl(tmp.cvt32(), 32 - mantissa_msb_bit);
|
||||
if (code.HasHostFeature(HostFeature::AVX)) {
|
||||
if constexpr (fsize == 32) {
|
||||
code.movd(tmp.cvt32(), op2);
|
||||
code.shl(tmp.cvt32(), 32 - mantissa_msb_bit);
|
||||
} else {
|
||||
code.movq(tmp, op2);
|
||||
code.shl(tmp, 64 - mantissa_msb_bit);
|
||||
}
|
||||
} else {
|
||||
code.movq(tmp, op2);
|
||||
code.shl(tmp, 64 - mantissa_msb_bit);
|
||||
if constexpr (fsize == 32) {
|
||||
code.vmovd(tmp.cvt32(), op2);
|
||||
code.shl(tmp.cvt32(), 32 - mantissa_msb_bit);
|
||||
} else {
|
||||
code.vmovq(tmp, op2);
|
||||
code.shl(tmp, 64 - mantissa_msb_bit);
|
||||
}
|
||||
}
|
||||
// If op2 is a SNaN, CF = 0 and ZF = 0.
|
||||
code.jna(end, code.T_NEAR);
|
||||
|
|
@ -477,10 +486,18 @@ static inline void EmitFPMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::
|
|||
tmp.setBit(fsize);
|
||||
|
||||
const auto move_to_tmp = [=, &code](const Xbyak::Xmm& xmm) {
|
||||
if constexpr (fsize == 32) {
|
||||
code.movd(tmp.cvt32(), xmm);
|
||||
if (code.HasHostFeature(HostFeature::AVX)) {
|
||||
if constexpr (fsize == 32) {
|
||||
code.vmovd(tmp.cvt32(), xmm);
|
||||
} else {
|
||||
code.vmovq(tmp.cvt64(), xmm);
|
||||
}
|
||||
} else {
|
||||
code.movq(tmp.cvt64(), xmm);
|
||||
if constexpr (fsize == 32) {
|
||||
code.movd(tmp.cvt32(), xmm);
|
||||
} else {
|
||||
code.movq(tmp.cvt64(), xmm);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -1156,7 +1173,11 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
|
|||
|
||||
code.L(*bad_values);
|
||||
if constexpr (fsize == 32) {
|
||||
code.movd(tmp, operand);
|
||||
if (code.HasHostFeature(HostFeature::AVX)) {
|
||||
code.vmovd(tmp, operand);
|
||||
} else {
|
||||
code.movd(tmp, operand);
|
||||
}
|
||||
|
||||
if (!ctx.FPCR().FZ()) {
|
||||
if (ctx.FPCR().DN()) {
|
||||
|
|
@ -1186,7 +1207,12 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
|
|||
}
|
||||
|
||||
code.L(default_nan);
|
||||
code.movd(result, code.Const(xword, 0x7FC00000));
|
||||
|
||||
if (code.HasHostFeature(HostFeature::AVX)) {
|
||||
code.vmovd(result, code.Const(xword, 0x7FC00000));
|
||||
} else {
|
||||
code.movd(result, code.Const(xword, 0x7FC00000));
|
||||
}
|
||||
code.jmp(*end, code.T_NEAR);
|
||||
} else {
|
||||
Xbyak::Label nan, zero;
|
||||
|
|
|
|||
|
|
@ -227,7 +227,11 @@ void EmitX64::EmitVectorGetElement32(EmitContext& ctx, IR::Inst* inst) {
|
|||
} else {
|
||||
auto const source = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
code.pshufd(source, source, index);
|
||||
code.movd(dest, source);
|
||||
if (code.HasHostFeature(HostFeature::AVX)) {
|
||||
code.vmovd(dest, source);
|
||||
} else {
|
||||
code.movd(dest, source);
|
||||
}
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(code, inst, dest);
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@
|
|||
#include <tuple>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
#include <smmintrin.h>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "dynarmic/mcl/function_info.hpp"
|
||||
|
|
@ -1652,25 +1653,23 @@ static void EmitFPVectorRoundIntThunk(VectorArray<FPT>& output, const VectorArra
|
|||
|
||||
template<size_t fsize>
|
||||
void EmitFPVectorRoundInt(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
//auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const auto rounding = FP::RoundingMode(inst->GetArg(1).GetU8());
|
||||
const bool exact = inst->GetArg(2).GetU1();
|
||||
|
||||
if constexpr (fsize != 16) {
|
||||
if (code.HasHostFeature(HostFeature::SSE41) && rounding != FP::RoundingMode::ToNearest_TieAwayFromZero && !exact) {
|
||||
const u8 round_imm = [&]() -> u8 {
|
||||
const u8 round_imm = [rounding]() -> u8 {
|
||||
switch (rounding) {
|
||||
case FP::RoundingMode::ToNearest_TieEven: return 0b00;
|
||||
case FP::RoundingMode::TowardsPlusInfinity: return 0b10;
|
||||
case FP::RoundingMode::TowardsMinusInfinity: return 0b01;
|
||||
case FP::RoundingMode::TowardsZero: return 0b11;
|
||||
case FP::RoundingMode::ToNearest_TieEven: return _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC;
|
||||
case FP::RoundingMode::TowardsPlusInfinity: return _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC;
|
||||
case FP::RoundingMode::TowardsMinusInfinity: return _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC;
|
||||
case FP::RoundingMode::TowardsZero: return _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC;
|
||||
default: UNREACHABLE();
|
||||
}
|
||||
}();
|
||||
|
||||
EmitTwoOpVectorOperation<fsize, DefaultIndexer, 3>(code, ctx, inst, [&](const Xbyak::Xmm& result, const Xbyak::Xmm& xmm_a) {
|
||||
EmitTwoOpVectorOperation<fsize, DefaultIndexer, 3>(code, ctx, inst, [&code, round_imm](const Xbyak::Xmm result, const Xbyak::Xmm xmm_a) {
|
||||
FCODE(roundp)(result, xmm_a, round_imm);
|
||||
});
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
|
@ -1678,33 +1677,21 @@ void EmitFPVectorRoundInt(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
|||
// Do not make a LUT out of this, let the compiler do it's thing
|
||||
using FPT = mcl::unsigned_integer_of_size<fsize>;
|
||||
switch (rounding) {
|
||||
case FP::RoundingMode::ToNearest_TieEven:
|
||||
exact
|
||||
? EmitTwoOpFallback<3>(code, ctx, inst, EmitFPVectorRoundIntThunk<FPT, FP::RoundingMode::ToNearest_TieEven, true>)
|
||||
: EmitTwoOpFallback<3>(code, ctx, inst, EmitFPVectorRoundIntThunk<FPT, FP::RoundingMode::ToNearest_TieEven, false>);
|
||||
break;
|
||||
case FP::RoundingMode::TowardsPlusInfinity:
|
||||
exact
|
||||
? EmitTwoOpFallback<3>(code, ctx, inst, EmitFPVectorRoundIntThunk<FPT, FP::RoundingMode::TowardsPlusInfinity, true>)
|
||||
: EmitTwoOpFallback<3>(code, ctx, inst, EmitFPVectorRoundIntThunk<FPT, FP::RoundingMode::TowardsPlusInfinity, false>);
|
||||
break;
|
||||
case FP::RoundingMode::TowardsMinusInfinity:
|
||||
exact
|
||||
? EmitTwoOpFallback<3>(code, ctx, inst, EmitFPVectorRoundIntThunk<FPT, FP::RoundingMode::TowardsMinusInfinity, true>)
|
||||
: EmitTwoOpFallback<3>(code, ctx, inst, EmitFPVectorRoundIntThunk<FPT, FP::RoundingMode::TowardsMinusInfinity, false>);
|
||||
break;
|
||||
case FP::RoundingMode::TowardsZero:
|
||||
exact
|
||||
? EmitTwoOpFallback<3>(code, ctx, inst, EmitFPVectorRoundIntThunk<FPT, FP::RoundingMode::TowardsZero, true>)
|
||||
: EmitTwoOpFallback<3>(code, ctx, inst, EmitFPVectorRoundIntThunk<FPT, FP::RoundingMode::TowardsZero, false>);
|
||||
break;
|
||||
case FP::RoundingMode::ToNearest_TieAwayFromZero:
|
||||
exact
|
||||
? EmitTwoOpFallback<3>(code, ctx, inst, EmitFPVectorRoundIntThunk<FPT, FP::RoundingMode::ToNearest_TieAwayFromZero, true>)
|
||||
: EmitTwoOpFallback<3>(code, ctx, inst, EmitFPVectorRoundIntThunk<FPT, FP::RoundingMode::ToNearest_TieAwayFromZero, false>);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
#define ROUND_LIST \
|
||||
ROUND_ELEM(ToNearest_TieEven) \
|
||||
ROUND_ELEM(TowardsPlusInfinity) \
|
||||
ROUND_ELEM(TowardsMinusInfinity) \
|
||||
ROUND_ELEM(TowardsZero) \
|
||||
ROUND_ELEM(ToNearest_TieAwayFromZero)
|
||||
#define ROUND_ELEM(name) \
|
||||
case FP::RoundingMode::name: \
|
||||
return exact \
|
||||
? EmitTwoOpFallback<3>(code, ctx, inst, EmitFPVectorRoundIntThunk<FPT, FP::RoundingMode::name, true>) \
|
||||
: EmitTwoOpFallback<3>(code, ctx, inst, EmitFPVectorRoundIntThunk<FPT, FP::RoundingMode::name, false>);
|
||||
ROUND_LIST
|
||||
#undef ROUND_ELEM
|
||||
#undef ROUND_LIST
|
||||
default: UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue