[dynarmic] VEX encode movd/movq

Signed-off-by: lizzie <lizzie@eden-emu.dev>
This commit is contained in:
lizzie 2026-05-24 18:47:11 +00:00
parent 0d736d49d6
commit 21bb1b8210
5 changed files with 95 additions and 54 deletions

View file

@ -348,7 +348,11 @@ void A32EmitX64::EmitA32SetRegister(A32EmitContext& ctx, IR::Inst* inst) {
code.mov(MJitStateReg(reg), args[1].GetImmediateU32());
} else if (args[1].IsInXmm(ctx.reg_alloc)) {
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(code, args[1]);
code.movd(MJitStateReg(reg), to_store);
if (code.HasHostFeature(HostFeature::AVX)) {
code.vmovd(MJitStateReg(reg), to_store);
} else {
code.movd(MJitStateReg(reg), to_store);
}
} else {
const Xbyak::Reg32 to_store = ctx.reg_alloc.UseGpr(code, args[1]).cvt32();
code.mov(MJitStateReg(reg), to_store);
@ -641,7 +645,11 @@ void A32EmitX64::EmitA32OrQFlag(A32EmitContext& ctx, IR::Inst* inst) {
void A32EmitX64::EmitA32GetGEFlags(A32EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
code.movd(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)]);
if (code.HasHostFeature(HostFeature::AVX)) {
code.vmovd(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)]);
} else {
code.movd(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)]);
}
ctx.reg_alloc.DefineValue(code, inst, result);
}
@ -651,7 +659,11 @@ void A32EmitX64::EmitA32SetGEFlags(A32EmitContext& ctx, IR::Inst* inst) {
if (args[0].IsInXmm(ctx.reg_alloc)) {
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(code, args[0]);
code.movd(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], to_store);
if (code.HasHostFeature(HostFeature::AVX)) {
code.vmovd(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], to_store);
} else {
code.movd(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], to_store);
}
} else {
const Xbyak::Reg32 to_store = ctx.reg_alloc.UseGpr(code, args[0]).cvt32();
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], to_store);

View file

@ -339,7 +339,11 @@ void A64EmitX64::EmitA64GetS(A64EmitContext& ctx, IR::Inst* inst) {
const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
code.movd(result, addr);
if (code.HasHostFeature(HostFeature::AVX)) {
code.vmovd(result, addr);
} else {
code.movd(result, addr);
}
ctx.reg_alloc.DefineValue(code, inst, result);
}
@ -348,7 +352,11 @@ void A64EmitX64::EmitA64GetD(A64EmitContext& ctx, IR::Inst* inst) {
const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
code.movq(result, addr);
if (code.HasHostFeature(HostFeature::AVX)) {
code.vmovq(result, addr);
} else {
code.movq(result, addr);
}
ctx.reg_alloc.DefineValue(code, inst, result);
}
@ -357,7 +365,11 @@ void A64EmitX64::EmitA64GetQ(A64EmitContext& ctx, IR::Inst* inst) {
const auto addr = xword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
code.movaps(result, addr);
if (code.HasHostFeature(HostFeature::AVX)) {
code.vmovaps(result, addr);
} else {
code.movaps(result, addr);
}
ctx.reg_alloc.DefineValue(code, inst, result);
}

View file

@ -226,13 +226,22 @@ void EmitPostProcessNaNs(BlockOfCode& code, Xbyak::Xmm result, Xbyak::Xmm op1, X
// op1 == Inf && op2 == QNaN
// op1 == QNaN && op2 == SNaN <<< The problematic case
// op1 == QNaN && op2 == Inf
if constexpr (fsize == 32) {
code.movd(tmp.cvt32(), op2);
code.shl(tmp.cvt32(), 32 - mantissa_msb_bit);
if (code.HasHostFeature(HostFeature::AVX)) {
if constexpr (fsize == 32) {
code.movd(tmp.cvt32(), op2);
code.shl(tmp.cvt32(), 32 - mantissa_msb_bit);
} else {
code.movq(tmp, op2);
code.shl(tmp, 64 - mantissa_msb_bit);
}
} else {
code.movq(tmp, op2);
code.shl(tmp, 64 - mantissa_msb_bit);
if constexpr (fsize == 32) {
code.vmovd(tmp.cvt32(), op2);
code.shl(tmp.cvt32(), 32 - mantissa_msb_bit);
} else {
code.vmovq(tmp, op2);
code.shl(tmp, 64 - mantissa_msb_bit);
}
}
// If op2 is a SNaN, CF = 0 and ZF = 0.
code.jna(end, code.T_NEAR);
@ -477,10 +486,18 @@ static inline void EmitFPMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::
tmp.setBit(fsize);
const auto move_to_tmp = [=, &code](const Xbyak::Xmm& xmm) {
if constexpr (fsize == 32) {
code.movd(tmp.cvt32(), xmm);
if (code.HasHostFeature(HostFeature::AVX)) {
if constexpr (fsize == 32) {
code.vmovd(tmp.cvt32(), xmm);
} else {
code.vmovq(tmp.cvt64(), xmm);
}
} else {
code.movq(tmp.cvt64(), xmm);
if constexpr (fsize == 32) {
code.movd(tmp.cvt32(), xmm);
} else {
code.movq(tmp.cvt64(), xmm);
}
}
};
@ -1156,7 +1173,11 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
code.L(*bad_values);
if constexpr (fsize == 32) {
code.movd(tmp, operand);
if (code.HasHostFeature(HostFeature::AVX)) {
code.vmovd(tmp, operand);
} else {
code.movd(tmp, operand);
}
if (!ctx.FPCR().FZ()) {
if (ctx.FPCR().DN()) {
@ -1186,7 +1207,12 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
}
code.L(default_nan);
code.movd(result, code.Const(xword, 0x7FC00000));
if (code.HasHostFeature(HostFeature::AVX)) {
code.vmovd(result, code.Const(xword, 0x7FC00000));
} else {
code.movd(result, code.Const(xword, 0x7FC00000));
}
code.jmp(*end, code.T_NEAR);
} else {
Xbyak::Label nan, zero;

View file

@ -227,7 +227,11 @@ void EmitX64::EmitVectorGetElement32(EmitContext& ctx, IR::Inst* inst) {
} else {
auto const source = ctx.reg_alloc.UseScratchXmm(code, args[0]);
code.pshufd(source, source, index);
code.movd(dest, source);
if (code.HasHostFeature(HostFeature::AVX)) {
code.vmovd(dest, source);
} else {
code.movd(dest, source);
}
}
ctx.reg_alloc.DefineValue(code, inst, dest);

View file

@ -11,6 +11,7 @@
#include <tuple>
#include <type_traits>
#include <utility>
#include <smmintrin.h>
#include "common/assert.h"
#include "dynarmic/mcl/function_info.hpp"
@ -1652,25 +1653,23 @@ static void EmitFPVectorRoundIntThunk(VectorArray<FPT>& output, const VectorArra
template<size_t fsize>
void EmitFPVectorRoundInt(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
//auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto rounding = FP::RoundingMode(inst->GetArg(1).GetU8());
const bool exact = inst->GetArg(2).GetU1();
if constexpr (fsize != 16) {
if (code.HasHostFeature(HostFeature::SSE41) && rounding != FP::RoundingMode::ToNearest_TieAwayFromZero && !exact) {
const u8 round_imm = [&]() -> u8 {
const u8 round_imm = [rounding]() -> u8 {
switch (rounding) {
case FP::RoundingMode::ToNearest_TieEven: return 0b00;
case FP::RoundingMode::TowardsPlusInfinity: return 0b10;
case FP::RoundingMode::TowardsMinusInfinity: return 0b01;
case FP::RoundingMode::TowardsZero: return 0b11;
case FP::RoundingMode::ToNearest_TieEven: return _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC;
case FP::RoundingMode::TowardsPlusInfinity: return _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC;
case FP::RoundingMode::TowardsMinusInfinity: return _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC;
case FP::RoundingMode::TowardsZero: return _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC;
default: UNREACHABLE();
}
}();
EmitTwoOpVectorOperation<fsize, DefaultIndexer, 3>(code, ctx, inst, [&](const Xbyak::Xmm& result, const Xbyak::Xmm& xmm_a) {
EmitTwoOpVectorOperation<fsize, DefaultIndexer, 3>(code, ctx, inst, [&code, round_imm](const Xbyak::Xmm result, const Xbyak::Xmm xmm_a) {
FCODE(roundp)(result, xmm_a, round_imm);
});
return;
}
}
@ -1678,33 +1677,21 @@ void EmitFPVectorRoundInt(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
// Do not make a LUT out of this, let the compiler do it's thing
using FPT = mcl::unsigned_integer_of_size<fsize>;
switch (rounding) {
case FP::RoundingMode::ToNearest_TieEven:
exact
? EmitTwoOpFallback<3>(code, ctx, inst, EmitFPVectorRoundIntThunk<FPT, FP::RoundingMode::ToNearest_TieEven, true>)
: EmitTwoOpFallback<3>(code, ctx, inst, EmitFPVectorRoundIntThunk<FPT, FP::RoundingMode::ToNearest_TieEven, false>);
break;
case FP::RoundingMode::TowardsPlusInfinity:
exact
? EmitTwoOpFallback<3>(code, ctx, inst, EmitFPVectorRoundIntThunk<FPT, FP::RoundingMode::TowardsPlusInfinity, true>)
: EmitTwoOpFallback<3>(code, ctx, inst, EmitFPVectorRoundIntThunk<FPT, FP::RoundingMode::TowardsPlusInfinity, false>);
break;
case FP::RoundingMode::TowardsMinusInfinity:
exact
? EmitTwoOpFallback<3>(code, ctx, inst, EmitFPVectorRoundIntThunk<FPT, FP::RoundingMode::TowardsMinusInfinity, true>)
: EmitTwoOpFallback<3>(code, ctx, inst, EmitFPVectorRoundIntThunk<FPT, FP::RoundingMode::TowardsMinusInfinity, false>);
break;
case FP::RoundingMode::TowardsZero:
exact
? EmitTwoOpFallback<3>(code, ctx, inst, EmitFPVectorRoundIntThunk<FPT, FP::RoundingMode::TowardsZero, true>)
: EmitTwoOpFallback<3>(code, ctx, inst, EmitFPVectorRoundIntThunk<FPT, FP::RoundingMode::TowardsZero, false>);
break;
case FP::RoundingMode::ToNearest_TieAwayFromZero:
exact
? EmitTwoOpFallback<3>(code, ctx, inst, EmitFPVectorRoundIntThunk<FPT, FP::RoundingMode::ToNearest_TieAwayFromZero, true>)
: EmitTwoOpFallback<3>(code, ctx, inst, EmitFPVectorRoundIntThunk<FPT, FP::RoundingMode::ToNearest_TieAwayFromZero, false>);
break;
default:
UNREACHABLE();
#define ROUND_LIST \
ROUND_ELEM(ToNearest_TieEven) \
ROUND_ELEM(TowardsPlusInfinity) \
ROUND_ELEM(TowardsMinusInfinity) \
ROUND_ELEM(TowardsZero) \
ROUND_ELEM(ToNearest_TieAwayFromZero)
#define ROUND_ELEM(name) \
case FP::RoundingMode::name: \
return exact \
? EmitTwoOpFallback<3>(code, ctx, inst, EmitFPVectorRoundIntThunk<FPT, FP::RoundingMode::name, true>) \
: EmitTwoOpFallback<3>(code, ctx, inst, EmitFPVectorRoundIntThunk<FPT, FP::RoundingMode::name, false>);
ROUND_LIST
#undef ROUND_ELEM
#undef ROUND_LIST
default: UNREACHABLE();
}
}