mirror of
https://git.eden-emu.dev/eden-emu/eden
synced 2026-04-10 05:28:56 +02:00
[dynarmic] fix ODR violations
Signed-off-by: lizzie <lizzie@eden-emu.dev>
This commit is contained in:
parent
79181c9fc5
commit
c3c0f72126
15 changed files with 92 additions and 96 deletions
|
|
@ -642,7 +642,7 @@ void EmitIR<IR::Opcode::ArithmeticShiftRight64>(oaknut::CodeGenerator& code, Emi
|
|||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::RotateRight32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
void EmitIR<IR::Opcode::BitRotateRight32>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
const auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp);
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
|
@ -708,7 +708,7 @@ void EmitIR<IR::Opcode::RotateRight32>(oaknut::CodeGenerator& code, EmitContext&
|
|||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::RotateRight64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
void EmitIR<IR::Opcode::BitRotateRight64>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto& operand_arg = args[0];
|
||||
auto& shift_arg = args[1];
|
||||
|
|
|
|||
|
|
@ -209,9 +209,9 @@ void CallbackOnlyEmitExclusiveWriteMemory(oaknut::CodeGenerator& code, EmitConte
|
|||
ctx.reg_alloc.DefineAsRegister(inst, X0);
|
||||
}
|
||||
|
||||
constexpr size_t page_bits = 12;
|
||||
constexpr size_t page_size = 1 << page_bits;
|
||||
constexpr size_t page_mask = (1 << page_bits) - 1;
|
||||
constexpr size_t page_table_const_bits = 12;
|
||||
constexpr size_t page_table_const_size = 1 << page_table_const_bits;
|
||||
constexpr size_t page_table_const_mask = (1 << page_table_const_bits) - 1;
|
||||
|
||||
// This function may use Xscratch0 as a scratch register
|
||||
// Trashes NZCV
|
||||
|
|
@ -242,28 +242,28 @@ void EmitDetectMisalignedVAddr(oaknut::CodeGenerator& code, EmitContext& ctx, oa
|
|||
code.TST(Xaddr, align_mask);
|
||||
code.B(NE, *fallback);
|
||||
} else {
|
||||
// If (addr & page_mask) > page_size - byte_size, use fallback.
|
||||
code.AND(Xscratch0, Xaddr, page_mask);
|
||||
code.CMP(Xscratch0, page_size - bitsize / 8);
|
||||
// If (addr & page_table_const_mask) > page_table_const_size - byte_size, use fallback.
|
||||
code.AND(Xscratch0, Xaddr, page_table_const_mask);
|
||||
code.CMP(Xscratch0, page_table_const_size - bitsize / 8);
|
||||
code.B(HI, *fallback);
|
||||
}
|
||||
}
|
||||
|
||||
// Outputs Xscratch0 = page_table[addr >> page_bits]
|
||||
// Outputs Xscratch0 = page_table[addr >> page_table_const_bits]
|
||||
// May use Xscratch1 as scratch register
|
||||
// Address to read/write = [ret0 + ret1], ret0 is always Xscratch0 and ret1 is either Xaddr or Xscratch1
|
||||
// Trashes NZCV
|
||||
template<size_t bitsize>
|
||||
std::pair<oaknut::XReg, oaknut::XReg> InlinePageTableEmitVAddrLookup(oaknut::CodeGenerator& code, EmitContext& ctx, oaknut::XReg Xaddr, const SharedLabel& fallback) {
|
||||
const size_t valid_page_index_bits = ctx.conf.page_table_address_space_bits - page_bits;
|
||||
const size_t valid_page_index_bits = ctx.conf.page_table_address_space_bits - page_table_const_bits;
|
||||
const size_t unused_top_bits = 64 - ctx.conf.page_table_address_space_bits;
|
||||
|
||||
EmitDetectMisalignedVAddr<bitsize>(code, ctx, Xaddr, fallback);
|
||||
|
||||
if (ctx.conf.silently_mirror_page_table || unused_top_bits == 0) {
|
||||
code.UBFX(Xscratch0, Xaddr, page_bits, valid_page_index_bits);
|
||||
code.UBFX(Xscratch0, Xaddr, page_table_const_bits, valid_page_index_bits);
|
||||
} else {
|
||||
code.LSR(Xscratch0, Xaddr, page_bits);
|
||||
code.LSR(Xscratch0, Xaddr, page_table_const_bits);
|
||||
code.TST(Xscratch0, u64(~u64(0)) << valid_page_index_bits);
|
||||
code.B(NE, *fallback);
|
||||
}
|
||||
|
|
@ -283,7 +283,7 @@ std::pair<oaknut::XReg, oaknut::XReg> InlinePageTableEmitVAddrLookup(oaknut::Cod
|
|||
if (ctx.conf.absolute_offset_page_table) {
|
||||
return std::make_pair(Xscratch0, Xaddr);
|
||||
}
|
||||
code.AND(Xscratch1, Xaddr, page_mask);
|
||||
code.AND(Xscratch1, Xaddr, page_table_const_mask);
|
||||
return std::make_pair(Xscratch0, Xscratch1);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -164,12 +164,12 @@ void EmitIR<IR::Opcode::ArithmeticShiftRight64>(biscuit::Assembler&, EmitContext
|
|||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::RotateRight32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
void EmitIR<IR::Opcode::BitRotateRight32>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
template<>
|
||||
void EmitIR<IR::Opcode::RotateRight64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
void EmitIR<IR::Opcode::BitRotateRight64>(biscuit::Assembler&, EmitContext&, IR::Inst*) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -663,7 +663,7 @@ void EmitX64::EmitArithmeticShiftRight64(EmitContext& ctx, IR::Inst* inst) {
|
|||
}
|
||||
}
|
||||
|
||||
void EmitX64::EmitRotateRight32(EmitContext& ctx, IR::Inst* inst) {
|
||||
void EmitX64::EmitBitRotateRight32(EmitContext& ctx, IR::Inst* inst) {
|
||||
const auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp);
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
|
@ -736,7 +736,7 @@ void EmitX64::EmitRotateRight32(EmitContext& ctx, IR::Inst* inst) {
|
|||
}
|
||||
}
|
||||
|
||||
void EmitX64::EmitRotateRight64(EmitContext& ctx, IR::Inst* inst) {
|
||||
void EmitX64::EmitBitRotateRight64(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto& operand_arg = args[0];
|
||||
auto& shift_arg = args[1];
|
||||
|
|
|
|||
|
|
@ -6,6 +6,8 @@
|
|||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <bit>
|
||||
#include "dynarmic/backend/x64/xbyak.h"
|
||||
|
||||
|
|
@ -22,9 +24,9 @@ namespace {
|
|||
|
||||
using namespace Xbyak::util;
|
||||
|
||||
constexpr size_t page_bits = 12;
|
||||
constexpr size_t page_size = 1 << page_bits;
|
||||
constexpr size_t page_mask = (1 << page_bits) - 1;
|
||||
constexpr size_t page_table_const_bits = 12;
|
||||
constexpr size_t page_table_const_size = 1 << page_table_const_bits;
|
||||
constexpr size_t page_table_const_mask = (1 << page_table_const_bits) - 1;
|
||||
|
||||
template<typename EmitContext>
|
||||
void EmitDetectMisalignedVAddr(BlockOfCode& code, EmitContext& ctx, size_t bitsize, Xbyak::Label& abort, Xbyak::Reg64 vaddr, Xbyak::Reg64 tmp) {
|
||||
|
|
@ -50,7 +52,7 @@ void EmitDetectMisalignedVAddr(BlockOfCode& code, EmitContext& ctx, size_t bitsi
|
|||
code.test(vaddr, align_mask);
|
||||
|
||||
if (ctx.conf.only_detect_misalignment_via_page_table_on_page_boundary) {
|
||||
const u32 page_align_mask = static_cast<u32>(page_size - 1) & ~align_mask;
|
||||
const u32 page_align_mask = static_cast<u32>(page_table_const_size - 1) & ~align_mask;
|
||||
|
||||
SharedLabel detect_boundary = ctx.GenSharedLabel(), resume = ctx.GenSharedLabel();
|
||||
|
||||
|
|
@ -83,7 +85,7 @@ template<>
|
|||
// TODO: This code assumes vaddr has been zext from 32-bits to 64-bits.
|
||||
|
||||
code.mov(tmp, vaddr.cvt32());
|
||||
code.shr(tmp, int(page_bits));
|
||||
code.shr(tmp, int(page_table_const_bits));
|
||||
code.shl(tmp, int(ctx.conf.page_table_log2_stride));
|
||||
code.mov(page, qword[r14 + tmp.cvt64()]);
|
||||
if (ctx.conf.page_table_pointer_mask_bits == 0) {
|
||||
|
|
@ -96,13 +98,13 @@ template<>
|
|||
return page + vaddr;
|
||||
}
|
||||
code.mov(tmp, vaddr.cvt32());
|
||||
code.and_(tmp, static_cast<u32>(page_mask));
|
||||
code.and_(tmp, static_cast<u32>(page_table_const_mask));
|
||||
return page + tmp.cvt64();
|
||||
}
|
||||
|
||||
template<>
|
||||
[[maybe_unused]] Xbyak::RegExp EmitVAddrLookup<A64EmitContext>(BlockOfCode& code, A64EmitContext& ctx, size_t bitsize, Xbyak::Label& abort, Xbyak::Reg64 vaddr) {
|
||||
const size_t valid_page_index_bits = ctx.conf.page_table_address_space_bits - page_bits;
|
||||
const size_t valid_page_index_bits = ctx.conf.page_table_address_space_bits - page_table_const_bits;
|
||||
const size_t unused_top_bits = 64 - ctx.conf.page_table_address_space_bits;
|
||||
|
||||
const Xbyak::Reg64 page = ctx.reg_alloc.ScratchGpr(code);
|
||||
|
|
@ -112,29 +114,29 @@ template<>
|
|||
|
||||
if (unused_top_bits == 0) {
|
||||
code.mov(tmp, vaddr);
|
||||
code.shr(tmp, int(page_bits));
|
||||
code.shr(tmp, int(page_table_const_bits));
|
||||
} else if (ctx.conf.silently_mirror_page_table) {
|
||||
if (valid_page_index_bits >= 32) {
|
||||
if (code.HasHostFeature(HostFeature::BMI2)) {
|
||||
const Xbyak::Reg64 bit_count = ctx.reg_alloc.ScratchGpr(code);
|
||||
code.mov(bit_count, unused_top_bits);
|
||||
code.bzhi(tmp, vaddr, bit_count);
|
||||
code.shr(tmp, int(page_bits));
|
||||
code.shr(tmp, int(page_table_const_bits));
|
||||
ctx.reg_alloc.Release(bit_count);
|
||||
} else {
|
||||
code.mov(tmp, vaddr);
|
||||
code.shl(tmp, int(unused_top_bits));
|
||||
code.shr(tmp, int(unused_top_bits + page_bits));
|
||||
code.shr(tmp, int(unused_top_bits + page_table_const_bits));
|
||||
}
|
||||
} else {
|
||||
code.mov(tmp, vaddr);
|
||||
code.shr(tmp, int(page_bits));
|
||||
code.shr(tmp, int(page_table_const_bits));
|
||||
code.and_(tmp, u32((1 << valid_page_index_bits) - 1));
|
||||
}
|
||||
} else {
|
||||
ASSERT(valid_page_index_bits < 32);
|
||||
code.mov(tmp, vaddr);
|
||||
code.shr(tmp, int(page_bits));
|
||||
code.shr(tmp, int(page_table_const_bits));
|
||||
code.test(tmp, u32(-(1 << valid_page_index_bits)));
|
||||
code.jnz(abort, code.T_NEAR);
|
||||
}
|
||||
|
|
@ -151,7 +153,7 @@ template<>
|
|||
return page + vaddr;
|
||||
}
|
||||
code.mov(tmp, vaddr);
|
||||
code.and_(tmp, static_cast<u32>(page_mask));
|
||||
code.and_(tmp, static_cast<u32>(page_table_const_mask));
|
||||
return page + tmp;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -25,12 +25,12 @@ using namespace Xbyak::util;
|
|||
|
||||
namespace {
|
||||
|
||||
enum class Op {
|
||||
enum class SaturationOp {
|
||||
Add,
|
||||
Sub,
|
||||
};
|
||||
|
||||
template<Op op, size_t size, bool has_overflow_inst = false>
|
||||
template<SaturationOp op, size_t size, bool has_overflow_inst = false>
|
||||
void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
|
|
@ -51,7 +51,7 @@ void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst)
|
|||
|
||||
// overflow now contains 0x7F... if a was positive, or 0x80... if a was negative
|
||||
|
||||
if constexpr (op == Op::Add) {
|
||||
if constexpr (op == SaturationOp::Add) {
|
||||
code.add(result, addend);
|
||||
} else {
|
||||
code.sub(result, addend);
|
||||
|
|
@ -75,16 +75,16 @@ void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst)
|
|||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
}
|
||||
|
||||
template<Op op, size_t size>
|
||||
template<SaturationOp op, size_t size>
|
||||
void EmitUnsignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
Xbyak::Reg op_result = ctx.reg_alloc.UseScratchGpr(code, args[0]).changeBit(size);
|
||||
Xbyak::Reg addend = ctx.reg_alloc.UseScratchGpr(code, args[1]).changeBit(size);
|
||||
|
||||
constexpr u64 boundary = op == Op::Add ? (std::numeric_limits<mcl::unsigned_integer_of_size<size>>::max)() : 0;
|
||||
constexpr u64 boundary = op == SaturationOp::Add ? (std::numeric_limits<mcl::unsigned_integer_of_size<size>>::max)() : 0;
|
||||
|
||||
if constexpr (op == Op::Add) {
|
||||
if constexpr (op == SaturationOp::Add) {
|
||||
code.add(op_result, addend);
|
||||
} else {
|
||||
code.sub(op_result, addend);
|
||||
|
|
@ -106,11 +106,11 @@ void EmitUnsignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst
|
|||
} // anonymous namespace
|
||||
|
||||
void EmitX64::EmitSignedSaturatedAddWithFlag32(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitSignedSaturatedOp<Op::Add, 32, true>(code, ctx, inst);
|
||||
EmitSignedSaturatedOp<SaturationOp::Add, 32, true>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitSignedSaturatedSubWithFlag32(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitSignedSaturatedOp<Op::Sub, 32, true>(code, ctx, inst);
|
||||
EmitSignedSaturatedOp<SaturationOp::Sub, 32, true>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitSignedSaturation(EmitContext& ctx, IR::Inst* inst) {
|
||||
|
|
@ -192,19 +192,19 @@ void EmitX64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) {
|
|||
}
|
||||
|
||||
void EmitX64::EmitSignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitSignedSaturatedOp<Op::Add, 8>(code, ctx, inst);
|
||||
EmitSignedSaturatedOp<SaturationOp::Add, 8>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitSignedSaturatedAdd16(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitSignedSaturatedOp<Op::Add, 16>(code, ctx, inst);
|
||||
EmitSignedSaturatedOp<SaturationOp::Add, 16>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitSignedSaturatedAdd32(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitSignedSaturatedOp<Op::Add, 32>(code, ctx, inst);
|
||||
EmitSignedSaturatedOp<SaturationOp::Add, 32>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitSignedSaturatedAdd64(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitSignedSaturatedOp<Op::Add, 64>(code, ctx, inst);
|
||||
EmitSignedSaturatedOp<SaturationOp::Add, 64>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh16(EmitContext& ctx, IR::Inst* inst) {
|
||||
|
|
@ -256,51 +256,51 @@ void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh32(EmitContext& ctx,
|
|||
}
|
||||
|
||||
void EmitX64::EmitSignedSaturatedSub8(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitSignedSaturatedOp<Op::Sub, 8>(code, ctx, inst);
|
||||
EmitSignedSaturatedOp<SaturationOp::Sub, 8>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitSignedSaturatedSub16(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitSignedSaturatedOp<Op::Sub, 16>(code, ctx, inst);
|
||||
EmitSignedSaturatedOp<SaturationOp::Sub, 16>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitSignedSaturatedSub32(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitSignedSaturatedOp<Op::Sub, 32>(code, ctx, inst);
|
||||
EmitSignedSaturatedOp<SaturationOp::Sub, 32>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitSignedSaturatedSub64(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitSignedSaturatedOp<Op::Sub, 64>(code, ctx, inst);
|
||||
EmitSignedSaturatedOp<SaturationOp::Sub, 64>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitUnsignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitUnsignedSaturatedOp<Op::Add, 8>(code, ctx, inst);
|
||||
EmitUnsignedSaturatedOp<SaturationOp::Add, 8>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitUnsignedSaturatedAdd16(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitUnsignedSaturatedOp<Op::Add, 16>(code, ctx, inst);
|
||||
EmitUnsignedSaturatedOp<SaturationOp::Add, 16>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitUnsignedSaturatedAdd32(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitUnsignedSaturatedOp<Op::Add, 32>(code, ctx, inst);
|
||||
EmitUnsignedSaturatedOp<SaturationOp::Add, 32>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitUnsignedSaturatedAdd64(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitUnsignedSaturatedOp<Op::Add, 64>(code, ctx, inst);
|
||||
EmitUnsignedSaturatedOp<SaturationOp::Add, 64>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitUnsignedSaturatedSub8(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitUnsignedSaturatedOp<Op::Sub, 8>(code, ctx, inst);
|
||||
EmitUnsignedSaturatedOp<SaturationOp::Sub, 8>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitUnsignedSaturatedSub16(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitUnsignedSaturatedOp<Op::Sub, 16>(code, ctx, inst);
|
||||
EmitUnsignedSaturatedOp<SaturationOp::Sub, 16>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitUnsignedSaturatedSub32(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitUnsignedSaturatedOp<Op::Sub, 32>(code, ctx, inst);
|
||||
EmitUnsignedSaturatedOp<SaturationOp::Sub, 32>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitUnsignedSaturatedSub64(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitUnsignedSaturatedOp<Op::Sub, 64>(code, ctx, inst);
|
||||
EmitUnsignedSaturatedOp<SaturationOp::Sub, 64>(code, ctx, inst);
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::X64
|
||||
|
|
|
|||
|
|
@ -6213,3 +6213,5 @@ void EmitX64::EmitZeroVector(EmitContext& ctx, IR::Inst* inst) {
|
|||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::X64
|
||||
|
||||
#undef ICODE
|
||||
|
|
|
|||
|
|
@ -52,12 +52,12 @@ void EmitVectorSaturatedNative(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
|||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
}
|
||||
|
||||
enum class Op {
|
||||
enum class VectorSaturationOp {
|
||||
Add,
|
||||
Sub,
|
||||
};
|
||||
|
||||
template<Op op, size_t esize>
|
||||
template<VectorSaturationOp op, size_t esize>
|
||||
void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
static_assert(esize == 32 || esize == 64);
|
||||
constexpr u64 msb_mask = esize == 32 ? 0x8000000080000000 : 0x8000000000000000;
|
||||
|
|
@ -72,7 +72,7 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
|||
|
||||
code.movaps(xmm0, operand1);
|
||||
|
||||
if constexpr (op == Op::Add) {
|
||||
if constexpr (op == VectorSaturationOp::Add) {
|
||||
ICODE(vpadd)(result, operand1, operand2);
|
||||
code.vpternlogd(xmm0, result, operand2, 0b00100100);
|
||||
} else {
|
||||
|
|
@ -102,7 +102,7 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
|||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
|
||||
|
||||
if (code.HasHostFeature(HostFeature::AVX)) {
|
||||
if constexpr (op == Op::Add) {
|
||||
if constexpr (op == VectorSaturationOp::Add) {
|
||||
ICODE(vpadd)(result, operand1, operand2);
|
||||
} else {
|
||||
ICODE(vpsub)(result, operand1, operand2);
|
||||
|
|
@ -112,7 +112,7 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
|||
} else {
|
||||
code.movaps(xmm0, operand1);
|
||||
code.movaps(tmp, operand1);
|
||||
if constexpr (op == Op::Add) {
|
||||
if constexpr (op == VectorSaturationOp::Add) {
|
||||
ICODE(padd)(result, operand2);
|
||||
} else {
|
||||
ICODE(psub)(result, operand2);
|
||||
|
|
@ -121,7 +121,7 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
|||
code.pxor(tmp, result);
|
||||
}
|
||||
|
||||
if constexpr (op == Op::Add) {
|
||||
if constexpr (op == VectorSaturationOp::Add) {
|
||||
code.pandn(xmm0, tmp);
|
||||
} else {
|
||||
code.pand(xmm0, tmp);
|
||||
|
|
@ -165,7 +165,7 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
|||
}
|
||||
}
|
||||
|
||||
template<Op op, size_t esize>
|
||||
template<VectorSaturationOp op, size_t esize>
|
||||
void EmitVectorUnsignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
static_assert(esize == 32 || esize == 64);
|
||||
|
||||
|
|
@ -177,7 +177,7 @@ void EmitVectorUnsignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst*
|
|||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
|
||||
const Xbyak::Reg8 overflow = ctx.reg_alloc.ScratchGpr(code).cvt8();
|
||||
|
||||
if constexpr (op == Op::Add) {
|
||||
if constexpr (op == VectorSaturationOp::Add) {
|
||||
ICODE(vpadd)(result, operand1, operand2);
|
||||
ICODE(vpcmpu)(k1, result, operand1, CmpInt::LessThan);
|
||||
ICODE(vpternlog)(result | k1, result, result, u8(0xFF));
|
||||
|
|
@ -201,7 +201,7 @@ void EmitVectorUnsignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst*
|
|||
const Xbyak::Reg8 overflow = ctx.reg_alloc.ScratchGpr(code).cvt8();
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
|
||||
|
||||
if constexpr (op == Op::Add) {
|
||||
if constexpr (op == VectorSaturationOp::Add) {
|
||||
if (code.HasHostFeature(HostFeature::AVX)) {
|
||||
code.vpxor(xmm0, operand1, operand2);
|
||||
code.vpand(tmp, operand1, operand2);
|
||||
|
|
@ -250,7 +250,7 @@ void EmitVectorUnsignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst*
|
|||
code.setnz(overflow);
|
||||
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
|
||||
|
||||
if constexpr (op == Op::Add) {
|
||||
if constexpr (op == VectorSaturationOp::Add) {
|
||||
code.por(result, tmp);
|
||||
ctx.reg_alloc.DefineValue(code, inst, result);
|
||||
} else {
|
||||
|
|
@ -270,11 +270,11 @@ void EmitX64::EmitVectorSignedSaturatedAdd16(EmitContext& ctx, IR::Inst* inst) {
|
|||
}
|
||||
|
||||
void EmitX64::EmitVectorSignedSaturatedAdd32(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitVectorSignedSaturated<Op::Add, 32>(code, ctx, inst);
|
||||
EmitVectorSignedSaturated<VectorSaturationOp::Add, 32>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorSignedSaturatedAdd64(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitVectorSignedSaturated<Op::Add, 64>(code, ctx, inst);
|
||||
EmitVectorSignedSaturated<VectorSaturationOp::Add, 64>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorSignedSaturatedSub8(EmitContext& ctx, IR::Inst* inst) {
|
||||
|
|
@ -286,11 +286,11 @@ void EmitX64::EmitVectorSignedSaturatedSub16(EmitContext& ctx, IR::Inst* inst) {
|
|||
}
|
||||
|
||||
void EmitX64::EmitVectorSignedSaturatedSub32(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitVectorSignedSaturated<Op::Sub, 32>(code, ctx, inst);
|
||||
EmitVectorSignedSaturated<VectorSaturationOp::Sub, 32>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorSignedSaturatedSub64(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitVectorSignedSaturated<Op::Sub, 64>(code, ctx, inst);
|
||||
EmitVectorSignedSaturated<VectorSaturationOp::Sub, 64>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorUnsignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) {
|
||||
|
|
@ -302,11 +302,11 @@ void EmitX64::EmitVectorUnsignedSaturatedAdd16(EmitContext& ctx, IR::Inst* inst)
|
|||
}
|
||||
|
||||
void EmitX64::EmitVectorUnsignedSaturatedAdd32(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitVectorUnsignedSaturated<Op::Add, 32>(code, ctx, inst);
|
||||
EmitVectorUnsignedSaturated<VectorSaturationOp::Add, 32>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorUnsignedSaturatedAdd64(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitVectorUnsignedSaturated<Op::Add, 64>(code, ctx, inst);
|
||||
EmitVectorUnsignedSaturated<VectorSaturationOp::Add, 64>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorUnsignedSaturatedSub8(EmitContext& ctx, IR::Inst* inst) {
|
||||
|
|
@ -318,11 +318,11 @@ void EmitX64::EmitVectorUnsignedSaturatedSub16(EmitContext& ctx, IR::Inst* inst)
|
|||
}
|
||||
|
||||
void EmitX64::EmitVectorUnsignedSaturatedSub32(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitVectorUnsignedSaturated<Op::Sub, 32>(code, ctx, inst);
|
||||
EmitVectorUnsignedSaturated<VectorSaturationOp::Sub, 32>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorUnsignedSaturatedSub64(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitVectorUnsignedSaturated<Op::Sub, 64>(code, ctx, inst);
|
||||
EmitVectorUnsignedSaturated<VectorSaturationOp::Sub, 64>(code, ctx, inst);
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::X64
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ FPUnpacked FusedMulAdd(FPUnpacked addend, FPUnpacked op1, FPUnpacked op2) {
|
|||
return std::make_tuple(exponent, value);
|
||||
}();
|
||||
|
||||
if (product_value == 0) {
|
||||
if (product_value == u128(0, 0)) {
|
||||
return addend;
|
||||
}
|
||||
|
||||
|
|
@ -55,13 +55,13 @@ FPUnpacked FusedMulAdd(FPUnpacked addend, FPUnpacked op1, FPUnpacked op2) {
|
|||
}
|
||||
|
||||
// addend < product
|
||||
const u128 result = product_value + StickyLogicalShiftRight(addend.mantissa, exp_diff - normalized_point_position);
|
||||
const u128 result = product_value + StickyLogicalShiftRight(u128(addend.mantissa, 0), exp_diff - normalized_point_position);
|
||||
return ReduceMantissa(product_sign, product_exponent, result);
|
||||
}
|
||||
|
||||
// Subtraction
|
||||
|
||||
const u128 addend_long = u128(addend.mantissa) << normalized_point_position;
|
||||
const u128 addend_long = u128(addend.mantissa, 0) << normalized_point_position;
|
||||
|
||||
bool result_sign;
|
||||
u128 result;
|
||||
|
|
|
|||
|
|
@ -137,7 +137,7 @@ u128 StickyLogicalShiftRight(u128 operand, int amount) {
|
|||
}
|
||||
|
||||
if (operand.lower != 0 || operand.upper != 0) {
|
||||
return u128(1);
|
||||
return u128(1, 0);
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,22 +22,13 @@ struct u128 {
|
|||
u128(u128&&) = default;
|
||||
u128& operator=(const u128&) = default;
|
||||
u128& operator=(u128&&) = default;
|
||||
|
||||
u128(u64 lower_, u64 upper_)
|
||||
: lower(lower_), upper(upper_) {}
|
||||
|
||||
template<typename T>
|
||||
/* implicit */ u128(T value)
|
||||
: lower(value), upper(0) {
|
||||
static_assert(std::is_integral_v<T>);
|
||||
static_assert(mcl::bitsizeof<T> <= mcl::bitsizeof<u64>);
|
||||
}
|
||||
explicit u128(u64 lower_, u64 upper_) : lower(lower_), upper(upper_) {}
|
||||
|
||||
u64 lower = 0;
|
||||
u64 upper = 0;
|
||||
|
||||
template<size_t bit_position>
|
||||
bool Bit() const {
|
||||
[[nodiscard]] inline bool Bit() const {
|
||||
static_assert(bit_position < 128);
|
||||
if constexpr (bit_position < 64) {
|
||||
return mcl::bit::get_bit<bit_position>(lower);
|
||||
|
|
|
|||
|
|
@ -228,7 +228,7 @@ public:
|
|||
}
|
||||
|
||||
ResultAndCarry<U32> RotateRight(const U32& value_in, const U8& shift_amount, const U1& carry_in) {
|
||||
const auto result = Inst<U32>(Opcode::RotateRight32, value_in, shift_amount, carry_in);
|
||||
const auto result = Inst<U32>(Opcode::BitRotateRight32, value_in, shift_amount, carry_in);
|
||||
const auto carry_out = Inst<U1>(Opcode::GetCarryFromOp, result);
|
||||
return {result, carry_out};
|
||||
}
|
||||
|
|
@ -265,9 +265,9 @@ public:
|
|||
|
||||
U32U64 RotateRight(const U32U64& value_in, const U8& shift_amount) {
|
||||
if (value_in.GetType() == Type::U32) {
|
||||
return Inst<U32>(Opcode::RotateRight32, value_in, shift_amount, Imm1(0));
|
||||
return Inst<U32>(Opcode::BitRotateRight32, value_in, shift_amount, Imm1(0));
|
||||
} else {
|
||||
return Inst<U64>(Opcode::RotateRight64, value_in, shift_amount);
|
||||
return Inst<U64>(Opcode::BitRotateRight64, value_in, shift_amount);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -45,8 +45,8 @@ constexpr bool IsArithmeticShift(const Opcode op) noexcept {
|
|||
|
||||
/// @brief Determines whether or not this instruction performs a logical shift.
|
||||
constexpr bool IsCircularShift(const Opcode op) noexcept {
|
||||
return op == Opcode::RotateRight32
|
||||
|| op == Opcode::RotateRight64
|
||||
return op == Opcode::BitRotateRight32
|
||||
|| op == Opcode::BitRotateRight64
|
||||
|| op == Opcode::RotateRightExtended;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -49,8 +49,9 @@ OPCODE(LogicalShiftRight32, U32, U32,
|
|||
OPCODE(LogicalShiftRight64, U64, U64, U8 )
|
||||
OPCODE(ArithmeticShiftRight32, U32, U32, U8, U1 )
|
||||
OPCODE(ArithmeticShiftRight64, U64, U64, U8 )
|
||||
OPCODE(RotateRight32, U32, U32, U8, U1 )
|
||||
OPCODE(RotateRight64, U64, U64, U8 )
|
||||
// windows.h defines RotateRight64 and RotateRight32
|
||||
OPCODE(BitRotateRight32, U32, U32, U8, U1 )
|
||||
OPCODE(BitRotateRight64, U64, U64, U8 )
|
||||
OPCODE(RotateRightExtended, U32, U32, U1 )
|
||||
OPCODE(LogicalShiftLeftMasked32, U32, U32, U32 )
|
||||
OPCODE(LogicalShiftLeftMasked64, U64, U64, U64 )
|
||||
|
|
|
|||
|
|
@ -1072,12 +1072,12 @@ static void ConstantPropagation(IR::Block& block) {
|
|||
ReplaceUsesWith(inst, false, Safe::ArithmeticShiftRight<u64>(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8()));
|
||||
}
|
||||
break;
|
||||
case Op::RotateRight32:
|
||||
case Op::BitRotateRight32:
|
||||
if (FoldShifts(inst)) {
|
||||
ReplaceUsesWith(inst, true, mcl::bit::rotate_right<u32>(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8()));
|
||||
}
|
||||
break;
|
||||
case Op::RotateRight64:
|
||||
case Op::BitRotateRight64:
|
||||
if (FoldShifts(inst)) {
|
||||
ReplaceUsesWith(inst, false, mcl::bit::rotate_right<u64>(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8()));
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue