mirror of
https://git.eden-emu.dev/eden-emu/eden
synced 2026-04-10 05:28:56 +02:00
Compare commits
6 commits
4d8acf2490
...
7e0cc4aac3
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7e0cc4aac3 | ||
|
|
e9f4541069 | ||
|
|
cf7086de7c | ||
|
|
8e14f07a69 | ||
|
|
0b179517b3 | ||
|
|
7a8176f63f |
20 changed files with 1705 additions and 1213 deletions
|
|
@ -226,7 +226,7 @@ void ArmDynarmic64::MakeJit(Common::PageTable* page_table, std::size_t address_s
|
|||
config.only_detect_misalignment_via_page_table_on_page_boundary = true;
|
||||
|
||||
config.fastmem_pointer = page_table->fastmem_arena ?
|
||||
std::optional<uintptr_t>{reinterpret_cast<uintptr_t>(page_table->fastmem_arena)} :
|
||||
std::optional<uintptr_t>{uintptr_t(page_table->fastmem_arena)} :
|
||||
std::nullopt;
|
||||
config.fastmem_address_space_bits = std::uint32_t(address_space_bits);
|
||||
config.silently_mirror_fastmem = false;
|
||||
|
|
|
|||
|
|
@ -48,6 +48,8 @@ public:
|
|||
mapped_ranges{mapped_ranges_}, parent{parent_} {}
|
||||
|
||||
std::optional<std::uint32_t> MemoryReadCode(VAddr vaddr) override {
|
||||
if (!memory.IsValidVirtualAddressRange(vaddr, sizeof(u32)))
|
||||
return std::nullopt;
|
||||
static_assert(Core::Memory::YUZU_PAGESIZE == Dynarmic::CODE_PAGE_SIZE);
|
||||
auto const aligned_vaddr = vaddr & ~Core::Memory::YUZU_PAGEMASK;
|
||||
if (last_code_addr != aligned_vaddr) {
|
||||
|
|
|
|||
|
|
@ -59,8 +59,10 @@ static Xbyak::Address MJitStateExtReg(A32::ExtReg reg) {
|
|||
UNREACHABLE();
|
||||
}
|
||||
|
||||
A32EmitContext::A32EmitContext(const A32::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block)
|
||||
: EmitContext(reg_alloc, block), conf(conf) {}
|
||||
A32EmitContext::A32EmitContext(const A32::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block, boost::container::stable_vector<Xbyak::Label>& shared_labels)
|
||||
: EmitContext(reg_alloc, block, shared_labels)
|
||||
, conf(conf)
|
||||
{}
|
||||
|
||||
A32::LocationDescriptor A32EmitContext::Location() const {
|
||||
return A32::LocationDescriptor{block.Location()};
|
||||
|
|
@ -109,35 +111,59 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) {
|
|||
gprs.reset(size_t(HostLoc::R14));
|
||||
return gprs;
|
||||
}(), any_xmm);
|
||||
A32EmitContext ctx{conf, reg_alloc, block};
|
||||
|
||||
A32EmitContext ctx{conf, reg_alloc, block, shared_labels};
|
||||
|
||||
// Start emitting.
|
||||
code.align();
|
||||
const u8* const entrypoint = code.getCurr();
|
||||
code.mov(code.qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, abi_base_pointer)], rbp);
|
||||
code.lea(rbp, code.ptr[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, abi_base_pointer) - 8]);
|
||||
|
||||
EmitCondPrelude(ctx);
|
||||
|
||||
for (auto iter = block.instructions.begin(); iter != block.instructions.end(); ++iter) [[likely]] {
|
||||
auto* inst = &*iter;
|
||||
// Call the relevant Emit* member function.
|
||||
switch (inst->GetOpcode()) {
|
||||
#define OPCODE(name, type, ...) \
|
||||
case IR::Opcode::name: \
|
||||
A32EmitX64::Emit##name(ctx, inst); \
|
||||
break;
|
||||
#define A32OPC(name, type, ...) \
|
||||
case IR::Opcode::A32##name: \
|
||||
A32EmitX64::EmitA32##name(ctx, inst);\
|
||||
break;
|
||||
typedef void (EmitX64::*EmitHandlerFn)(EmitContext& context, IR::Inst* inst);
|
||||
constexpr EmitHandlerFn opcode_handlers[] = {
|
||||
#define OPCODE(name, type, ...) &EmitX64::Emit##name,
|
||||
#define A32OPC(name, type, ...)
|
||||
#define A64OPC(name, type, ...)
|
||||
#include "dynarmic/ir/opcodes.inc"
|
||||
#undef OPCODE
|
||||
#undef A32OPC
|
||||
#undef A64OPC
|
||||
};
|
||||
typedef void (A32EmitX64::*A32EmitHandlerFn)(A32EmitContext& context, IR::Inst* inst);
|
||||
constexpr A32EmitHandlerFn a32_handlers[] = {
|
||||
#define OPCODE(...)
|
||||
#define A32OPC(name, type, ...) &A32EmitX64::EmitA32##name,
|
||||
#define A64OPC(...)
|
||||
#include "dynarmic/ir/opcodes.inc"
|
||||
#undef OPCODE
|
||||
#undef A32OPC
|
||||
#undef A64OPC
|
||||
};
|
||||
|
||||
for (auto& inst : block.instructions) {
|
||||
auto const opcode = inst.GetOpcode();
|
||||
// Call the relevant Emit* member function.
|
||||
switch (opcode) {
|
||||
#define OPCODE(name, type, ...) case IR::Opcode::name: goto opcode_branch;
|
||||
#define A32OPC(name, type, ...) case IR::Opcode::A32##name: goto a32_branch;
|
||||
#define A64OPC(name, type, ...)
|
||||
#include "dynarmic/ir/opcodes.inc"
|
||||
#undef OPCODE
|
||||
#undef A32OPC
|
||||
#undef A64OPC
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
reg_alloc.EndOfAllocScope();
|
||||
opcode_branch:
|
||||
(this->*opcode_handlers[size_t(opcode)])(ctx, &inst);
|
||||
goto finish_this_inst;
|
||||
a32_branch:
|
||||
// Update with FIRST A32 instruction
|
||||
(this->*a32_handlers[size_t(opcode) - size_t(IR::Opcode::A32SetCheckBit)])(ctx, &inst);
|
||||
finish_this_inst:
|
||||
ctx.reg_alloc.EndOfAllocScope();
|
||||
#ifndef NDEBUG
|
||||
if (conf.very_verbose_debugging_output)
|
||||
EmitVerboseDebuggingOutput(reg_alloc);
|
||||
|
|
@ -146,15 +172,14 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) {
|
|||
|
||||
reg_alloc.AssertNoMoreUses();
|
||||
|
||||
if (conf.enable_cycle_counting) {
|
||||
if (conf.enable_cycle_counting)
|
||||
EmitAddCycles(block.CycleCount());
|
||||
}
|
||||
code.mov(rbp, code.qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, abi_base_pointer)]);
|
||||
EmitTerminal(block.GetTerminal(), ctx.Location().SetSingleStepping(false), ctx.IsSingleStep());
|
||||
code.int3();
|
||||
|
||||
for (auto& deferred_emit : ctx.deferred_emits) {
|
||||
for (auto& deferred_emit : ctx.deferred_emits)
|
||||
deferred_emit();
|
||||
}
|
||||
code.int3();
|
||||
|
||||
const size_t size = size_t(code.getCurr() - entrypoint);
|
||||
|
|
@ -167,6 +192,7 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) {
|
|||
|
||||
auto const bdesc = RegisterBlock(descriptor, entrypoint, size);
|
||||
code.DisableWriting();
|
||||
shared_labels.clear();
|
||||
return bdesc;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
|
|
@ -29,7 +29,7 @@ namespace Dynarmic::Backend::X64 {
|
|||
class RegAlloc;
|
||||
|
||||
struct A32EmitContext final : public EmitContext {
|
||||
A32EmitContext(const A32::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block);
|
||||
A32EmitContext(const A32::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block, boost::container::stable_vector<Xbyak::Label>& shared_labels);
|
||||
|
||||
A32::LocationDescriptor Location() const;
|
||||
A32::LocationDescriptor EndLocation() const;
|
||||
|
|
@ -130,6 +130,7 @@ public:
|
|||
ankerl::unordered_dense::map<std::tuple<bool, size_t, int, int>, void (*)()> write_fallbacks;
|
||||
ankerl::unordered_dense::map<std::tuple<bool, size_t, int, int>, void (*)()> exclusive_write_fallbacks;
|
||||
ankerl::unordered_dense::set<DoNotFastmemMarker> do_not_fastmem;
|
||||
boost::container::stable_vector<Xbyak::Label> shared_labels;
|
||||
void (*memory_read_128)() = nullptr; // Dummy
|
||||
void (*memory_write_128)() = nullptr; // Dummy
|
||||
const void* terminal_handler_pop_rsb_hint;
|
||||
|
|
|
|||
|
|
@ -37,8 +37,10 @@ namespace Dynarmic::Backend::X64 {
|
|||
|
||||
using namespace Xbyak::util;
|
||||
|
||||
A64EmitContext::A64EmitContext(const A64::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block)
|
||||
: EmitContext(reg_alloc, block), conf(conf) {}
|
||||
A64EmitContext::A64EmitContext(const A64::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block, boost::container::stable_vector<Xbyak::Label>& shared_labels)
|
||||
: EmitContext(reg_alloc, block, shared_labels)
|
||||
, conf(conf)
|
||||
{}
|
||||
|
||||
A64::LocationDescriptor A64EmitContext::Location() const {
|
||||
return A64::LocationDescriptor{block.Location()};
|
||||
|
|
@ -83,11 +85,14 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) noexcept {
|
|||
gprs.reset(size_t(HostLoc::R14));
|
||||
return gprs;
|
||||
}(), any_xmm};
|
||||
A64EmitContext ctx{conf, reg_alloc, block};
|
||||
|
||||
A64EmitContext ctx{conf, reg_alloc, block, shared_labels};
|
||||
|
||||
// Start emitting.
|
||||
code.align();
|
||||
const auto* const entrypoint = code.getCurr();
|
||||
code.mov(code.qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, abi_base_pointer)], rbp);
|
||||
code.lea(rbp, code.ptr[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, abi_base_pointer) - 8]);
|
||||
|
||||
DEBUG_ASSERT(block.GetCondition() == IR::Cond::AL);
|
||||
typedef void (EmitX64::*EmitHandlerFn)(EmitContext& context, IR::Inst* inst);
|
||||
|
|
@ -139,16 +144,13 @@ finish_this_inst:
|
|||
}
|
||||
|
||||
reg_alloc.AssertNoMoreUses();
|
||||
|
||||
if (conf.enable_cycle_counting) {
|
||||
if (conf.enable_cycle_counting)
|
||||
EmitAddCycles(block.CycleCount());
|
||||
}
|
||||
code.mov(rbp, code.qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, abi_base_pointer)]);
|
||||
EmitTerminal(block.GetTerminal(), ctx.Location().SetSingleStepping(false), ctx.IsSingleStep());
|
||||
code.int3();
|
||||
|
||||
for (auto& deferred_emit : ctx.deferred_emits) {
|
||||
for (auto& deferred_emit : ctx.deferred_emits)
|
||||
deferred_emit();
|
||||
}
|
||||
code.int3();
|
||||
|
||||
const size_t size = size_t(code.getCurr() - entrypoint);
|
||||
|
|
@ -161,6 +163,7 @@ finish_this_inst:
|
|||
|
||||
auto bdesc = RegisterBlock(descriptor, entrypoint, size);
|
||||
code.DisableWriting();
|
||||
shared_labels.clear();
|
||||
return bdesc;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
|
|
@ -27,7 +27,7 @@
|
|||
namespace Dynarmic::Backend::X64 {
|
||||
|
||||
struct A64EmitContext final : public EmitContext {
|
||||
A64EmitContext(const A64::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block);
|
||||
A64EmitContext(const A64::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block, boost::container::stable_vector<Xbyak::Label>& shared_labels);
|
||||
|
||||
A64::LocationDescriptor Location() const;
|
||||
bool IsSingleStep() const;
|
||||
|
|
@ -126,6 +126,7 @@ public:
|
|||
ankerl::unordered_dense::map<std::tuple<bool, size_t, int, int>, void (*)()> write_fallbacks;
|
||||
ankerl::unordered_dense::map<std::tuple<bool, size_t, int, int>, void (*)()> exclusive_write_fallbacks;
|
||||
ankerl::unordered_dense::set<DoNotFastmemMarker> do_not_fastmem;
|
||||
boost::container::stable_vector<Xbyak::Label> shared_labels;
|
||||
const void* terminal_handler_pop_rsb_hint = nullptr;
|
||||
const void* terminal_handler_fast_dispatch_hint = nullptr;
|
||||
FastDispatchEntry& (*fast_dispatch_table_lookup)(u64) = nullptr;
|
||||
|
|
|
|||
|
|
@ -32,8 +32,11 @@ namespace Dynarmic::Backend::X64 {
|
|||
|
||||
using namespace Xbyak::util;
|
||||
|
||||
EmitContext::EmitContext(RegAlloc& reg_alloc, IR::Block& block)
|
||||
: reg_alloc(reg_alloc), block(block) {}
|
||||
EmitContext::EmitContext(RegAlloc& reg_alloc, IR::Block& block, boost::container::stable_vector<Xbyak::Label>& shared_labels)
|
||||
: reg_alloc(reg_alloc)
|
||||
, block(block)
|
||||
, shared_labels(shared_labels)
|
||||
{}
|
||||
|
||||
EmitContext::~EmitContext() = default;
|
||||
|
||||
|
|
|
|||
|
|
@ -16,11 +16,12 @@
|
|||
#include <type_traits>
|
||||
#include <vector>
|
||||
|
||||
#include "dynarmic/mcl/bit.hpp"
|
||||
#include <ankerl/unordered_dense.h>
|
||||
#include "dynarmic/backend/x64/xbyak.h"
|
||||
#include <boost/container/stable_vector.hpp>
|
||||
#include <boost/container/small_vector.hpp>
|
||||
|
||||
#include "dynarmic/backend/x64/xbyak.h"
|
||||
#include "dynarmic/mcl/bit.hpp"
|
||||
#include "dynarmic/backend/exception_handler.h"
|
||||
#include "dynarmic/backend/x64/reg_alloc.h"
|
||||
#include "dynarmic/common/fp/fpcr.h"
|
||||
|
|
@ -52,24 +53,23 @@ using VectorArray = std::array<T, A64FullVectorWidth::value / mcl::bitsizeof<T>>
|
|||
template<typename T>
|
||||
using HalfVectorArray = std::array<T, A64FullVectorWidth::value / mcl::bitsizeof<T> / 2>;
|
||||
|
||||
using SharedLabel = Xbyak::Label*;
|
||||
struct EmitContext {
|
||||
EmitContext(RegAlloc& reg_alloc, IR::Block& block);
|
||||
EmitContext(RegAlloc& reg_alloc, IR::Block& block, boost::container::stable_vector<Xbyak::Label>& shared_labels);
|
||||
virtual ~EmitContext();
|
||||
virtual FP::FPCR FPCR(bool fpcr_controlled = true) const = 0;
|
||||
virtual bool HasOptimization(OptimizationFlag flag) const = 0;
|
||||
|
||||
RegAlloc& reg_alloc;
|
||||
IR::Block& block;
|
||||
[[nodiscard]] inline Xbyak::Label* GenSharedLabel() noexcept {
|
||||
return &shared_labels.emplace_back();
|
||||
}
|
||||
|
||||
std::vector<std::function<void()>> deferred_emits;
|
||||
RegAlloc& reg_alloc;
|
||||
IR::Block& block;
|
||||
boost::container::stable_vector<Xbyak::Label>& shared_labels;
|
||||
};
|
||||
|
||||
using SharedLabel = std::shared_ptr<Xbyak::Label>;
|
||||
|
||||
inline SharedLabel GenSharedLabel() {
|
||||
return std::make_shared<Xbyak::Label>();
|
||||
}
|
||||
|
||||
class EmitX64 {
|
||||
public:
|
||||
struct BlockDescriptor {
|
||||
|
|
|
|||
|
|
@ -136,7 +136,7 @@ void ForceToDefaultNaN(BlockOfCode& code, Xbyak::Xmm result) {
|
|||
|
||||
template<size_t fsize>
|
||||
SharedLabel ProcessNaN(BlockOfCode& code, EmitContext& ctx, Xbyak::Xmm a) {
|
||||
SharedLabel nan = GenSharedLabel(), end = GenSharedLabel();
|
||||
SharedLabel nan = ctx.GenSharedLabel(), end = ctx.GenSharedLabel();
|
||||
|
||||
FCODE(ucomis)(a, a);
|
||||
code.jp(*nan, code.T_NEAR);
|
||||
|
|
@ -251,7 +251,7 @@ template<size_t fsize, typename Function>
|
|||
void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
SharedLabel end = GenSharedLabel();
|
||||
SharedLabel end = ctx.GenSharedLabel();
|
||||
|
||||
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
|
||||
|
|
@ -304,7 +304,7 @@ void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn)
|
|||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
|
||||
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(code);
|
||||
|
||||
SharedLabel end = GenSharedLabel(), nan = GenSharedLabel();
|
||||
SharedLabel end = ctx.GenSharedLabel(), nan = ctx.GenSharedLabel();
|
||||
|
||||
code.movaps(result, op1);
|
||||
if constexpr (std::is_member_function_pointer_v<Function>) {
|
||||
|
|
@ -413,7 +413,7 @@ static void EmitFPMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, bo
|
|||
|
||||
DenormalsAreZero<fsize>(code, ctx, {result, operand});
|
||||
|
||||
SharedLabel equal = GenSharedLabel(), end = GenSharedLabel();
|
||||
SharedLabel equal = ctx.GenSharedLabel(), end = ctx.GenSharedLabel();
|
||||
|
||||
FCODE(ucomis)(result, operand);
|
||||
code.jz(*equal, code.T_NEAR);
|
||||
|
|
@ -484,7 +484,7 @@ static inline void EmitFPMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::
|
|||
}
|
||||
};
|
||||
|
||||
SharedLabel end = GenSharedLabel(), z = GenSharedLabel();
|
||||
SharedLabel end = ctx.GenSharedLabel(), z = ctx.GenSharedLabel();
|
||||
|
||||
FCODE(ucomis)(op1, op2);
|
||||
code.jz(*z, code.T_NEAR);
|
||||
|
|
@ -632,7 +632,7 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, bo
|
|||
}
|
||||
|
||||
if (code.HasHostFeature(HostFeature::FMA | HostFeature::AVX)) {
|
||||
SharedLabel fallback = GenSharedLabel(), end = GenSharedLabel();
|
||||
SharedLabel fallback = ctx.GenSharedLabel(), end = ctx.GenSharedLabel();
|
||||
|
||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(code, args[0]);
|
||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]);
|
||||
|
|
@ -843,7 +843,7 @@ static void EmitFPMulX(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
|||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
|
||||
const Xbyak::Reg64 tmp = do_default_nan ? INVALID_REG : ctx.reg_alloc.ScratchGpr(code);
|
||||
|
||||
SharedLabel end = GenSharedLabel(), nan = GenSharedLabel();
|
||||
SharedLabel end = ctx.GenSharedLabel(), nan = ctx.GenSharedLabel();
|
||||
|
||||
if (code.HasHostFeature(HostFeature::AVX)) {
|
||||
FCODE(vmuls)(result, op1, op2);
|
||||
|
|
@ -981,7 +981,7 @@ static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
|
|||
}
|
||||
|
||||
if (code.HasHostFeature(HostFeature::FMA)) {
|
||||
SharedLabel end = GenSharedLabel(), fallback = GenSharedLabel();
|
||||
SharedLabel end = ctx.GenSharedLabel(), fallback = ctx.GenSharedLabel();
|
||||
|
||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(code, args[0]);
|
||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]);
|
||||
|
|
@ -1129,7 +1129,7 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
|
|||
const Xbyak::Xmm value = ctx.reg_alloc.ScratchXmm(code);
|
||||
[[maybe_unused]] const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr(code).cvt32();
|
||||
|
||||
SharedLabel bad_values = GenSharedLabel(), end = GenSharedLabel();
|
||||
SharedLabel bad_values = ctx.GenSharedLabel(), end = ctx.GenSharedLabel();
|
||||
|
||||
code.movaps(value, operand);
|
||||
|
||||
|
|
@ -1296,7 +1296,7 @@ static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
|
|||
}
|
||||
|
||||
if (code.HasHostFeature(HostFeature::FMA | HostFeature::AVX)) {
|
||||
SharedLabel end = GenSharedLabel(), fallback = GenSharedLabel();
|
||||
SharedLabel end = ctx.GenSharedLabel(), fallback = ctx.GenSharedLabel();
|
||||
|
||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(code, args[0]);
|
||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]);
|
||||
|
|
@ -1641,7 +1641,7 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
|||
const Xbyak::Xmm scratch = ctx.reg_alloc.ScratchXmm(code);
|
||||
|
||||
if (!unsigned_) {
|
||||
SharedLabel saturate_max = GenSharedLabel(), end = GenSharedLabel();
|
||||
SharedLabel saturate_max = ctx.GenSharedLabel(), end = ctx.GenSharedLabel();
|
||||
|
||||
ZeroIfNaN<64>(code, src, scratch);
|
||||
|
||||
|
|
|
|||
|
|
@ -86,7 +86,7 @@ void AxxEmitX64::EmitMemoryRead(AxxEmitContext& ctx, IR::Inst* inst) {
|
|||
|
||||
const auto wrapped_fn = read_fallbacks[std::make_tuple(ordered, bitsize, vaddr.getIdx(), value_idx)];
|
||||
|
||||
SharedLabel abort = GenSharedLabel(), end = GenSharedLabel();
|
||||
SharedLabel abort = ctx.GenSharedLabel(), end = ctx.GenSharedLabel();
|
||||
|
||||
if (fastmem_marker) {
|
||||
// Use fastmem
|
||||
|
|
@ -108,7 +108,7 @@ void AxxEmitX64::EmitMemoryRead(AxxEmitContext& ctx, IR::Inst* inst) {
|
|||
conf.recompile_on_fastmem_failure,
|
||||
});
|
||||
|
||||
EmitCheckMemoryAbort(ctx, inst, end.get());
|
||||
EmitCheckMemoryAbort(ctx, inst, end);
|
||||
code.jmp(*end, code.T_NEAR);
|
||||
});
|
||||
} else {
|
||||
|
|
@ -120,7 +120,7 @@ void AxxEmitX64::EmitMemoryRead(AxxEmitContext& ctx, IR::Inst* inst) {
|
|||
ctx.deferred_emits.emplace_back([=, this, &ctx] {
|
||||
code.L(*abort);
|
||||
code.call(wrapped_fn);
|
||||
EmitCheckMemoryAbort(ctx, inst, end.get());
|
||||
EmitCheckMemoryAbort(ctx, inst, end);
|
||||
code.jmp(*end, code.T_NEAR);
|
||||
});
|
||||
}
|
||||
|
|
@ -173,7 +173,7 @@ void AxxEmitX64::EmitMemoryWrite(AxxEmitContext& ctx, IR::Inst* inst) {
|
|||
|
||||
const auto wrapped_fn = write_fallbacks[std::make_tuple(ordered, bitsize, vaddr.getIdx(), value_idx)];
|
||||
|
||||
SharedLabel abort = GenSharedLabel(), end = GenSharedLabel();
|
||||
SharedLabel abort = ctx.GenSharedLabel(), end = ctx.GenSharedLabel();
|
||||
|
||||
if (fastmem_marker) {
|
||||
// Use fastmem
|
||||
|
|
@ -195,7 +195,7 @@ void AxxEmitX64::EmitMemoryWrite(AxxEmitContext& ctx, IR::Inst* inst) {
|
|||
conf.recompile_on_fastmem_failure,
|
||||
});
|
||||
|
||||
EmitCheckMemoryAbort(ctx, inst, end.get());
|
||||
EmitCheckMemoryAbort(ctx, inst, end);
|
||||
code.jmp(*end, code.T_NEAR);
|
||||
});
|
||||
} else {
|
||||
|
|
@ -207,7 +207,7 @@ void AxxEmitX64::EmitMemoryWrite(AxxEmitContext& ctx, IR::Inst* inst) {
|
|||
ctx.deferred_emits.emplace_back([=, this, &ctx] {
|
||||
code.L(*abort);
|
||||
code.call(wrapped_fn);
|
||||
EmitCheckMemoryAbort(ctx, inst, end.get());
|
||||
EmitCheckMemoryAbort(ctx, inst, end);
|
||||
code.jmp(*end, code.T_NEAR);
|
||||
});
|
||||
}
|
||||
|
|
@ -352,7 +352,7 @@ void AxxEmitX64::EmitExclusiveReadMemoryInline(AxxEmitContext& ctx, IR::Inst* in
|
|||
|
||||
const auto fastmem_marker = ShouldFastmem(ctx, inst);
|
||||
if (fastmem_marker) {
|
||||
SharedLabel abort = GenSharedLabel(), end = GenSharedLabel();
|
||||
SharedLabel abort = ctx.GenSharedLabel(), end = ctx.GenSharedLabel();
|
||||
bool require_abort_handling = false;
|
||||
|
||||
const auto src_ptr = EmitFastmemVAddr(code, ctx, *abort, vaddr, require_abort_handling);
|
||||
|
|
@ -427,7 +427,7 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i
|
|||
|
||||
EmitExclusiveLock(code, conf, tmp, tmp2.cvt32());
|
||||
|
||||
SharedLabel end = GenSharedLabel();
|
||||
SharedLabel end = ctx.GenSharedLabel();
|
||||
|
||||
code.mov(status, u32(1));
|
||||
code.movzx(tmp.cvt32(), code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)]);
|
||||
|
|
@ -460,7 +460,7 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i
|
|||
|
||||
const auto fastmem_marker = ShouldFastmem(ctx, inst);
|
||||
if (fastmem_marker) {
|
||||
SharedLabel abort = GenSharedLabel();
|
||||
SharedLabel abort = ctx.GenSharedLabel();
|
||||
bool require_abort_handling = false;
|
||||
|
||||
const auto dest_ptr = EmitFastmemVAddr(code, ctx, *abort, vaddr, require_abort_handling, tmp);
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ void EmitDetectMisalignedVAddr(BlockOfCode& code, EmitContext& ctx, size_t bitsi
|
|||
if (ctx.conf.only_detect_misalignment_via_page_table_on_page_boundary) {
|
||||
const u32 page_align_mask = static_cast<u32>(page_table_const_size - 1) & ~align_mask;
|
||||
|
||||
SharedLabel detect_boundary = GenSharedLabel(), resume = GenSharedLabel();
|
||||
SharedLabel detect_boundary = ctx.GenSharedLabel(), resume = ctx.GenSharedLabel();
|
||||
|
||||
code.jnz(*detect_boundary, code.T_NEAR);
|
||||
code.L(*resume);
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -24,6 +24,7 @@
|
|||
#include "dynarmic/common/fp/fpcr.h"
|
||||
#include "dynarmic/common/fp/info.h"
|
||||
#include "dynarmic/common/fp/op.h"
|
||||
#include "dynarmic/common/fp/rounding_mode.h"
|
||||
#include "dynarmic/common/fp/util.h"
|
||||
#include "dynarmic/interface/optimization_flags.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
|
|
@ -93,7 +94,7 @@ void HandleNaNs(BlockOfCode& code, EmitContext& ctx, bool fpcr_controlled, std::
|
|||
code.cmp(bitmask, 0);
|
||||
}
|
||||
|
||||
SharedLabel end = GenSharedLabel(), nan = GenSharedLabel();
|
||||
SharedLabel end = ctx.GenSharedLabel(), nan = ctx.GenSharedLabel();
|
||||
|
||||
code.jnz(*nan, code.T_NEAR);
|
||||
code.L(*end);
|
||||
|
|
@ -188,23 +189,6 @@ void ForceToDefaultNaN(BlockOfCode& code, FP::FPCR fpcr, Xbyak::Xmm result) {
|
|||
}
|
||||
}
|
||||
|
||||
template<size_t fsize>
|
||||
void ZeroIfNaN(BlockOfCode& code, Xbyak::Xmm result) {
|
||||
const Xbyak::Xmm nan_mask = xmm0;
|
||||
if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
|
||||
constexpr u32 nan_to_zero = FixupLUT(FpFixup::PosZero,
|
||||
FpFixup::PosZero);
|
||||
FCODE(vfixupimmp)(result, result, code.BConst<32>(ptr_b, nan_to_zero), u8(0));
|
||||
} else if (code.HasHostFeature(HostFeature::AVX)) {
|
||||
FCODE(vcmpordp)(nan_mask, result, result);
|
||||
FCODE(vandp)(result, result, nan_mask);
|
||||
} else {
|
||||
code.movaps(nan_mask, result);
|
||||
FCODE(cmpordp)(nan_mask, nan_mask);
|
||||
code.andps(result, nan_mask);
|
||||
}
|
||||
}
|
||||
|
||||
template<size_t fsize>
|
||||
void DenormalsAreZero(BlockOfCode& code, FP::FPCR fpcr, std::initializer_list<Xbyak::Xmm> to_daz, Xbyak::Xmm tmp) {
|
||||
if (fpcr.FZ()) {
|
||||
|
|
@ -1330,7 +1314,7 @@ void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
|||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
|
||||
|
||||
SharedLabel end = GenSharedLabel(), fallback = GenSharedLabel();
|
||||
SharedLabel end = ctx.GenSharedLabel(), fallback = ctx.GenSharedLabel();
|
||||
|
||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||
code.movaps(result, xmm_a);
|
||||
|
|
@ -1603,7 +1587,7 @@ static void EmitRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
|||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]);
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
|
||||
|
||||
SharedLabel end = GenSharedLabel(), fallback = GenSharedLabel();
|
||||
SharedLabel end = ctx.GenSharedLabel(), fallback = ctx.GenSharedLabel();
|
||||
|
||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||
code.movaps(result, GetVectorOf<fsize, false, 0, 2>(code));
|
||||
|
|
@ -1776,7 +1760,7 @@ static void EmitRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
|
|||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
|
||||
const Xbyak::Xmm value = ctx.reg_alloc.ScratchXmm(code);
|
||||
|
||||
SharedLabel bad_values = GenSharedLabel(), end = GenSharedLabel();
|
||||
SharedLabel bad_values = ctx.GenSharedLabel(), end = ctx.GenSharedLabel();
|
||||
|
||||
code.movaps(value, operand);
|
||||
|
||||
|
|
@ -1867,7 +1851,7 @@ static void EmitRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
|||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
|
||||
const Xbyak::Xmm mask = ctx.reg_alloc.ScratchXmm(code);
|
||||
|
||||
SharedLabel end = GenSharedLabel(), fallback = GenSharedLabel();
|
||||
SharedLabel end = ctx.GenSharedLabel(), fallback = ctx.GenSharedLabel();
|
||||
|
||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||
code.vmovaps(result, GetVectorOf<fsize, false, 0, 3>(code));
|
||||
|
|
@ -2004,120 +1988,123 @@ void EmitX64::EmitFPVectorToHalf32(EmitContext& ctx, IR::Inst* inst) {
|
|||
template<size_t fsize, bool unsigned_>
|
||||
void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
const size_t fbits = inst->GetArg(1).GetU8();
|
||||
const auto rounding = static_cast<FP::RoundingMode>(inst->GetArg(2).GetU8());
|
||||
const auto rounding = FP::RoundingMode(inst->GetArg(2).GetU8());
|
||||
[[maybe_unused]] const bool fpcr_controlled = inst->GetArg(3).GetU1();
|
||||
|
||||
if constexpr (fsize != 16) {
|
||||
if (code.HasHostFeature(HostFeature::SSE41) && rounding != FP::RoundingMode::ToNearest_TieAwayFromZero) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
|
||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||
const int round_imm = [&] {
|
||||
switch (rounding) {
|
||||
case FP::RoundingMode::ToNearest_TieEven:
|
||||
default:
|
||||
return 0b00;
|
||||
case FP::RoundingMode::TowardsPlusInfinity:
|
||||
return 0b10;
|
||||
case FP::RoundingMode::TowardsMinusInfinity:
|
||||
return 0b01;
|
||||
case FP::RoundingMode::TowardsZero:
|
||||
return 0b11;
|
||||
}
|
||||
}();
|
||||
|
||||
const auto perform_conversion = [&code, &ctx](const Xbyak::Xmm& src) {
|
||||
// MSVC doesn't allow us to use a [&] capture, so we have to do this instead.
|
||||
(void)ctx;
|
||||
|
||||
if constexpr (fsize == 32) {
|
||||
code.cvttps2dq(src, src);
|
||||
} else {
|
||||
if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
|
||||
code.vcvttpd2qq(src, src);
|
||||
} else {
|
||||
const Xbyak::Reg64 hi = ctx.reg_alloc.ScratchGpr(code);
|
||||
const Xbyak::Reg64 lo = ctx.reg_alloc.ScratchGpr(code);
|
||||
|
||||
code.cvttsd2si(lo, src);
|
||||
code.punpckhqdq(src, src);
|
||||
code.cvttsd2si(hi, src);
|
||||
code.movq(src, lo);
|
||||
code.pinsrq(src, hi, 1);
|
||||
|
||||
ctx.reg_alloc.Release(hi);
|
||||
ctx.reg_alloc.Release(lo);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
if (fbits != 0) {
|
||||
const u64 scale_factor = fsize == 32
|
||||
? static_cast<u64>(fbits + 127) << 23
|
||||
: static_cast<u64>(fbits + 1023) << 52;
|
||||
FCODE(mulp)(src, GetVectorOf<fsize>(code, scale_factor));
|
||||
if (code.HasHostFeature(HostFeature::SSE41) && fsize != 16 && rounding != FP::RoundingMode::ToNearest_TieAwayFromZero) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||
const int round_imm = [&] {
|
||||
switch (rounding) {
|
||||
case FP::RoundingMode::ToNearest_TieEven:
|
||||
default:
|
||||
return 0b00;
|
||||
case FP::RoundingMode::TowardsPlusInfinity:
|
||||
return 0b10;
|
||||
case FP::RoundingMode::TowardsMinusInfinity:
|
||||
return 0b01;
|
||||
case FP::RoundingMode::TowardsZero:
|
||||
return 0b11;
|
||||
}
|
||||
}();
|
||||
const auto perform_conversion = [&code, &ctx](const Xbyak::Xmm& src) {
|
||||
// MSVC doesn't allow us to use a [&] capture, so we have to do this instead.
|
||||
(void)ctx;
|
||||
|
||||
FCODE(roundp)(src, src, static_cast<u8>(round_imm));
|
||||
ZeroIfNaN<fsize>(code, src);
|
||||
|
||||
constexpr u64 float_upper_limit_signed = fsize == 32 ? 0x4f000000 : 0x43e0000000000000;
|
||||
[[maybe_unused]] constexpr u64 float_upper_limit_unsigned = fsize == 32 ? 0x4f800000 : 0x43f0000000000000;
|
||||
|
||||
if constexpr (unsigned_) {
|
||||
if constexpr (fsize == 32) {
|
||||
code.cvttps2dq(src, src);
|
||||
} else {
|
||||
if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
|
||||
// Mask positive values
|
||||
code.xorps(xmm0, xmm0);
|
||||
FCODE(vcmpp)(k1, src, xmm0, Cmp::GreaterEqual_OQ);
|
||||
|
||||
// Convert positive values to unsigned integers, write 0 anywhere else
|
||||
// vcvttp*2u*q already saturates out-of-range values to (0xFFFF...)
|
||||
if constexpr (fsize == 32) {
|
||||
code.vcvttps2udq(src | k1 | T_z, src);
|
||||
} else {
|
||||
code.vcvttpd2uqq(src | k1 | T_z, src);
|
||||
}
|
||||
code.vcvttpd2qq(src, src);
|
||||
} else {
|
||||
// Zero is minimum
|
||||
code.xorps(xmm0, xmm0);
|
||||
FCODE(cmplep)(xmm0, src);
|
||||
FCODE(andp)(src, xmm0);
|
||||
const Xbyak::Reg64 hi = ctx.reg_alloc.ScratchGpr(code);
|
||||
const Xbyak::Reg64 lo = ctx.reg_alloc.ScratchGpr(code);
|
||||
|
||||
// Will we exceed unsigned range?
|
||||
const Xbyak::Xmm exceed_unsigned = ctx.reg_alloc.ScratchXmm(code);
|
||||
code.movaps(exceed_unsigned, GetVectorOf<fsize, float_upper_limit_unsigned>(code));
|
||||
FCODE(cmplep)(exceed_unsigned, src);
|
||||
code.cvttsd2si(lo, src);
|
||||
code.punpckhqdq(src, src);
|
||||
code.cvttsd2si(hi, src);
|
||||
code.movq(src, lo);
|
||||
code.pinsrq(src, hi, 1);
|
||||
|
||||
// Will be exceed signed range?
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
|
||||
code.movaps(tmp, GetVectorOf<fsize, float_upper_limit_signed>(code));
|
||||
code.movaps(xmm0, tmp);
|
||||
FCODE(cmplep)(xmm0, src);
|
||||
FCODE(andp)(tmp, xmm0);
|
||||
FCODE(subp)(src, tmp);
|
||||
perform_conversion(src);
|
||||
ICODE(psll)(xmm0, u8(fsize - 1));
|
||||
FCODE(orp)(src, xmm0);
|
||||
ctx.reg_alloc.Release(hi);
|
||||
ctx.reg_alloc.Release(lo);
|
||||
}
|
||||
}
|
||||
};
|
||||
if (fbits != 0) {
|
||||
const u64 scale_factor = fsize == 32
|
||||
? u64(fbits + 127) << 23
|
||||
: u64(fbits + 1023) << 52;
|
||||
FCODE(mulp)(src, GetVectorOf<fsize>(code, scale_factor));
|
||||
}
|
||||
|
||||
// Saturate to max
|
||||
FCODE(orp)(src, exceed_unsigned);
|
||||
FCODE(roundp)(src, src, u8(round_imm));
|
||||
const Xbyak::Xmm nan_mask = xmm0;
|
||||
if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
|
||||
static constexpr u32 nan_to_zero = FixupLUT(FpFixup::PosZero, FpFixup::PosZero);
|
||||
FCODE(vfixupimmp)(src, src, code.BConst<32>(ptr_b, nan_to_zero), u8(0));
|
||||
} else if (code.HasHostFeature(HostFeature::AVX)) {
|
||||
FCODE(vcmpordp)(nan_mask, src, src);
|
||||
FCODE(vandp)(src, src, nan_mask);
|
||||
} else {
|
||||
code.movaps(nan_mask, src);
|
||||
FCODE(cmpordp)(nan_mask, nan_mask);
|
||||
code.andps(src, nan_mask);
|
||||
}
|
||||
|
||||
constexpr u64 float_upper_limit_signed = fsize == 32 ? 0x4f000000 : 0x43e0000000000000;
|
||||
[[maybe_unused]] constexpr u64 float_upper_limit_unsigned = fsize == 32 ? 0x4f800000 : 0x43f0000000000000;
|
||||
|
||||
if constexpr (unsigned_) {
|
||||
if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
|
||||
// Mask positive values
|
||||
code.xorps(xmm0, xmm0);
|
||||
FCODE(vcmpp)(k1, src, xmm0, Cmp::GreaterEqual_OQ);
|
||||
|
||||
// Convert positive values to unsigned integers, write 0 anywhere else
|
||||
// vcvttp*2u*q already saturates out-of-range values to (0xFFFF...)
|
||||
if (fsize == 32) {
|
||||
code.vcvttps2udq(src | k1 | T_z, src);
|
||||
} else {
|
||||
code.vcvttpd2uqq(src | k1 | T_z, src);
|
||||
}
|
||||
} else {
|
||||
using FPT = mcl::unsigned_integer_of_size<fsize>; // WORKAROUND: For issue 678 on MSVC
|
||||
constexpr u64 integer_max = FPT((std::numeric_limits<std::conditional_t<unsigned_, FPT, std::make_signed_t<FPT>>>::max)());
|
||||
|
||||
code.movaps(xmm0, GetVectorOf<fsize, float_upper_limit_signed>(code));
|
||||
// Zero is minimum
|
||||
code.xorps(xmm0, xmm0);
|
||||
FCODE(cmplep)(xmm0, src);
|
||||
perform_conversion(src);
|
||||
FCODE(blendvp)(src, GetVectorOf<fsize, integer_max>(code));
|
||||
}
|
||||
});
|
||||
FCODE(andp)(src, xmm0);
|
||||
|
||||
ctx.reg_alloc.DefineValue(code, inst, src);
|
||||
return;
|
||||
}
|
||||
// Will we exceed unsigned range?
|
||||
const Xbyak::Xmm exceed_unsigned = ctx.reg_alloc.ScratchXmm(code);
|
||||
code.movaps(exceed_unsigned, GetVectorOf<fsize, float_upper_limit_unsigned>(code));
|
||||
FCODE(cmplep)(exceed_unsigned, src);
|
||||
|
||||
// Will be exceed signed range?
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
|
||||
code.movaps(tmp, GetVectorOf<fsize, float_upper_limit_signed>(code));
|
||||
code.movaps(xmm0, tmp);
|
||||
FCODE(cmplep)(xmm0, src);
|
||||
FCODE(andp)(tmp, xmm0);
|
||||
FCODE(subp)(src, tmp);
|
||||
perform_conversion(src);
|
||||
ICODE(psll)(xmm0, u8(fsize - 1));
|
||||
FCODE(orp)(src, xmm0);
|
||||
|
||||
// Saturate to max
|
||||
FCODE(orp)(src, exceed_unsigned);
|
||||
}
|
||||
} else {
|
||||
using FPT = mcl::unsigned_integer_of_size<fsize>; // WORKAROUND: For issue 678 on MSVC
|
||||
constexpr u64 integer_max = FPT((std::numeric_limits<std::conditional_t<unsigned_, FPT, std::make_signed_t<FPT>>>::max)());
|
||||
code.movaps(xmm0, GetVectorOf<fsize, float_upper_limit_signed>(code));
|
||||
FCODE(cmplep)(xmm0, src);
|
||||
perform_conversion(src);
|
||||
FCODE(blendvp)(src, GetVectorOf<fsize, integer_max>(code));
|
||||
}
|
||||
});
|
||||
ctx.reg_alloc.DefineValue(code, inst, src);
|
||||
return;
|
||||
}
|
||||
|
||||
using FPT = mcl::unsigned_integer_of_size<fsize>; // WORKAROUND: For issue 678 on MSVC
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
|
|
@ -176,7 +176,7 @@ struct ExceptionHandler::Impl final {
|
|||
|
||||
code.align(16);
|
||||
const u8* exception_handler_without_cb = code.getCurr<u8*>();
|
||||
code.mov(code.eax, static_cast<u32>(ExceptionContinueSearch));
|
||||
code.mov(code.eax, u32(ExceptionContinueSearch));
|
||||
code.ret();
|
||||
|
||||
code.align(16);
|
||||
|
|
@ -192,20 +192,18 @@ struct ExceptionHandler::Impl final {
|
|||
code.lea(code.rsp, code.ptr[code.rsp - 8]);
|
||||
code.mov(code.ABI_PARAM1, std::bit_cast<u64>(&cb));
|
||||
code.mov(code.ABI_PARAM2, code.ABI_PARAM3);
|
||||
code.CallLambda(
|
||||
[](const std::function<FakeCall(u64)>& cb_, PCONTEXT ctx) {
|
||||
FakeCall fc = cb_(ctx->Rip);
|
||||
|
||||
ctx->Rsp -= sizeof(u64);
|
||||
*std::bit_cast<u64*>(ctx->Rsp) = fc.ret_rip;
|
||||
ctx->Rip = fc.call_rip;
|
||||
});
|
||||
code.CallLambda([](const std::function<FakeCall(u64)>& cb_, PCONTEXT ctx) {
|
||||
FakeCall fc = cb_(ctx->Rip);
|
||||
ctx->Rsp -= sizeof(u64);
|
||||
*std::bit_cast<u64*>(ctx->Rsp) = fc.ret_rip;
|
||||
ctx->Rip = fc.call_rip;
|
||||
});
|
||||
code.add(code.rsp, 8);
|
||||
code.mov(code.eax, static_cast<u32>(ExceptionContinueExecution));
|
||||
code.mov(code.eax, u32(ExceptionContinueExecution));
|
||||
code.ret();
|
||||
|
||||
exception_handler_without_cb_offset = static_cast<ULONG>(exception_handler_without_cb - code.getCode<u8*>());
|
||||
exception_handler_with_cb_offset = static_cast<ULONG>(exception_handler_with_cb - code.getCode<u8*>());
|
||||
exception_handler_without_cb_offset = ULONG(exception_handler_without_cb - code.getCode<u8*>());
|
||||
exception_handler_with_cb_offset = ULONG(exception_handler_with_cb - code.getCode<u8*>());
|
||||
|
||||
code.align(16);
|
||||
UNWIND_INFO* unwind_info = static_cast<UNWIND_INFO*>(code.AllocateFromCodeSpace(sizeof(UNWIND_INFO)));
|
||||
|
|
|
|||
|
|
@ -417,7 +417,8 @@ HostLoc RegAlloc::SelectARegister(std::bitset<32> desired_locations) const noexc
|
|||
// While R13 and R14 are technically available, we avoid allocating for them
|
||||
// at all costs, because theoretically skipping them is better than spilling
|
||||
// all over the place - i also fixes bugs with high reg pressure
|
||||
} else if (i >= HostLoc::R13 && i <= HostLoc::R15) {
|
||||
// %rbp must not be trashed, so skip it as well
|
||||
} else if (i == HostLoc::RBP || (i >= HostLoc::R13 && i <= HostLoc::R15)) {
|
||||
// skip, do not touch
|
||||
// Intel recommends to reuse registers as soon as they're overwritable (DO NOT SPILL)
|
||||
} else if (loc_info.IsEmpty()) {
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
|
|
@ -22,14 +22,13 @@ constexpr size_t SpillCount = 64;
|
|||
#endif
|
||||
|
||||
struct alignas(16) StackLayout {
|
||||
// Needs alignment for VMOV and XMM spills
|
||||
alignas(16) std::array<std::array<u64, 2>, SpillCount> spill;
|
||||
s64 cycles_remaining;
|
||||
s64 cycles_to_run;
|
||||
|
||||
std::array<std::array<u64, 2>, SpillCount> spill;
|
||||
|
||||
u32 save_host_MXCSR;
|
||||
|
||||
bool check_bit;
|
||||
u64 abi_base_pointer;
|
||||
};
|
||||
|
||||
#ifdef _MSC_VER
|
||||
|
|
|
|||
|
|
@ -1,3 +1,6 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// First we list common shared opcodes
|
||||
// Since we give priority to A64 performance, we include them first, this is so we
|
||||
// can discard all A32 opcodes instead of having a "hole" in our checks
|
||||
|
|
@ -710,6 +713,8 @@ A64OPC(ExclusiveWriteMemory32, U32, U64,
|
|||
A64OPC(ExclusiveWriteMemory64, U32, U64, U64, U64, AccType )
|
||||
A64OPC(ExclusiveWriteMemory128, U32, U64, U64, U128, AccType )
|
||||
|
||||
// Remember to update:
|
||||
// - a32_emit_x64.cpp
|
||||
|
||||
// A32 Context getters/setters
|
||||
A32OPC(SetCheckBit, Void, U1 )
|
||||
|
|
|
|||
|
|
@ -415,6 +415,105 @@ TEST_CASE("A64: URSHL", "[a64]") {
|
|||
CHECK(jit.GetVector(9) == Vector{0x0000000000000002, 0x12db8b8280e0ba});
|
||||
}
|
||||
|
||||
TEST_CASE("A64: SQSHLU", "[a64]") {
|
||||
A64TestEnv env;
|
||||
A64::UserConfig jit_user_config{};
|
||||
jit_user_config.callbacks = &env;
|
||||
A64::Jit jit{jit_user_config};
|
||||
|
||||
oaknut::VectorCodeGenerator code{env.code_mem, nullptr};
|
||||
code.SQSHLU(V8.B16(), V0.B16(), 1);
|
||||
code.SQSHLU(V9.H8(), V1.H8(), 2);
|
||||
code.SQSHLU(V10.S4(), V2.S4(), 28);
|
||||
code.SQSHLU(V11.D2(), V3.D2(), 4);
|
||||
code.SQSHLU(V12.S4(), V0.S4(), 1);
|
||||
code.SQSHLU(V13.S4(), V1.S4(), 3);
|
||||
code.SQSHLU(V14.S4(), V2.S4(), 0);
|
||||
code.SQSHLU(V15.S4(), V3.S4(), 0);
|
||||
|
||||
jit.SetVector(0, Vector{0xffffffff'18ba6a6a, 0x7fffffff'943b954f});
|
||||
jit.SetVector(1, Vector{0x0000000b'0000000f, 0xffffffff'ffffffff});
|
||||
jit.SetVector(2, Vector{0x00000001'000000ff, 0x00000010'0000007f});
|
||||
jit.SetVector(3, Vector{0xffffffffffffffff, 0x96dc5c140705cd04});
|
||||
|
||||
env.ticks_left = env.code_mem.size();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
|
||||
CHECK(jit.GetVector(8) == Vector{0x3000d4d4, 0xfe0000000076009e});
|
||||
CHECK(jit.GetVector(9) == Vector{0x2c0000003c, 0});
|
||||
CHECK(jit.GetVector(10) == Vector{0x10000000'ffffffff, 0xffffffff'ffffffff});
|
||||
CHECK(jit.GetVector(11) == Vector{0, 0});
|
||||
CHECK(jit.GetVector(12) == Vector{0x3174d4d4, 0xfffffffe00000000});
|
||||
CHECK(jit.GetVector(13) == Vector{0x5800000078, 0});
|
||||
CHECK(jit.GetVector(14) == Vector{0x1000000ff, 0x100000007f});
|
||||
CHECK(jit.GetVector(15) == Vector{0, 0x705cd04});
|
||||
}
|
||||
|
||||
TEST_CASE("A64: SMIN", "[a64]") {
|
||||
A64TestEnv env;
|
||||
A64::UserConfig jit_user_config{};
|
||||
jit_user_config.callbacks = &env;
|
||||
A64::Jit jit{jit_user_config};
|
||||
|
||||
oaknut::VectorCodeGenerator code{env.code_mem, nullptr};
|
||||
code.SMIN(V8.B16(), V0.B16(), V3.B16());
|
||||
code.SMIN(V9.H8(), V1.H8(), V2.H8());
|
||||
code.SMIN(V10.S4(), V2.S4(), V3.S4());
|
||||
code.SMIN(V11.S4(), V3.S4(), V3.S4());
|
||||
code.SMIN(V12.S4(), V0.S4(), V3.S4());
|
||||
code.SMIN(V13.S4(), V1.S4(), V2.S4());
|
||||
code.SMIN(V14.S4(), V2.S4(), V1.S4());
|
||||
code.SMIN(V15.S4(), V3.S4(), V0.S4());
|
||||
|
||||
jit.SetPC(0);
|
||||
jit.SetVector(0, Vector{0xffffffff'18ba6a6a, 0x7fffffff'943b954f});
|
||||
jit.SetVector(1, Vector{0x0000000b'0000000f, 0xffffffff'ffffffff});
|
||||
jit.SetVector(2, Vector{0x00000001'000000ff, 0x00000010'0000007f});
|
||||
jit.SetVector(3, Vector{0xffffffff'ffffffff, 0x96dc5c14'0705cd04});
|
||||
|
||||
env.ticks_left = 4;
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
|
||||
REQUIRE(jit.GetVector(8) == Vector{0xffffffffffbaffff, 0x96dcffff94059504});
|
||||
REQUIRE(jit.GetVector(9) == Vector{0x10000000f, 0xffffffffffffffff});
|
||||
REQUIRE(jit.GetVector(10) == Vector{0xffffffffffffffff, 0x96dc5c140000007f});
|
||||
}
|
||||
|
||||
TEST_CASE("A64: SMINP", "[a64]") {
|
||||
A64TestEnv env;
|
||||
A64::UserConfig jit_user_config{};
|
||||
jit_user_config.callbacks = &env;
|
||||
A64::Jit jit{jit_user_config};
|
||||
|
||||
oaknut::VectorCodeGenerator code{env.code_mem, nullptr};
|
||||
code.SMINP(V8.B16(), V0.B16(), V3.B16());
|
||||
code.SMINP(V9.H8(), V1.H8(), V2.H8());
|
||||
code.SMINP(V10.S4(), V2.S4(), V1.S4());
|
||||
code.SMINP(V11.S4(), V3.S4(), V3.S4());
|
||||
code.SMINP(V12.S4(), V0.S4(), V3.S4());
|
||||
code.SMINP(V13.S4(), V1.S4(), V2.S4());
|
||||
code.SMINP(V14.S4(), V2.S4(), V1.S4());
|
||||
code.SMINP(V15.S4(), V3.S4(), V0.S4());
|
||||
|
||||
jit.SetPC(0);
|
||||
jit.SetVector(0, Vector{0xffffffff'18ba6a6a, 0x7fffffff'943b954f});
|
||||
jit.SetVector(1, Vector{0x0000000b'0000000f, 0xffffffff'ffffffff});
|
||||
jit.SetVector(2, Vector{0x00000001'000000ff, 0x00000010'0000007f});
|
||||
jit.SetVector(3, Vector{0xffffffff'ffffffff, 0x96dc5c14'0705cd04});
|
||||
|
||||
env.ticks_left = 4;
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
|
||||
REQUIRE(jit.GetVector(8) == Vector{0xffff9495ffffba6a, 0x961405cdffffffff});
|
||||
REQUIRE(jit.GetVector(9) == Vector{0xffffffff00000000, 0});
|
||||
REQUIRE(jit.GetVector(10) == Vector{0x1000000001, 0xffffffff0000000b});
|
||||
REQUIRE(jit.GetVector(11) == Vector{0x96dc5c14ffffffff, 0x96dc5c14ffffffff});
|
||||
REQUIRE(jit.GetVector(12) == Vector{0x943b954fffffffff, 0x96dc5c14ffffffff});
|
||||
REQUIRE(jit.GetVector(13) == Vector{0xffffffff0000000b, 0x1000000001});
|
||||
REQUIRE(jit.GetVector(14) == Vector{0x1000000001, 0xffffffff0000000b});
|
||||
REQUIRE(jit.GetVector(15) == Vector{0x96dc5c14ffffffff, 0x943b954fffffffff});
|
||||
}
|
||||
|
||||
TEST_CASE("A64: XTN", "[a64]") {
|
||||
A64TestEnv env;
|
||||
A64::UserConfig jit_user_config{};
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ using VideoCore::Surface::PixelFormatFromDepthFormat;
|
|||
using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
|
||||
|
||||
constexpr size_t NUM_STAGES = Maxwell::MaxShaderStage;
|
||||
constexpr size_t MAX_IMAGE_ELEMENTS = 64;
|
||||
constexpr size_t INLINE_IMAGE_ELEMENTS = 64;
|
||||
|
||||
DescriptorLayoutBuilder MakeBuilder(const Device& device, std::span<const Shader::Info> infos) {
|
||||
DescriptorLayoutBuilder builder{device};
|
||||
|
|
@ -264,7 +264,11 @@ GraphicsPipeline::GraphicsPipeline(
|
|||
stage_infos[stage] = *info;
|
||||
enabled_uniform_buffer_masks[stage] = info->constant_buffer_mask;
|
||||
std::ranges::copy(info->constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin());
|
||||
num_image_elements += Shader::NumDescriptors(info->texture_buffer_descriptors);
|
||||
num_image_elements += Shader::NumDescriptors(info->image_buffer_descriptors);
|
||||
num_textures += Shader::NumDescriptors(info->texture_descriptors);
|
||||
num_image_elements += Shader::NumDescriptors(info->texture_descriptors);
|
||||
num_image_elements += Shader::NumDescriptors(info->image_descriptors);
|
||||
}
|
||||
fragment_has_color0_output = stage_infos[NUM_STAGES - 1].stores_frag_color[0];
|
||||
auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool, pipeline_statistics] {
|
||||
|
|
@ -310,10 +314,10 @@ void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) {
|
|||
|
||||
template <typename Spec>
|
||||
bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
|
||||
std::array<VideoCommon::ImageViewInOut, MAX_IMAGE_ELEMENTS> views;
|
||||
std::array<VideoCommon::SamplerId, MAX_IMAGE_ELEMENTS> samplers;
|
||||
size_t sampler_index{};
|
||||
size_t view_index{};
|
||||
small_vector<VideoCommon::ImageViewInOut, INLINE_IMAGE_ELEMENTS> views;
|
||||
small_vector<VideoCommon::SamplerId, INLINE_IMAGE_ELEMENTS> samplers;
|
||||
views.reserve(num_image_elements);
|
||||
samplers.reserve(num_textures);
|
||||
|
||||
texture_cache.SynchronizeGraphicsDescriptors();
|
||||
|
||||
|
|
@ -358,11 +362,11 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
|
|||
const auto add_image{[&](const auto& desc, bool blacklist) LAMBDA_FORCEINLINE {
|
||||
for (u32 index = 0; index < desc.count; ++index) {
|
||||
const auto handle{read_handle(desc, index)};
|
||||
views[view_index++] = {
|
||||
views.push_back({
|
||||
.index = handle.first,
|
||||
.blacklist = blacklist,
|
||||
.id = {}
|
||||
};
|
||||
});
|
||||
}
|
||||
}};
|
||||
if constexpr (Spec::has_texture_buffers) {
|
||||
|
|
@ -378,10 +382,10 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
|
|||
for (const auto& desc : info.texture_descriptors) {
|
||||
for (u32 index = 0; index < desc.count; ++index) {
|
||||
const auto handle{read_handle(desc, index)};
|
||||
views[view_index++] = {handle.first};
|
||||
views.push_back({handle.first});
|
||||
|
||||
VideoCommon::SamplerId sampler{texture_cache.GetGraphicsSamplerId(handle.second)};
|
||||
samplers[sampler_index++] = sampler;
|
||||
samplers.push_back(sampler);
|
||||
}
|
||||
}
|
||||
if constexpr (Spec::has_images) {
|
||||
|
|
@ -407,7 +411,9 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
|
|||
if constexpr (Spec::enabled_stages[4]) {
|
||||
config_stage(4);
|
||||
}
|
||||
texture_cache.FillGraphicsImageViews<Spec::has_images>(std::span(views.data(), view_index));
|
||||
ASSERT(views.size() == num_image_elements);
|
||||
ASSERT(samplers.size() == num_textures);
|
||||
texture_cache.FillGraphicsImageViews<Spec::has_images>(std::span(views.data(), views.size()));
|
||||
|
||||
VideoCommon::ImageViewInOut* texture_buffer_it{views.data()};
|
||||
const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE {
|
||||
|
|
@ -501,7 +507,8 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
|
|||
buffer_cache.any_buffer_uploaded = false;
|
||||
}
|
||||
texture_cache.UpdateRenderTargets(false);
|
||||
texture_cache.CheckFeedbackLoop(views);
|
||||
texture_cache.CheckFeedbackLoop(std::span<const VideoCommon::ImageViewInOut>{views.data(),
|
||||
views.size()});
|
||||
ConfigureDraw(rescaling, render_area);
|
||||
|
||||
return true;
|
||||
|
|
@ -987,7 +994,7 @@ void GraphicsPipeline::Validate() {
|
|||
num_images += Shader::NumDescriptors(info.texture_descriptors);
|
||||
num_images += Shader::NumDescriptors(info.image_descriptors);
|
||||
}
|
||||
ASSERT(num_images <= MAX_IMAGE_ELEMENTS);
|
||||
ASSERT(num_images == num_image_elements);
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
|
|
@ -159,6 +159,7 @@ private:
|
|||
std::array<Shader::Info, NUM_STAGES> stage_infos;
|
||||
std::array<u32, 5> enabled_uniform_buffer_masks{};
|
||||
VideoCommon::UniformBufferSizes uniform_buffer_sizes{};
|
||||
size_t num_image_elements{};
|
||||
u32 num_textures{};
|
||||
bool fragment_has_color0_output{};
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue