mirror of
https://git.eden-emu.dev/eden-emu/eden
synced 2026-04-10 14:08:54 +02:00
[dynarmic, macroHLE] Use faster ankerl for xbyak maps
Signed-off-by: lizzie <lizzie@eden-emu.dev>
This commit is contained in:
parent
f0a4ac7359
commit
fc8dbc402f
10 changed files with 91 additions and 97 deletions
|
|
@ -3,11 +3,32 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <type_traits>
|
||||
#include <bitset>
|
||||
#include <initializer_list>
|
||||
#include <xbyak/xbyak.h>
|
||||
#include "common/assert.h"
|
||||
|
||||
// xbyak hates human beings
|
||||
#ifdef __GNUC__
|
||||
#pragma GCC diagnostic ignored "-Wconversion"
|
||||
#pragma GCC diagnostic ignored "-Wshadow"
|
||||
#endif
|
||||
#ifdef __clang__
|
||||
#pragma clang diagnostic ignored "-Wconversion"
|
||||
#pragma clang diagnostic ignored "-Wshadow"
|
||||
#endif
|
||||
|
||||
// You must ensure this matches with src/common/x64/xbyak.h on root dir
|
||||
#include <ankerl/unordered_dense.h>
|
||||
#include <boost/unordered_map.hpp>
|
||||
#define XBYAK_STD_UNORDERED_SET ankerl::unordered_dense::set
|
||||
#define XBYAK_STD_UNORDERED_MAP ankerl::unordered_dense::map
|
||||
#define XBYAK_STD_UNORDERED_MULTIMAP boost::unordered_multimap
|
||||
#include <xbyak/xbyak.h>
|
||||
#include <xbyak/xbyak_util.h>
|
||||
|
||||
#include <xbyak/xbyak.h>
|
||||
|
||||
namespace Common::X64 {
|
||||
|
||||
constexpr size_t RegToIndex(const Xbyak::Reg& reg) {
|
||||
|
|
@ -174,12 +195,13 @@ inline ABIFrameInfo ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alig
|
|||
rsp_alignment -= subtraction;
|
||||
subtraction += rsp_alignment & 0xF;
|
||||
|
||||
return ABIFrameInfo{static_cast<s32>(subtraction),
|
||||
static_cast<s32>(subtraction - xmm_base_subtraction)};
|
||||
return ABIFrameInfo{
|
||||
s32(subtraction),
|
||||
s32(subtraction - xmm_base_subtraction)
|
||||
};
|
||||
}
|
||||
|
||||
inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs,
|
||||
size_t rsp_alignment, size_t needed_frame_size = 0) {
|
||||
inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs, size_t rsp_alignment, size_t needed_frame_size = 0) {
|
||||
auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size);
|
||||
|
||||
for (size_t i = 0; i < regs.size(); ++i) {
|
||||
|
|
@ -202,8 +224,7 @@ inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::b
|
|||
return ABI_SHADOW_SPACE;
|
||||
}
|
||||
|
||||
inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs,
|
||||
size_t rsp_alignment, size_t needed_frame_size = 0) {
|
||||
inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs, size_t rsp_alignment, size_t needed_frame_size = 0) {
|
||||
auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size);
|
||||
|
||||
for (size_t i = 0; i < regs.size(); ++i) {
|
||||
|
|
@ -226,4 +247,38 @@ inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bits
|
|||
}
|
||||
}
|
||||
|
||||
// Constants for use with cmpps/cmpss
|
||||
enum {
|
||||
CMP_EQ = 0,
|
||||
CMP_LT = 1,
|
||||
CMP_LE = 2,
|
||||
CMP_UNORD = 3,
|
||||
CMP_NEQ = 4,
|
||||
CMP_NLT = 5,
|
||||
CMP_NLE = 6,
|
||||
CMP_ORD = 7,
|
||||
};
|
||||
|
||||
constexpr bool IsWithin2G(uintptr_t ref, uintptr_t target) {
|
||||
const u64 distance = target - (ref + 5);
|
||||
return !(distance >= 0x8000'0000ULL && distance <= ~0x8000'0000ULL);
|
||||
}
|
||||
|
||||
inline bool IsWithin2G(const Xbyak::CodeGenerator& code, uintptr_t target) {
|
||||
return IsWithin2G(reinterpret_cast<uintptr_t>(code.getCurr()), target);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void CallFarFunction(Xbyak::CodeGenerator& code, const T f) {
|
||||
static_assert(std::is_pointer_v<T>, "Argument must be a (function) pointer.");
|
||||
size_t addr = reinterpret_cast<size_t>(f);
|
||||
if (IsWithin2G(code, addr)) {
|
||||
code.call(f);
|
||||
} else {
|
||||
// ABI_RETURN is a safe temp register to use before a call
|
||||
code.mov(ABI_RETURN, addr);
|
||||
code.call(ABI_RETURN);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Common::X64
|
||||
|
|
@ -1,46 +0,0 @@
|
|||
// SPDX-FileCopyrightText: 2016 Citra Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <type_traits>
|
||||
#include <xbyak/xbyak.h>
|
||||
#include "common/x64/xbyak_abi.h"
|
||||
|
||||
namespace Common::X64 {
|
||||
|
||||
// Constants for use with cmpps/cmpss
|
||||
enum {
|
||||
CMP_EQ = 0,
|
||||
CMP_LT = 1,
|
||||
CMP_LE = 2,
|
||||
CMP_UNORD = 3,
|
||||
CMP_NEQ = 4,
|
||||
CMP_NLT = 5,
|
||||
CMP_NLE = 6,
|
||||
CMP_ORD = 7,
|
||||
};
|
||||
|
||||
constexpr bool IsWithin2G(uintptr_t ref, uintptr_t target) {
|
||||
const u64 distance = target - (ref + 5);
|
||||
return !(distance >= 0x8000'0000ULL && distance <= ~0x8000'0000ULL);
|
||||
}
|
||||
|
||||
inline bool IsWithin2G(const Xbyak::CodeGenerator& code, uintptr_t target) {
|
||||
return IsWithin2G(reinterpret_cast<uintptr_t>(code.getCurr()), target);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void CallFarFunction(Xbyak::CodeGenerator& code, const T f) {
|
||||
static_assert(std::is_pointer_v<T>, "Argument must be a (function) pointer.");
|
||||
size_t addr = reinterpret_cast<size_t>(f);
|
||||
if (IsWithin2G(code, addr)) {
|
||||
code.call(f);
|
||||
} else {
|
||||
// ABI_RETURN is a safe temp register to use before a call
|
||||
code.mov(ABI_RETURN, addr);
|
||||
code.call(ABI_RETURN);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Common::X64
|
||||
|
|
@ -222,7 +222,7 @@ void A64EmitX64::GenTerminalHandlers() {
|
|||
terminal_handler_fast_dispatch_hint = code.getCurr<const void*>();
|
||||
calculate_location_descriptor();
|
||||
code.L(rsb_cache_miss);
|
||||
code.mov(r8, reinterpret_cast<u64>(fast_dispatch_table.data()));
|
||||
code.mov(r8, u64(fast_dispatch_table.data()));
|
||||
//code.mov(r12, qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)]);
|
||||
code.mov(r12, rbx);
|
||||
if (code.HasHostFeature(HostFeature::SSE42)) {
|
||||
|
|
@ -242,7 +242,7 @@ void A64EmitX64::GenTerminalHandlers() {
|
|||
|
||||
code.align();
|
||||
fast_dispatch_table_lookup = code.getCurr<FastDispatchEntry& (*)(u64)>();
|
||||
code.mov(code.ABI_PARAM2, reinterpret_cast<u64>(fast_dispatch_table.data()));
|
||||
code.mov(code.ABI_PARAM2, u64(fast_dispatch_table.data()));
|
||||
if (code.HasHostFeature(HostFeature::SSE42)) {
|
||||
code.crc32(code.ABI_PARAM1, code.ABI_PARAM2);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ struct FrameInfo {
|
|||
};
|
||||
static_assert(ABI_SHADOW_SPACE <= 32);
|
||||
|
||||
static FrameInfo CalculateFrameInfo(const size_t num_gprs, const size_t num_xmms, size_t frame_size) {
|
||||
static FrameInfo CalculateFrameInfo(const size_t num_gprs, const size_t num_xmms, size_t frame_size) noexcept {
|
||||
// We are initially 8 byte aligned because the return value is pushed onto an aligned stack after a call.
|
||||
const size_t rsp_alignment = (num_gprs % 2 == 0) ? 8 : 0;
|
||||
const size_t total_xmm_size = num_xmms * XMM_SIZE;
|
||||
|
|
@ -40,7 +40,7 @@ static FrameInfo CalculateFrameInfo(const size_t num_gprs, const size_t num_xmms
|
|||
};
|
||||
}
|
||||
|
||||
void ABI_PushRegistersAndAdjustStack(BlockOfCode& code, const size_t frame_size, std::bitset<32> const& regs) {
|
||||
static void ABI_PushRegistersAndAdjustStack(BlockOfCode& code, const size_t frame_size, std::bitset<32> regs) noexcept {
|
||||
using namespace Xbyak::util;
|
||||
|
||||
const size_t num_gprs = (ABI_ALL_GPRS & regs).count();
|
||||
|
|
@ -65,7 +65,7 @@ void ABI_PushRegistersAndAdjustStack(BlockOfCode& code, const size_t frame_size,
|
|||
}
|
||||
}
|
||||
|
||||
void ABI_PopRegistersAndAdjustStack(BlockOfCode& code, const size_t frame_size, std::bitset<32> const& regs) {
|
||||
static void ABI_PopRegistersAndAdjustStack(BlockOfCode& code, const size_t frame_size, std::bitset<32> regs) noexcept {
|
||||
using namespace Xbyak::util;
|
||||
|
||||
const size_t num_gprs = (ABI_ALL_GPRS & regs).count();
|
||||
|
|
@ -107,13 +107,13 @@ void ABI_PopCallerSaveRegistersAndAdjustStack(BlockOfCode& code, const std::size
|
|||
|
||||
// Windows ABI registers are not in the same allocation algorithm as unix's
|
||||
void ABI_PushCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, const HostLoc exception) {
|
||||
std::bitset<32> regs = ABI_ALL_CALLER_SAVE;
|
||||
auto regs = ABI_ALL_CALLER_SAVE;
|
||||
regs.reset(size_t(exception));
|
||||
ABI_PushRegistersAndAdjustStack(code, 0, regs);
|
||||
}
|
||||
|
||||
void ABI_PopCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, const HostLoc exception) {
|
||||
std::bitset<32> regs = ABI_ALL_CALLER_SAVE;
|
||||
auto regs = ABI_ALL_CALLER_SAVE;
|
||||
regs.reset(size_t(exception));
|
||||
ABI_PopRegistersAndAdjustStack(code, 0, regs);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
|
|
@ -6,23 +6,21 @@
|
|||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include "dynarmic/backend/x64/constant_pool.h"
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#include "dynarmic/common/assert.h"
|
||||
|
||||
#include "dynarmic/backend/x64/block_of_code.h"
|
||||
#include "dynarmic/backend/x64/constant_pool.h"
|
||||
|
||||
namespace Dynarmic::Backend::X64 {
|
||||
|
||||
ConstantPool::ConstantPool(BlockOfCode& code, size_t size)
|
||||
: code(code), insertion_point(0) {
|
||||
: code(code)
|
||||
, insertion_point(0)
|
||||
{
|
||||
code.EnsureMemoryCommitted(align_size + size);
|
||||
code.int3();
|
||||
code.align(align_size);
|
||||
pool = std::span<ConstantT>(
|
||||
reinterpret_cast<ConstantT*>(code.AllocateFromCodeSpace(size)), size / align_size);
|
||||
pool = std::span<ConstantT>(reinterpret_cast<ConstantT*>(code.AllocateFromCodeSpace(size)), size / align_size);
|
||||
}
|
||||
|
||||
Xbyak::Address ConstantPool::GetConstant(const Xbyak::AddressFrame& frame, u64 lower, u64 upper) {
|
||||
|
|
|
|||
|
|
@ -8,8 +8,6 @@
|
|||
#pragma once
|
||||
|
||||
#include <bitset>
|
||||
#include <xbyak/xbyak.h>
|
||||
|
||||
#include "dynarmic/common/assert.h"
|
||||
#include "dynarmic/common/common_types.h"
|
||||
#include "dynarmic/backend/x64/xbyak.h"
|
||||
|
|
|
|||
|
|
@ -3,13 +3,11 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
|
||||
// TODO: Defining this crashes e v e r y t h i n g
|
||||
// #define XBYAK_STD_UNORDERED_SET ankerl::unordered_dense::set
|
||||
// #define XBYAK_STD_UNORDERED_MAP ankerl::unordered_dense::map
|
||||
// #define XBYAK_STD_UNORDERED_MULTIMAP boost::unordered_multimap
|
||||
|
||||
// You must ensure this matches with src/common/x64/xbyak.h on root dir
|
||||
#include <ankerl/unordered_dense.h>
|
||||
#include <boost/unordered_map.hpp>
|
||||
#define XBYAK_STD_UNORDERED_SET ankerl::unordered_dense::set
|
||||
#define XBYAK_STD_UNORDERED_MAP ankerl::unordered_dense::map
|
||||
#define XBYAK_STD_UNORDERED_MULTIMAP boost::unordered_multimap
|
||||
#include <xbyak/xbyak.h>
|
||||
#include <xbyak/xbyak_util.h>
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
|
|
@ -79,7 +79,7 @@ u64 FPToFixed(size_t ibits, FPT op, size_t fbits, bool unsigned_, FPCR fpcr, Rou
|
|||
}
|
||||
|
||||
// Detect Overflow
|
||||
const int min_exponent_for_overflow = static_cast<int>(ibits) - static_cast<int>(mcl::bit::highest_set_bit(value.mantissa + (round_up ? Safe::LogicalShiftRight<u64>(1, exponent) : 0))) - (unsigned_ ? 0 : 1);
|
||||
const int min_exponent_for_overflow = int(ibits) - int(mcl::bit::highest_set_bit(value.mantissa + (round_up ? Safe::LogicalShiftRight<u64>(1, exponent) : 0))) - (unsigned_ ? 0 : 1);
|
||||
if (exponent >= min_exponent_for_overflow) {
|
||||
// Positive overflow
|
||||
if (unsigned_ || !sign) {
|
||||
|
|
@ -88,10 +88,10 @@ u64 FPToFixed(size_t ibits, FPT op, size_t fbits, bool unsigned_, FPCR fpcr, Rou
|
|||
}
|
||||
|
||||
// Negative overflow
|
||||
const u64 min_value = Safe::Negate<u64>(static_cast<u64>(1) << (ibits - 1));
|
||||
const u64 min_value = Safe::Negate<u64>(u64(1) << (ibits - 1));
|
||||
if (!(exponent == min_exponent_for_overflow && int_result == min_value)) {
|
||||
FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
|
||||
return static_cast<u64>(1) << (ibits - 1);
|
||||
return u64(1) << (ibits - 1);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,3 +1,6 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2020 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
|
|
@ -11,7 +14,7 @@
|
|||
#include <utility>
|
||||
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
#include <xbyak/xbyak_util.h>
|
||||
#include "dynarmic/backend/x64/xbyak.h"
|
||||
|
||||
TEST_CASE("Host CPU supports", "[a64]") {
|
||||
using Cpu = Xbyak::util::Cpu;
|
||||
|
|
|
|||
|
|
@ -11,17 +11,9 @@
|
|||
|
||||
#include <fstream>
|
||||
#include <variant>
|
||||
|
||||
#ifdef ARCHITECTURE_x86_64
|
||||
// xbyak hates human beings
|
||||
#ifdef __GNUC__
|
||||
#pragma GCC diagnostic ignored "-Wconversion"
|
||||
#pragma GCC diagnostic ignored "-Wshadow"
|
||||
#endif
|
||||
#ifdef __clang__
|
||||
#pragma clang diagnostic ignored "-Wconversion"
|
||||
#pragma clang diagnostic ignored "-Wshadow"
|
||||
#endif
|
||||
#include <xbyak/xbyak.h>
|
||||
#include "common/x64/xbyak.h"
|
||||
#endif
|
||||
|
||||
#include "common/assert.h"
|
||||
|
|
@ -39,10 +31,6 @@
|
|||
#include "common/assert.h"
|
||||
#include "common/bit_field.h"
|
||||
#include "common/logging.h"
|
||||
#ifdef ARCHITECTURE_x86_64
|
||||
#include "common/x64/xbyak_abi.h"
|
||||
#include "common/x64/xbyak_util.h"
|
||||
#endif
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
|
||||
namespace Tegra {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue