mirror of
https://git.eden-emu.dev/eden-emu/eden
synced 2026-04-10 03:18:55 +02:00
[dynarmic] avoid stable_vector<> reallocations for shared labels (#3717)
this reduces some overhead due to frequent reallocations Signed-off-by: lizzie <lizzie@eden-emu.dev> Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3717 Reviewed-by: Maufeat <sahyno1996@gmail.com> Reviewed-by: CamilleLaVey <camillelavey99@gmail.com> Co-authored-by: lizzie <lizzie@eden-emu.dev> Co-committed-by: lizzie <lizzie@eden-emu.dev>
This commit is contained in:
parent
8e14f07a69
commit
cf7086de7c
10 changed files with 167 additions and 167 deletions
|
|
@ -59,8 +59,10 @@ static Xbyak::Address MJitStateExtReg(A32::ExtReg reg) {
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
|
||||||
A32EmitContext::A32EmitContext(const A32::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block)
|
A32EmitContext::A32EmitContext(const A32::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block, boost::container::stable_vector<Xbyak::Label>& shared_labels)
|
||||||
: EmitContext(reg_alloc, block), conf(conf) {}
|
: EmitContext(reg_alloc, block, shared_labels)
|
||||||
|
, conf(conf)
|
||||||
|
{}
|
||||||
|
|
||||||
A32::LocationDescriptor A32EmitContext::Location() const {
|
A32::LocationDescriptor A32EmitContext::Location() const {
|
||||||
return A32::LocationDescriptor{block.Location()};
|
return A32::LocationDescriptor{block.Location()};
|
||||||
|
|
@ -109,7 +111,8 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) {
|
||||||
gprs.reset(size_t(HostLoc::R14));
|
gprs.reset(size_t(HostLoc::R14));
|
||||||
return gprs;
|
return gprs;
|
||||||
}(), any_xmm);
|
}(), any_xmm);
|
||||||
A32EmitContext ctx{conf, reg_alloc, block};
|
|
||||||
|
A32EmitContext ctx{conf, reg_alloc, block, shared_labels};
|
||||||
|
|
||||||
// Start emitting.
|
// Start emitting.
|
||||||
code.align();
|
code.align();
|
||||||
|
|
@ -168,6 +171,7 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) {
|
||||||
|
|
||||||
auto const bdesc = RegisterBlock(descriptor, entrypoint, size);
|
auto const bdesc = RegisterBlock(descriptor, entrypoint, size);
|
||||||
code.DisableWriting();
|
code.DisableWriting();
|
||||||
|
shared_labels.clear();
|
||||||
return bdesc;
|
return bdesc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
|
||||||
/* This file is part of the dynarmic project.
|
/* This file is part of the dynarmic project.
|
||||||
|
|
@ -29,7 +29,7 @@ namespace Dynarmic::Backend::X64 {
|
||||||
class RegAlloc;
|
class RegAlloc;
|
||||||
|
|
||||||
struct A32EmitContext final : public EmitContext {
|
struct A32EmitContext final : public EmitContext {
|
||||||
A32EmitContext(const A32::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block);
|
A32EmitContext(const A32::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block, boost::container::stable_vector<Xbyak::Label>& shared_labels);
|
||||||
|
|
||||||
A32::LocationDescriptor Location() const;
|
A32::LocationDescriptor Location() const;
|
||||||
A32::LocationDescriptor EndLocation() const;
|
A32::LocationDescriptor EndLocation() const;
|
||||||
|
|
@ -130,6 +130,7 @@ public:
|
||||||
ankerl::unordered_dense::map<std::tuple<bool, size_t, int, int>, void (*)()> write_fallbacks;
|
ankerl::unordered_dense::map<std::tuple<bool, size_t, int, int>, void (*)()> write_fallbacks;
|
||||||
ankerl::unordered_dense::map<std::tuple<bool, size_t, int, int>, void (*)()> exclusive_write_fallbacks;
|
ankerl::unordered_dense::map<std::tuple<bool, size_t, int, int>, void (*)()> exclusive_write_fallbacks;
|
||||||
ankerl::unordered_dense::set<DoNotFastmemMarker> do_not_fastmem;
|
ankerl::unordered_dense::set<DoNotFastmemMarker> do_not_fastmem;
|
||||||
|
boost::container::stable_vector<Xbyak::Label> shared_labels;
|
||||||
void (*memory_read_128)() = nullptr; // Dummy
|
void (*memory_read_128)() = nullptr; // Dummy
|
||||||
void (*memory_write_128)() = nullptr; // Dummy
|
void (*memory_write_128)() = nullptr; // Dummy
|
||||||
const void* terminal_handler_pop_rsb_hint;
|
const void* terminal_handler_pop_rsb_hint;
|
||||||
|
|
|
||||||
|
|
@ -37,8 +37,10 @@ namespace Dynarmic::Backend::X64 {
|
||||||
|
|
||||||
using namespace Xbyak::util;
|
using namespace Xbyak::util;
|
||||||
|
|
||||||
A64EmitContext::A64EmitContext(const A64::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block)
|
A64EmitContext::A64EmitContext(const A64::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block, boost::container::stable_vector<Xbyak::Label>& shared_labels)
|
||||||
: EmitContext(reg_alloc, block), conf(conf) {}
|
: EmitContext(reg_alloc, block, shared_labels)
|
||||||
|
, conf(conf)
|
||||||
|
{}
|
||||||
|
|
||||||
A64::LocationDescriptor A64EmitContext::Location() const {
|
A64::LocationDescriptor A64EmitContext::Location() const {
|
||||||
return A64::LocationDescriptor{block.Location()};
|
return A64::LocationDescriptor{block.Location()};
|
||||||
|
|
@ -83,7 +85,8 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) noexcept {
|
||||||
gprs.reset(size_t(HostLoc::R14));
|
gprs.reset(size_t(HostLoc::R14));
|
||||||
return gprs;
|
return gprs;
|
||||||
}(), any_xmm};
|
}(), any_xmm};
|
||||||
A64EmitContext ctx{conf, reg_alloc, block};
|
|
||||||
|
A64EmitContext ctx{conf, reg_alloc, block, shared_labels};
|
||||||
|
|
||||||
// Start emitting.
|
// Start emitting.
|
||||||
code.align();
|
code.align();
|
||||||
|
|
@ -160,6 +163,7 @@ finish_this_inst:
|
||||||
|
|
||||||
auto bdesc = RegisterBlock(descriptor, entrypoint, size);
|
auto bdesc = RegisterBlock(descriptor, entrypoint, size);
|
||||||
code.DisableWriting();
|
code.DisableWriting();
|
||||||
|
shared_labels.clear();
|
||||||
return bdesc;
|
return bdesc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
|
||||||
/* This file is part of the dynarmic project.
|
/* This file is part of the dynarmic project.
|
||||||
|
|
@ -27,7 +27,7 @@
|
||||||
namespace Dynarmic::Backend::X64 {
|
namespace Dynarmic::Backend::X64 {
|
||||||
|
|
||||||
struct A64EmitContext final : public EmitContext {
|
struct A64EmitContext final : public EmitContext {
|
||||||
A64EmitContext(const A64::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block);
|
A64EmitContext(const A64::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block, boost::container::stable_vector<Xbyak::Label>& shared_labels);
|
||||||
|
|
||||||
A64::LocationDescriptor Location() const;
|
A64::LocationDescriptor Location() const;
|
||||||
bool IsSingleStep() const;
|
bool IsSingleStep() const;
|
||||||
|
|
@ -126,6 +126,7 @@ public:
|
||||||
ankerl::unordered_dense::map<std::tuple<bool, size_t, int, int>, void (*)()> write_fallbacks;
|
ankerl::unordered_dense::map<std::tuple<bool, size_t, int, int>, void (*)()> write_fallbacks;
|
||||||
ankerl::unordered_dense::map<std::tuple<bool, size_t, int, int>, void (*)()> exclusive_write_fallbacks;
|
ankerl::unordered_dense::map<std::tuple<bool, size_t, int, int>, void (*)()> exclusive_write_fallbacks;
|
||||||
ankerl::unordered_dense::set<DoNotFastmemMarker> do_not_fastmem;
|
ankerl::unordered_dense::set<DoNotFastmemMarker> do_not_fastmem;
|
||||||
|
boost::container::stable_vector<Xbyak::Label> shared_labels;
|
||||||
const void* terminal_handler_pop_rsb_hint = nullptr;
|
const void* terminal_handler_pop_rsb_hint = nullptr;
|
||||||
const void* terminal_handler_fast_dispatch_hint = nullptr;
|
const void* terminal_handler_fast_dispatch_hint = nullptr;
|
||||||
FastDispatchEntry& (*fast_dispatch_table_lookup)(u64) = nullptr;
|
FastDispatchEntry& (*fast_dispatch_table_lookup)(u64) = nullptr;
|
||||||
|
|
|
||||||
|
|
@ -32,8 +32,11 @@ namespace Dynarmic::Backend::X64 {
|
||||||
|
|
||||||
using namespace Xbyak::util;
|
using namespace Xbyak::util;
|
||||||
|
|
||||||
EmitContext::EmitContext(RegAlloc& reg_alloc, IR::Block& block)
|
EmitContext::EmitContext(RegAlloc& reg_alloc, IR::Block& block, boost::container::stable_vector<Xbyak::Label>& shared_labels)
|
||||||
: reg_alloc(reg_alloc), block(block) {}
|
: reg_alloc(reg_alloc)
|
||||||
|
, block(block)
|
||||||
|
, shared_labels(shared_labels)
|
||||||
|
{}
|
||||||
|
|
||||||
EmitContext::~EmitContext() = default;
|
EmitContext::~EmitContext() = default;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -16,11 +16,12 @@
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "dynarmic/mcl/bit.hpp"
|
|
||||||
#include <ankerl/unordered_dense.h>
|
#include <ankerl/unordered_dense.h>
|
||||||
#include "dynarmic/backend/x64/xbyak.h"
|
#include <boost/container/stable_vector.hpp>
|
||||||
#include <boost/container/small_vector.hpp>
|
#include <boost/container/small_vector.hpp>
|
||||||
|
|
||||||
|
#include "dynarmic/backend/x64/xbyak.h"
|
||||||
|
#include "dynarmic/mcl/bit.hpp"
|
||||||
#include "dynarmic/backend/exception_handler.h"
|
#include "dynarmic/backend/exception_handler.h"
|
||||||
#include "dynarmic/backend/x64/reg_alloc.h"
|
#include "dynarmic/backend/x64/reg_alloc.h"
|
||||||
#include "dynarmic/common/fp/fpcr.h"
|
#include "dynarmic/common/fp/fpcr.h"
|
||||||
|
|
@ -52,24 +53,23 @@ using VectorArray = std::array<T, A64FullVectorWidth::value / mcl::bitsizeof<T>>
|
||||||
template<typename T>
|
template<typename T>
|
||||||
using HalfVectorArray = std::array<T, A64FullVectorWidth::value / mcl::bitsizeof<T> / 2>;
|
using HalfVectorArray = std::array<T, A64FullVectorWidth::value / mcl::bitsizeof<T> / 2>;
|
||||||
|
|
||||||
|
using SharedLabel = Xbyak::Label*;
|
||||||
struct EmitContext {
|
struct EmitContext {
|
||||||
EmitContext(RegAlloc& reg_alloc, IR::Block& block);
|
EmitContext(RegAlloc& reg_alloc, IR::Block& block, boost::container::stable_vector<Xbyak::Label>& shared_labels);
|
||||||
virtual ~EmitContext();
|
virtual ~EmitContext();
|
||||||
virtual FP::FPCR FPCR(bool fpcr_controlled = true) const = 0;
|
virtual FP::FPCR FPCR(bool fpcr_controlled = true) const = 0;
|
||||||
virtual bool HasOptimization(OptimizationFlag flag) const = 0;
|
virtual bool HasOptimization(OptimizationFlag flag) const = 0;
|
||||||
|
|
||||||
RegAlloc& reg_alloc;
|
[[nodiscard]] inline Xbyak::Label* GenSharedLabel() noexcept {
|
||||||
IR::Block& block;
|
return &shared_labels.emplace_back();
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<std::function<void()>> deferred_emits;
|
std::vector<std::function<void()>> deferred_emits;
|
||||||
|
RegAlloc& reg_alloc;
|
||||||
|
IR::Block& block;
|
||||||
|
boost::container::stable_vector<Xbyak::Label>& shared_labels;
|
||||||
};
|
};
|
||||||
|
|
||||||
using SharedLabel = std::shared_ptr<Xbyak::Label>;
|
|
||||||
|
|
||||||
inline SharedLabel GenSharedLabel() {
|
|
||||||
return std::make_shared<Xbyak::Label>();
|
|
||||||
}
|
|
||||||
|
|
||||||
class EmitX64 {
|
class EmitX64 {
|
||||||
public:
|
public:
|
||||||
struct BlockDescriptor {
|
struct BlockDescriptor {
|
||||||
|
|
|
||||||
|
|
@ -136,7 +136,7 @@ void ForceToDefaultNaN(BlockOfCode& code, Xbyak::Xmm result) {
|
||||||
|
|
||||||
template<size_t fsize>
|
template<size_t fsize>
|
||||||
SharedLabel ProcessNaN(BlockOfCode& code, EmitContext& ctx, Xbyak::Xmm a) {
|
SharedLabel ProcessNaN(BlockOfCode& code, EmitContext& ctx, Xbyak::Xmm a) {
|
||||||
SharedLabel nan = GenSharedLabel(), end = GenSharedLabel();
|
SharedLabel nan = ctx.GenSharedLabel(), end = ctx.GenSharedLabel();
|
||||||
|
|
||||||
FCODE(ucomis)(a, a);
|
FCODE(ucomis)(a, a);
|
||||||
code.jp(*nan, code.T_NEAR);
|
code.jp(*nan, code.T_NEAR);
|
||||||
|
|
@ -251,7 +251,7 @@ template<size_t fsize, typename Function>
|
||||||
void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
|
void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
SharedLabel end = GenSharedLabel();
|
SharedLabel end = ctx.GenSharedLabel();
|
||||||
|
|
||||||
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||||
|
|
||||||
|
|
@ -304,7 +304,7 @@ void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn)
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
|
||||||
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(code);
|
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(code);
|
||||||
|
|
||||||
SharedLabel end = GenSharedLabel(), nan = GenSharedLabel();
|
SharedLabel end = ctx.GenSharedLabel(), nan = ctx.GenSharedLabel();
|
||||||
|
|
||||||
code.movaps(result, op1);
|
code.movaps(result, op1);
|
||||||
if constexpr (std::is_member_function_pointer_v<Function>) {
|
if constexpr (std::is_member_function_pointer_v<Function>) {
|
||||||
|
|
@ -413,7 +413,7 @@ static void EmitFPMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, bo
|
||||||
|
|
||||||
DenormalsAreZero<fsize>(code, ctx, {result, operand});
|
DenormalsAreZero<fsize>(code, ctx, {result, operand});
|
||||||
|
|
||||||
SharedLabel equal = GenSharedLabel(), end = GenSharedLabel();
|
SharedLabel equal = ctx.GenSharedLabel(), end = ctx.GenSharedLabel();
|
||||||
|
|
||||||
FCODE(ucomis)(result, operand);
|
FCODE(ucomis)(result, operand);
|
||||||
code.jz(*equal, code.T_NEAR);
|
code.jz(*equal, code.T_NEAR);
|
||||||
|
|
@ -484,7 +484,7 @@ static inline void EmitFPMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
SharedLabel end = GenSharedLabel(), z = GenSharedLabel();
|
SharedLabel end = ctx.GenSharedLabel(), z = ctx.GenSharedLabel();
|
||||||
|
|
||||||
FCODE(ucomis)(op1, op2);
|
FCODE(ucomis)(op1, op2);
|
||||||
code.jz(*z, code.T_NEAR);
|
code.jz(*z, code.T_NEAR);
|
||||||
|
|
@ -632,7 +632,7 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, bo
|
||||||
}
|
}
|
||||||
|
|
||||||
if (code.HasHostFeature(HostFeature::FMA | HostFeature::AVX)) {
|
if (code.HasHostFeature(HostFeature::FMA | HostFeature::AVX)) {
|
||||||
SharedLabel fallback = GenSharedLabel(), end = GenSharedLabel();
|
SharedLabel fallback = ctx.GenSharedLabel(), end = ctx.GenSharedLabel();
|
||||||
|
|
||||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(code, args[0]);
|
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(code, args[0]);
|
||||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]);
|
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]);
|
||||||
|
|
@ -843,7 +843,7 @@ static void EmitFPMulX(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
|
||||||
const Xbyak::Reg64 tmp = do_default_nan ? INVALID_REG : ctx.reg_alloc.ScratchGpr(code);
|
const Xbyak::Reg64 tmp = do_default_nan ? INVALID_REG : ctx.reg_alloc.ScratchGpr(code);
|
||||||
|
|
||||||
SharedLabel end = GenSharedLabel(), nan = GenSharedLabel();
|
SharedLabel end = ctx.GenSharedLabel(), nan = ctx.GenSharedLabel();
|
||||||
|
|
||||||
if (code.HasHostFeature(HostFeature::AVX)) {
|
if (code.HasHostFeature(HostFeature::AVX)) {
|
||||||
FCODE(vmuls)(result, op1, op2);
|
FCODE(vmuls)(result, op1, op2);
|
||||||
|
|
@ -981,7 +981,7 @@ static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
|
||||||
}
|
}
|
||||||
|
|
||||||
if (code.HasHostFeature(HostFeature::FMA)) {
|
if (code.HasHostFeature(HostFeature::FMA)) {
|
||||||
SharedLabel end = GenSharedLabel(), fallback = GenSharedLabel();
|
SharedLabel end = ctx.GenSharedLabel(), fallback = ctx.GenSharedLabel();
|
||||||
|
|
||||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(code, args[0]);
|
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(code, args[0]);
|
||||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]);
|
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]);
|
||||||
|
|
@ -1129,7 +1129,7 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
|
||||||
const Xbyak::Xmm value = ctx.reg_alloc.ScratchXmm(code);
|
const Xbyak::Xmm value = ctx.reg_alloc.ScratchXmm(code);
|
||||||
[[maybe_unused]] const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr(code).cvt32();
|
[[maybe_unused]] const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr(code).cvt32();
|
||||||
|
|
||||||
SharedLabel bad_values = GenSharedLabel(), end = GenSharedLabel();
|
SharedLabel bad_values = ctx.GenSharedLabel(), end = ctx.GenSharedLabel();
|
||||||
|
|
||||||
code.movaps(value, operand);
|
code.movaps(value, operand);
|
||||||
|
|
||||||
|
|
@ -1296,7 +1296,7 @@ static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
|
||||||
}
|
}
|
||||||
|
|
||||||
if (code.HasHostFeature(HostFeature::FMA | HostFeature::AVX)) {
|
if (code.HasHostFeature(HostFeature::FMA | HostFeature::AVX)) {
|
||||||
SharedLabel end = GenSharedLabel(), fallback = GenSharedLabel();
|
SharedLabel end = ctx.GenSharedLabel(), fallback = ctx.GenSharedLabel();
|
||||||
|
|
||||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(code, args[0]);
|
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(code, args[0]);
|
||||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]);
|
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]);
|
||||||
|
|
@ -1641,7 +1641,7 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
const Xbyak::Xmm scratch = ctx.reg_alloc.ScratchXmm(code);
|
const Xbyak::Xmm scratch = ctx.reg_alloc.ScratchXmm(code);
|
||||||
|
|
||||||
if (!unsigned_) {
|
if (!unsigned_) {
|
||||||
SharedLabel saturate_max = GenSharedLabel(), end = GenSharedLabel();
|
SharedLabel saturate_max = ctx.GenSharedLabel(), end = ctx.GenSharedLabel();
|
||||||
|
|
||||||
ZeroIfNaN<64>(code, src, scratch);
|
ZeroIfNaN<64>(code, src, scratch);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -86,7 +86,7 @@ void AxxEmitX64::EmitMemoryRead(AxxEmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
const auto wrapped_fn = read_fallbacks[std::make_tuple(ordered, bitsize, vaddr.getIdx(), value_idx)];
|
const auto wrapped_fn = read_fallbacks[std::make_tuple(ordered, bitsize, vaddr.getIdx(), value_idx)];
|
||||||
|
|
||||||
SharedLabel abort = GenSharedLabel(), end = GenSharedLabel();
|
SharedLabel abort = ctx.GenSharedLabel(), end = ctx.GenSharedLabel();
|
||||||
|
|
||||||
if (fastmem_marker) {
|
if (fastmem_marker) {
|
||||||
// Use fastmem
|
// Use fastmem
|
||||||
|
|
@ -108,7 +108,7 @@ void AxxEmitX64::EmitMemoryRead(AxxEmitContext& ctx, IR::Inst* inst) {
|
||||||
conf.recompile_on_fastmem_failure,
|
conf.recompile_on_fastmem_failure,
|
||||||
});
|
});
|
||||||
|
|
||||||
EmitCheckMemoryAbort(ctx, inst, end.get());
|
EmitCheckMemoryAbort(ctx, inst, end);
|
||||||
code.jmp(*end, code.T_NEAR);
|
code.jmp(*end, code.T_NEAR);
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -120,7 +120,7 @@ void AxxEmitX64::EmitMemoryRead(AxxEmitContext& ctx, IR::Inst* inst) {
|
||||||
ctx.deferred_emits.emplace_back([=, this, &ctx] {
|
ctx.deferred_emits.emplace_back([=, this, &ctx] {
|
||||||
code.L(*abort);
|
code.L(*abort);
|
||||||
code.call(wrapped_fn);
|
code.call(wrapped_fn);
|
||||||
EmitCheckMemoryAbort(ctx, inst, end.get());
|
EmitCheckMemoryAbort(ctx, inst, end);
|
||||||
code.jmp(*end, code.T_NEAR);
|
code.jmp(*end, code.T_NEAR);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
@ -173,7 +173,7 @@ void AxxEmitX64::EmitMemoryWrite(AxxEmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
const auto wrapped_fn = write_fallbacks[std::make_tuple(ordered, bitsize, vaddr.getIdx(), value_idx)];
|
const auto wrapped_fn = write_fallbacks[std::make_tuple(ordered, bitsize, vaddr.getIdx(), value_idx)];
|
||||||
|
|
||||||
SharedLabel abort = GenSharedLabel(), end = GenSharedLabel();
|
SharedLabel abort = ctx.GenSharedLabel(), end = ctx.GenSharedLabel();
|
||||||
|
|
||||||
if (fastmem_marker) {
|
if (fastmem_marker) {
|
||||||
// Use fastmem
|
// Use fastmem
|
||||||
|
|
@ -195,7 +195,7 @@ void AxxEmitX64::EmitMemoryWrite(AxxEmitContext& ctx, IR::Inst* inst) {
|
||||||
conf.recompile_on_fastmem_failure,
|
conf.recompile_on_fastmem_failure,
|
||||||
});
|
});
|
||||||
|
|
||||||
EmitCheckMemoryAbort(ctx, inst, end.get());
|
EmitCheckMemoryAbort(ctx, inst, end);
|
||||||
code.jmp(*end, code.T_NEAR);
|
code.jmp(*end, code.T_NEAR);
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -207,7 +207,7 @@ void AxxEmitX64::EmitMemoryWrite(AxxEmitContext& ctx, IR::Inst* inst) {
|
||||||
ctx.deferred_emits.emplace_back([=, this, &ctx] {
|
ctx.deferred_emits.emplace_back([=, this, &ctx] {
|
||||||
code.L(*abort);
|
code.L(*abort);
|
||||||
code.call(wrapped_fn);
|
code.call(wrapped_fn);
|
||||||
EmitCheckMemoryAbort(ctx, inst, end.get());
|
EmitCheckMemoryAbort(ctx, inst, end);
|
||||||
code.jmp(*end, code.T_NEAR);
|
code.jmp(*end, code.T_NEAR);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
@ -352,7 +352,7 @@ void AxxEmitX64::EmitExclusiveReadMemoryInline(AxxEmitContext& ctx, IR::Inst* in
|
||||||
|
|
||||||
const auto fastmem_marker = ShouldFastmem(ctx, inst);
|
const auto fastmem_marker = ShouldFastmem(ctx, inst);
|
||||||
if (fastmem_marker) {
|
if (fastmem_marker) {
|
||||||
SharedLabel abort = GenSharedLabel(), end = GenSharedLabel();
|
SharedLabel abort = ctx.GenSharedLabel(), end = ctx.GenSharedLabel();
|
||||||
bool require_abort_handling = false;
|
bool require_abort_handling = false;
|
||||||
|
|
||||||
const auto src_ptr = EmitFastmemVAddr(code, ctx, *abort, vaddr, require_abort_handling);
|
const auto src_ptr = EmitFastmemVAddr(code, ctx, *abort, vaddr, require_abort_handling);
|
||||||
|
|
@ -427,7 +427,7 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i
|
||||||
|
|
||||||
EmitExclusiveLock(code, conf, tmp, tmp2.cvt32());
|
EmitExclusiveLock(code, conf, tmp, tmp2.cvt32());
|
||||||
|
|
||||||
SharedLabel end = GenSharedLabel();
|
SharedLabel end = ctx.GenSharedLabel();
|
||||||
|
|
||||||
code.mov(status, u32(1));
|
code.mov(status, u32(1));
|
||||||
code.movzx(tmp.cvt32(), code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)]);
|
code.movzx(tmp.cvt32(), code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)]);
|
||||||
|
|
@ -460,7 +460,7 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i
|
||||||
|
|
||||||
const auto fastmem_marker = ShouldFastmem(ctx, inst);
|
const auto fastmem_marker = ShouldFastmem(ctx, inst);
|
||||||
if (fastmem_marker) {
|
if (fastmem_marker) {
|
||||||
SharedLabel abort = GenSharedLabel();
|
SharedLabel abort = ctx.GenSharedLabel();
|
||||||
bool require_abort_handling = false;
|
bool require_abort_handling = false;
|
||||||
|
|
||||||
const auto dest_ptr = EmitFastmemVAddr(code, ctx, *abort, vaddr, require_abort_handling, tmp);
|
const auto dest_ptr = EmitFastmemVAddr(code, ctx, *abort, vaddr, require_abort_handling, tmp);
|
||||||
|
|
|
||||||
|
|
@ -54,7 +54,7 @@ void EmitDetectMisalignedVAddr(BlockOfCode& code, EmitContext& ctx, size_t bitsi
|
||||||
if (ctx.conf.only_detect_misalignment_via_page_table_on_page_boundary) {
|
if (ctx.conf.only_detect_misalignment_via_page_table_on_page_boundary) {
|
||||||
const u32 page_align_mask = static_cast<u32>(page_table_const_size - 1) & ~align_mask;
|
const u32 page_align_mask = static_cast<u32>(page_table_const_size - 1) & ~align_mask;
|
||||||
|
|
||||||
SharedLabel detect_boundary = GenSharedLabel(), resume = GenSharedLabel();
|
SharedLabel detect_boundary = ctx.GenSharedLabel(), resume = ctx.GenSharedLabel();
|
||||||
|
|
||||||
code.jnz(*detect_boundary, code.T_NEAR);
|
code.jnz(*detect_boundary, code.T_NEAR);
|
||||||
code.L(*resume);
|
code.L(*resume);
|
||||||
|
|
|
||||||
|
|
@ -24,6 +24,7 @@
|
||||||
#include "dynarmic/common/fp/fpcr.h"
|
#include "dynarmic/common/fp/fpcr.h"
|
||||||
#include "dynarmic/common/fp/info.h"
|
#include "dynarmic/common/fp/info.h"
|
||||||
#include "dynarmic/common/fp/op.h"
|
#include "dynarmic/common/fp/op.h"
|
||||||
|
#include "dynarmic/common/fp/rounding_mode.h"
|
||||||
#include "dynarmic/common/fp/util.h"
|
#include "dynarmic/common/fp/util.h"
|
||||||
#include "dynarmic/interface/optimization_flags.h"
|
#include "dynarmic/interface/optimization_flags.h"
|
||||||
#include "dynarmic/ir/basic_block.h"
|
#include "dynarmic/ir/basic_block.h"
|
||||||
|
|
@ -93,7 +94,7 @@ void HandleNaNs(BlockOfCode& code, EmitContext& ctx, bool fpcr_controlled, std::
|
||||||
code.cmp(bitmask, 0);
|
code.cmp(bitmask, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
SharedLabel end = GenSharedLabel(), nan = GenSharedLabel();
|
SharedLabel end = ctx.GenSharedLabel(), nan = ctx.GenSharedLabel();
|
||||||
|
|
||||||
code.jnz(*nan, code.T_NEAR);
|
code.jnz(*nan, code.T_NEAR);
|
||||||
code.L(*end);
|
code.L(*end);
|
||||||
|
|
@ -188,23 +189,6 @@ void ForceToDefaultNaN(BlockOfCode& code, FP::FPCR fpcr, Xbyak::Xmm result) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<size_t fsize>
|
|
||||||
void ZeroIfNaN(BlockOfCode& code, Xbyak::Xmm result) {
|
|
||||||
const Xbyak::Xmm nan_mask = xmm0;
|
|
||||||
if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
|
|
||||||
constexpr u32 nan_to_zero = FixupLUT(FpFixup::PosZero,
|
|
||||||
FpFixup::PosZero);
|
|
||||||
FCODE(vfixupimmp)(result, result, code.BConst<32>(ptr_b, nan_to_zero), u8(0));
|
|
||||||
} else if (code.HasHostFeature(HostFeature::AVX)) {
|
|
||||||
FCODE(vcmpordp)(nan_mask, result, result);
|
|
||||||
FCODE(vandp)(result, result, nan_mask);
|
|
||||||
} else {
|
|
||||||
code.movaps(nan_mask, result);
|
|
||||||
FCODE(cmpordp)(nan_mask, nan_mask);
|
|
||||||
code.andps(result, nan_mask);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template<size_t fsize>
|
template<size_t fsize>
|
||||||
void DenormalsAreZero(BlockOfCode& code, FP::FPCR fpcr, std::initializer_list<Xbyak::Xmm> to_daz, Xbyak::Xmm tmp) {
|
void DenormalsAreZero(BlockOfCode& code, FP::FPCR fpcr, std::initializer_list<Xbyak::Xmm> to_daz, Xbyak::Xmm tmp) {
|
||||||
if (fpcr.FZ()) {
|
if (fpcr.FZ()) {
|
||||||
|
|
@ -1330,7 +1314,7 @@ void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
|
||||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
|
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
|
||||||
|
|
||||||
SharedLabel end = GenSharedLabel(), fallback = GenSharedLabel();
|
SharedLabel end = ctx.GenSharedLabel(), fallback = ctx.GenSharedLabel();
|
||||||
|
|
||||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||||
code.movaps(result, xmm_a);
|
code.movaps(result, xmm_a);
|
||||||
|
|
@ -1603,7 +1587,7 @@ static void EmitRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
||||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]);
|
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]);
|
||||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
|
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
|
||||||
|
|
||||||
SharedLabel end = GenSharedLabel(), fallback = GenSharedLabel();
|
SharedLabel end = ctx.GenSharedLabel(), fallback = ctx.GenSharedLabel();
|
||||||
|
|
||||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||||
code.movaps(result, GetVectorOf<fsize, false, 0, 2>(code));
|
code.movaps(result, GetVectorOf<fsize, false, 0, 2>(code));
|
||||||
|
|
@ -1776,7 +1760,7 @@ static void EmitRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
|
||||||
const Xbyak::Xmm value = ctx.reg_alloc.ScratchXmm(code);
|
const Xbyak::Xmm value = ctx.reg_alloc.ScratchXmm(code);
|
||||||
|
|
||||||
SharedLabel bad_values = GenSharedLabel(), end = GenSharedLabel();
|
SharedLabel bad_values = ctx.GenSharedLabel(), end = ctx.GenSharedLabel();
|
||||||
|
|
||||||
code.movaps(value, operand);
|
code.movaps(value, operand);
|
||||||
|
|
||||||
|
|
@ -1867,7 +1851,7 @@ static void EmitRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
||||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
|
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
|
||||||
const Xbyak::Xmm mask = ctx.reg_alloc.ScratchXmm(code);
|
const Xbyak::Xmm mask = ctx.reg_alloc.ScratchXmm(code);
|
||||||
|
|
||||||
SharedLabel end = GenSharedLabel(), fallback = GenSharedLabel();
|
SharedLabel end = ctx.GenSharedLabel(), fallback = ctx.GenSharedLabel();
|
||||||
|
|
||||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||||
code.vmovaps(result, GetVectorOf<fsize, false, 0, 3>(code));
|
code.vmovaps(result, GetVectorOf<fsize, false, 0, 3>(code));
|
||||||
|
|
@ -2004,15 +1988,12 @@ void EmitX64::EmitFPVectorToHalf32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
template<size_t fsize, bool unsigned_>
|
template<size_t fsize, bool unsigned_>
|
||||||
void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
const size_t fbits = inst->GetArg(1).GetU8();
|
const size_t fbits = inst->GetArg(1).GetU8();
|
||||||
const auto rounding = static_cast<FP::RoundingMode>(inst->GetArg(2).GetU8());
|
const auto rounding = FP::RoundingMode(inst->GetArg(2).GetU8());
|
||||||
[[maybe_unused]] const bool fpcr_controlled = inst->GetArg(3).GetU1();
|
[[maybe_unused]] const bool fpcr_controlled = inst->GetArg(3).GetU1();
|
||||||
|
|
||||||
if constexpr (fsize != 16) {
|
if (code.HasHostFeature(HostFeature::SSE41) && fsize != 16 && rounding != FP::RoundingMode::ToNearest_TieAwayFromZero) {
|
||||||
if (code.HasHostFeature(HostFeature::SSE41) && rounding != FP::RoundingMode::ToNearest_TieAwayFromZero) {
|
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(code, args[0]);
|
||||||
|
|
||||||
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
|
||||||
const int round_imm = [&] {
|
const int round_imm = [&] {
|
||||||
switch (rounding) {
|
switch (rounding) {
|
||||||
|
|
@ -2027,7 +2008,6 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
return 0b11;
|
return 0b11;
|
||||||
}
|
}
|
||||||
}();
|
}();
|
||||||
|
|
||||||
const auto perform_conversion = [&code, &ctx](const Xbyak::Xmm& src) {
|
const auto perform_conversion = [&code, &ctx](const Xbyak::Xmm& src) {
|
||||||
// MSVC doesn't allow us to use a [&] capture, so we have to do this instead.
|
// MSVC doesn't allow us to use a [&] capture, so we have to do this instead.
|
||||||
(void)ctx;
|
(void)ctx;
|
||||||
|
|
@ -2052,16 +2032,26 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
if (fbits != 0) {
|
if (fbits != 0) {
|
||||||
const u64 scale_factor = fsize == 32
|
const u64 scale_factor = fsize == 32
|
||||||
? static_cast<u64>(fbits + 127) << 23
|
? u64(fbits + 127) << 23
|
||||||
: static_cast<u64>(fbits + 1023) << 52;
|
: u64(fbits + 1023) << 52;
|
||||||
FCODE(mulp)(src, GetVectorOf<fsize>(code, scale_factor));
|
FCODE(mulp)(src, GetVectorOf<fsize>(code, scale_factor));
|
||||||
}
|
}
|
||||||
|
|
||||||
FCODE(roundp)(src, src, static_cast<u8>(round_imm));
|
FCODE(roundp)(src, src, u8(round_imm));
|
||||||
ZeroIfNaN<fsize>(code, src);
|
const Xbyak::Xmm nan_mask = xmm0;
|
||||||
|
if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
|
||||||
|
static constexpr u32 nan_to_zero = FixupLUT(FpFixup::PosZero, FpFixup::PosZero);
|
||||||
|
FCODE(vfixupimmp)(src, src, code.BConst<32>(ptr_b, nan_to_zero), u8(0));
|
||||||
|
} else if (code.HasHostFeature(HostFeature::AVX)) {
|
||||||
|
FCODE(vcmpordp)(nan_mask, src, src);
|
||||||
|
FCODE(vandp)(src, src, nan_mask);
|
||||||
|
} else {
|
||||||
|
code.movaps(nan_mask, src);
|
||||||
|
FCODE(cmpordp)(nan_mask, nan_mask);
|
||||||
|
code.andps(src, nan_mask);
|
||||||
|
}
|
||||||
|
|
||||||
constexpr u64 float_upper_limit_signed = fsize == 32 ? 0x4f000000 : 0x43e0000000000000;
|
constexpr u64 float_upper_limit_signed = fsize == 32 ? 0x4f000000 : 0x43e0000000000000;
|
||||||
[[maybe_unused]] constexpr u64 float_upper_limit_unsigned = fsize == 32 ? 0x4f800000 : 0x43f0000000000000;
|
[[maybe_unused]] constexpr u64 float_upper_limit_unsigned = fsize == 32 ? 0x4f800000 : 0x43f0000000000000;
|
||||||
|
|
@ -2074,7 +2064,7 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
// Convert positive values to unsigned integers, write 0 anywhere else
|
// Convert positive values to unsigned integers, write 0 anywhere else
|
||||||
// vcvttp*2u*q already saturates out-of-range values to (0xFFFF...)
|
// vcvttp*2u*q already saturates out-of-range values to (0xFFFF...)
|
||||||
if constexpr (fsize == 32) {
|
if (fsize == 32) {
|
||||||
code.vcvttps2udq(src | k1 | T_z, src);
|
code.vcvttps2udq(src | k1 | T_z, src);
|
||||||
} else {
|
} else {
|
||||||
code.vcvttpd2uqq(src | k1 | T_z, src);
|
code.vcvttpd2uqq(src | k1 | T_z, src);
|
||||||
|
|
@ -2107,18 +2097,15 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
} else {
|
} else {
|
||||||
using FPT = mcl::unsigned_integer_of_size<fsize>; // WORKAROUND: For issue 678 on MSVC
|
using FPT = mcl::unsigned_integer_of_size<fsize>; // WORKAROUND: For issue 678 on MSVC
|
||||||
constexpr u64 integer_max = FPT((std::numeric_limits<std::conditional_t<unsigned_, FPT, std::make_signed_t<FPT>>>::max)());
|
constexpr u64 integer_max = FPT((std::numeric_limits<std::conditional_t<unsigned_, FPT, std::make_signed_t<FPT>>>::max)());
|
||||||
|
|
||||||
code.movaps(xmm0, GetVectorOf<fsize, float_upper_limit_signed>(code));
|
code.movaps(xmm0, GetVectorOf<fsize, float_upper_limit_signed>(code));
|
||||||
FCODE(cmplep)(xmm0, src);
|
FCODE(cmplep)(xmm0, src);
|
||||||
perform_conversion(src);
|
perform_conversion(src);
|
||||||
FCODE(blendvp)(src, GetVectorOf<fsize, integer_max>(code));
|
FCODE(blendvp)(src, GetVectorOf<fsize, integer_max>(code));
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(code, inst, src);
|
ctx.reg_alloc.DefineValue(code, inst, src);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
using FPT = mcl::unsigned_integer_of_size<fsize>; // WORKAROUND: For issue 678 on MSVC
|
using FPT = mcl::unsigned_integer_of_size<fsize>; // WORKAROUND: For issue 678 on MSVC
|
||||||
auto const func = [rounding]() -> void(*)(VectorArray<FPT>& output, const VectorArray<FPT>& input, FP::FPCR fpcr, FP::FPSR& fpsr) {
|
auto const func = [rounding]() -> void(*)(VectorArray<FPT>& output, const VectorArray<FPT>& input, FP::FPCR fpcr, FP::FPSR& fpsr) {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue