fixes for dynarmic on higher batch sizes

This commit is contained in:
lizzie 2026-03-17 23:37:47 +00:00
parent 5146e41b80
commit cbd9d7a421
4 changed files with 8 additions and 15 deletions

View file

@ -6,6 +6,8 @@
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include <bit>
#include "dynarmic/backend/x64/xbyak.h"

View file

@ -32,7 +32,7 @@ FPUnpacked FusedMulAdd(FPUnpacked addend, FPUnpacked op1, FPUnpacked op2) {
return std::make_tuple(exponent, value);
}();
if (product_value == 0) {
if (product_value == u128(0, 0)) {
return addend;
}
@ -52,13 +52,13 @@ FPUnpacked FusedMulAdd(FPUnpacked addend, FPUnpacked op1, FPUnpacked op2) {
}
// addend < product
const u128 result = product_value + StickyLogicalShiftRight(addend.mantissa, exp_diff - normalized_point_position);
const u128 result = product_value + StickyLogicalShiftRight(u128(addend.mantissa, 0), exp_diff - normalized_point_position);
return ReduceMantissa(product_sign, product_exponent, result);
}
// Subtraction
const u128 addend_long = u128(addend.mantissa) << normalized_point_position;
const u128 addend_long = u128(addend.mantissa, 0) << normalized_point_position;
bool result_sign;
u128 result;

View file

@ -137,7 +137,7 @@ u128 StickyLogicalShiftRight(u128 operand, int amount) {
}
if (operand.lower != 0 || operand.upper != 0) {
return u128(1);
return u128(1, 0);
}
return {};
}

View file

@ -23,22 +23,13 @@ struct u128 {
u128(u128&&) = default;
u128& operator=(const u128&) = default;
u128& operator=(u128&&) = default;
u128(u64 lower_, u64 upper_)
: lower(lower_), upper(upper_) {}
template<typename T>
/* implicit */ u128(T value)
: lower(value), upper(0) {
static_assert(std::is_integral_v<T>);
static_assert(mcl::bitsizeof<T> <= mcl::bitsizeof<u64>);
}
explicit u128(u64 lower_, u64 upper_) : lower(lower_), upper(upper_) {}
u64 lower = 0;
u64 upper = 0;
template<size_t bit_position>
bool Bit() const {
[[nodiscard]] inline bool Bit() const {
static_assert(bit_position < 128);
if constexpr (bit_position < 64) {
return mcl::bit::get_bit<bit_position>(lower);