mirror of
https://git.eden-emu.dev/eden-emu/eden
synced 2026-06-01 20:47:08 +02:00
[core/core_timing] better MWAITX and WAITPKG delays (#3984)
This implements MWAITX and WAITPKG extensions (umonitor, mwait) for CPUs that support them. Reduces wait times and bypasses the timing stuff from the OS that is slow (windows notably). generally it should answer within 0.2 to 0.5 microsecs (since most requests wait for that long). Also does a general rework of static ctors and stuff Signed-off-by: lizzie <lizzie@eden-emu.dev> Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3984 Reviewed-by: MaranBr <maranbr@eden-emu.dev> Reviewed-by: crueter <crueter@eden-emu.dev>
This commit is contained in:
parent
ff7bbaea7d
commit
7c32cf03a1
19 changed files with 477 additions and 533 deletions
|
|
@ -141,12 +141,12 @@ add_library(
|
||||||
vector_math.h
|
vector_math.h
|
||||||
virtual_buffer.cpp
|
virtual_buffer.cpp
|
||||||
virtual_buffer.h
|
virtual_buffer.h
|
||||||
wall_clock.cpp
|
|
||||||
wall_clock.h
|
|
||||||
zstd_compression.cpp
|
zstd_compression.cpp
|
||||||
zstd_compression.h
|
zstd_compression.h
|
||||||
fs/ryujinx_compat.h fs/ryujinx_compat.cpp
|
fs/ryujinx_compat.h fs/ryujinx_compat.cpp
|
||||||
fs/symlink.h fs/symlink.cpp
|
fs/symlink.h fs/symlink.cpp
|
||||||
|
cpu_features.cpp
|
||||||
|
cpu_features.h
|
||||||
httplib.h
|
httplib.h
|
||||||
net/net.h net/net.cpp)
|
net/net.h net/net.cpp)
|
||||||
|
|
||||||
|
|
@ -180,10 +180,7 @@ endif()
|
||||||
if(ARCHITECTURE_x86_64)
|
if(ARCHITECTURE_x86_64)
|
||||||
target_sources(
|
target_sources(
|
||||||
common
|
common
|
||||||
PRIVATE x64/cpu_detect.cpp
|
PRIVATE
|
||||||
x64/cpu_detect.h
|
|
||||||
x64/cpu_wait.cpp
|
|
||||||
x64/cpu_wait.h
|
|
||||||
x64/rdtsc.cpp
|
x64/rdtsc.cpp
|
||||||
x64/rdtsc.h
|
x64/rdtsc.h
|
||||||
x64/xbyak.h)
|
x64/xbyak.h)
|
||||||
|
|
@ -234,7 +231,7 @@ if(CXX_CLANG)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (BOOST_NO_HEADERS)
|
if (BOOST_NO_HEADERS)
|
||||||
target_link_libraries(common PUBLIC Boost::algorithm Boost::icl Boost::pool)
|
target_link_libraries(common PUBLIC Boost::algorithm Boost::heap Boost::icl Boost::pool)
|
||||||
else()
|
else()
|
||||||
target_link_libraries(common PUBLIC Boost::headers)
|
target_link_libraries(common PUBLIC Boost::headers)
|
||||||
endif()
|
endif()
|
||||||
|
|
|
||||||
|
|
@ -10,34 +10,32 @@
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <iterator>
|
#include <iterator>
|
||||||
#include <optional>
|
#include <optional>
|
||||||
|
#include <algorithm>
|
||||||
#include <string_view>
|
#include <string_view>
|
||||||
#include <thread>
|
#include <thread>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include "common/bit_util.h"
|
|
||||||
#include "common/common_types.h"
|
|
||||||
#include "common/logging.h"
|
|
||||||
#include "common/x64/cpu_detect.h"
|
|
||||||
#include "common/x64/rdtsc.h"
|
|
||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
#include <windows.h>
|
#include <windows.h>
|
||||||
#endif
|
#elif defined(__DragonFly__) || defined(__FreeBSD__)
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
|
||||||
#include <intrin.h>
|
|
||||||
|
|
||||||
static inline u64 xgetbv(u32 index) {
|
|
||||||
return _xgetbv(index);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
|
|
||||||
#if defined(__DragonFly__) || defined(__FreeBSD__)
|
|
||||||
// clang-format off
|
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <machine/cpufunc.h>
|
#include <machine/cpufunc.h>
|
||||||
// clang-format on
|
#elif defined(__ANDROID__)
|
||||||
|
#include <sys/system_properties.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include "common/steady_clock.h"
|
||||||
|
#include "common/uint128.h"
|
||||||
|
#include "common/bit_util.h"
|
||||||
|
#include "common/common_types.h"
|
||||||
|
#include "common/cpu_features.h"
|
||||||
|
#include "common/logging.h"
|
||||||
|
|
||||||
|
#ifdef ARCHITECTURE_x86_64
|
||||||
|
#include "common/x64/rdtsc.h"
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#include <intrin.h>
|
||||||
|
static inline u64 xgetbv(u32 index) { return _xgetbv(index); }
|
||||||
|
#else
|
||||||
static inline void __cpuidex(int info[4], u32 function_id, u32 subfunction_id) {
|
static inline void __cpuidex(int info[4], u32 function_id, u32 subfunction_id) {
|
||||||
#if defined(__DragonFly__) || defined(__FreeBSD__)
|
#if defined(__DragonFly__) || defined(__FreeBSD__)
|
||||||
// Despite the name, this is just do_cpuid() with ECX as second input.
|
// Despite the name, this is just do_cpuid() with ECX as second input.
|
||||||
|
|
@ -50,11 +48,7 @@ static inline void __cpuidex(int info[4], u32 function_id, u32 subfunction_id) {
|
||||||
: "a"(function_id), "c"(subfunction_id));
|
: "a"(function_id), "c"(subfunction_id));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
static inline void __cpuid(int info[4], u32 function_id) { return __cpuidex(info, function_id, 0); }
|
||||||
static inline void __cpuid(int info[4], u32 function_id) {
|
|
||||||
return __cpuidex(info, function_id, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
#define _XCR_XFEATURE_ENABLED_MASK 0
|
#define _XCR_XFEATURE_ENABLED_MASK 0
|
||||||
static inline u64 xgetbv(u32 index) {
|
static inline u64 xgetbv(u32 index) {
|
||||||
u32 eax, edx;
|
u32 eax, edx;
|
||||||
|
|
@ -62,9 +56,10 @@ static inline u64 xgetbv(u32 index) {
|
||||||
return ((u64)edx << 32) | eax;
|
return ((u64)edx << 32) | eax;
|
||||||
}
|
}
|
||||||
#endif // _MSC_VER
|
#endif // _MSC_VER
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace Common {
|
namespace Common {
|
||||||
|
#ifdef ARCHITECTURE_x86_64
|
||||||
CPUCaps::Manufacturer CPUCaps::ParseManufacturer(std::string_view brand_string) {
|
CPUCaps::Manufacturer CPUCaps::ParseManufacturer(std::string_view brand_string) {
|
||||||
if (brand_string == "GenuineIntel") {
|
if (brand_string == "GenuineIntel") {
|
||||||
return Manufacturer::Intel;
|
return Manufacturer::Intel;
|
||||||
|
|
@ -76,13 +71,53 @@ CPUCaps::Manufacturer CPUCaps::ParseManufacturer(std::string_view brand_string)
|
||||||
return Manufacturer::Unknown;
|
return Manufacturer::Unknown;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Detects the various CPU features
|
std::optional<int> GetProcessorCount() {
|
||||||
static CPUCaps Detect() {
|
#if defined(_WIN32)
|
||||||
|
// Get the buffer length.
|
||||||
|
DWORD length = 0;
|
||||||
|
GetLogicalProcessorInformation(nullptr, &length);
|
||||||
|
if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
|
||||||
|
LOG_ERROR(Frontend, "Failed to query core count.");
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
std::vector<SYSTEM_LOGICAL_PROCESSOR_INFORMATION> buffer(
|
||||||
|
length / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION));
|
||||||
|
// Now query the core count.
|
||||||
|
if (!GetLogicalProcessorInformation(buffer.data(), &length)) {
|
||||||
|
LOG_ERROR(Frontend, "Failed to query core count.");
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
return static_cast<int>(
|
||||||
|
std::count_if(buffer.cbegin(), buffer.cend(), [](const auto& proc_info) {
|
||||||
|
return proc_info.Relationship == RelationProcessorCore;
|
||||||
|
}));
|
||||||
|
#elif defined(__unix__)
|
||||||
|
const int thread_count = std::thread::hardware_concurrency();
|
||||||
|
std::ifstream smt("/sys/devices/system/cpu/smt/active");
|
||||||
|
char state = '0';
|
||||||
|
if (smt) {
|
||||||
|
smt.read(&state, sizeof(state));
|
||||||
|
}
|
||||||
|
switch (state) {
|
||||||
|
case '0':
|
||||||
|
return thread_count;
|
||||||
|
case '1':
|
||||||
|
return thread_count / 2;
|
||||||
|
default:
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
// Shame on you
|
||||||
|
return std::nullopt;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/// @brief Detects the various CPU features
|
||||||
|
const CPUCaps g_cpu_caps = [] {
|
||||||
CPUCaps caps = {};
|
CPUCaps caps = {};
|
||||||
|
|
||||||
// Assumes the CPU supports the CPUID instruction. Those that don't would likely not support
|
// Assumes the CPU supports the CPUID instruction. Those that don't would likely not support
|
||||||
// yuzu at all anyway
|
// yuzu at all anyway
|
||||||
|
|
||||||
int cpu_id[4];
|
int cpu_id[4];
|
||||||
|
|
||||||
// Detect CPU's CPUID capabilities and grab manufacturer string
|
// Detect CPU's CPUID capabilities and grab manufacturer string
|
||||||
|
|
@ -183,9 +218,8 @@ static CPUCaps Detect() {
|
||||||
// https://github.com/torvalds/linux/blob/master/tools/power/x86/turbostat/turbostat.c#L5569
|
// https://github.com/torvalds/linux/blob/master/tools/power/x86/turbostat/turbostat.c#L5569
|
||||||
// but it's easier to just estimate the TSC tick rate for these cases.
|
// but it's easier to just estimate the TSC tick rate for these cases.
|
||||||
if (caps.tsc_crystal_ratio_denominator) {
|
if (caps.tsc_crystal_ratio_denominator) {
|
||||||
caps.tsc_frequency = static_cast<u64>(caps.crystal_frequency) *
|
caps.tsc_frequency = u64(caps.crystal_frequency)
|
||||||
caps.tsc_crystal_ratio_numerator /
|
* caps.tsc_crystal_ratio_numerator / caps.tsc_crystal_ratio_denominator;
|
||||||
caps.tsc_crystal_ratio_denominator;
|
|
||||||
} else {
|
} else {
|
||||||
caps.tsc_frequency = X64::EstimateRDTSCFrequency();
|
caps.tsc_frequency = X64::EstimateRDTSCFrequency();
|
||||||
}
|
}
|
||||||
|
|
@ -197,54 +231,196 @@ static CPUCaps Detect() {
|
||||||
caps.max_frequency = cpu_id[1];
|
caps.max_frequency = cpu_id[1];
|
||||||
caps.bus_frequency = cpu_id[2];
|
caps.bus_frequency = cpu_id[2];
|
||||||
}
|
}
|
||||||
|
|
||||||
return caps;
|
return caps;
|
||||||
}
|
}();
|
||||||
|
|
||||||
const CPUCaps& GetCPUCaps() {
|
|
||||||
static CPUCaps caps = Detect();
|
|
||||||
return caps;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::optional<int> GetProcessorCount() {
|
|
||||||
#if defined(_WIN32)
|
|
||||||
// Get the buffer length.
|
|
||||||
DWORD length = 0;
|
|
||||||
GetLogicalProcessorInformation(nullptr, &length);
|
|
||||||
if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
|
|
||||||
LOG_ERROR(Frontend, "Failed to query core count.");
|
|
||||||
return std::nullopt;
|
|
||||||
}
|
|
||||||
std::vector<SYSTEM_LOGICAL_PROCESSOR_INFORMATION> buffer(
|
|
||||||
length / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION));
|
|
||||||
// Now query the core count.
|
|
||||||
if (!GetLogicalProcessorInformation(buffer.data(), &length)) {
|
|
||||||
LOG_ERROR(Frontend, "Failed to query core count.");
|
|
||||||
return std::nullopt;
|
|
||||||
}
|
|
||||||
return static_cast<int>(
|
|
||||||
std::count_if(buffer.cbegin(), buffer.cend(), [](const auto& proc_info) {
|
|
||||||
return proc_info.Relationship == RelationProcessorCore;
|
|
||||||
}));
|
|
||||||
#elif defined(__unix__)
|
|
||||||
const int thread_count = std::thread::hardware_concurrency();
|
|
||||||
std::ifstream smt("/sys/devices/system/cpu/smt/active");
|
|
||||||
char state = '0';
|
|
||||||
if (smt) {
|
|
||||||
smt.read(&state, sizeof(state));
|
|
||||||
}
|
|
||||||
switch (state) {
|
|
||||||
case '0':
|
|
||||||
return thread_count;
|
|
||||||
case '1':
|
|
||||||
return thread_count / 2;
|
|
||||||
default:
|
|
||||||
return std::nullopt;
|
|
||||||
}
|
|
||||||
#else
|
#else
|
||||||
// Shame on you
|
|
||||||
return std::nullopt;
|
#endif
|
||||||
|
|
||||||
|
#if defined(ARCHITECTURE_x86_64)
|
||||||
|
WallClock::WallClock(bool invariant_, u64 rdtsc_frequency_) noexcept
|
||||||
|
: rdtsc_frequency{rdtsc_frequency_}
|
||||||
|
, ns_rdtsc_factor{invariant_ ? GetFixedPoint64Factor(NsRatio::den, rdtsc_frequency_) : 0}
|
||||||
|
, us_rdtsc_factor{invariant_ ? GetFixedPoint64Factor(UsRatio::den, rdtsc_frequency_) : 0}
|
||||||
|
, ms_rdtsc_factor{invariant_ ? GetFixedPoint64Factor(MsRatio::den, rdtsc_frequency_) : 0}
|
||||||
|
, rdtsc_ns_factor{invariant_ ? GetFixedPoint64Factor(rdtsc_frequency_, NsRatio::den) : 1}
|
||||||
|
, cntpct_rdtsc_factor{invariant_ ? GetFixedPoint64Factor(CNTFRQ, rdtsc_frequency_) : 0}
|
||||||
|
, gputick_rdtsc_factor{invariant_ ? GetFixedPoint64Factor(GPUTickFreq, rdtsc_frequency_) : 0}
|
||||||
|
, invariant{invariant_}
|
||||||
|
{}
|
||||||
|
|
||||||
|
std::chrono::nanoseconds WallClock::GetTimeNS() const {
|
||||||
|
if (!invariant)
|
||||||
|
return std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::steady_clock::now().time_since_epoch());
|
||||||
|
return std::chrono::nanoseconds{MultiplyHigh(GetUptime(), ns_rdtsc_factor)};
|
||||||
|
}
|
||||||
|
|
||||||
|
std::chrono::microseconds WallClock::GetTimeUS() const {
|
||||||
|
if (!invariant)
|
||||||
|
return std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::steady_clock::now().time_since_epoch());
|
||||||
|
return std::chrono::microseconds{MultiplyHigh(GetUptime(), us_rdtsc_factor)};
|
||||||
|
}
|
||||||
|
|
||||||
|
std::chrono::milliseconds WallClock::GetTimeMS() const {
|
||||||
|
if (!invariant)
|
||||||
|
return std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now().time_since_epoch());
|
||||||
|
return std::chrono::milliseconds{MultiplyHigh(GetUptime(), ms_rdtsc_factor)};
|
||||||
|
}
|
||||||
|
|
||||||
|
s64 WallClock::GetCNTPCT() const {
|
||||||
|
if (!invariant)
|
||||||
|
return GetUptime() * NsToCNTPCTRatio::num / NsToCNTPCTRatio::den;
|
||||||
|
return MultiplyHigh(GetUptime(), cntpct_rdtsc_factor);
|
||||||
|
}
|
||||||
|
|
||||||
|
s64 WallClock::GetGPUTick() const {
|
||||||
|
if (!invariant)
|
||||||
|
return GetUptime() * NsToGPUTickRatio::num / NsToGPUTickRatio::den;
|
||||||
|
return MultiplyHigh(GetUptime(), gputick_rdtsc_factor);
|
||||||
|
}
|
||||||
|
|
||||||
|
s64 WallClock::GetUptime() const {
|
||||||
|
if (!invariant)
|
||||||
|
return std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::steady_clock::now().time_since_epoch()).count();
|
||||||
|
return s64(Common::X64::FencedRDTSC());
|
||||||
|
}
|
||||||
|
|
||||||
|
bool WallClock::IsNative() const {
|
||||||
|
return invariant;
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 WallClock::NsToTicks(std::chrono::nanoseconds ns) const {
|
||||||
|
return invariant ? MultiplyHigh(ns.count(), rdtsc_ns_factor) : ns.count();
|
||||||
|
}
|
||||||
|
#elif defined(HAS_NCE)
|
||||||
|
namespace {
|
||||||
|
[[nodiscard]] Common::WallClock::FactorType GetFixedPointFactor(u64 num, u64 den) noexcept {
|
||||||
|
return (Common::WallClock::FactorType(num) << 64) / den;
|
||||||
|
}
|
||||||
|
[[nodiscard]] u64 MultiplyHigh(u64 m, Common::WallClock::FactorType factor) noexcept {
|
||||||
|
return static_cast<u64>((m * factor) >> 64);
|
||||||
|
}
|
||||||
|
[[nodiscard]] s64 GetHostCNTFRQ() noexcept {
|
||||||
|
u64 cntfrq_el0 = 0;
|
||||||
|
#ifdef ANDROID
|
||||||
|
std::string_view board{""};
|
||||||
|
char buffer[PROP_VALUE_MAX];
|
||||||
|
int len{__system_property_get("ro.product.board", buffer)};
|
||||||
|
board = std::string_view(buffer, static_cast<size_t>(len));
|
||||||
|
if (board == "s5e9925") { // Exynos 2200
|
||||||
|
cntfrq_el0 = 25600000;
|
||||||
|
} else if (board == "exynos2100") { // Exynos 2100
|
||||||
|
cntfrq_el0 = 26000000;
|
||||||
|
} else if (board == "exynos9810") { // Exynos 9810
|
||||||
|
cntfrq_el0 = 26000000;
|
||||||
|
} else if (board == "s5e8825") { // Exynos 1280
|
||||||
|
cntfrq_el0 = 26000000;
|
||||||
|
} else {
|
||||||
|
asm volatile("mrs %[cntfrq_el0], cntfrq_el0" : [cntfrq_el0] "=r"(cntfrq_el0));
|
||||||
|
}
|
||||||
|
return cntfrq_el0;
|
||||||
|
#else
|
||||||
|
asm volatile("mrs %[cntfrq_el0], cntfrq_el0" : [cntfrq_el0] "=r"(cntfrq_el0));
|
||||||
|
return cntfrq_el0;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
WallClock::WallClock(bool invariant_, u64 rdtsc_frequency_) noexcept {
|
||||||
|
const u64 host_cntfrq = std::max<u64>(GetHostCNTFRQ(), 1);
|
||||||
|
ns_cntfrq_factor = GetFixedPointFactor(NsRatio::den, host_cntfrq);
|
||||||
|
us_cntfrq_factor = GetFixedPointFactor(UsRatio::den, host_cntfrq);
|
||||||
|
ms_cntfrq_factor = GetFixedPointFactor(MsRatio::den, host_cntfrq);
|
||||||
|
cntfrq_ns_factor = GetFixedPointFactor(host_cntfrq, NsRatio::den);
|
||||||
|
guest_cntfrq_factor = GetFixedPointFactor(CNTFRQ, host_cntfrq);
|
||||||
|
gputick_cntfrq_factor = GetFixedPointFactor(GPUTickFreq, host_cntfrq);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::chrono::nanoseconds WallClock::GetTimeNS() const {
|
||||||
|
return std::chrono::nanoseconds{MultiplyHigh(GetUptime(), ns_cntfrq_factor)};
|
||||||
|
}
|
||||||
|
|
||||||
|
std::chrono::microseconds WallClock::GetTimeUS() const {
|
||||||
|
return std::chrono::microseconds{MultiplyHigh(GetUptime(), us_cntfrq_factor)};
|
||||||
|
}
|
||||||
|
|
||||||
|
std::chrono::milliseconds WallClock::GetTimeMS() const {
|
||||||
|
return std::chrono::milliseconds{MultiplyHigh(GetUptime(), ms_cntfrq_factor)};
|
||||||
|
}
|
||||||
|
|
||||||
|
s64 WallClock::GetCNTPCT() const {
|
||||||
|
return MultiplyHigh(GetUptime(), guest_cntfrq_factor);
|
||||||
|
}
|
||||||
|
|
||||||
|
s64 WallClock::GetGPUTick() const {
|
||||||
|
return MultiplyHigh(GetUptime(), gputick_cntfrq_factor);
|
||||||
|
}
|
||||||
|
|
||||||
|
s64 WallClock::GetUptime() const {
|
||||||
|
s64 cntvct_el0 = 0;
|
||||||
|
asm volatile(
|
||||||
|
"dsb ish\n\t"
|
||||||
|
"mrs %[cntvct_el0], cntvct_el0\n\t"
|
||||||
|
"dsb ish\n\t"
|
||||||
|
: [cntvct_el0] "=r"(cntvct_el0)
|
||||||
|
);
|
||||||
|
return cntvct_el0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool WallClock::IsNative() const {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 WallClock::NsToTicks(std::chrono::nanoseconds ns) const {
|
||||||
|
return MultiplyHigh(ns.count(), cntfrq_ns_factor);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
WallClock::WallClock(bool invariant_, u64 rdtsc_frequency_) noexcept {}
|
||||||
|
|
||||||
|
std::chrono::nanoseconds WallClock::GetTimeNS() const {
|
||||||
|
return std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::steady_clock::now().time_since_epoch());
|
||||||
|
}
|
||||||
|
|
||||||
|
std::chrono::microseconds WallClock::GetTimeUS() const {
|
||||||
|
return std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::steady_clock::now().time_since_epoch());
|
||||||
|
}
|
||||||
|
|
||||||
|
std::chrono::milliseconds WallClock::GetTimeMS() const {
|
||||||
|
return std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now().time_since_epoch());
|
||||||
|
}
|
||||||
|
|
||||||
|
s64 WallClock::GetCNTPCT() const {
|
||||||
|
return GetUptime() * NsToCNTPCTRatio::num / NsToCNTPCTRatio::den;
|
||||||
|
}
|
||||||
|
|
||||||
|
s64 WallClock::GetGPUTick() const {
|
||||||
|
return GetUptime() * NsToGPUTickRatio::num / NsToGPUTickRatio::den;
|
||||||
|
}
|
||||||
|
|
||||||
|
s64 WallClock::GetUptime() const {
|
||||||
|
return std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::steady_clock::now().time_since_epoch()).count();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool WallClock::IsNative() const {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 WallClock::NsToTicks(std::chrono::nanoseconds ns) const {
|
||||||
|
return ns.count();
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Wall clock MUST be initialized AFTER g_cpu_caps
|
||||||
|
// C++ only guarantees ctor init in the order they appear in TU
|
||||||
|
const WallClock g_wall_clock = [] {
|
||||||
|
#if defined(ARCHITECTURE_x86_64)
|
||||||
|
auto const& caps = Common::g_cpu_caps;
|
||||||
|
return WallClock(caps.invariant_tsc && caps.tsc_frequency >= std::nano::den, caps.tsc_frequency);
|
||||||
|
#elif defined(HAS_NCE)
|
||||||
|
return WallClock(false, 1);
|
||||||
|
#else
|
||||||
|
return WallClock(true, 1);
|
||||||
|
#endif
|
||||||
|
}();
|
||||||
} // namespace Common
|
} // namespace Common
|
||||||
|
|
@ -1,11 +1,14 @@
|
||||||
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
|
||||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <optional>
|
||||||
|
#include <string_view>
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <ratio>
|
#include <ratio>
|
||||||
|
|
@ -43,6 +46,9 @@ public:
|
||||||
/// @returns Whether the clock directly uses the host's hardware clock.
|
/// @returns Whether the clock directly uses the host's hardware clock.
|
||||||
bool IsNative() const;
|
bool IsNative() const;
|
||||||
|
|
||||||
|
// @returns Nanoseconds to native ticks
|
||||||
|
u64 NsToTicks(std::chrono::nanoseconds ns) const;
|
||||||
|
|
||||||
static inline u64 NSToCNTPCT(u64 ns) {
|
static inline u64 NSToCNTPCT(u64 ns) {
|
||||||
return ns * NsToCNTPCTRatio::num / NsToCNTPCTRatio::den;
|
return ns * NsToCNTPCTRatio::num / NsToCNTPCTRatio::den;
|
||||||
}
|
}
|
||||||
|
|
@ -69,7 +75,6 @@ public:
|
||||||
return cpu_tick * CPUTickToGPUTickRatio::num / CPUTickToGPUTickRatio::den;
|
return cpu_tick * CPUTickToGPUTickRatio::num / CPUTickToGPUTickRatio::den;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
|
||||||
using NsRatio = std::nano;
|
using NsRatio = std::nano;
|
||||||
using UsRatio = std::micro;
|
using UsRatio = std::micro;
|
||||||
using MsRatio = std::milli;
|
using MsRatio = std::milli;
|
||||||
|
|
@ -87,31 +92,96 @@ protected:
|
||||||
using CPUTickToGPUTickRatio = std::ratio<GPUTickFreq, CPUTickFreq>;
|
using CPUTickToGPUTickRatio = std::ratio<GPUTickFreq, CPUTickFreq>;
|
||||||
|
|
||||||
#if defined(ARCHITECTURE_x86_64)
|
#if defined(ARCHITECTURE_x86_64)
|
||||||
bool invariant;
|
|
||||||
u64 rdtsc_frequency;
|
u64 rdtsc_frequency;
|
||||||
u64 ns_rdtsc_factor;
|
u64 ns_rdtsc_factor;
|
||||||
u64 us_rdtsc_factor;
|
u64 us_rdtsc_factor;
|
||||||
u64 ms_rdtsc_factor;
|
u64 ms_rdtsc_factor;
|
||||||
|
u64 rdtsc_ns_factor;
|
||||||
u64 cntpct_rdtsc_factor;
|
u64 cntpct_rdtsc_factor;
|
||||||
u64 gputick_rdtsc_factor;
|
u64 gputick_rdtsc_factor;
|
||||||
|
bool invariant;
|
||||||
#elif defined(HAS_NCE)
|
#elif defined(HAS_NCE)
|
||||||
public:
|
|
||||||
using FactorType = unsigned __int128;
|
using FactorType = unsigned __int128;
|
||||||
|
[[nodiscard]] inline FactorType GetGuestCNTFRQFactor() const {
|
||||||
FactorType GetGuestCNTFRQFactor() const {
|
|
||||||
return guest_cntfrq_factor;
|
return guest_cntfrq_factor;
|
||||||
}
|
}
|
||||||
protected:
|
|
||||||
FactorType ns_cntfrq_factor;
|
FactorType ns_cntfrq_factor;
|
||||||
FactorType us_cntfrq_factor;
|
FactorType us_cntfrq_factor;
|
||||||
FactorType ms_cntfrq_factor;
|
FactorType ms_cntfrq_factor;
|
||||||
|
FactorType cntfrq_ns_factor;
|
||||||
FactorType guest_cntfrq_factor;
|
FactorType guest_cntfrq_factor;
|
||||||
FactorType gputick_cntfrq_factor;
|
FactorType gputick_cntfrq_factor;
|
||||||
#else
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
[[nodiscard]] WallClock CreateOptimalClock() noexcept;
|
#ifdef ARCHITECTURE_x86_64
|
||||||
|
/// x86/x64 CPU capabilities that may be detected by this module
|
||||||
|
struct CPUCaps {
|
||||||
|
enum class Manufacturer : u8 {
|
||||||
|
Unknown = 0,
|
||||||
|
Intel = 1,
|
||||||
|
AMD = 2,
|
||||||
|
Hygon = 3,
|
||||||
|
};
|
||||||
|
|
||||||
|
static Manufacturer ParseManufacturer(std::string_view brand_string);
|
||||||
|
|
||||||
|
Manufacturer manufacturer;
|
||||||
|
char brand_string[13];
|
||||||
|
|
||||||
|
char cpu_string[48];
|
||||||
|
|
||||||
|
u32 base_frequency;
|
||||||
|
u32 max_frequency;
|
||||||
|
u32 bus_frequency;
|
||||||
|
|
||||||
|
u32 tsc_crystal_ratio_denominator;
|
||||||
|
u32 tsc_crystal_ratio_numerator;
|
||||||
|
u32 crystal_frequency;
|
||||||
|
u64 tsc_frequency; // Derived from the above three values
|
||||||
|
|
||||||
|
bool sse3 : 1;
|
||||||
|
bool ssse3 : 1;
|
||||||
|
bool sse4_1 : 1;
|
||||||
|
bool sse4_2 : 1;
|
||||||
|
|
||||||
|
bool avx : 1;
|
||||||
|
bool avx2 : 1;
|
||||||
|
bool avx512f : 1;
|
||||||
|
bool avx512dq : 1;
|
||||||
|
bool avx512cd : 1;
|
||||||
|
bool avx512bw : 1;
|
||||||
|
bool avx512vl : 1;
|
||||||
|
bool avx512vbmi : 1;
|
||||||
|
bool avx512bitalg : 1;
|
||||||
|
|
||||||
|
bool aes : 1;
|
||||||
|
bool bmi1 : 1;
|
||||||
|
bool bmi2 : 1;
|
||||||
|
bool f16c : 1;
|
||||||
|
bool fma : 1;
|
||||||
|
bool gfni : 1;
|
||||||
|
bool invariant_tsc : 1;
|
||||||
|
bool lzcnt : 1;
|
||||||
|
bool monitorx : 1;
|
||||||
|
bool movbe : 1;
|
||||||
|
bool pclmulqdq : 1;
|
||||||
|
bool popcnt : 1;
|
||||||
|
bool sha : 1;
|
||||||
|
bool waitpkg : 1;
|
||||||
|
};
|
||||||
|
#else
|
||||||
|
struct CPUCaps {
|
||||||
|
bool padding;
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/// Detects CPU core count
|
||||||
|
std::optional<int> GetProcessorCount();
|
||||||
|
|
||||||
|
/// @brief Global cpu caps
|
||||||
|
extern const CPUCaps g_cpu_caps;
|
||||||
|
/// @brief Global wall clock
|
||||||
|
extern const WallClock g_wall_clock;
|
||||||
|
|
||||||
} // namespace Common
|
} // namespace Common
|
||||||
|
|
@ -4,6 +4,8 @@
|
||||||
// SPDX-FileCopyrightText: 2014 Citra Emulator Project
|
// SPDX-FileCopyrightText: 2014 Citra Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#include <chrono>
|
||||||
|
#include <limits>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <thread>
|
#include <thread>
|
||||||
|
|
||||||
|
|
@ -18,24 +20,35 @@
|
||||||
#elif defined(_WIN32)
|
#elif defined(_WIN32)
|
||||||
#include <windows.h>
|
#include <windows.h>
|
||||||
#include "common/string_util.h"
|
#include "common/string_util.h"
|
||||||
|
#include "common/windows/timer_resolution.h"
|
||||||
#else
|
#else
|
||||||
#if defined(__FreeBSD__)
|
#if defined(__FreeBSD__)
|
||||||
#include <sys/cpuset.h>
|
#include <sys/cpuset.h>
|
||||||
#include <sys/_cpuset.h>
|
#include <sys/_cpuset.h>
|
||||||
#include <pthread_np.h>
|
#include <pthread_np.h>
|
||||||
|
// Compatibility with CPUset
|
||||||
|
#define cpu_set_t cpuset_t
|
||||||
#elif defined(__DragonFly__) || defined(__OpenBSD__) || defined(__Bitrig__)
|
#elif defined(__DragonFly__) || defined(__OpenBSD__) || defined(__Bitrig__)
|
||||||
#include <pthread_np.h>
|
#include <pthread_np.h>
|
||||||
#endif
|
#endif
|
||||||
#include <pthread.h>
|
#include <pthread.h>
|
||||||
#include <sched.h>
|
#include <sched.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef _WIN32
|
#ifndef _WIN32
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __FreeBSD__
|
#include "common/cpu_features.h"
|
||||||
# define cpu_set_t cpuset_t
|
#ifdef ARCHITECTURE_x86_64
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#include <intrin.h>
|
||||||
|
#else
|
||||||
|
#include <x86intrin.h>
|
||||||
#endif
|
#endif
|
||||||
|
#include "common/x64/rdtsc.h"
|
||||||
|
#endif
|
||||||
|
#include "core/core_timing.h"
|
||||||
|
|
||||||
namespace Common {
|
namespace Common {
|
||||||
|
|
||||||
|
|
@ -144,4 +157,93 @@ void PinCurrentThreadToPerformanceCore(size_t core_id) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef ARCHITECTURE_x86_64
|
||||||
|
// On Linux and UNIX systems, a futex would nominally be used to cover the costs
|
||||||
|
// the idea is that it's intuitivelly cheaper to use a direct instruction as opposed to a full futex call
|
||||||
|
// the underlying libc++ implementation uses pthread_cond_timedwait which MAY invoke a futex
|
||||||
|
// Let's pretend the OS is too expensive to jump into, and avoid ANY context switches
|
||||||
|
// this should *IN THEORY* lower CPU usage while just waiting for stuff effectively
|
||||||
|
// For windows the minimal quanta resolution is about 500us, and normal CRT cond var is 1.5ms(?)
|
||||||
|
// so may as well avoid that too
|
||||||
|
// Let's just give ALL platforms the same mechanisms (almost) for when they have umonitor OR waitpkg
|
||||||
|
#ifdef __clang__
|
||||||
|
__attribute__((target("waitpkg,mwaitx")))
|
||||||
|
#elif defined(__GNUC__)
|
||||||
|
#pragma GCC target("waitpkg")
|
||||||
|
#pragma GCC target("mwaitx")
|
||||||
|
#endif
|
||||||
|
bool Event::WaitFor(const std::chrono::nanoseconds time) {
|
||||||
|
#ifdef _WIN32
|
||||||
|
auto const start = Common::X64::FencedRDTSC();
|
||||||
|
auto const& caps = Common::g_cpu_caps;
|
||||||
|
[[maybe_unused]] auto const end = start + Common::g_wall_clock.NsToTicks(time);
|
||||||
|
if (caps.monitorx) {
|
||||||
|
while (true) {
|
||||||
|
// Armed monitor, as per manual, MWAITX must be conditional if the condition isn't satisfied
|
||||||
|
// to prevent a race condition.
|
||||||
|
_mm_monitorx(reinterpret_cast<u64*>(std::addressof(is_set)), 0, 0);
|
||||||
|
if (!is_set.load()) {
|
||||||
|
// RDTSC may be fenced here due to atomic load
|
||||||
|
auto const now = _rdtsc();
|
||||||
|
if (end > now) {
|
||||||
|
u32 const cycles = std::min<u32>((std::numeric_limits<u32>::max)(), s64(end) - s64(now));
|
||||||
|
// See here: https://github.com/torvalds/linux/blob/948a64995aca6820abefd17f1a4258f5835c5ad9/arch/x86/lib/delay.c#L93
|
||||||
|
// MWAITX accepts a 32-bit input timer which determines the total number of cycles to wait for
|
||||||
|
// NOT THE TOTAL ABSOLUTE TSC VALUE, it's just a delta
|
||||||
|
// BIT[1] = use a timer
|
||||||
|
// Hint = 0: Use C1 state when sleepy (means slower wakeup but better savings)
|
||||||
|
_mm_mwaitx(1 << 1, 0u, cycles);
|
||||||
|
if (!is_set.load())
|
||||||
|
return false;
|
||||||
|
} else
|
||||||
|
return false; //timeout
|
||||||
|
}
|
||||||
|
bool expected = true;
|
||||||
|
if (is_set.compare_exchange_weak(expected, false, std::memory_order_release))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
} else if (caps.waitpkg) {
|
||||||
|
// #UD If CPUID.7.0:ECX.WAITPKG[bit 5]=0.
|
||||||
|
while (true) {
|
||||||
|
_umonitor(std::addressof(is_set));
|
||||||
|
if (!is_set.load() && !_umwait(0, end)) //umwait is absolute time!!!
|
||||||
|
return false;
|
||||||
|
bool expected = true;
|
||||||
|
if (is_set.compare_exchange_weak(expected, false, std::memory_order_release))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
while (!is_set.load() && end > _rdtsc())
|
||||||
|
Common::Windows::SleepForOneTick();
|
||||||
|
if (is_set.load())
|
||||||
|
Reset();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
std::unique_lock lk{mutex};
|
||||||
|
if (!condvar.wait_for(lk, time, [this] { return is_set.load(); }))
|
||||||
|
return false;
|
||||||
|
is_set = false;
|
||||||
|
return true;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
bool Event::WaitFor(const std::chrono::nanoseconds time) {
|
||||||
|
#ifdef _WIN32
|
||||||
|
auto const end = Common::g_wall_clock.GetTimeNS() + time;
|
||||||
|
while (!is_set.load() && end > Common::g_wall_clock.GetTimeNS())
|
||||||
|
Common::Windows::SleepForOneTick();
|
||||||
|
if (is_set.load())
|
||||||
|
Reset();
|
||||||
|
return true;
|
||||||
|
#else
|
||||||
|
std::unique_lock lk{mutex};
|
||||||
|
if (!condvar.wait_for(lk, time, [this] { return is_set.load(); }))
|
||||||
|
return false;
|
||||||
|
is_set = false;
|
||||||
|
return true;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
} // namespace Common
|
} // namespace Common
|
||||||
|
|
|
||||||
|
|
@ -34,16 +34,10 @@ public:
|
||||||
is_set = false;
|
is_set = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool WaitFor(const std::chrono::nanoseconds& time) {
|
bool WaitFor(const std::chrono::nanoseconds time);
|
||||||
std::unique_lock lk{mutex};
|
|
||||||
if (!condvar.wait_for(lk, time, [this] { return is_set.load(); }))
|
|
||||||
return false;
|
|
||||||
is_set = false;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <class Clock, class Duration>
|
template<class Clock, class Duration>
|
||||||
bool WaitUntil(const std::chrono::time_point<Clock, Duration>& time) {
|
bool WaitUntil(const std::chrono::time_point<Clock, Duration> time) {
|
||||||
std::unique_lock lk{mutex};
|
std::unique_lock lk{mutex};
|
||||||
if (!condvar.wait_until(lk, time, [this] { return is_set.load(); }))
|
if (!condvar.wait_until(lk, time, [this] { return is_set.load(); }))
|
||||||
return false;
|
return false;
|
||||||
|
|
@ -63,9 +57,9 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
alignas(64) std::atomic<bool> is_set{false};
|
||||||
std::condition_variable condvar;
|
std::condition_variable condvar;
|
||||||
std::mutex mutex;
|
std::mutex mutex;
|
||||||
std::atomic_bool is_set{false};
|
|
||||||
};
|
};
|
||||||
|
|
||||||
class Barrier {
|
class Barrier {
|
||||||
|
|
|
||||||
|
|
@ -1,197 +0,0 @@
|
||||||
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
|
||||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
|
||||||
|
|
||||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
||||||
|
|
||||||
#include "common/steady_clock.h"
|
|
||||||
#include "common/uint128.h"
|
|
||||||
#include "common/wall_clock.h"
|
|
||||||
|
|
||||||
#ifdef __ANDROID__
|
|
||||||
#include <sys/system_properties.h>
|
|
||||||
#endif
|
|
||||||
#ifdef ARCHITECTURE_x86_64
|
|
||||||
#include "common/x64/cpu_detect.h"
|
|
||||||
#include "common/x64/rdtsc.h"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
namespace Common {
|
|
||||||
|
|
||||||
#if defined(ARCHITECTURE_x86_64)
|
|
||||||
WallClock::WallClock(bool invariant_, u64 rdtsc_frequency_) noexcept
|
|
||||||
: invariant{invariant_}
|
|
||||||
, rdtsc_frequency{rdtsc_frequency_}
|
|
||||||
, ns_rdtsc_factor{invariant_ ? 0 : GetFixedPoint64Factor(NsRatio::den, rdtsc_frequency_)}
|
|
||||||
, us_rdtsc_factor{invariant_ ? 0 : GetFixedPoint64Factor(UsRatio::den, rdtsc_frequency_)}
|
|
||||||
, ms_rdtsc_factor{invariant_ ? 0 : GetFixedPoint64Factor(MsRatio::den, rdtsc_frequency_)}
|
|
||||||
, cntpct_rdtsc_factor{invariant_ ? 0 : GetFixedPoint64Factor(CNTFRQ, rdtsc_frequency_)}
|
|
||||||
, gputick_rdtsc_factor{invariant_ ? 0 : GetFixedPoint64Factor(GPUTickFreq, rdtsc_frequency_)}
|
|
||||||
{}
|
|
||||||
|
|
||||||
std::chrono::nanoseconds WallClock::GetTimeNS() const {
|
|
||||||
if (invariant)
|
|
||||||
return std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::system_clock::now().time_since_epoch());
|
|
||||||
return std::chrono::nanoseconds{MultiplyHigh(GetUptime(), ns_rdtsc_factor)};
|
|
||||||
}
|
|
||||||
|
|
||||||
std::chrono::microseconds WallClock::GetTimeUS() const {
|
|
||||||
if (invariant)
|
|
||||||
return std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::system_clock::now().time_since_epoch());
|
|
||||||
return std::chrono::microseconds{MultiplyHigh(GetUptime(), us_rdtsc_factor)};
|
|
||||||
}
|
|
||||||
|
|
||||||
std::chrono::milliseconds WallClock::GetTimeMS() const {
|
|
||||||
if (invariant)
|
|
||||||
return std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now().time_since_epoch());
|
|
||||||
return std::chrono::milliseconds{MultiplyHigh(GetUptime(), ms_rdtsc_factor)};
|
|
||||||
}
|
|
||||||
|
|
||||||
s64 WallClock::GetCNTPCT() const {
|
|
||||||
if (invariant)
|
|
||||||
return GetUptime() * NsToCNTPCTRatio::num / NsToCNTPCTRatio::den;
|
|
||||||
return MultiplyHigh(GetUptime(), cntpct_rdtsc_factor);
|
|
||||||
}
|
|
||||||
|
|
||||||
s64 WallClock::GetGPUTick() const {
|
|
||||||
if (invariant)
|
|
||||||
return GetUptime() * NsToGPUTickRatio::num / NsToGPUTickRatio::den;
|
|
||||||
return MultiplyHigh(GetUptime(), gputick_rdtsc_factor);
|
|
||||||
}
|
|
||||||
|
|
||||||
s64 WallClock::GetUptime() const {
|
|
||||||
if (invariant)
|
|
||||||
return std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::steady_clock::now().time_since_epoch()).count();
|
|
||||||
return s64(Common::X64::FencedRDTSC());
|
|
||||||
}
|
|
||||||
|
|
||||||
bool WallClock::IsNative() const {
|
|
||||||
if (invariant)
|
|
||||||
return false;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
#elif defined(HAS_NCE)
|
|
||||||
namespace {
|
|
||||||
|
|
||||||
[[nodiscard]] WallClock::FactorType GetFixedPointFactor(u64 num, u64 den) noexcept {
|
|
||||||
return (WallClock::FactorType(num) << 64) / den;
|
|
||||||
}
|
|
||||||
|
|
||||||
[[nodiscard]] u64 MultiplyHigh(u64 m, WallClock::FactorType factor) noexcept {
|
|
||||||
return static_cast<u64>((m * factor) >> 64);
|
|
||||||
}
|
|
||||||
|
|
||||||
[[nodiscard]] s64 GetHostCNTFRQ() noexcept {
|
|
||||||
u64 cntfrq_el0 = 0;
|
|
||||||
#ifdef ANDROID
|
|
||||||
std::string_view board{""};
|
|
||||||
char buffer[PROP_VALUE_MAX];
|
|
||||||
int len{__system_property_get("ro.product.board", buffer)};
|
|
||||||
board = std::string_view(buffer, static_cast<size_t>(len));
|
|
||||||
if (board == "s5e9925") { // Exynos 2200
|
|
||||||
cntfrq_el0 = 25600000;
|
|
||||||
} else if (board == "exynos2100") { // Exynos 2100
|
|
||||||
cntfrq_el0 = 26000000;
|
|
||||||
} else if (board == "exynos9810") { // Exynos 9810
|
|
||||||
cntfrq_el0 = 26000000;
|
|
||||||
} else if (board == "s5e8825") { // Exynos 1280
|
|
||||||
cntfrq_el0 = 26000000;
|
|
||||||
} else {
|
|
||||||
asm volatile("mrs %[cntfrq_el0], cntfrq_el0" : [cntfrq_el0] "=r"(cntfrq_el0));
|
|
||||||
}
|
|
||||||
return cntfrq_el0;
|
|
||||||
#else
|
|
||||||
asm volatile("mrs %[cntfrq_el0], cntfrq_el0" : [cntfrq_el0] "=r"(cntfrq_el0));
|
|
||||||
return cntfrq_el0;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace
|
|
||||||
|
|
||||||
WallClock::WallClock(bool invariant_, u64 rdtsc_frequency_) noexcept {
|
|
||||||
const u64 host_cntfrq = std::max<u64>(GetHostCNTFRQ(), 1);
|
|
||||||
ns_cntfrq_factor = GetFixedPointFactor(NsRatio::den, host_cntfrq);
|
|
||||||
us_cntfrq_factor = GetFixedPointFactor(UsRatio::den, host_cntfrq);
|
|
||||||
ms_cntfrq_factor = GetFixedPointFactor(MsRatio::den, host_cntfrq);
|
|
||||||
guest_cntfrq_factor = GetFixedPointFactor(CNTFRQ, host_cntfrq);
|
|
||||||
gputick_cntfrq_factor = GetFixedPointFactor(GPUTickFreq, host_cntfrq);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::chrono::nanoseconds WallClock::GetTimeNS() const {
|
|
||||||
return std::chrono::nanoseconds{MultiplyHigh(GetUptime(), ns_cntfrq_factor)};
|
|
||||||
}
|
|
||||||
|
|
||||||
std::chrono::microseconds WallClock::GetTimeUS() const {
|
|
||||||
return std::chrono::microseconds{MultiplyHigh(GetUptime(), us_cntfrq_factor)};
|
|
||||||
}
|
|
||||||
|
|
||||||
std::chrono::milliseconds WallClock::GetTimeMS() const {
|
|
||||||
return std::chrono::milliseconds{MultiplyHigh(GetUptime(), ms_cntfrq_factor)};
|
|
||||||
}
|
|
||||||
|
|
||||||
s64 WallClock::GetCNTPCT() const {
|
|
||||||
return MultiplyHigh(GetUptime(), guest_cntfrq_factor);
|
|
||||||
}
|
|
||||||
|
|
||||||
s64 WallClock::GetGPUTick() const {
|
|
||||||
return MultiplyHigh(GetUptime(), gputick_cntfrq_factor);
|
|
||||||
}
|
|
||||||
|
|
||||||
s64 WallClock::GetUptime() const {
|
|
||||||
s64 cntvct_el0 = 0;
|
|
||||||
asm volatile(
|
|
||||||
"dsb ish\n\t"
|
|
||||||
"mrs %[cntvct_el0], cntvct_el0\n\t"
|
|
||||||
"dsb ish\n\t"
|
|
||||||
: [cntvct_el0] "=r"(cntvct_el0)
|
|
||||||
);
|
|
||||||
return cntvct_el0;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool WallClock::IsNative() const {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
WallClock::WallClock(bool invariant_, u64 rdtsc_frequency_) noexcept {}
|
|
||||||
|
|
||||||
std::chrono::nanoseconds WallClock::GetTimeNS() const {
|
|
||||||
return std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::system_clock::now().time_since_epoch());
|
|
||||||
}
|
|
||||||
|
|
||||||
std::chrono::microseconds WallClock::GetTimeUS() const {
|
|
||||||
return std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::system_clock::now().time_since_epoch());
|
|
||||||
}
|
|
||||||
|
|
||||||
std::chrono::milliseconds WallClock::GetTimeMS() const {
|
|
||||||
return std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now().time_since_epoch());
|
|
||||||
}
|
|
||||||
|
|
||||||
s64 WallClock::GetCNTPCT() const {
|
|
||||||
return GetUptime() * NsToCNTPCTRatio::num / NsToCNTPCTRatio::den;
|
|
||||||
}
|
|
||||||
|
|
||||||
s64 WallClock::GetGPUTick() const {
|
|
||||||
return GetUptime() * NsToGPUTickRatio::num / NsToGPUTickRatio::den;
|
|
||||||
}
|
|
||||||
|
|
||||||
s64 WallClock::GetUptime() const {
|
|
||||||
return std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::steady_clock::now().time_since_epoch()).count();
|
|
||||||
}
|
|
||||||
|
|
||||||
bool WallClock::IsNative() const {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
WallClock CreateOptimalClock() noexcept {
|
|
||||||
#if defined(ARCHITECTURE_x86_64)
|
|
||||||
auto const& caps = GetCPUCaps();
|
|
||||||
return WallClock(!(caps.invariant_tsc && caps.tsc_frequency >= std::nano::den), caps.tsc_frequency);
|
|
||||||
#elif defined(HAS_NCE)
|
|
||||||
return WallClock(false, 1);
|
|
||||||
#else
|
|
||||||
return WallClock(true, 1);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace Common
|
|
||||||
|
|
@ -1,82 +0,0 @@
|
||||||
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
|
||||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
|
||||||
|
|
||||||
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
|
|
||||||
// SPDX-FileCopyrightText: Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project
|
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include <optional>
|
|
||||||
#include <string_view>
|
|
||||||
#include "common/common_types.h"
|
|
||||||
|
|
||||||
namespace Common {
|
|
||||||
|
|
||||||
/// x86/x64 CPU capabilities that may be detected by this module
|
|
||||||
struct CPUCaps {
|
|
||||||
|
|
||||||
enum class Manufacturer : u8 {
|
|
||||||
Unknown = 0,
|
|
||||||
Intel = 1,
|
|
||||||
AMD = 2,
|
|
||||||
Hygon = 3,
|
|
||||||
};
|
|
||||||
|
|
||||||
static Manufacturer ParseManufacturer(std::string_view brand_string);
|
|
||||||
|
|
||||||
Manufacturer manufacturer;
|
|
||||||
char brand_string[13];
|
|
||||||
|
|
||||||
char cpu_string[48];
|
|
||||||
|
|
||||||
u32 base_frequency;
|
|
||||||
u32 max_frequency;
|
|
||||||
u32 bus_frequency;
|
|
||||||
|
|
||||||
u32 tsc_crystal_ratio_denominator;
|
|
||||||
u32 tsc_crystal_ratio_numerator;
|
|
||||||
u32 crystal_frequency;
|
|
||||||
u64 tsc_frequency; // Derived from the above three values
|
|
||||||
|
|
||||||
bool sse3 : 1;
|
|
||||||
bool ssse3 : 1;
|
|
||||||
bool sse4_1 : 1;
|
|
||||||
bool sse4_2 : 1;
|
|
||||||
|
|
||||||
bool avx : 1;
|
|
||||||
bool avx2 : 1;
|
|
||||||
bool avx512f : 1;
|
|
||||||
bool avx512dq : 1;
|
|
||||||
bool avx512cd : 1;
|
|
||||||
bool avx512bw : 1;
|
|
||||||
bool avx512vl : 1;
|
|
||||||
bool avx512vbmi : 1;
|
|
||||||
bool avx512bitalg : 1;
|
|
||||||
|
|
||||||
bool aes : 1;
|
|
||||||
bool bmi1 : 1;
|
|
||||||
bool bmi2 : 1;
|
|
||||||
bool f16c : 1;
|
|
||||||
bool fma : 1;
|
|
||||||
bool gfni : 1;
|
|
||||||
bool invariant_tsc : 1;
|
|
||||||
bool lzcnt : 1;
|
|
||||||
bool monitorx : 1;
|
|
||||||
bool movbe : 1;
|
|
||||||
bool pclmulqdq : 1;
|
|
||||||
bool popcnt : 1;
|
|
||||||
bool sha : 1;
|
|
||||||
bool waitpkg : 1;
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the supported capabilities of the host CPU
|
|
||||||
* @return Reference to a CPUCaps struct with the detected host CPU capabilities
|
|
||||||
*/
|
|
||||||
const CPUCaps& GetCPUCaps();
|
|
||||||
|
|
||||||
/// Detects CPU core count
|
|
||||||
std::optional<int> GetProcessorCount();
|
|
||||||
|
|
||||||
} // namespace Common
|
|
||||||
|
|
@ -1,75 +0,0 @@
|
||||||
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
|
||||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
|
||||||
|
|
||||||
#include <thread>
|
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
|
||||||
#include <intrin.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include "common/x64/cpu_detect.h"
|
|
||||||
#include "common/x64/cpu_wait.h"
|
|
||||||
#include "common/x64/rdtsc.h"
|
|
||||||
|
|
||||||
namespace Common::X64 {
|
|
||||||
|
|
||||||
namespace {
|
|
||||||
|
|
||||||
// 100,000 cycles is a reasonable amount of time to wait to save on CPU resources.
|
|
||||||
// For reference:
|
|
||||||
// At 1 GHz, 100K cycles is 100us
|
|
||||||
// At 2 GHz, 100K cycles is 50us
|
|
||||||
// At 4 GHz, 100K cycles is 25us
|
|
||||||
constexpr auto PauseCycles = 100'000U;
|
|
||||||
|
|
||||||
} // Anonymous namespace
|
|
||||||
|
|
||||||
#if defined(_MSC_VER) && !defined(__clang__)
|
|
||||||
__forceinline static void TPAUSE() {
|
|
||||||
static constexpr auto RequestC02State = 0U;
|
|
||||||
_tpause(RequestC02State, FencedRDTSC() + PauseCycles);
|
|
||||||
}
|
|
||||||
|
|
||||||
__forceinline static void MWAITX() {
|
|
||||||
static constexpr auto EnableWaitTimeFlag = 1U << 1;
|
|
||||||
static constexpr auto RequestC1State = 0U;
|
|
||||||
|
|
||||||
// monitor_var should be aligned to a cache line.
|
|
||||||
alignas(64) u64 monitor_var{};
|
|
||||||
_mm_monitorx(&monitor_var, 0, 0);
|
|
||||||
_mm_mwaitx(EnableWaitTimeFlag, RequestC1State, PauseCycles);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
static void TPAUSE() {
|
|
||||||
static constexpr auto RequestC02State = 0U;
|
|
||||||
const auto tsc = FencedRDTSC() + PauseCycles;
|
|
||||||
const auto eax = static_cast<u32>(tsc & 0xFFFFFFFF);
|
|
||||||
const auto edx = static_cast<u32>(tsc >> 32);
|
|
||||||
asm volatile("tpause %0" : : "r"(RequestC02State), "d"(edx), "a"(eax));
|
|
||||||
}
|
|
||||||
|
|
||||||
static void MWAITX() {
|
|
||||||
static constexpr auto EnableWaitTimeFlag = 1U << 1;
|
|
||||||
static constexpr auto RequestC1State = 0U;
|
|
||||||
|
|
||||||
// monitor_var should be aligned to a cache line.
|
|
||||||
alignas(64) u64 monitor_var{};
|
|
||||||
asm volatile("monitorx" : : "a"(&monitor_var), "c"(0), "d"(0));
|
|
||||||
asm volatile("mwaitx" : : "a"(RequestC1State), "b"(PauseCycles), "c"(EnableWaitTimeFlag));
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
void MicroSleep() {
|
|
||||||
static const bool has_waitpkg = GetCPUCaps().waitpkg;
|
|
||||||
static const bool has_monitorx = GetCPUCaps().monitorx;
|
|
||||||
|
|
||||||
if (has_waitpkg) {
|
|
||||||
TPAUSE();
|
|
||||||
} else if (has_monitorx) {
|
|
||||||
MWAITX();
|
|
||||||
} else {
|
|
||||||
std::this_thread::yield();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace Common::X64
|
|
||||||
|
|
@ -1,10 +0,0 @@
|
||||||
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
|
||||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
namespace Common::X64 {
|
|
||||||
|
|
||||||
void MicroSleep();
|
|
||||||
|
|
||||||
} // namespace Common::X64
|
|
||||||
|
|
@ -3,7 +3,7 @@
|
||||||
|
|
||||||
#include <numeric>
|
#include <numeric>
|
||||||
#include <bit>
|
#include <bit>
|
||||||
#include "common/wall_clock.h"
|
#include "common/cpu_features.h"
|
||||||
#include "common/alignment.h"
|
#include "common/alignment.h"
|
||||||
#include "common/literals.h"
|
#include "common/literals.h"
|
||||||
#include "core/arm/nce/arm_nce.h"
|
#include "core/arm/nce/arm_nce.h"
|
||||||
|
|
|
||||||
|
|
@ -8,15 +8,13 @@
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <tuple>
|
#include <tuple>
|
||||||
|
#include "common/cpu_features.h"
|
||||||
|
#include "common/cpu_features.h"
|
||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
#include "common/windows/timer_resolution.h"
|
#include "common/windows/timer_resolution.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef ARCHITECTURE_x86_64
|
|
||||||
#include "common/x64/cpu_wait.h"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include "common/settings.h"
|
#include "common/settings.h"
|
||||||
#include "core/core_timing.h"
|
#include "core/core_timing.h"
|
||||||
#include "core/hardware_properties.h"
|
#include "core/hardware_properties.h"
|
||||||
|
|
@ -47,8 +45,7 @@ struct CoreTiming::Event {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
CoreTiming::CoreTiming() : clock{Common::CreateOptimalClock()} {}
|
CoreTiming::CoreTiming() = default;
|
||||||
|
|
||||||
CoreTiming::~CoreTiming() {
|
CoreTiming::~CoreTiming() {
|
||||||
Reset();
|
Reset();
|
||||||
}
|
}
|
||||||
|
|
@ -64,31 +61,16 @@ void CoreTiming::Initialize(std::function<void()>&& on_thread_init_) {
|
||||||
Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
|
Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
|
||||||
on_thread_init();
|
on_thread_init();
|
||||||
has_started = true;
|
has_started = true;
|
||||||
|
|
||||||
|
// base frequency in MHz: 1ns (10^-9) = 1GHz (10^9)
|
||||||
while (!stop_token.stop_requested()) {
|
while (!stop_token.stop_requested()) {
|
||||||
while (!paused && !stop_token.stop_requested()) {
|
while (!paused && !stop_token.stop_requested()) {
|
||||||
paused_set = false;
|
paused_set = false;
|
||||||
if (auto const next_time = Advance(); next_time) {
|
if (auto const next_time = Advance(); next_time) {
|
||||||
// There are more events left in the queue, wait until the next event.
|
// There are more events left in the queue, wait until the next event.
|
||||||
auto wait_time = *next_time - GetGlobalTimeNs().count();
|
auto const wait_time = *next_time - GetGlobalTimeNs().count();
|
||||||
if (wait_time > 0) {
|
if (wait_time > 0) {
|
||||||
#ifdef _WIN32
|
|
||||||
while (!paused && !event.IsSet() && wait_time > 0) {
|
|
||||||
wait_time = *next_time - GetGlobalTimeNs().count();
|
|
||||||
if (wait_time >= timer_resolution_ns) {
|
|
||||||
Common::Windows::SleepForOneTick();
|
|
||||||
} else {
|
|
||||||
#ifdef ARCHITECTURE_x86_64
|
|
||||||
Common::X64::MicroSleep();
|
|
||||||
#else
|
|
||||||
std::this_thread::yield();
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (event.IsSet())
|
|
||||||
event.Reset();
|
|
||||||
#else
|
|
||||||
event.WaitFor(std::chrono::nanoseconds(wait_time));
|
event.WaitFor(std::chrono::nanoseconds(wait_time));
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Queue is empty, wait until another event is scheduled and signals us to
|
// Queue is empty, wait until another event is scheduled and signals us to
|
||||||
|
|
@ -226,7 +208,7 @@ void CoreTiming::ResetTicks() {
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 CoreTiming::GetClockTicks() const {
|
u64 CoreTiming::GetClockTicks() const {
|
||||||
u64 fres = is_multicore ? clock.GetCNTPCT() : Common::WallClock::CPUTickToCNTPCT(cpu_ticks);
|
u64 fres = is_multicore ? Common::g_wall_clock.GetCNTPCT() : Common::WallClock::CPUTickToCNTPCT(cpu_ticks);
|
||||||
if (auto const overclock = Settings::values.fast_cpu_time.GetValue(); overclock != Settings::CpuClock::Off) {
|
if (auto const overclock = Settings::values.fast_cpu_time.GetValue(); overclock != Settings::CpuClock::Off) {
|
||||||
fres = u64(f64(fres) * (1.7 + 0.3 * u32(overclock)));
|
fres = u64(f64(fres) * (1.7 + 0.3 * u32(overclock)));
|
||||||
}
|
}
|
||||||
|
|
@ -240,7 +222,7 @@ u64 CoreTiming::GetClockTicks() const {
|
||||||
|
|
||||||
u64 CoreTiming::GetGPUTicks() const {
|
u64 CoreTiming::GetGPUTicks() const {
|
||||||
return is_multicore
|
return is_multicore
|
||||||
? clock.GetGPUTick()
|
? Common::g_wall_clock.GetGPUTick()
|
||||||
: Common::WallClock::CPUTickToGPUTick(cpu_ticks);
|
: Common::WallClock::CPUTickToGPUTick(cpu_ticks);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -317,14 +299,14 @@ void CoreTiming::Reset() {
|
||||||
/// @brief Returns current time in nanoseconds.
|
/// @brief Returns current time in nanoseconds.
|
||||||
std::chrono::nanoseconds CoreTiming::GetGlobalTimeNs() const noexcept {
|
std::chrono::nanoseconds CoreTiming::GetGlobalTimeNs() const noexcept {
|
||||||
return is_multicore
|
return is_multicore
|
||||||
? clock.GetTimeNS()
|
? Common::g_wall_clock.GetTimeNS()
|
||||||
: std::chrono::nanoseconds{Common::WallClock::CPUTickToNS(cpu_ticks)};
|
: std::chrono::nanoseconds{Common::WallClock::CPUTickToNS(cpu_ticks)};
|
||||||
}
|
}
|
||||||
|
|
||||||
/// @brief Returns current time in microseconds.
|
/// @brief Returns current time in microseconds.
|
||||||
std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const noexcept {
|
std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const noexcept {
|
||||||
return is_multicore
|
return is_multicore
|
||||||
? clock.GetTimeUS()
|
? Common::g_wall_clock.GetTimeUS()
|
||||||
: std::chrono::microseconds{Common::WallClock::CPUTickToUS(cpu_ticks)};
|
: std::chrono::microseconds{Common::WallClock::CPUTickToUS(cpu_ticks)};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -19,7 +19,7 @@
|
||||||
|
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "common/thread.h"
|
#include "common/thread.h"
|
||||||
#include "common/wall_clock.h"
|
#include "common/cpu_features.h"
|
||||||
|
|
||||||
namespace Core::Timing {
|
namespace Core::Timing {
|
||||||
|
|
||||||
|
|
@ -142,37 +142,28 @@ public:
|
||||||
|
|
||||||
void Reset();
|
void Reset();
|
||||||
|
|
||||||
Common::WallClock clock;
|
using heap_t = boost::heap::fibonacci_heap<CoreTiming::Event, boost::heap::compare<std::greater<>>>;
|
||||||
|
heap_t event_queue;
|
||||||
s64 global_timer = 0;
|
s64 global_timer = 0;
|
||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
s64 timer_resolution_ns;
|
s64 timer_resolution_ns;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
using heap_t =
|
|
||||||
boost::heap::fibonacci_heap<CoreTiming::Event, boost::heap::compare<std::greater<>>>;
|
|
||||||
|
|
||||||
heap_t event_queue;
|
|
||||||
u64 event_fifo_id = 0;
|
u64 event_fifo_id = 0;
|
||||||
|
s64 pause_end_time{};
|
||||||
|
/// Cycle timing
|
||||||
|
u64 cpu_ticks{};
|
||||||
|
s64 downcount{};
|
||||||
Common::Event event{};
|
Common::Event event{};
|
||||||
Common::Event pause_event{};
|
Common::Event pause_event{};
|
||||||
|
std::function<void()> on_thread_init{};
|
||||||
|
std::jthread timer_thread;
|
||||||
mutable std::mutex basic_lock;
|
mutable std::mutex basic_lock;
|
||||||
std::mutex advance_lock;
|
std::mutex advance_lock;
|
||||||
std::jthread timer_thread;
|
|
||||||
std::atomic<bool> paused{};
|
std::atomic<bool> paused{};
|
||||||
std::atomic<bool> paused_set{};
|
std::atomic<bool> paused_set{};
|
||||||
std::atomic<bool> wait_set{};
|
std::atomic<bool> wait_set{};
|
||||||
std::atomic<bool> has_started{};
|
std::atomic<bool> has_started{};
|
||||||
std::function<void()> on_thread_init{};
|
|
||||||
|
|
||||||
bool is_multicore{};
|
bool is_multicore{};
|
||||||
s64 pause_end_time{};
|
|
||||||
|
|
||||||
/// Cycle timing
|
|
||||||
u64 cpu_ticks{};
|
|
||||||
s64 downcount{};
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Creates a core timing event with the given name and callback.
|
/// Creates a core timing event with the given name and callback.
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,7 @@
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/logging.h"
|
#include "common/logging.h"
|
||||||
#include "common/settings.h"
|
#include "common/settings.h"
|
||||||
|
#include "common/cpu_features.h"
|
||||||
#include "core/hle/kernel/k_event.h"
|
#include "core/hle/kernel/k_event.h"
|
||||||
#include "core/hle/kernel/k_readable_event.h"
|
#include "core/hle/kernel/k_readable_event.h"
|
||||||
#include "core/hle/kernel/kernel.h"
|
#include "core/hle/kernel/kernel.h"
|
||||||
|
|
@ -28,7 +29,6 @@ BufferQueueProducer::BufferQueueProducer(Service::KernelHelpers::ServiceContext&
|
||||||
Service::Nvidia::NvCore::NvMap& nvmap_)
|
Service::Nvidia::NvCore::NvMap& nvmap_)
|
||||||
: service_context{service_context_}, core{std::move(buffer_queue_core_)}
|
: service_context{service_context_}, core{std::move(buffer_queue_core_)}
|
||||||
, slots(core->slots)
|
, slots(core->slots)
|
||||||
, clock{Common::CreateOptimalClock()}
|
|
||||||
, nvmap(nvmap_)
|
, nvmap(nvmap_)
|
||||||
{
|
{
|
||||||
buffer_wait_event = service_context.CreateEvent("BufferQueue:WaitEvent");
|
buffer_wait_event = service_context.CreateEvent("BufferQueue:WaitEvent");
|
||||||
|
|
@ -488,7 +488,7 @@ Status BufferQueueProducer::QueueBuffer(s32 slot, const QueueBufferInput& input,
|
||||||
slots[slot].buffer_state = BufferState::Queued;
|
slots[slot].buffer_state = BufferState::Queued;
|
||||||
slots[slot].frame_number = core->frame_counter;
|
slots[slot].frame_number = core->frame_counter;
|
||||||
slots[slot].queue_time = timestamp;
|
slots[slot].queue_time = timestamp;
|
||||||
slots[slot].presentation_time = clock.GetTimeNS().count();
|
slots[slot].presentation_time = Common::g_wall_clock.GetTimeNS().count();
|
||||||
slots[slot].fence = fence;
|
slots[slot].fence = fence;
|
||||||
|
|
||||||
item.slot = slot;
|
item.slot = slot;
|
||||||
|
|
|
||||||
|
|
@ -14,7 +14,7 @@
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
|
|
||||||
#include "common/common_funcs.h"
|
#include "common/common_funcs.h"
|
||||||
#include "common/wall_clock.h"
|
#include "common/cpu_features.h"
|
||||||
#include "core/hle/service/nvdrv/nvdata.h"
|
#include "core/hle/service/nvdrv/nvdata.h"
|
||||||
#include "core/hle/service/nvnflinger/binder.h"
|
#include "core/hle/service/nvnflinger/binder.h"
|
||||||
#include "core/hle/service/nvnflinger/buffer_queue_defs.h"
|
#include "core/hle/service/nvnflinger/buffer_queue_defs.h"
|
||||||
|
|
@ -89,7 +89,6 @@ private:
|
||||||
s32 next_callback_ticket{};
|
s32 next_callback_ticket{};
|
||||||
s32 current_callback_ticket{};
|
s32 current_callback_ticket{};
|
||||||
std::condition_variable_any callback_condition;
|
std::condition_variable_any callback_condition;
|
||||||
Common::WallClock clock;
|
|
||||||
Service::Nvidia::NvCore::NvMap& nvmap;
|
Service::Nvidia::NvCore::NvMap& nvmap;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
|
||||||
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||||
|
|
@ -13,7 +13,7 @@
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "common/intrusive_list.h"
|
#include "common/intrusive_list.h"
|
||||||
#include "common/uuid.h"
|
#include "common/uuid.h"
|
||||||
#include "common/wall_clock.h"
|
#include "common/cpu_features.h"
|
||||||
#include "core/hle/kernel/k_event.h"
|
#include "core/hle/kernel/k_event.h"
|
||||||
#include "core/hle/service/kernel_helpers.h"
|
#include "core/hle/service/kernel_helpers.h"
|
||||||
#include "core/hle/service/psc/time/errors.h"
|
#include "core/hle/service/psc/time/errors.h"
|
||||||
|
|
|
||||||
|
|
@ -13,12 +13,9 @@
|
||||||
#include "common/fs/path_util.h"
|
#include "common/fs/path_util.h"
|
||||||
#include "common/logging.h"
|
#include "common/logging.h"
|
||||||
#include "common/scm_rev.h"
|
#include "common/scm_rev.h"
|
||||||
|
#include "common/cpu_features.h"
|
||||||
#include "core/memory.h"
|
#include "core/memory.h"
|
||||||
|
|
||||||
#ifdef ARCHITECTURE_x86_64
|
|
||||||
#include "common/x64/cpu_detect.h"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include <QGuiApplication>
|
#include <QGuiApplication>
|
||||||
#include <QStringLiteral>
|
#include <QStringLiteral>
|
||||||
#include "core/frontend/emu_window.h"
|
#include "core/frontend/emu_window.h"
|
||||||
|
|
@ -214,7 +211,7 @@ void Init(QWidget* root) {
|
||||||
LOG_INFO(Frontend, "Eden Version: {}", yuzu_build_version);
|
LOG_INFO(Frontend, "Eden Version: {}", yuzu_build_version);
|
||||||
LogRuntimes();
|
LogRuntimes();
|
||||||
#ifdef ARCHITECTURE_x86_64
|
#ifdef ARCHITECTURE_x86_64
|
||||||
const auto& caps = Common::GetCPUCaps();
|
const auto& caps = Common::g_cpu_caps;
|
||||||
std::string cpu_string = caps.cpu_string;
|
std::string cpu_string = caps.cpu_string;
|
||||||
if (caps.avx || caps.avx2 || caps.avx512f) {
|
if (caps.avx || caps.avx2 || caps.avx512f) {
|
||||||
cpu_string += " | AVX";
|
cpu_string += " | AVX";
|
||||||
|
|
|
||||||
|
|
@ -39,7 +39,7 @@ extern "C" {
|
||||||
#include "video_core/textures/decoders.h"
|
#include "video_core/textures/decoders.h"
|
||||||
|
|
||||||
#if defined(ARCHITECTURE_x86_64)
|
#if defined(ARCHITECTURE_x86_64)
|
||||||
#include "common/x64/cpu_detect.h"
|
#include "common/cpu_features.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(ARCHITECTURE_x86_64) \
|
#if defined(ARCHITECTURE_x86_64) \
|
||||||
|
|
@ -55,9 +55,8 @@ namespace Tegra::Host1x {
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
static bool HasSSE41() {
|
static bool HasSSE41() {
|
||||||
#if defined(ARCHITECTURE_x86_64)
|
#ifdef ARCHITECTURE_x86_64
|
||||||
static bool has_sse41 = Common::GetCPUCaps().sse4_1;
|
return Common::g_cpu_caps.sse4_1;
|
||||||
return has_sse41;
|
|
||||||
#else
|
#else
|
||||||
return false;
|
return false;
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -254,11 +254,12 @@ std::optional<u64> GenericEnvironment::TryFindSize() {
|
||||||
static constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL;
|
static constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL;
|
||||||
static constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL;
|
static constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL;
|
||||||
|
|
||||||
|
code.resize(MAXIMUM_SIZE / INST_SIZE);
|
||||||
|
|
||||||
GPUVAddr guest_addr{program_base + start_address};
|
GPUVAddr guest_addr{program_base + start_address};
|
||||||
size_t offset{0};
|
size_t offset{0};
|
||||||
size_t size{BLOCK_SIZE};
|
size_t size{BLOCK_SIZE};
|
||||||
while (size <= MAXIMUM_SIZE) {
|
while (size <= MAXIMUM_SIZE) {
|
||||||
code.resize(size / INST_SIZE);
|
|
||||||
u64* const data = code.data() + offset / INST_SIZE;
|
u64* const data = code.data() + offset / INST_SIZE;
|
||||||
gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE);
|
gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE);
|
||||||
for (size_t index = 0; index < BLOCK_SIZE; index += INST_SIZE) {
|
for (size_t index = 0; index < BLOCK_SIZE; index += INST_SIZE) {
|
||||||
|
|
|
||||||
|
|
@ -124,7 +124,7 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual
|
||||||
#include "common/string_util.h"
|
#include "common/string_util.h"
|
||||||
|
|
||||||
#ifdef ARCHITECTURE_x86_64
|
#ifdef ARCHITECTURE_x86_64
|
||||||
#include "common/x64/cpu_detect.h"
|
#include "common/cpu_features.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Core //
|
// Core //
|
||||||
|
|
@ -3119,7 +3119,7 @@ void MainWindow::OnMenuReportCompatibility() {
|
||||||
tr("Compatibility list reporting is currently disabled. Check back later!"));
|
tr("Compatibility list reporting is currently disabled. Check back later!"));
|
||||||
|
|
||||||
// #if defined(ARCHITECTURE_x86_64) && !defined(__APPLE__)
|
// #if defined(ARCHITECTURE_x86_64) && !defined(__APPLE__)
|
||||||
// const auto& caps = Common::GetCPUCaps();
|
// const auto& caps = g_cpu_caps;
|
||||||
// const bool has_fma = caps.fma;
|
// const bool has_fma = caps.fma;
|
||||||
// const auto processor_count = std::thread::hardware_concurrency();
|
// const auto processor_count = std::thread::hardware_concurrency();
|
||||||
// const bool has_4threads = processor_count == 0 || processor_count >= 4;
|
// const bool has_4threads = processor_count == 0 || processor_count >= 4;
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue