From e086c112e04ff255ee91f3634b10204557207a73 Mon Sep 17 00:00:00 2001 From: lizzie Date: Sun, 24 May 2026 01:26:46 +0000 Subject: [PATCH] coalesced cpu caps + wall clokc timer --- src/common/CMakeLists.txt | 7 +- .../{x64/cpu_detect.cpp => cpu_features.cpp} | 304 ++++++++++++++---- src/common/{wall_clock.h => cpu_features.h} | 78 ++++- src/common/thread.cpp | 16 +- src/common/wall_clock.cpp | 195 ----------- src/common/x64/cpu_detect.h | 79 ----- src/core/arm/nce/patcher.cpp | 2 +- src/core/core_timing.cpp | 14 +- src/core/core_timing.h | 27 +- .../nvnflinger/buffer_queue_producer.cpp | 4 +- .../nvnflinger/buffer_queue_producer.h | 3 +- src/core/hle/service/psc/time/common.h | 4 +- src/qt_common/qt_common.cpp | 5 +- src/video_core/host1x/vic.cpp | 2 +- src/yuzu/main_window.cpp | 2 +- 15 files changed, 362 insertions(+), 380 deletions(-) rename src/common/{x64/cpu_detect.cpp => cpu_features.cpp} (53%) rename src/common/{wall_clock.h => cpu_features.h} (67%) delete mode 100644 src/common/wall_clock.cpp delete mode 100644 src/common/x64/cpu_detect.h diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 85bb9e5292..3833b56dc3 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -141,12 +141,12 @@ add_library( vector_math.h virtual_buffer.cpp virtual_buffer.h - wall_clock.cpp - wall_clock.h zstd_compression.cpp zstd_compression.h fs/ryujinx_compat.h fs/ryujinx_compat.cpp fs/symlink.h fs/symlink.cpp + cpu_features.cpp + cpu_features.h httplib.h net/net.h net/net.cpp) @@ -180,8 +180,7 @@ endif() if(ARCHITECTURE_x86_64) target_sources( common - PRIVATE x64/cpu_detect.cpp - x64/cpu_detect.h + PRIVATE x64/rdtsc.cpp x64/rdtsc.h x64/xbyak.h) diff --git a/src/common/x64/cpu_detect.cpp b/src/common/cpu_features.cpp similarity index 53% rename from src/common/x64/cpu_detect.cpp rename to src/common/cpu_features.cpp index cddcdba7a8..3805fa7b2d 100644 --- a/src/common/x64/cpu_detect.cpp +++ b/src/common/cpu_features.cpp @@ -13,33 +13,42 @@ #include #include #include -#include "common/bit_util.h" -#include "common/common_types.h" -#include "common/logging.h" -#ifdef ARCHITECTURE_x86_64 -#include "common/x64/cpu_detect.h" -#include "common/x64/rdtsc.h" -#endif - #ifdef _WIN32 #include #endif +#if defined(__DragonFly__) || defined(__FreeBSD__) +#include +#include +#endif +#include "common/steady_clock.h" +#include "common/uint128.h" +#include "common/bit_util.h" +#include "common/common_types.h" +#include "common/cpu_detect.h" + +#ifdef ARCHITECTURE_x86_64 +#include "common/x64/rdtsc.h" #ifdef _MSC_VER #include - static inline u64 xgetbv(u32 index) { return _xgetbv(index); } #else - -#if defined(__DragonFly__) || defined(__FreeBSD__) -// clang-format off -#include -#include -// clang-format on #endif +#ifdef __ANDROID__ +#include +#endif +#ifdef ARCHITECTURE_x86_64 +#include "common/x64/rdtsc.h" +#endif + +namespace Common { + +#ifdef ARCHITECTURE_x86_64 + +namespace { static inline void __cpuidex(int info[4], u32 function_id, u32 subfunction_id) { #if defined(__DragonFly__) || defined(__FreeBSD__) // Despite the name, this is just do_cpuid() with ECX as second input. @@ -64,8 +73,7 @@ static inline u64 xgetbv(u32 index) { return ((u64)edx << 32) | eax; } #endif // _MSC_VER - -namespace Common { +} CPUCaps::Manufacturer CPUCaps::ParseManufacturer(std::string_view brand_string) { if (brand_string == "GenuineIntel") { @@ -78,6 +86,47 @@ CPUCaps::Manufacturer CPUCaps::ParseManufacturer(std::string_view brand_string) return Manufacturer::Unknown; } +std::optional GetProcessorCount() { +#if defined(_WIN32) + // Get the buffer length. + DWORD length = 0; + GetLogicalProcessorInformation(nullptr, &length); + if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) { + LOG_ERROR(Frontend, "Failed to query core count."); + return std::nullopt; + } + std::vector buffer( + length / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION)); + // Now query the core count. + if (!GetLogicalProcessorInformation(buffer.data(), &length)) { + LOG_ERROR(Frontend, "Failed to query core count."); + return std::nullopt; + } + return static_cast( + std::count_if(buffer.cbegin(), buffer.cend(), [](const auto& proc_info) { + return proc_info.Relationship == RelationProcessorCore; + })); +#elif defined(__unix__) + const int thread_count = std::thread::hardware_concurrency(); + std::ifstream smt("/sys/devices/system/cpu/smt/active"); + char state = '0'; + if (smt) { + smt.read(&state, sizeof(state)); + } + switch (state) { + case '0': + return thread_count; + case '1': + return thread_count / 2; + default: + return std::nullopt; + } +#else + // Shame on you + return std::nullopt; +#endif +} + /// @brief Detects the various CPU features const CPUCaps g_cpu_caps = [] { CPUCaps caps = {}; @@ -184,12 +233,14 @@ const CPUCaps g_cpu_caps = [] { // https://github.com/torvalds/linux/blob/master/tools/power/x86/turbostat/turbostat.c#L5569 // but it's easier to just estimate the TSC tick rate for these cases. if (caps.tsc_crystal_ratio_denominator) { - caps.tsc_frequency = static_cast(caps.crystal_frequency) * - caps.tsc_crystal_ratio_numerator / - caps.tsc_crystal_ratio_denominator; + caps.tsc_frequency = u64(caps.crystal_frequency) + * caps.tsc_crystal_ratio_numerator / caps.tsc_crystal_ratio_denominator; } else { caps.tsc_frequency = X64::EstimateRDTSCFrequency(); } + caps.tsc_to_ns_ratio = GetFixedPoint64Factor(NsRatio::den, caps.tsc_frequency); + } else { + caps.tsc_to_ns_ratio = 1; } if (max_std_fn >= 0x16) { @@ -201,45 +252,184 @@ const CPUCaps g_cpu_caps = [] { return caps; }(); -std::optional GetProcessorCount() { -#if defined(_WIN32) - // Get the buffer length. - DWORD length = 0; - GetLogicalProcessorInformation(nullptr, &length); - if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) { - LOG_ERROR(Frontend, "Failed to query core count."); - return std::nullopt; - } - std::vector buffer( - length / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION)); - // Now query the core count. - if (!GetLogicalProcessorInformation(buffer.data(), &length)) { - LOG_ERROR(Frontend, "Failed to query core count."); - return std::nullopt; - } - return static_cast( - std::count_if(buffer.cbegin(), buffer.cend(), [](const auto& proc_info) { - return proc_info.Relationship == RelationProcessorCore; - })); -#elif defined(__unix__) - const int thread_count = std::thread::hardware_concurrency(); - std::ifstream smt("/sys/devices/system/cpu/smt/active"); - char state = '0'; - if (smt) { - smt.read(&state, sizeof(state)); - } - switch (state) { - case '0': - return thread_count; - case '1': - return thread_count / 2; - default: - return std::nullopt; - } #else - // Shame on you - return std::nullopt; + +#endif + +#if defined(ARCHITECTURE_x86_64) +WallClock::WallClock(bool invariant_, u64 rdtsc_frequency_) noexcept + : rdtsc_frequency{rdtsc_frequency_} + , ns_rdtsc_factor{invariant_ ? GetFixedPoint64Factor(NsRatio::den, rdtsc_frequency_) : 0} + , us_rdtsc_factor{invariant_ ? GetFixedPoint64Factor(UsRatio::den, rdtsc_frequency_) : 0} + , ms_rdtsc_factor{invariant_ ? GetFixedPoint64Factor(MsRatio::den, rdtsc_frequency_) : 0} + , cntpct_rdtsc_factor{invariant_ ? GetFixedPoint64Factor(CNTFRQ, rdtsc_frequency_) : 0} + , gputick_rdtsc_factor{invariant_ ? GetFixedPoint64Factor(GPUTickFreq, rdtsc_frequency_) : 0} + , invariant{invariant_} +{} + +std::chrono::nanoseconds WallClock::GetTimeNS() const { + if (!invariant) + return std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()); + return std::chrono::nanoseconds{MultiplyHigh(GetUptime(), ns_rdtsc_factor)}; +} + +std::chrono::microseconds WallClock::GetTimeUS() const { + if (!invariant) + return std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()); + return std::chrono::microseconds{MultiplyHigh(GetUptime(), us_rdtsc_factor)}; +} + +std::chrono::milliseconds WallClock::GetTimeMS() const { + if (!invariant) + return std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()); + return std::chrono::milliseconds{MultiplyHigh(GetUptime(), ms_rdtsc_factor)}; +} + +s64 WallClock::GetCNTPCT() const { + if (!invariant) + return GetUptime() * NsToCNTPCTRatio::num / NsToCNTPCTRatio::den; + return MultiplyHigh(GetUptime(), cntpct_rdtsc_factor); +} + +s64 WallClock::GetGPUTick() const { + if (!invariant) + return GetUptime() * NsToGPUTickRatio::num / NsToGPUTickRatio::den; + return MultiplyHigh(GetUptime(), gputick_rdtsc_factor); +} + +s64 WallClock::GetUptime() const { + if (!invariant) + return std::chrono::duration_cast(std::chrono::steady_clock::now().time_since_epoch()).count(); + return s64(Common::X64::FencedRDTSC()); +} + +bool WallClock::IsNative() const { + return invariant; +} +#elif defined(HAS_NCE) +namespace { + +[[nodiscard]] WallClock::FactorType GetFixedPointFactor(u64 num, u64 den) noexcept { + return (WallClock::FactorType(num) << 64) / den; +} + +[[nodiscard]] u64 MultiplyHigh(u64 m, WallClock::FactorType factor) noexcept { + return static_cast((m * factor) >> 64); +} + +[[nodiscard]] s64 GetHostCNTFRQ() noexcept { + u64 cntfrq_el0 = 0; +#ifdef ANDROID + std::string_view board{""}; + char buffer[PROP_VALUE_MAX]; + int len{__system_property_get("ro.product.board", buffer)}; + board = std::string_view(buffer, static_cast(len)); + if (board == "s5e9925") { // Exynos 2200 + cntfrq_el0 = 25600000; + } else if (board == "exynos2100") { // Exynos 2100 + cntfrq_el0 = 26000000; + } else if (board == "exynos9810") { // Exynos 9810 + cntfrq_el0 = 26000000; + } else if (board == "s5e8825") { // Exynos 1280 + cntfrq_el0 = 26000000; + } else { + asm volatile("mrs %[cntfrq_el0], cntfrq_el0" : [cntfrq_el0] "=r"(cntfrq_el0)); + } + return cntfrq_el0; +#else + asm volatile("mrs %[cntfrq_el0], cntfrq_el0" : [cntfrq_el0] "=r"(cntfrq_el0)); + return cntfrq_el0; #endif } +} // namespace + +WallClock::WallClock(bool invariant_, u64 rdtsc_frequency_) noexcept { + const u64 host_cntfrq = std::max(GetHostCNTFRQ(), 1); + ns_cntfrq_factor = GetFixedPointFactor(NsRatio::den, host_cntfrq); + us_cntfrq_factor = GetFixedPointFactor(UsRatio::den, host_cntfrq); + ms_cntfrq_factor = GetFixedPointFactor(MsRatio::den, host_cntfrq); + guest_cntfrq_factor = GetFixedPointFactor(CNTFRQ, host_cntfrq); + gputick_cntfrq_factor = GetFixedPointFactor(GPUTickFreq, host_cntfrq); +} + +std::chrono::nanoseconds WallClock::GetTimeNS() const { + return std::chrono::nanoseconds{MultiplyHigh(GetUptime(), ns_cntfrq_factor)}; +} + +std::chrono::microseconds WallClock::GetTimeUS() const { + return std::chrono::microseconds{MultiplyHigh(GetUptime(), us_cntfrq_factor)}; +} + +std::chrono::milliseconds WallClock::GetTimeMS() const { + return std::chrono::milliseconds{MultiplyHigh(GetUptime(), ms_cntfrq_factor)}; +} + +s64 WallClock::GetCNTPCT() const { + return MultiplyHigh(GetUptime(), guest_cntfrq_factor); +} + +s64 WallClock::GetGPUTick() const { + return MultiplyHigh(GetUptime(), gputick_cntfrq_factor); +} + +s64 WallClock::GetUptime() const { + s64 cntvct_el0 = 0; + asm volatile( + "dsb ish\n\t" + "mrs %[cntvct_el0], cntvct_el0\n\t" + "dsb ish\n\t" + : [cntvct_el0] "=r"(cntvct_el0) + ); + return cntvct_el0; +} + +bool WallClock::IsNative() const { + return true; +} +#else +WallClock::WallClock(bool invariant_, u64 rdtsc_frequency_) noexcept {} + +std::chrono::nanoseconds WallClock::GetTimeNS() const { + return std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()); +} + +std::chrono::microseconds WallClock::GetTimeUS() const { + return std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()); +} + +std::chrono::milliseconds WallClock::GetTimeMS() const { + return std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()); +} + +s64 WallClock::GetCNTPCT() const { + return GetUptime() * NsToCNTPCTRatio::num / NsToCNTPCTRatio::den; +} + +s64 WallClock::GetGPUTick() const { + return GetUptime() * NsToGPUTickRatio::num / NsToGPUTickRatio::den; +} + +s64 WallClock::GetUptime() const { + return std::chrono::duration_cast(std::chrono::steady_clock::now().time_since_epoch()).count(); +} + +bool WallClock::IsNative() const { + return false; +} +#endif + +// Wall clock MUST be initialized AFTER g_cpu_caps +// C++ only guarantees ctor init in the order they appear in TU +const WallClock g_wall_clock = [] { +#if defined(ARCHITECTURE_x86_64) + auto const& caps = Common::g_cpu_caps; + return WallClock(caps.invariant_tsc && caps.tsc_frequency >= std::nano::den, caps.tsc_frequency); +#elif defined(HAS_NCE) + return WallClock(false, 1); +#else + return WallClock(true, 1); +#endif +}(); + } // namespace Common diff --git a/src/common/wall_clock.h b/src/common/cpu_features.h similarity index 67% rename from src/common/wall_clock.h rename to src/common/cpu_features.h index 73fea96967..b332dbde85 100644 --- a/src/common/wall_clock.h +++ b/src/common/cpu_features.h @@ -1,11 +1,14 @@ // SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later -// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project +// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project +// SPDX-FileCopyrightText: Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #pragma once +#include +#include #include #include #include @@ -107,11 +110,78 @@ protected: FactorType ms_cntfrq_factor; FactorType guest_cntfrq_factor; FactorType gputick_cntfrq_factor; -#else - #endif }; -[[nodiscard]] WallClock CreateOptimalClock() noexcept; +#ifdef ARCHITECTURE_x86_64 +/// x86/x64 CPU capabilities that may be detected by this module +struct CPUCaps { + enum class Manufacturer : u8 { + Unknown = 0, + Intel = 1, + AMD = 2, + Hygon = 3, + }; + + static Manufacturer ParseManufacturer(std::string_view brand_string); + + Manufacturer manufacturer; + char brand_string[13]; + + char cpu_string[48]; + + u32 base_frequency; + u32 max_frequency; + u32 bus_frequency; + + u32 tsc_crystal_ratio_denominator; + u32 tsc_crystal_ratio_numerator; + u32 crystal_frequency; + u64 tsc_frequency; // Derived from the above three values + u64 tsc_to_ns_ratio; // Derived + + bool sse3 : 1; + bool ssse3 : 1; + bool sse4_1 : 1; + bool sse4_2 : 1; + + bool avx : 1; + bool avx2 : 1; + bool avx512f : 1; + bool avx512dq : 1; + bool avx512cd : 1; + bool avx512bw : 1; + bool avx512vl : 1; + bool avx512vbmi : 1; + bool avx512bitalg : 1; + + bool aes : 1; + bool bmi1 : 1; + bool bmi2 : 1; + bool f16c : 1; + bool fma : 1; + bool gfni : 1; + bool invariant_tsc : 1; + bool lzcnt : 1; + bool monitorx : 1; + bool movbe : 1; + bool pclmulqdq : 1; + bool popcnt : 1; + bool sha : 1; + bool waitpkg : 1; +}; +#else +struct CPUCaps { + bool padding; +}; +#endif + +/// Detects CPU core count +std::optional GetProcessorCount(); + +/// @brief Global cpu caps +extern const CPUCaps g_cpu_caps; +/// @brief Global wall clock +extern const WallClock g_wall_clock; } // namespace Common diff --git a/src/common/thread.cpp b/src/common/thread.cpp index 51d22d2a7f..1a02adc4eb 100644 --- a/src/common/thread.cpp +++ b/src/common/thread.cpp @@ -38,13 +38,13 @@ #include #endif +#include "common/cpu_features.h" #ifdef ARCHITECTURE_x86_64 #ifdef _MSC_VER #include #else #include #endif -#include "common/x64/cpu_detect.h" #include "common/x64/rdtsc.h" #endif #include "core/core_timing.h" @@ -174,8 +174,7 @@ __attribute__((target("waitpkg,mwaitx"))) bool Event::WaitFor(const std::chrono::nanoseconds time) { auto const start = Common::X64::FencedRDTSC(); auto const& caps = Common::g_cpu_caps; - auto const ns_ratio = std::max(1, caps.tsc_frequency / 1'000); - [[maybe_unused]] auto const end = start + time.count() * ns_ratio; + [[maybe_unused]] auto const end = start + time.count() * caps.tsc_to_ns_ratio; if (caps.monitorx) { while (true) { // Armed monitor, as per manual, MWAITX must be conditional if the condition isn't satisfied @@ -232,11 +231,22 @@ bool Event::WaitFor(const std::chrono::nanoseconds time) { } #else bool Event::WaitFor(const std::chrono::nanoseconds time) { +#ifdef _WIN32 + s64 rem = s64(time.count()); //98 years + while (!is_set.load() && rem > 0) { + Common::Windows::SleepForOneTick(); + rem = s64(Common::g_wall_clock.GetGlobalTimeNs().count()) - s64(time.count()); + } + if (is_set.load()) + Reset(); + return true; +#else std::unique_lock lk{mutex}; if (!condvar.wait_for(lk, time, [this] { return is_set.load(); })) return false; is_set = false; return true; +#endif } #endif diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp deleted file mode 100644 index 0498df5c1b..0000000000 --- a/src/common/wall_clock.cpp +++ /dev/null @@ -1,195 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#include "common/steady_clock.h" -#include "common/uint128.h" -#include "common/wall_clock.h" - -#ifdef __ANDROID__ -#include -#endif -#ifdef ARCHITECTURE_x86_64 -#include "common/x64/cpu_detect.h" -#include "common/x64/rdtsc.h" -#endif - -namespace Common { - -#if defined(ARCHITECTURE_x86_64) -WallClock::WallClock(bool invariant_, u64 rdtsc_frequency_) noexcept - : rdtsc_frequency{rdtsc_frequency_} - , ns_rdtsc_factor{invariant_ ? GetFixedPoint64Factor(NsRatio::den, rdtsc_frequency_) : 0} - , us_rdtsc_factor{invariant_ ? GetFixedPoint64Factor(UsRatio::den, rdtsc_frequency_) : 0} - , ms_rdtsc_factor{invariant_ ? GetFixedPoint64Factor(MsRatio::den, rdtsc_frequency_) : 0} - , cntpct_rdtsc_factor{invariant_ ? GetFixedPoint64Factor(CNTFRQ, rdtsc_frequency_) : 0} - , gputick_rdtsc_factor{invariant_ ? GetFixedPoint64Factor(GPUTickFreq, rdtsc_frequency_) : 0} - , invariant{invariant_} -{} - -std::chrono::nanoseconds WallClock::GetTimeNS() const { - if (!invariant) - return std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()); - return std::chrono::nanoseconds{MultiplyHigh(GetUptime(), ns_rdtsc_factor)}; -} - -std::chrono::microseconds WallClock::GetTimeUS() const { - if (!invariant) - return std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()); - return std::chrono::microseconds{MultiplyHigh(GetUptime(), us_rdtsc_factor)}; -} - -std::chrono::milliseconds WallClock::GetTimeMS() const { - if (!invariant) - return std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()); - return std::chrono::milliseconds{MultiplyHigh(GetUptime(), ms_rdtsc_factor)}; -} - -s64 WallClock::GetCNTPCT() const { - if (!invariant) - return GetUptime() * NsToCNTPCTRatio::num / NsToCNTPCTRatio::den; - return MultiplyHigh(GetUptime(), cntpct_rdtsc_factor); -} - -s64 WallClock::GetGPUTick() const { - if (!invariant) - return GetUptime() * NsToGPUTickRatio::num / NsToGPUTickRatio::den; - return MultiplyHigh(GetUptime(), gputick_rdtsc_factor); -} - -s64 WallClock::GetUptime() const { - if (!invariant) - return std::chrono::duration_cast(std::chrono::steady_clock::now().time_since_epoch()).count(); - return s64(Common::X64::FencedRDTSC()); -} - -bool WallClock::IsNative() const { - return invariant; -} -#elif defined(HAS_NCE) -namespace { - -[[nodiscard]] WallClock::FactorType GetFixedPointFactor(u64 num, u64 den) noexcept { - return (WallClock::FactorType(num) << 64) / den; -} - -[[nodiscard]] u64 MultiplyHigh(u64 m, WallClock::FactorType factor) noexcept { - return static_cast((m * factor) >> 64); -} - -[[nodiscard]] s64 GetHostCNTFRQ() noexcept { - u64 cntfrq_el0 = 0; -#ifdef ANDROID - std::string_view board{""}; - char buffer[PROP_VALUE_MAX]; - int len{__system_property_get("ro.product.board", buffer)}; - board = std::string_view(buffer, static_cast(len)); - if (board == "s5e9925") { // Exynos 2200 - cntfrq_el0 = 25600000; - } else if (board == "exynos2100") { // Exynos 2100 - cntfrq_el0 = 26000000; - } else if (board == "exynos9810") { // Exynos 9810 - cntfrq_el0 = 26000000; - } else if (board == "s5e8825") { // Exynos 1280 - cntfrq_el0 = 26000000; - } else { - asm volatile("mrs %[cntfrq_el0], cntfrq_el0" : [cntfrq_el0] "=r"(cntfrq_el0)); - } - return cntfrq_el0; -#else - asm volatile("mrs %[cntfrq_el0], cntfrq_el0" : [cntfrq_el0] "=r"(cntfrq_el0)); - return cntfrq_el0; -#endif -} - -} // namespace - -WallClock::WallClock(bool invariant_, u64 rdtsc_frequency_) noexcept { - const u64 host_cntfrq = std::max(GetHostCNTFRQ(), 1); - ns_cntfrq_factor = GetFixedPointFactor(NsRatio::den, host_cntfrq); - us_cntfrq_factor = GetFixedPointFactor(UsRatio::den, host_cntfrq); - ms_cntfrq_factor = GetFixedPointFactor(MsRatio::den, host_cntfrq); - guest_cntfrq_factor = GetFixedPointFactor(CNTFRQ, host_cntfrq); - gputick_cntfrq_factor = GetFixedPointFactor(GPUTickFreq, host_cntfrq); -} - -std::chrono::nanoseconds WallClock::GetTimeNS() const { - return std::chrono::nanoseconds{MultiplyHigh(GetUptime(), ns_cntfrq_factor)}; -} - -std::chrono::microseconds WallClock::GetTimeUS() const { - return std::chrono::microseconds{MultiplyHigh(GetUptime(), us_cntfrq_factor)}; -} - -std::chrono::milliseconds WallClock::GetTimeMS() const { - return std::chrono::milliseconds{MultiplyHigh(GetUptime(), ms_cntfrq_factor)}; -} - -s64 WallClock::GetCNTPCT() const { - return MultiplyHigh(GetUptime(), guest_cntfrq_factor); -} - -s64 WallClock::GetGPUTick() const { - return MultiplyHigh(GetUptime(), gputick_cntfrq_factor); -} - -s64 WallClock::GetUptime() const { - s64 cntvct_el0 = 0; - asm volatile( - "dsb ish\n\t" - "mrs %[cntvct_el0], cntvct_el0\n\t" - "dsb ish\n\t" - : [cntvct_el0] "=r"(cntvct_el0) - ); - return cntvct_el0; -} - -bool WallClock::IsNative() const { - return true; -} -#else -WallClock::WallClock(bool invariant_, u64 rdtsc_frequency_) noexcept {} - -std::chrono::nanoseconds WallClock::GetTimeNS() const { - return std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()); -} - -std::chrono::microseconds WallClock::GetTimeUS() const { - return std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()); -} - -std::chrono::milliseconds WallClock::GetTimeMS() const { - return std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()); -} - -s64 WallClock::GetCNTPCT() const { - return GetUptime() * NsToCNTPCTRatio::num / NsToCNTPCTRatio::den; -} - -s64 WallClock::GetGPUTick() const { - return GetUptime() * NsToGPUTickRatio::num / NsToGPUTickRatio::den; -} - -s64 WallClock::GetUptime() const { - return std::chrono::duration_cast(std::chrono::steady_clock::now().time_since_epoch()).count(); -} - -bool WallClock::IsNative() const { - return false; -} -#endif - -WallClock CreateOptimalClock() noexcept { -#if defined(ARCHITECTURE_x86_64) - auto const& caps = Common::g_cpu_caps; - return WallClock(caps.invariant_tsc && caps.tsc_frequency >= std::nano::den, caps.tsc_frequency); -#elif defined(HAS_NCE) - return WallClock(false, 1); -#else - return WallClock(true, 1); -#endif -} - -} // namespace Common diff --git a/src/common/x64/cpu_detect.h b/src/common/x64/cpu_detect.h deleted file mode 100644 index bf60be4ef2..0000000000 --- a/src/common/x64/cpu_detect.h +++ /dev/null @@ -1,79 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project -// SPDX-FileCopyrightText: Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include -#include -#include "common/common_types.h" - -namespace Common { - -/// x86/x64 CPU capabilities that may be detected by this module -struct CPUCaps { - - enum class Manufacturer : u8 { - Unknown = 0, - Intel = 1, - AMD = 2, - Hygon = 3, - }; - - static Manufacturer ParseManufacturer(std::string_view brand_string); - - Manufacturer manufacturer; - char brand_string[13]; - - char cpu_string[48]; - - u32 base_frequency; - u32 max_frequency; - u32 bus_frequency; - - u32 tsc_crystal_ratio_denominator; - u32 tsc_crystal_ratio_numerator; - u32 crystal_frequency; - u64 tsc_frequency; // Derived from the above three values - - bool sse3 : 1; - bool ssse3 : 1; - bool sse4_1 : 1; - bool sse4_2 : 1; - - bool avx : 1; - bool avx2 : 1; - bool avx512f : 1; - bool avx512dq : 1; - bool avx512cd : 1; - bool avx512bw : 1; - bool avx512vl : 1; - bool avx512vbmi : 1; - bool avx512bitalg : 1; - - bool aes : 1; - bool bmi1 : 1; - bool bmi2 : 1; - bool f16c : 1; - bool fma : 1; - bool gfni : 1; - bool invariant_tsc : 1; - bool lzcnt : 1; - bool monitorx : 1; - bool movbe : 1; - bool pclmulqdq : 1; - bool popcnt : 1; - bool sha : 1; - bool waitpkg : 1; -}; - -/// @brief Global cpu caps -extern const CPUCaps g_cpu_caps; - -/// Detects CPU core count -std::optional GetProcessorCount(); - -} // namespace Common diff --git a/src/core/arm/nce/patcher.cpp b/src/core/arm/nce/patcher.cpp index bf72b207b9..79ff3b1e31 100644 --- a/src/core/arm/nce/patcher.cpp +++ b/src/core/arm/nce/patcher.cpp @@ -3,7 +3,7 @@ #include #include -#include "common/wall_clock.h" +#include "common/cpu_features.h" #include "common/alignment.h" #include "common/literals.h" #include "core/arm/nce/arm_nce.h" diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp index 7fccada29f..3e4e7bdc68 100644 --- a/src/core/core_timing.cpp +++ b/src/core/core_timing.cpp @@ -8,7 +8,8 @@ #include #include #include -#include "common/x64/cpu_detect.h" +#include "common/cpu_features.h" +#include "common/cpu_features.h" #ifdef _WIN32 #include "common/windows/timer_resolution.h" @@ -44,8 +45,7 @@ struct CoreTiming::Event { } }; -CoreTiming::CoreTiming() : clock{Common::CreateOptimalClock()} {} - +CoreTiming::CoreTiming() = default; CoreTiming::~CoreTiming() { Reset(); } @@ -208,7 +208,7 @@ void CoreTiming::ResetTicks() { } u64 CoreTiming::GetClockTicks() const { - u64 fres = is_multicore ? clock.GetCNTPCT() : Common::WallClock::CPUTickToCNTPCT(cpu_ticks); + u64 fres = is_multicore ? Common::g_wall_clock.GetCNTPCT() : Common::WallClock::CPUTickToCNTPCT(cpu_ticks); if (auto const overclock = Settings::values.fast_cpu_time.GetValue(); overclock != Settings::CpuClock::Off) { fres = u64(f64(fres) * (1.7 + 0.3 * u32(overclock))); } @@ -222,7 +222,7 @@ u64 CoreTiming::GetClockTicks() const { u64 CoreTiming::GetGPUTicks() const { return is_multicore - ? clock.GetGPUTick() + ? Common::g_wall_clock.GetGPUTick() : Common::WallClock::CPUTickToGPUTick(cpu_ticks); } @@ -299,14 +299,14 @@ void CoreTiming::Reset() { /// @brief Returns current time in nanoseconds. std::chrono::nanoseconds CoreTiming::GetGlobalTimeNs() const noexcept { return is_multicore - ? clock.GetTimeNS() + ? Common::g_wall_clock.GetTimeNS() : std::chrono::nanoseconds{Common::WallClock::CPUTickToNS(cpu_ticks)}; } /// @brief Returns current time in microseconds. std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const noexcept { return is_multicore - ? clock.GetTimeUS() + ? Common::g_wall_clock.GetTimeUS() : std::chrono::microseconds{Common::WallClock::CPUTickToUS(cpu_ticks)}; } diff --git a/src/core/core_timing.h b/src/core/core_timing.h index 5967c83b57..298fc9595d 100644 --- a/src/core/core_timing.h +++ b/src/core/core_timing.h @@ -19,7 +19,7 @@ #include "common/common_types.h" #include "common/thread.h" -#include "common/wall_clock.h" +#include "common/cpu_features.h" namespace Core::Timing { @@ -142,37 +142,28 @@ public: void Reset(); - Common::WallClock clock; - + using heap_t = boost::heap::fibonacci_heap>>; + heap_t event_queue; s64 global_timer = 0; - #ifdef _WIN32 s64 timer_resolution_ns; #endif - - using heap_t = - boost::heap::fibonacci_heap>>; - - heap_t event_queue; u64 event_fifo_id = 0; - + s64 pause_end_time{}; + /// Cycle timing + u64 cpu_ticks{}; + s64 downcount{}; Common::Event event{}; Common::Event pause_event{}; + std::function on_thread_init{}; + std::jthread timer_thread; mutable std::mutex basic_lock; std::mutex advance_lock; - std::jthread timer_thread; std::atomic paused{}; std::atomic paused_set{}; std::atomic wait_set{}; std::atomic has_started{}; - std::function on_thread_init{}; - bool is_multicore{}; - s64 pause_end_time{}; - - /// Cycle timing - u64 cpu_ticks{}; - s64 downcount{}; }; /// Creates a core timing event with the given name and callback. diff --git a/src/core/hle/service/nvnflinger/buffer_queue_producer.cpp b/src/core/hle/service/nvnflinger/buffer_queue_producer.cpp index d36b30a2d2..5bd53fb99e 100644 --- a/src/core/hle/service/nvnflinger/buffer_queue_producer.cpp +++ b/src/core/hle/service/nvnflinger/buffer_queue_producer.cpp @@ -10,6 +10,7 @@ #include "common/assert.h" #include "common/logging.h" #include "common/settings.h" +#include "common/cpu_features.h" #include "core/hle/kernel/k_event.h" #include "core/hle/kernel/k_readable_event.h" #include "core/hle/kernel/kernel.h" @@ -28,7 +29,6 @@ BufferQueueProducer::BufferQueueProducer(Service::KernelHelpers::ServiceContext& Service::Nvidia::NvCore::NvMap& nvmap_) : service_context{service_context_}, core{std::move(buffer_queue_core_)} , slots(core->slots) - , clock{Common::CreateOptimalClock()} , nvmap(nvmap_) { buffer_wait_event = service_context.CreateEvent("BufferQueue:WaitEvent"); @@ -488,7 +488,7 @@ Status BufferQueueProducer::QueueBuffer(s32 slot, const QueueBufferInput& input, slots[slot].buffer_state = BufferState::Queued; slots[slot].frame_number = core->frame_counter; slots[slot].queue_time = timestamp; - slots[slot].presentation_time = clock.GetTimeNS().count(); + slots[slot].presentation_time = Common::g_wall_clock.GetTimeNS().count(); slots[slot].fence = fence; item.slot = slot; diff --git a/src/core/hle/service/nvnflinger/buffer_queue_producer.h b/src/core/hle/service/nvnflinger/buffer_queue_producer.h index 51c0801a0a..697de0ac9b 100644 --- a/src/core/hle/service/nvnflinger/buffer_queue_producer.h +++ b/src/core/hle/service/nvnflinger/buffer_queue_producer.h @@ -14,7 +14,7 @@ #include #include "common/common_funcs.h" -#include "common/wall_clock.h" +#include "common/cpu_features.h" #include "core/hle/service/nvdrv/nvdata.h" #include "core/hle/service/nvnflinger/binder.h" #include "core/hle/service/nvnflinger/buffer_queue_defs.h" @@ -89,7 +89,6 @@ private: s32 next_callback_ticket{}; s32 current_callback_ticket{}; std::condition_variable_any callback_condition; - Common::WallClock clock; Service::Nvidia::NvCore::NvMap& nvmap; }; diff --git a/src/core/hle/service/psc/time/common.h b/src/core/hle/service/psc/time/common.h index 0ad2ed51f8..36a90cd828 100644 --- a/src/core/hle/service/psc/time/common.h +++ b/src/core/hle/service/psc/time/common.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project @@ -13,7 +13,7 @@ #include "common/common_types.h" #include "common/intrusive_list.h" #include "common/uuid.h" -#include "common/wall_clock.h" +#include "common/cpu_features.h" #include "core/hle/kernel/k_event.h" #include "core/hle/service/kernel_helpers.h" #include "core/hle/service/psc/time/errors.h" diff --git a/src/qt_common/qt_common.cpp b/src/qt_common/qt_common.cpp index 216551835d..7550e931af 100644 --- a/src/qt_common/qt_common.cpp +++ b/src/qt_common/qt_common.cpp @@ -13,12 +13,9 @@ #include "common/fs/path_util.h" #include "common/logging.h" #include "common/scm_rev.h" +#include "common/cpu_features.h" #include "core/memory.h" -#ifdef ARCHITECTURE_x86_64 -#include "common/x64/cpu_detect.h" -#endif - #include #include #include "core/frontend/emu_window.h" diff --git a/src/video_core/host1x/vic.cpp b/src/video_core/host1x/vic.cpp index c808489214..bafbb0ed25 100644 --- a/src/video_core/host1x/vic.cpp +++ b/src/video_core/host1x/vic.cpp @@ -39,7 +39,7 @@ extern "C" { #include "video_core/textures/decoders.h" #if defined(ARCHITECTURE_x86_64) -#include "common/x64/cpu_detect.h" +#include "common/cpu_features.h" #endif #if defined(ARCHITECTURE_x86_64) \ diff --git a/src/yuzu/main_window.cpp b/src/yuzu/main_window.cpp index 494fe657b7..8c362d229b 100644 --- a/src/yuzu/main_window.cpp +++ b/src/yuzu/main_window.cpp @@ -124,7 +124,7 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual #include "common/string_util.h" #ifdef ARCHITECTURE_x86_64 -#include "common/x64/cpu_detect.h" +#include "common/cpu_features.h" #endif // Core //