diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index c5740bfb83..5896ae4d1f 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -141,12 +141,12 @@ add_library( vector_math.h virtual_buffer.cpp virtual_buffer.h - wall_clock.cpp - wall_clock.h zstd_compression.cpp zstd_compression.h fs/ryujinx_compat.h fs/ryujinx_compat.cpp fs/symlink.h fs/symlink.cpp + cpu_features.cpp + cpu_features.h httplib.h net/net.h net/net.cpp) @@ -180,10 +180,7 @@ endif() if(ARCHITECTURE_x86_64) target_sources( common - PRIVATE x64/cpu_detect.cpp - x64/cpu_detect.h - x64/cpu_wait.cpp - x64/cpu_wait.h + PRIVATE x64/rdtsc.cpp x64/rdtsc.h x64/xbyak.h) @@ -234,7 +231,7 @@ if(CXX_CLANG) endif() if (BOOST_NO_HEADERS) - target_link_libraries(common PUBLIC Boost::algorithm Boost::icl Boost::pool) + target_link_libraries(common PUBLIC Boost::algorithm Boost::heap Boost::icl Boost::pool) else() target_link_libraries(common PUBLIC Boost::headers) endif() diff --git a/src/common/x64/cpu_detect.cpp b/src/common/cpu_features.cpp similarity index 50% rename from src/common/x64/cpu_detect.cpp rename to src/common/cpu_features.cpp index d613954eb5..2f2fae3a77 100644 --- a/src/common/x64/cpu_detect.cpp +++ b/src/common/cpu_features.cpp @@ -10,34 +10,32 @@ #include #include #include +#include #include #include #include -#include "common/bit_util.h" -#include "common/common_types.h" -#include "common/logging.h" -#include "common/x64/cpu_detect.h" -#include "common/x64/rdtsc.h" - #ifdef _WIN32 #include -#endif - -#ifdef _MSC_VER -#include - -static inline u64 xgetbv(u32 index) { - return _xgetbv(index); -} -#else - -#if defined(__DragonFly__) || defined(__FreeBSD__) -// clang-format off +#elif defined(__DragonFly__) || defined(__FreeBSD__) #include #include -// clang-format on +#elif defined(__ANDROID__) +#include #endif +#include "common/steady_clock.h" +#include "common/uint128.h" +#include "common/bit_util.h" +#include "common/common_types.h" +#include "common/cpu_features.h" +#include "common/logging.h" + +#ifdef ARCHITECTURE_x86_64 +#include "common/x64/rdtsc.h" +#ifdef _MSC_VER +#include +static inline u64 xgetbv(u32 index) { return _xgetbv(index); } +#else static inline void __cpuidex(int info[4], u32 function_id, u32 subfunction_id) { #if defined(__DragonFly__) || defined(__FreeBSD__) // Despite the name, this is just do_cpuid() with ECX as second input. @@ -50,11 +48,7 @@ static inline void __cpuidex(int info[4], u32 function_id, u32 subfunction_id) { : "a"(function_id), "c"(subfunction_id)); #endif } - -static inline void __cpuid(int info[4], u32 function_id) { - return __cpuidex(info, function_id, 0); -} - +static inline void __cpuid(int info[4], u32 function_id) { return __cpuidex(info, function_id, 0); } #define _XCR_XFEATURE_ENABLED_MASK 0 static inline u64 xgetbv(u32 index) { u32 eax, edx; @@ -62,9 +56,10 @@ static inline u64 xgetbv(u32 index) { return ((u64)edx << 32) | eax; } #endif // _MSC_VER +#endif namespace Common { - +#ifdef ARCHITECTURE_x86_64 CPUCaps::Manufacturer CPUCaps::ParseManufacturer(std::string_view brand_string) { if (brand_string == "GenuineIntel") { return Manufacturer::Intel; @@ -76,13 +71,53 @@ CPUCaps::Manufacturer CPUCaps::ParseManufacturer(std::string_view brand_string) return Manufacturer::Unknown; } -// Detects the various CPU features -static CPUCaps Detect() { +std::optional GetProcessorCount() { +#if defined(_WIN32) + // Get the buffer length. + DWORD length = 0; + GetLogicalProcessorInformation(nullptr, &length); + if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) { + LOG_ERROR(Frontend, "Failed to query core count."); + return std::nullopt; + } + std::vector buffer( + length / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION)); + // Now query the core count. + if (!GetLogicalProcessorInformation(buffer.data(), &length)) { + LOG_ERROR(Frontend, "Failed to query core count."); + return std::nullopt; + } + return static_cast( + std::count_if(buffer.cbegin(), buffer.cend(), [](const auto& proc_info) { + return proc_info.Relationship == RelationProcessorCore; + })); +#elif defined(__unix__) + const int thread_count = std::thread::hardware_concurrency(); + std::ifstream smt("/sys/devices/system/cpu/smt/active"); + char state = '0'; + if (smt) { + smt.read(&state, sizeof(state)); + } + switch (state) { + case '0': + return thread_count; + case '1': + return thread_count / 2; + default: + return std::nullopt; + } +#else + // Shame on you + return std::nullopt; +#endif +} + +/// @brief Detects the various CPU features +const CPUCaps g_cpu_caps = [] { CPUCaps caps = {}; // Assumes the CPU supports the CPUID instruction. Those that don't would likely not support // yuzu at all anyway - int cpu_id[4]; // Detect CPU's CPUID capabilities and grab manufacturer string @@ -183,9 +218,8 @@ static CPUCaps Detect() { // https://github.com/torvalds/linux/blob/master/tools/power/x86/turbostat/turbostat.c#L5569 // but it's easier to just estimate the TSC tick rate for these cases. if (caps.tsc_crystal_ratio_denominator) { - caps.tsc_frequency = static_cast(caps.crystal_frequency) * - caps.tsc_crystal_ratio_numerator / - caps.tsc_crystal_ratio_denominator; + caps.tsc_frequency = u64(caps.crystal_frequency) + * caps.tsc_crystal_ratio_numerator / caps.tsc_crystal_ratio_denominator; } else { caps.tsc_frequency = X64::EstimateRDTSCFrequency(); } @@ -197,54 +231,196 @@ static CPUCaps Detect() { caps.max_frequency = cpu_id[1]; caps.bus_frequency = cpu_id[2]; } - return caps; -} +}(); -const CPUCaps& GetCPUCaps() { - static CPUCaps caps = Detect(); - return caps; -} - -std::optional GetProcessorCount() { -#if defined(_WIN32) - // Get the buffer length. - DWORD length = 0; - GetLogicalProcessorInformation(nullptr, &length); - if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) { - LOG_ERROR(Frontend, "Failed to query core count."); - return std::nullopt; - } - std::vector buffer( - length / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION)); - // Now query the core count. - if (!GetLogicalProcessorInformation(buffer.data(), &length)) { - LOG_ERROR(Frontend, "Failed to query core count."); - return std::nullopt; - } - return static_cast( - std::count_if(buffer.cbegin(), buffer.cend(), [](const auto& proc_info) { - return proc_info.Relationship == RelationProcessorCore; - })); -#elif defined(__unix__) - const int thread_count = std::thread::hardware_concurrency(); - std::ifstream smt("/sys/devices/system/cpu/smt/active"); - char state = '0'; - if (smt) { - smt.read(&state, sizeof(state)); - } - switch (state) { - case '0': - return thread_count; - case '1': - return thread_count / 2; - default: - return std::nullopt; - } #else - // Shame on you - return std::nullopt; + +#endif + +#if defined(ARCHITECTURE_x86_64) +WallClock::WallClock(bool invariant_, u64 rdtsc_frequency_) noexcept + : rdtsc_frequency{rdtsc_frequency_} + , ns_rdtsc_factor{invariant_ ? GetFixedPoint64Factor(NsRatio::den, rdtsc_frequency_) : 0} + , us_rdtsc_factor{invariant_ ? GetFixedPoint64Factor(UsRatio::den, rdtsc_frequency_) : 0} + , ms_rdtsc_factor{invariant_ ? GetFixedPoint64Factor(MsRatio::den, rdtsc_frequency_) : 0} + , rdtsc_ns_factor{invariant_ ? GetFixedPoint64Factor(rdtsc_frequency_, NsRatio::den) : 1} + , cntpct_rdtsc_factor{invariant_ ? GetFixedPoint64Factor(CNTFRQ, rdtsc_frequency_) : 0} + , gputick_rdtsc_factor{invariant_ ? GetFixedPoint64Factor(GPUTickFreq, rdtsc_frequency_) : 0} + , invariant{invariant_} +{} + +std::chrono::nanoseconds WallClock::GetTimeNS() const { + if (!invariant) + return std::chrono::duration_cast(std::chrono::steady_clock::now().time_since_epoch()); + return std::chrono::nanoseconds{MultiplyHigh(GetUptime(), ns_rdtsc_factor)}; +} + +std::chrono::microseconds WallClock::GetTimeUS() const { + if (!invariant) + return std::chrono::duration_cast(std::chrono::steady_clock::now().time_since_epoch()); + return std::chrono::microseconds{MultiplyHigh(GetUptime(), us_rdtsc_factor)}; +} + +std::chrono::milliseconds WallClock::GetTimeMS() const { + if (!invariant) + return std::chrono::duration_cast(std::chrono::steady_clock::now().time_since_epoch()); + return std::chrono::milliseconds{MultiplyHigh(GetUptime(), ms_rdtsc_factor)}; +} + +s64 WallClock::GetCNTPCT() const { + if (!invariant) + return GetUptime() * NsToCNTPCTRatio::num / NsToCNTPCTRatio::den; + return MultiplyHigh(GetUptime(), cntpct_rdtsc_factor); +} + +s64 WallClock::GetGPUTick() const { + if (!invariant) + return GetUptime() * NsToGPUTickRatio::num / NsToGPUTickRatio::den; + return MultiplyHigh(GetUptime(), gputick_rdtsc_factor); +} + +s64 WallClock::GetUptime() const { + if (!invariant) + return std::chrono::duration_cast(std::chrono::steady_clock::now().time_since_epoch()).count(); + return s64(Common::X64::FencedRDTSC()); +} + +bool WallClock::IsNative() const { + return invariant; +} + +u64 WallClock::NsToTicks(std::chrono::nanoseconds ns) const { + return invariant ? MultiplyHigh(ns.count(), rdtsc_ns_factor) : ns.count(); +} +#elif defined(HAS_NCE) +namespace { +[[nodiscard]] Common::WallClock::FactorType GetFixedPointFactor(u64 num, u64 den) noexcept { + return (Common::WallClock::FactorType(num) << 64) / den; +} +[[nodiscard]] u64 MultiplyHigh(u64 m, Common::WallClock::FactorType factor) noexcept { + return static_cast((m * factor) >> 64); +} +[[nodiscard]] s64 GetHostCNTFRQ() noexcept { + u64 cntfrq_el0 = 0; +#ifdef ANDROID + std::string_view board{""}; + char buffer[PROP_VALUE_MAX]; + int len{__system_property_get("ro.product.board", buffer)}; + board = std::string_view(buffer, static_cast(len)); + if (board == "s5e9925") { // Exynos 2200 + cntfrq_el0 = 25600000; + } else if (board == "exynos2100") { // Exynos 2100 + cntfrq_el0 = 26000000; + } else if (board == "exynos9810") { // Exynos 9810 + cntfrq_el0 = 26000000; + } else if (board == "s5e8825") { // Exynos 1280 + cntfrq_el0 = 26000000; + } else { + asm volatile("mrs %[cntfrq_el0], cntfrq_el0" : [cntfrq_el0] "=r"(cntfrq_el0)); + } + return cntfrq_el0; +#else + asm volatile("mrs %[cntfrq_el0], cntfrq_el0" : [cntfrq_el0] "=r"(cntfrq_el0)); + return cntfrq_el0; #endif } +} // namespace +WallClock::WallClock(bool invariant_, u64 rdtsc_frequency_) noexcept { + const u64 host_cntfrq = std::max(GetHostCNTFRQ(), 1); + ns_cntfrq_factor = GetFixedPointFactor(NsRatio::den, host_cntfrq); + us_cntfrq_factor = GetFixedPointFactor(UsRatio::den, host_cntfrq); + ms_cntfrq_factor = GetFixedPointFactor(MsRatio::den, host_cntfrq); + cntfrq_ns_factor = GetFixedPointFactor(host_cntfrq, NsRatio::den); + guest_cntfrq_factor = GetFixedPointFactor(CNTFRQ, host_cntfrq); + gputick_cntfrq_factor = GetFixedPointFactor(GPUTickFreq, host_cntfrq); +} + +std::chrono::nanoseconds WallClock::GetTimeNS() const { + return std::chrono::nanoseconds{MultiplyHigh(GetUptime(), ns_cntfrq_factor)}; +} + +std::chrono::microseconds WallClock::GetTimeUS() const { + return std::chrono::microseconds{MultiplyHigh(GetUptime(), us_cntfrq_factor)}; +} + +std::chrono::milliseconds WallClock::GetTimeMS() const { + return std::chrono::milliseconds{MultiplyHigh(GetUptime(), ms_cntfrq_factor)}; +} + +s64 WallClock::GetCNTPCT() const { + return MultiplyHigh(GetUptime(), guest_cntfrq_factor); +} + +s64 WallClock::GetGPUTick() const { + return MultiplyHigh(GetUptime(), gputick_cntfrq_factor); +} + +s64 WallClock::GetUptime() const { + s64 cntvct_el0 = 0; + asm volatile( + "dsb ish\n\t" + "mrs %[cntvct_el0], cntvct_el0\n\t" + "dsb ish\n\t" + : [cntvct_el0] "=r"(cntvct_el0) + ); + return cntvct_el0; +} + +bool WallClock::IsNative() const { + return true; +} + +u64 WallClock::NsToTicks(std::chrono::nanoseconds ns) const { + return MultiplyHigh(ns.count(), cntfrq_ns_factor); +} +#else +WallClock::WallClock(bool invariant_, u64 rdtsc_frequency_) noexcept {} + +std::chrono::nanoseconds WallClock::GetTimeNS() const { + return std::chrono::duration_cast(std::chrono::steady_clock::now().time_since_epoch()); +} + +std::chrono::microseconds WallClock::GetTimeUS() const { + return std::chrono::duration_cast(std::chrono::steady_clock::now().time_since_epoch()); +} + +std::chrono::milliseconds WallClock::GetTimeMS() const { + return std::chrono::duration_cast(std::chrono::steady_clock::now().time_since_epoch()); +} + +s64 WallClock::GetCNTPCT() const { + return GetUptime() * NsToCNTPCTRatio::num / NsToCNTPCTRatio::den; +} + +s64 WallClock::GetGPUTick() const { + return GetUptime() * NsToGPUTickRatio::num / NsToGPUTickRatio::den; +} + +s64 WallClock::GetUptime() const { + return std::chrono::duration_cast(std::chrono::steady_clock::now().time_since_epoch()).count(); +} + +bool WallClock::IsNative() const { + return false; +} + +u64 WallClock::NsToTicks(std::chrono::nanoseconds ns) const { + return ns.count(); +} +#endif + +// Wall clock MUST be initialized AFTER g_cpu_caps +// C++ only guarantees ctor init in the order they appear in TU +const WallClock g_wall_clock = [] { +#if defined(ARCHITECTURE_x86_64) + auto const& caps = Common::g_cpu_caps; + return WallClock(caps.invariant_tsc && caps.tsc_frequency >= std::nano::den, caps.tsc_frequency); +#elif defined(HAS_NCE) + return WallClock(false, 1); +#else + return WallClock(true, 1); +#endif +}(); } // namespace Common diff --git a/src/common/wall_clock.h b/src/common/cpu_features.h similarity index 64% rename from src/common/wall_clock.h rename to src/common/cpu_features.h index 6a6d56a610..7dcb550f8f 100644 --- a/src/common/wall_clock.h +++ b/src/common/cpu_features.h @@ -1,11 +1,14 @@ // SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later -// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project +// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project +// SPDX-FileCopyrightText: Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #pragma once +#include +#include #include #include #include @@ -43,6 +46,9 @@ public: /// @returns Whether the clock directly uses the host's hardware clock. bool IsNative() const; + // @returns Nanoseconds to native ticks + u64 NsToTicks(std::chrono::nanoseconds ns) const; + static inline u64 NSToCNTPCT(u64 ns) { return ns * NsToCNTPCTRatio::num / NsToCNTPCTRatio::den; } @@ -69,7 +75,6 @@ public: return cpu_tick * CPUTickToGPUTickRatio::num / CPUTickToGPUTickRatio::den; } -protected: using NsRatio = std::nano; using UsRatio = std::micro; using MsRatio = std::milli; @@ -87,31 +92,96 @@ protected: using CPUTickToGPUTickRatio = std::ratio; #if defined(ARCHITECTURE_x86_64) - bool invariant; u64 rdtsc_frequency; u64 ns_rdtsc_factor; u64 us_rdtsc_factor; u64 ms_rdtsc_factor; + u64 rdtsc_ns_factor; u64 cntpct_rdtsc_factor; u64 gputick_rdtsc_factor; + bool invariant; #elif defined(HAS_NCE) -public: using FactorType = unsigned __int128; - - FactorType GetGuestCNTFRQFactor() const { + [[nodiscard]] inline FactorType GetGuestCNTFRQFactor() const { return guest_cntfrq_factor; } -protected: FactorType ns_cntfrq_factor; FactorType us_cntfrq_factor; FactorType ms_cntfrq_factor; + FactorType cntfrq_ns_factor; FactorType guest_cntfrq_factor; FactorType gputick_cntfrq_factor; -#else - #endif }; -[[nodiscard]] WallClock CreateOptimalClock() noexcept; +#ifdef ARCHITECTURE_x86_64 +/// x86/x64 CPU capabilities that may be detected by this module +struct CPUCaps { + enum class Manufacturer : u8 { + Unknown = 0, + Intel = 1, + AMD = 2, + Hygon = 3, + }; + + static Manufacturer ParseManufacturer(std::string_view brand_string); + + Manufacturer manufacturer; + char brand_string[13]; + + char cpu_string[48]; + + u32 base_frequency; + u32 max_frequency; + u32 bus_frequency; + + u32 tsc_crystal_ratio_denominator; + u32 tsc_crystal_ratio_numerator; + u32 crystal_frequency; + u64 tsc_frequency; // Derived from the above three values + + bool sse3 : 1; + bool ssse3 : 1; + bool sse4_1 : 1; + bool sse4_2 : 1; + + bool avx : 1; + bool avx2 : 1; + bool avx512f : 1; + bool avx512dq : 1; + bool avx512cd : 1; + bool avx512bw : 1; + bool avx512vl : 1; + bool avx512vbmi : 1; + bool avx512bitalg : 1; + + bool aes : 1; + bool bmi1 : 1; + bool bmi2 : 1; + bool f16c : 1; + bool fma : 1; + bool gfni : 1; + bool invariant_tsc : 1; + bool lzcnt : 1; + bool monitorx : 1; + bool movbe : 1; + bool pclmulqdq : 1; + bool popcnt : 1; + bool sha : 1; + bool waitpkg : 1; +}; +#else +struct CPUCaps { + bool padding; +}; +#endif + +/// Detects CPU core count +std::optional GetProcessorCount(); + +/// @brief Global cpu caps +extern const CPUCaps g_cpu_caps; +/// @brief Global wall clock +extern const WallClock g_wall_clock; } // namespace Common diff --git a/src/common/thread.cpp b/src/common/thread.cpp index f4bdb3f7c0..49f6f5cf98 100644 --- a/src/common/thread.cpp +++ b/src/common/thread.cpp @@ -4,6 +4,8 @@ // SPDX-FileCopyrightText: 2014 Citra Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include +#include #include #include @@ -18,24 +20,35 @@ #elif defined(_WIN32) #include #include "common/string_util.h" +#include "common/windows/timer_resolution.h" #else #if defined(__FreeBSD__) #include #include #include +// Compatibility with CPUset +#define cpu_set_t cpuset_t #elif defined(__DragonFly__) || defined(__OpenBSD__) || defined(__Bitrig__) #include #endif #include #include #endif + #ifndef _WIN32 #include #endif -#ifdef __FreeBSD__ -# define cpu_set_t cpuset_t +#include "common/cpu_features.h" +#ifdef ARCHITECTURE_x86_64 +#ifdef _MSC_VER +#include +#else +#include #endif +#include "common/x64/rdtsc.h" +#endif +#include "core/core_timing.h" namespace Common { @@ -144,4 +157,93 @@ void PinCurrentThreadToPerformanceCore(size_t core_id) { } } +#ifdef ARCHITECTURE_x86_64 +// On Linux and UNIX systems, a futex would nominally be used to cover the costs +// the idea is that it's intuitivelly cheaper to use a direct instruction as opposed to a full futex call +// the underlying libc++ implementation uses pthread_cond_timedwait which MAY invoke a futex +// Let's pretend the OS is too expensive to jump into, and avoid ANY context switches +// this should *IN THEORY* lower CPU usage while just waiting for stuff effectively +// For windows the minimal quanta resolution is about 500us, and normal CRT cond var is 1.5ms(?) +// so may as well avoid that too +// Let's just give ALL platforms the same mechanisms (almost) for when they have umonitor OR waitpkg +#ifdef __clang__ +__attribute__((target("waitpkg,mwaitx"))) +#elif defined(__GNUC__) +#pragma GCC target("waitpkg") +#pragma GCC target("mwaitx") +#endif +bool Event::WaitFor(const std::chrono::nanoseconds time) { +#ifdef _WIN32 + auto const start = Common::X64::FencedRDTSC(); + auto const& caps = Common::g_cpu_caps; + [[maybe_unused]] auto const end = start + Common::g_wall_clock.NsToTicks(time); + if (caps.monitorx) { + while (true) { + // Armed monitor, as per manual, MWAITX must be conditional if the condition isn't satisfied + // to prevent a race condition. + _mm_monitorx(reinterpret_cast(std::addressof(is_set)), 0, 0); + if (!is_set.load()) { + // RDTSC may be fenced here due to atomic load + auto const now = _rdtsc(); + if (end > now) { + u32 const cycles = std::min((std::numeric_limits::max)(), s64(end) - s64(now)); + // See here: https://github.com/torvalds/linux/blob/948a64995aca6820abefd17f1a4258f5835c5ad9/arch/x86/lib/delay.c#L93 + // MWAITX accepts a 32-bit input timer which determines the total number of cycles to wait for + // NOT THE TOTAL ABSOLUTE TSC VALUE, it's just a delta + // BIT[1] = use a timer + // Hint = 0: Use C1 state when sleepy (means slower wakeup but better savings) + _mm_mwaitx(1 << 1, 0u, cycles); + if (!is_set.load()) + return false; + } else + return false; //timeout + } + bool expected = true; + if (is_set.compare_exchange_weak(expected, false, std::memory_order_release)) + return true; + } + } else if (caps.waitpkg) { + // #UD If CPUID.7.0:ECX.WAITPKG[bit 5]=0. + while (true) { + _umonitor(std::addressof(is_set)); + if (!is_set.load() && !_umwait(0, end)) //umwait is absolute time!!! + return false; + bool expected = true; + if (is_set.compare_exchange_weak(expected, false, std::memory_order_release)) + return true; + } + } else { + while (!is_set.load() && end > _rdtsc()) + Common::Windows::SleepForOneTick(); + if (is_set.load()) + Reset(); + return true; + } +#else + std::unique_lock lk{mutex}; + if (!condvar.wait_for(lk, time, [this] { return is_set.load(); })) + return false; + is_set = false; + return true; +#endif +} +#else +bool Event::WaitFor(const std::chrono::nanoseconds time) { +#ifdef _WIN32 + auto const end = Common::g_wall_clock.GetTimeNS() + time; + while (!is_set.load() && end > Common::g_wall_clock.GetTimeNS()) + Common::Windows::SleepForOneTick(); + if (is_set.load()) + Reset(); + return true; +#else + std::unique_lock lk{mutex}; + if (!condvar.wait_for(lk, time, [this] { return is_set.load(); })) + return false; + is_set = false; + return true; +#endif +} +#endif + } // namespace Common diff --git a/src/common/thread.h b/src/common/thread.h index ea6f5d6b3b..a75e342802 100644 --- a/src/common/thread.h +++ b/src/common/thread.h @@ -34,16 +34,10 @@ public: is_set = false; } - bool WaitFor(const std::chrono::nanoseconds& time) { - std::unique_lock lk{mutex}; - if (!condvar.wait_for(lk, time, [this] { return is_set.load(); })) - return false; - is_set = false; - return true; - } + bool WaitFor(const std::chrono::nanoseconds time); - template - bool WaitUntil(const std::chrono::time_point& time) { + template + bool WaitUntil(const std::chrono::time_point time) { std::unique_lock lk{mutex}; if (!condvar.wait_until(lk, time, [this] { return is_set.load(); })) return false; @@ -63,9 +57,9 @@ public: } private: + alignas(64) std::atomic is_set{false}; std::condition_variable condvar; std::mutex mutex; - std::atomic_bool is_set{false}; }; class Barrier { diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp deleted file mode 100644 index c06f9b6318..0000000000 --- a/src/common/wall_clock.cpp +++ /dev/null @@ -1,197 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#include "common/steady_clock.h" -#include "common/uint128.h" -#include "common/wall_clock.h" - -#ifdef __ANDROID__ -#include -#endif -#ifdef ARCHITECTURE_x86_64 -#include "common/x64/cpu_detect.h" -#include "common/x64/rdtsc.h" -#endif - -namespace Common { - -#if defined(ARCHITECTURE_x86_64) -WallClock::WallClock(bool invariant_, u64 rdtsc_frequency_) noexcept - : invariant{invariant_} - , rdtsc_frequency{rdtsc_frequency_} - , ns_rdtsc_factor{invariant_ ? 0 : GetFixedPoint64Factor(NsRatio::den, rdtsc_frequency_)} - , us_rdtsc_factor{invariant_ ? 0 : GetFixedPoint64Factor(UsRatio::den, rdtsc_frequency_)} - , ms_rdtsc_factor{invariant_ ? 0 : GetFixedPoint64Factor(MsRatio::den, rdtsc_frequency_)} - , cntpct_rdtsc_factor{invariant_ ? 0 : GetFixedPoint64Factor(CNTFRQ, rdtsc_frequency_)} - , gputick_rdtsc_factor{invariant_ ? 0 : GetFixedPoint64Factor(GPUTickFreq, rdtsc_frequency_)} -{} - -std::chrono::nanoseconds WallClock::GetTimeNS() const { - if (invariant) - return std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()); - return std::chrono::nanoseconds{MultiplyHigh(GetUptime(), ns_rdtsc_factor)}; -} - -std::chrono::microseconds WallClock::GetTimeUS() const { - if (invariant) - return std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()); - return std::chrono::microseconds{MultiplyHigh(GetUptime(), us_rdtsc_factor)}; -} - -std::chrono::milliseconds WallClock::GetTimeMS() const { - if (invariant) - return std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()); - return std::chrono::milliseconds{MultiplyHigh(GetUptime(), ms_rdtsc_factor)}; -} - -s64 WallClock::GetCNTPCT() const { - if (invariant) - return GetUptime() * NsToCNTPCTRatio::num / NsToCNTPCTRatio::den; - return MultiplyHigh(GetUptime(), cntpct_rdtsc_factor); -} - -s64 WallClock::GetGPUTick() const { - if (invariant) - return GetUptime() * NsToGPUTickRatio::num / NsToGPUTickRatio::den; - return MultiplyHigh(GetUptime(), gputick_rdtsc_factor); -} - -s64 WallClock::GetUptime() const { - if (invariant) - return std::chrono::duration_cast(std::chrono::steady_clock::now().time_since_epoch()).count(); - return s64(Common::X64::FencedRDTSC()); -} - -bool WallClock::IsNative() const { - if (invariant) - return false; - return true; -} -#elif defined(HAS_NCE) -namespace { - -[[nodiscard]] WallClock::FactorType GetFixedPointFactor(u64 num, u64 den) noexcept { - return (WallClock::FactorType(num) << 64) / den; -} - -[[nodiscard]] u64 MultiplyHigh(u64 m, WallClock::FactorType factor) noexcept { - return static_cast((m * factor) >> 64); -} - -[[nodiscard]] s64 GetHostCNTFRQ() noexcept { - u64 cntfrq_el0 = 0; -#ifdef ANDROID - std::string_view board{""}; - char buffer[PROP_VALUE_MAX]; - int len{__system_property_get("ro.product.board", buffer)}; - board = std::string_view(buffer, static_cast(len)); - if (board == "s5e9925") { // Exynos 2200 - cntfrq_el0 = 25600000; - } else if (board == "exynos2100") { // Exynos 2100 - cntfrq_el0 = 26000000; - } else if (board == "exynos9810") { // Exynos 9810 - cntfrq_el0 = 26000000; - } else if (board == "s5e8825") { // Exynos 1280 - cntfrq_el0 = 26000000; - } else { - asm volatile("mrs %[cntfrq_el0], cntfrq_el0" : [cntfrq_el0] "=r"(cntfrq_el0)); - } - return cntfrq_el0; -#else - asm volatile("mrs %[cntfrq_el0], cntfrq_el0" : [cntfrq_el0] "=r"(cntfrq_el0)); - return cntfrq_el0; -#endif -} - -} // namespace - -WallClock::WallClock(bool invariant_, u64 rdtsc_frequency_) noexcept { - const u64 host_cntfrq = std::max(GetHostCNTFRQ(), 1); - ns_cntfrq_factor = GetFixedPointFactor(NsRatio::den, host_cntfrq); - us_cntfrq_factor = GetFixedPointFactor(UsRatio::den, host_cntfrq); - ms_cntfrq_factor = GetFixedPointFactor(MsRatio::den, host_cntfrq); - guest_cntfrq_factor = GetFixedPointFactor(CNTFRQ, host_cntfrq); - gputick_cntfrq_factor = GetFixedPointFactor(GPUTickFreq, host_cntfrq); -} - -std::chrono::nanoseconds WallClock::GetTimeNS() const { - return std::chrono::nanoseconds{MultiplyHigh(GetUptime(), ns_cntfrq_factor)}; -} - -std::chrono::microseconds WallClock::GetTimeUS() const { - return std::chrono::microseconds{MultiplyHigh(GetUptime(), us_cntfrq_factor)}; -} - -std::chrono::milliseconds WallClock::GetTimeMS() const { - return std::chrono::milliseconds{MultiplyHigh(GetUptime(), ms_cntfrq_factor)}; -} - -s64 WallClock::GetCNTPCT() const { - return MultiplyHigh(GetUptime(), guest_cntfrq_factor); -} - -s64 WallClock::GetGPUTick() const { - return MultiplyHigh(GetUptime(), gputick_cntfrq_factor); -} - -s64 WallClock::GetUptime() const { - s64 cntvct_el0 = 0; - asm volatile( - "dsb ish\n\t" - "mrs %[cntvct_el0], cntvct_el0\n\t" - "dsb ish\n\t" - : [cntvct_el0] "=r"(cntvct_el0) - ); - return cntvct_el0; -} - -bool WallClock::IsNative() const { - return true; -} -#else -WallClock::WallClock(bool invariant_, u64 rdtsc_frequency_) noexcept {} - -std::chrono::nanoseconds WallClock::GetTimeNS() const { - return std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()); -} - -std::chrono::microseconds WallClock::GetTimeUS() const { - return std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()); -} - -std::chrono::milliseconds WallClock::GetTimeMS() const { - return std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()); -} - -s64 WallClock::GetCNTPCT() const { - return GetUptime() * NsToCNTPCTRatio::num / NsToCNTPCTRatio::den; -} - -s64 WallClock::GetGPUTick() const { - return GetUptime() * NsToGPUTickRatio::num / NsToGPUTickRatio::den; -} - -s64 WallClock::GetUptime() const { - return std::chrono::duration_cast(std::chrono::steady_clock::now().time_since_epoch()).count(); -} - -bool WallClock::IsNative() const { - return false; -} -#endif - -WallClock CreateOptimalClock() noexcept { -#if defined(ARCHITECTURE_x86_64) - auto const& caps = GetCPUCaps(); - return WallClock(!(caps.invariant_tsc && caps.tsc_frequency >= std::nano::den), caps.tsc_frequency); -#elif defined(HAS_NCE) - return WallClock(false, 1); -#else - return WallClock(true, 1); -#endif -} - -} // namespace Common diff --git a/src/common/x64/cpu_detect.h b/src/common/x64/cpu_detect.h deleted file mode 100644 index af93c36e2c..0000000000 --- a/src/common/x64/cpu_detect.h +++ /dev/null @@ -1,82 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project -// SPDX-FileCopyrightText: Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include -#include -#include "common/common_types.h" - -namespace Common { - -/// x86/x64 CPU capabilities that may be detected by this module -struct CPUCaps { - - enum class Manufacturer : u8 { - Unknown = 0, - Intel = 1, - AMD = 2, - Hygon = 3, - }; - - static Manufacturer ParseManufacturer(std::string_view brand_string); - - Manufacturer manufacturer; - char brand_string[13]; - - char cpu_string[48]; - - u32 base_frequency; - u32 max_frequency; - u32 bus_frequency; - - u32 tsc_crystal_ratio_denominator; - u32 tsc_crystal_ratio_numerator; - u32 crystal_frequency; - u64 tsc_frequency; // Derived from the above three values - - bool sse3 : 1; - bool ssse3 : 1; - bool sse4_1 : 1; - bool sse4_2 : 1; - - bool avx : 1; - bool avx2 : 1; - bool avx512f : 1; - bool avx512dq : 1; - bool avx512cd : 1; - bool avx512bw : 1; - bool avx512vl : 1; - bool avx512vbmi : 1; - bool avx512bitalg : 1; - - bool aes : 1; - bool bmi1 : 1; - bool bmi2 : 1; - bool f16c : 1; - bool fma : 1; - bool gfni : 1; - bool invariant_tsc : 1; - bool lzcnt : 1; - bool monitorx : 1; - bool movbe : 1; - bool pclmulqdq : 1; - bool popcnt : 1; - bool sha : 1; - bool waitpkg : 1; -}; - -/** - * Gets the supported capabilities of the host CPU - * @return Reference to a CPUCaps struct with the detected host CPU capabilities - */ -const CPUCaps& GetCPUCaps(); - -/// Detects CPU core count -std::optional GetProcessorCount(); - -} // namespace Common diff --git a/src/common/x64/cpu_wait.cpp b/src/common/x64/cpu_wait.cpp deleted file mode 100644 index 85d27161ba..0000000000 --- a/src/common/x64/cpu_wait.cpp +++ /dev/null @@ -1,75 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -#include - -#ifdef _MSC_VER -#include -#endif - -#include "common/x64/cpu_detect.h" -#include "common/x64/cpu_wait.h" -#include "common/x64/rdtsc.h" - -namespace Common::X64 { - -namespace { - -// 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. -// For reference: -// At 1 GHz, 100K cycles is 100us -// At 2 GHz, 100K cycles is 50us -// At 4 GHz, 100K cycles is 25us -constexpr auto PauseCycles = 100'000U; - -} // Anonymous namespace - -#if defined(_MSC_VER) && !defined(__clang__) -__forceinline static void TPAUSE() { - static constexpr auto RequestC02State = 0U; - _tpause(RequestC02State, FencedRDTSC() + PauseCycles); -} - -__forceinline static void MWAITX() { - static constexpr auto EnableWaitTimeFlag = 1U << 1; - static constexpr auto RequestC1State = 0U; - - // monitor_var should be aligned to a cache line. - alignas(64) u64 monitor_var{}; - _mm_monitorx(&monitor_var, 0, 0); - _mm_mwaitx(EnableWaitTimeFlag, RequestC1State, PauseCycles); -} -#else -static void TPAUSE() { - static constexpr auto RequestC02State = 0U; - const auto tsc = FencedRDTSC() + PauseCycles; - const auto eax = static_cast(tsc & 0xFFFFFFFF); - const auto edx = static_cast(tsc >> 32); - asm volatile("tpause %0" : : "r"(RequestC02State), "d"(edx), "a"(eax)); -} - -static void MWAITX() { - static constexpr auto EnableWaitTimeFlag = 1U << 1; - static constexpr auto RequestC1State = 0U; - - // monitor_var should be aligned to a cache line. - alignas(64) u64 monitor_var{}; - asm volatile("monitorx" : : "a"(&monitor_var), "c"(0), "d"(0)); - asm volatile("mwaitx" : : "a"(RequestC1State), "b"(PauseCycles), "c"(EnableWaitTimeFlag)); -} -#endif - -void MicroSleep() { - static const bool has_waitpkg = GetCPUCaps().waitpkg; - static const bool has_monitorx = GetCPUCaps().monitorx; - - if (has_waitpkg) { - TPAUSE(); - } else if (has_monitorx) { - MWAITX(); - } else { - std::this_thread::yield(); - } -} - -} // namespace Common::X64 diff --git a/src/common/x64/cpu_wait.h b/src/common/x64/cpu_wait.h deleted file mode 100644 index 472ddca815..0000000000 --- a/src/common/x64/cpu_wait.h +++ /dev/null @@ -1,10 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -#pragma once - -namespace Common::X64 { - -void MicroSleep(); - -} // namespace Common::X64 diff --git a/src/core/arm/nce/patcher.cpp b/src/core/arm/nce/patcher.cpp index bf72b207b9..79ff3b1e31 100644 --- a/src/core/arm/nce/patcher.cpp +++ b/src/core/arm/nce/patcher.cpp @@ -3,7 +3,7 @@ #include #include -#include "common/wall_clock.h" +#include "common/cpu_features.h" #include "common/alignment.h" #include "common/literals.h" #include "core/arm/nce/arm_nce.h" diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp index 6738a0d73d..3e4e7bdc68 100644 --- a/src/core/core_timing.cpp +++ b/src/core/core_timing.cpp @@ -8,15 +8,13 @@ #include #include #include +#include "common/cpu_features.h" +#include "common/cpu_features.h" #ifdef _WIN32 #include "common/windows/timer_resolution.h" #endif -#ifdef ARCHITECTURE_x86_64 -#include "common/x64/cpu_wait.h" -#endif - #include "common/settings.h" #include "core/core_timing.h" #include "core/hardware_properties.h" @@ -47,8 +45,7 @@ struct CoreTiming::Event { } }; -CoreTiming::CoreTiming() : clock{Common::CreateOptimalClock()} {} - +CoreTiming::CoreTiming() = default; CoreTiming::~CoreTiming() { Reset(); } @@ -64,31 +61,16 @@ void CoreTiming::Initialize(std::function&& on_thread_init_) { Common::SetCurrentThreadPriority(Common::ThreadPriority::High); on_thread_init(); has_started = true; + + // base frequency in MHz: 1ns (10^-9) = 1GHz (10^9) while (!stop_token.stop_requested()) { while (!paused && !stop_token.stop_requested()) { paused_set = false; if (auto const next_time = Advance(); next_time) { // There are more events left in the queue, wait until the next event. - auto wait_time = *next_time - GetGlobalTimeNs().count(); + auto const wait_time = *next_time - GetGlobalTimeNs().count(); if (wait_time > 0) { -#ifdef _WIN32 - while (!paused && !event.IsSet() && wait_time > 0) { - wait_time = *next_time - GetGlobalTimeNs().count(); - if (wait_time >= timer_resolution_ns) { - Common::Windows::SleepForOneTick(); - } else { -#ifdef ARCHITECTURE_x86_64 - Common::X64::MicroSleep(); -#else - std::this_thread::yield(); -#endif - } - } - if (event.IsSet()) - event.Reset(); -#else event.WaitFor(std::chrono::nanoseconds(wait_time)); -#endif } } else { // Queue is empty, wait until another event is scheduled and signals us to @@ -226,7 +208,7 @@ void CoreTiming::ResetTicks() { } u64 CoreTiming::GetClockTicks() const { - u64 fres = is_multicore ? clock.GetCNTPCT() : Common::WallClock::CPUTickToCNTPCT(cpu_ticks); + u64 fres = is_multicore ? Common::g_wall_clock.GetCNTPCT() : Common::WallClock::CPUTickToCNTPCT(cpu_ticks); if (auto const overclock = Settings::values.fast_cpu_time.GetValue(); overclock != Settings::CpuClock::Off) { fres = u64(f64(fres) * (1.7 + 0.3 * u32(overclock))); } @@ -240,7 +222,7 @@ u64 CoreTiming::GetClockTicks() const { u64 CoreTiming::GetGPUTicks() const { return is_multicore - ? clock.GetGPUTick() + ? Common::g_wall_clock.GetGPUTick() : Common::WallClock::CPUTickToGPUTick(cpu_ticks); } @@ -317,14 +299,14 @@ void CoreTiming::Reset() { /// @brief Returns current time in nanoseconds. std::chrono::nanoseconds CoreTiming::GetGlobalTimeNs() const noexcept { return is_multicore - ? clock.GetTimeNS() + ? Common::g_wall_clock.GetTimeNS() : std::chrono::nanoseconds{Common::WallClock::CPUTickToNS(cpu_ticks)}; } /// @brief Returns current time in microseconds. std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const noexcept { return is_multicore - ? clock.GetTimeUS() + ? Common::g_wall_clock.GetTimeUS() : std::chrono::microseconds{Common::WallClock::CPUTickToUS(cpu_ticks)}; } diff --git a/src/core/core_timing.h b/src/core/core_timing.h index 5967c83b57..298fc9595d 100644 --- a/src/core/core_timing.h +++ b/src/core/core_timing.h @@ -19,7 +19,7 @@ #include "common/common_types.h" #include "common/thread.h" -#include "common/wall_clock.h" +#include "common/cpu_features.h" namespace Core::Timing { @@ -142,37 +142,28 @@ public: void Reset(); - Common::WallClock clock; - + using heap_t = boost::heap::fibonacci_heap>>; + heap_t event_queue; s64 global_timer = 0; - #ifdef _WIN32 s64 timer_resolution_ns; #endif - - using heap_t = - boost::heap::fibonacci_heap>>; - - heap_t event_queue; u64 event_fifo_id = 0; - + s64 pause_end_time{}; + /// Cycle timing + u64 cpu_ticks{}; + s64 downcount{}; Common::Event event{}; Common::Event pause_event{}; + std::function on_thread_init{}; + std::jthread timer_thread; mutable std::mutex basic_lock; std::mutex advance_lock; - std::jthread timer_thread; std::atomic paused{}; std::atomic paused_set{}; std::atomic wait_set{}; std::atomic has_started{}; - std::function on_thread_init{}; - bool is_multicore{}; - s64 pause_end_time{}; - - /// Cycle timing - u64 cpu_ticks{}; - s64 downcount{}; }; /// Creates a core timing event with the given name and callback. diff --git a/src/core/hle/service/nvnflinger/buffer_queue_producer.cpp b/src/core/hle/service/nvnflinger/buffer_queue_producer.cpp index d36b30a2d2..5bd53fb99e 100644 --- a/src/core/hle/service/nvnflinger/buffer_queue_producer.cpp +++ b/src/core/hle/service/nvnflinger/buffer_queue_producer.cpp @@ -10,6 +10,7 @@ #include "common/assert.h" #include "common/logging.h" #include "common/settings.h" +#include "common/cpu_features.h" #include "core/hle/kernel/k_event.h" #include "core/hle/kernel/k_readable_event.h" #include "core/hle/kernel/kernel.h" @@ -28,7 +29,6 @@ BufferQueueProducer::BufferQueueProducer(Service::KernelHelpers::ServiceContext& Service::Nvidia::NvCore::NvMap& nvmap_) : service_context{service_context_}, core{std::move(buffer_queue_core_)} , slots(core->slots) - , clock{Common::CreateOptimalClock()} , nvmap(nvmap_) { buffer_wait_event = service_context.CreateEvent("BufferQueue:WaitEvent"); @@ -488,7 +488,7 @@ Status BufferQueueProducer::QueueBuffer(s32 slot, const QueueBufferInput& input, slots[slot].buffer_state = BufferState::Queued; slots[slot].frame_number = core->frame_counter; slots[slot].queue_time = timestamp; - slots[slot].presentation_time = clock.GetTimeNS().count(); + slots[slot].presentation_time = Common::g_wall_clock.GetTimeNS().count(); slots[slot].fence = fence; item.slot = slot; diff --git a/src/core/hle/service/nvnflinger/buffer_queue_producer.h b/src/core/hle/service/nvnflinger/buffer_queue_producer.h index 51c0801a0a..697de0ac9b 100644 --- a/src/core/hle/service/nvnflinger/buffer_queue_producer.h +++ b/src/core/hle/service/nvnflinger/buffer_queue_producer.h @@ -14,7 +14,7 @@ #include #include "common/common_funcs.h" -#include "common/wall_clock.h" +#include "common/cpu_features.h" #include "core/hle/service/nvdrv/nvdata.h" #include "core/hle/service/nvnflinger/binder.h" #include "core/hle/service/nvnflinger/buffer_queue_defs.h" @@ -89,7 +89,6 @@ private: s32 next_callback_ticket{}; s32 current_callback_ticket{}; std::condition_variable_any callback_condition; - Common::WallClock clock; Service::Nvidia::NvCore::NvMap& nvmap; }; diff --git a/src/core/hle/service/psc/time/common.h b/src/core/hle/service/psc/time/common.h index 0ad2ed51f8..36a90cd828 100644 --- a/src/core/hle/service/psc/time/common.h +++ b/src/core/hle/service/psc/time/common.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project @@ -13,7 +13,7 @@ #include "common/common_types.h" #include "common/intrusive_list.h" #include "common/uuid.h" -#include "common/wall_clock.h" +#include "common/cpu_features.h" #include "core/hle/kernel/k_event.h" #include "core/hle/service/kernel_helpers.h" #include "core/hle/service/psc/time/errors.h" diff --git a/src/qt_common/qt_common.cpp b/src/qt_common/qt_common.cpp index 753c537736..7550e931af 100644 --- a/src/qt_common/qt_common.cpp +++ b/src/qt_common/qt_common.cpp @@ -13,12 +13,9 @@ #include "common/fs/path_util.h" #include "common/logging.h" #include "common/scm_rev.h" +#include "common/cpu_features.h" #include "core/memory.h" -#ifdef ARCHITECTURE_x86_64 -#include "common/x64/cpu_detect.h" -#endif - #include #include #include "core/frontend/emu_window.h" @@ -214,7 +211,7 @@ void Init(QWidget* root) { LOG_INFO(Frontend, "Eden Version: {}", yuzu_build_version); LogRuntimes(); #ifdef ARCHITECTURE_x86_64 - const auto& caps = Common::GetCPUCaps(); + const auto& caps = Common::g_cpu_caps; std::string cpu_string = caps.cpu_string; if (caps.avx || caps.avx2 || caps.avx512f) { cpu_string += " | AVX"; diff --git a/src/video_core/host1x/vic.cpp b/src/video_core/host1x/vic.cpp index 906714cc16..bafbb0ed25 100644 --- a/src/video_core/host1x/vic.cpp +++ b/src/video_core/host1x/vic.cpp @@ -39,7 +39,7 @@ extern "C" { #include "video_core/textures/decoders.h" #if defined(ARCHITECTURE_x86_64) -#include "common/x64/cpu_detect.h" +#include "common/cpu_features.h" #endif #if defined(ARCHITECTURE_x86_64) \ @@ -55,9 +55,8 @@ namespace Tegra::Host1x { namespace { static bool HasSSE41() { -#if defined(ARCHITECTURE_x86_64) - static bool has_sse41 = Common::GetCPUCaps().sse4_1; - return has_sse41; +#ifdef ARCHITECTURE_x86_64 + return Common::g_cpu_caps.sse4_1; #else return false; #endif diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index 5f5633d4d1..29a79810cd 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -254,11 +254,12 @@ std::optional GenericEnvironment::TryFindSize() { static constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL; static constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL; + code.resize(MAXIMUM_SIZE / INST_SIZE); + GPUVAddr guest_addr{program_base + start_address}; size_t offset{0}; size_t size{BLOCK_SIZE}; while (size <= MAXIMUM_SIZE) { - code.resize(size / INST_SIZE); u64* const data = code.data() + offset / INST_SIZE; gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE); for (size_t index = 0; index < BLOCK_SIZE; index += INST_SIZE) { diff --git a/src/yuzu/main_window.cpp b/src/yuzu/main_window.cpp index 5d60bd3a8f..8c362d229b 100644 --- a/src/yuzu/main_window.cpp +++ b/src/yuzu/main_window.cpp @@ -124,7 +124,7 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual #include "common/string_util.h" #ifdef ARCHITECTURE_x86_64 -#include "common/x64/cpu_detect.h" +#include "common/cpu_features.h" #endif // Core // @@ -3119,7 +3119,7 @@ void MainWindow::OnMenuReportCompatibility() { tr("Compatibility list reporting is currently disabled. Check back later!")); // #if defined(ARCHITECTURE_x86_64) && !defined(__APPLE__) - // const auto& caps = Common::GetCPUCaps(); + // const auto& caps = g_cpu_caps; // const bool has_fma = caps.fma; // const auto processor_count = std::thread::hardware_concurrency(); // const bool has_4threads = processor_count == 0 || processor_count >= 4;