Convert CPU caps into a global instead of MT safe global

This commit is contained in:
lizzie 2026-05-23 21:08:24 +00:00
parent f40da0638b
commit 98232563e9
9 changed files with 31 additions and 35 deletions

View file

@ -4,6 +4,7 @@
// SPDX-FileCopyrightText: 2014 Citra Emulator Project // SPDX-FileCopyrightText: 2014 Citra Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include <chrono>
#include <string> #include <string>
#include <thread> #include <thread>
@ -168,17 +169,19 @@ __attribute__((target("waitpkg,mwaitx")))
#pragma GCC target("waitpkg") #pragma GCC target("waitpkg")
#pragma GCC target("mwaitx") #pragma GCC target("mwaitx")
#endif #endif
bool Event::WaitFor(const std::chrono::nanoseconds& time) { bool Event::WaitFor(const std::chrono::nanoseconds time) {
auto const& caps = Common::GetCPUCaps(); auto const start = Common::X64::FencedRDTSC();
auto const& caps = Common::g_cpu_caps;
auto const ns_ratio = std::max<u64>(1, caps.base_frequency / 1'000); auto const ns_ratio = std::max<u64>(1, caps.base_frequency / 1'000);
auto const target_tsc = Common::X64::FencedRDTSC() + time.count() * ns_ratio; auto const end = start + time.count() * ns_ratio;
if (caps.monitorx) { if (caps.monitorx) {
while (true) { while (true) {
_mm_monitorx(reinterpret_cast<u64*>(std::addressof(is_set)), 0, 0); _mm_monitorx(reinterpret_cast<u64*>(std::addressof(is_set)), 0, 0);
if (!IsSet()) { if (!is_set.load()) {
constexpr auto EnableWaitTimeFlag = 1U << 1; constexpr auto EnableWaitTimeFlag = 1U << 1;
constexpr auto RequestC1State = 0U; constexpr auto RequestC1State = 0U;
_mm_mwaitx(EnableWaitTimeFlag, RequestC1State, target_tsc); _mm_mwaitx(EnableWaitTimeFlag, RequestC1State, end);
if (!is_set.load()) if (!is_set.load())
return false; return false;
} }
@ -190,7 +193,7 @@ bool Event::WaitFor(const std::chrono::nanoseconds& time) {
// #UD If CPUID.7.0:ECX.WAITPKG[bit 5]=0. // #UD If CPUID.7.0:ECX.WAITPKG[bit 5]=0.
while (true) { while (true) {
_umonitor(std::addressof(is_set)); _umonitor(std::addressof(is_set));
if (!IsSet() && !_umwait(1, target_tsc)) if (!is_set.load() && !_umwait(1, end))
return false; return false;
bool expected = true; bool expected = true;
if (is_set.compare_exchange_weak(expected, false, std::memory_order_release)) if (is_set.compare_exchange_weak(expected, false, std::memory_order_release))
@ -198,9 +201,9 @@ bool Event::WaitFor(const std::chrono::nanoseconds& time) {
} }
} else { } else {
#ifdef _WIN32 #ifdef _WIN32
while (!IsSet() && _rdtsc() < target_tsc) while (!is_set.load() && _rdtsc() < end)
Common::Windows::SleepForOneTick(); Common::Windows::SleepForOneTick();
if (IsSet()) if (is_set.load())
Reset(); Reset();
return true; return true;
#else #else
@ -213,11 +216,14 @@ bool Event::WaitFor(const std::chrono::nanoseconds& time) {
} }
} }
#else #else
bool Event::WaitFor(const std::chrono::nanoseconds& time) { bool Event::WaitFor(const std::chrono::nanoseconds time) {
#ifdef _WIN32 #ifdef _WIN32
while (!IsSet() && _rdtsc() < target_tsc) s64 rem = s64(time.count()); //98 years
while (!is_set.load() && rem > 0) {
Common::Windows::SleepForOneTick(); Common::Windows::SleepForOneTick();
if (IsSet()) rem = s64(GetGlobalTimeNs().count()) - s64(time.count());
}
if (is_set.load())
Reset(); Reset();
return true; return true;
#else #else

View file

@ -35,10 +35,10 @@ public:
is_set = false; is_set = false;
} }
bool WaitFor(const std::chrono::nanoseconds& time); bool WaitFor(const std::chrono::nanoseconds time);
template<class Clock, class Duration> template<class Clock, class Duration>
bool WaitUntil(const std::chrono::time_point<Clock, Duration>& time) { bool WaitUntil(const std::chrono::time_point<Clock, Duration> time) {
std::unique_lock lk{mutex}; std::unique_lock lk{mutex};
if (!condvar.wait_until(lk, time, [this] { return is_set.load(); })) if (!condvar.wait_until(lk, time, [this] { return is_set.load(); }))
return false; return false;

View file

@ -183,7 +183,7 @@ bool WallClock::IsNative() const {
WallClock CreateOptimalClock() noexcept { WallClock CreateOptimalClock() noexcept {
#if defined(ARCHITECTURE_x86_64) #if defined(ARCHITECTURE_x86_64)
auto const& caps = GetCPUCaps(); auto const& caps = Common::g_cpu_caps;
return WallClock(caps.invariant_tsc && caps.tsc_frequency >= std::nano::den, caps.tsc_frequency); return WallClock(caps.invariant_tsc && caps.tsc_frequency >= std::nano::den, caps.tsc_frequency);
#elif defined(HAS_NCE) #elif defined(HAS_NCE)
return WallClock(false, 1); return WallClock(false, 1);

View file

@ -76,13 +76,12 @@ CPUCaps::Manufacturer CPUCaps::ParseManufacturer(std::string_view brand_string)
return Manufacturer::Unknown; return Manufacturer::Unknown;
} }
// Detects the various CPU features /// @brief Detects the various CPU features
static CPUCaps Detect() { const CPUCaps g_cpu_caps = [] {
CPUCaps caps = {}; CPUCaps caps = {};
// Assumes the CPU supports the CPUID instruction. Those that don't would likely not support // Assumes the CPU supports the CPUID instruction. Those that don't would likely not support
// yuzu at all anyway // yuzu at all anyway
int cpu_id[4]; int cpu_id[4];
// Detect CPU's CPUID capabilities and grab manufacturer string // Detect CPU's CPUID capabilities and grab manufacturer string
@ -197,14 +196,8 @@ static CPUCaps Detect() {
caps.max_frequency = cpu_id[1]; caps.max_frequency = cpu_id[1];
caps.bus_frequency = cpu_id[2]; caps.bus_frequency = cpu_id[2];
} }
return caps; return caps;
} }();
const CPUCaps& GetCPUCaps() {
static CPUCaps caps = Detect();
return caps;
}
std::optional<int> GetProcessorCount() { std::optional<int> GetProcessorCount() {
#if defined(_WIN32) #if defined(_WIN32)

View file

@ -70,11 +70,8 @@ struct CPUCaps {
bool waitpkg : 1; bool waitpkg : 1;
}; };
/** /// @brief Global cpu caps
* Gets the supported capabilities of the host CPU extern const CPUCaps g_cpu_caps;
* @return Reference to a CPUCaps struct with the detected host CPU capabilities
*/
const CPUCaps& GetCPUCaps();
/// Detects CPU core count /// Detects CPU core count
std::optional<int> GetProcessorCount(); std::optional<int> GetProcessorCount();

View file

@ -214,7 +214,7 @@ void Init(QWidget* root) {
LOG_INFO(Frontend, "Eden Version: {}", yuzu_build_version); LOG_INFO(Frontend, "Eden Version: {}", yuzu_build_version);
LogRuntimes(); LogRuntimes();
#ifdef ARCHITECTURE_x86_64 #ifdef ARCHITECTURE_x86_64
const auto& caps = Common::GetCPUCaps(); const auto& caps = Common::g_cpu_caps;
std::string cpu_string = caps.cpu_string; std::string cpu_string = caps.cpu_string;
if (caps.avx || caps.avx2 || caps.avx512f) { if (caps.avx || caps.avx2 || caps.avx512f) {
cpu_string += " | AVX"; cpu_string += " | AVX";

View file

@ -55,9 +55,8 @@ namespace Tegra::Host1x {
namespace { namespace {
static bool HasSSE41() { static bool HasSSE41() {
#if defined(ARCHITECTURE_x86_64) #ifdef ARCHITECTURE_x86_64
static bool has_sse41 = Common::GetCPUCaps().sse4_1; return Common::g_cpu_caps.sse4_1;
return has_sse41;
#else #else
return false; return false;
#endif #endif

View file

@ -254,11 +254,12 @@ std::optional<u64> GenericEnvironment::TryFindSize() {
static constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL; static constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL;
static constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL; static constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL;
code.resize(MAXIMUM_SIZE / INST_SIZE);
GPUVAddr guest_addr{program_base + start_address}; GPUVAddr guest_addr{program_base + start_address};
size_t offset{0}; size_t offset{0};
size_t size{BLOCK_SIZE}; size_t size{BLOCK_SIZE};
while (size <= MAXIMUM_SIZE) { while (size <= MAXIMUM_SIZE) {
code.resize(size / INST_SIZE);
u64* const data = code.data() + offset / INST_SIZE; u64* const data = code.data() + offset / INST_SIZE;
gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE); gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE);
for (size_t index = 0; index < BLOCK_SIZE; index += INST_SIZE) { for (size_t index = 0; index < BLOCK_SIZE; index += INST_SIZE) {

View file

@ -3119,7 +3119,7 @@ void MainWindow::OnMenuReportCompatibility() {
tr("Compatibility list reporting is currently disabled. Check back later!")); tr("Compatibility list reporting is currently disabled. Check back later!"));
// #if defined(ARCHITECTURE_x86_64) && !defined(__APPLE__) // #if defined(ARCHITECTURE_x86_64) && !defined(__APPLE__)
// const auto& caps = Common::GetCPUCaps(); // const auto& caps = g_cpu_caps;
// const bool has_fma = caps.fma; // const bool has_fma = caps.fma;
// const auto processor_count = std::thread::hardware_concurrency(); // const auto processor_count = std::thread::hardware_concurrency();
// const bool has_4threads = processor_count == 0 || processor_count >= 4; // const bool has_4threads = processor_count == 0 || processor_count >= 4;