diff --git a/src/common/thread.cpp b/src/common/thread.cpp index 29ee69c145..f6a2c176e4 100644 --- a/src/common/thread.cpp +++ b/src/common/thread.cpp @@ -4,6 +4,7 @@ // SPDX-FileCopyrightText: 2014 Citra Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include #include #include @@ -168,17 +169,19 @@ __attribute__((target("waitpkg,mwaitx"))) #pragma GCC target("waitpkg") #pragma GCC target("mwaitx") #endif -bool Event::WaitFor(const std::chrono::nanoseconds& time) { - auto const& caps = Common::GetCPUCaps(); +bool Event::WaitFor(const std::chrono::nanoseconds time) { + auto const start = Common::X64::FencedRDTSC(); + + auto const& caps = Common::g_cpu_caps; auto const ns_ratio = std::max(1, caps.base_frequency / 1'000); - auto const target_tsc = Common::X64::FencedRDTSC() + time.count() * ns_ratio; + auto const end = start + time.count() * ns_ratio; if (caps.monitorx) { while (true) { _mm_monitorx(reinterpret_cast(std::addressof(is_set)), 0, 0); - if (!IsSet()) { + if (!is_set.load()) { constexpr auto EnableWaitTimeFlag = 1U << 1; constexpr auto RequestC1State = 0U; - _mm_mwaitx(EnableWaitTimeFlag, RequestC1State, target_tsc); + _mm_mwaitx(EnableWaitTimeFlag, RequestC1State, end); if (!is_set.load()) return false; } @@ -190,7 +193,7 @@ bool Event::WaitFor(const std::chrono::nanoseconds& time) { // #UD If CPUID.7.0:ECX.WAITPKG[bit 5]=0. while (true) { _umonitor(std::addressof(is_set)); - if (!IsSet() && !_umwait(1, target_tsc)) + if (!is_set.load() && !_umwait(1, end)) return false; bool expected = true; if (is_set.compare_exchange_weak(expected, false, std::memory_order_release)) @@ -198,9 +201,9 @@ bool Event::WaitFor(const std::chrono::nanoseconds& time) { } } else { #ifdef _WIN32 - while (!IsSet() && _rdtsc() < target_tsc) + while (!is_set.load() && _rdtsc() < end) Common::Windows::SleepForOneTick(); - if (IsSet()) + if (is_set.load()) Reset(); return true; #else @@ -213,11 +216,14 @@ bool Event::WaitFor(const std::chrono::nanoseconds& time) { } } #else -bool Event::WaitFor(const std::chrono::nanoseconds& time) { +bool Event::WaitFor(const std::chrono::nanoseconds time) { #ifdef _WIN32 - while (!IsSet() && _rdtsc() < target_tsc) + s64 rem = s64(time.count()); //98 years + while (!is_set.load() && rem > 0) { Common::Windows::SleepForOneTick(); - if (IsSet()) + rem = s64(GetGlobalTimeNs().count()) - s64(time.count()); + } + if (is_set.load()) Reset(); return true; #else diff --git a/src/common/thread.h b/src/common/thread.h index 93688bb455..5b3682f266 100644 --- a/src/common/thread.h +++ b/src/common/thread.h @@ -35,10 +35,10 @@ public: is_set = false; } - bool WaitFor(const std::chrono::nanoseconds& time); + bool WaitFor(const std::chrono::nanoseconds time); template - bool WaitUntil(const std::chrono::time_point& time) { + bool WaitUntil(const std::chrono::time_point time) { std::unique_lock lk{mutex}; if (!condvar.wait_until(lk, time, [this] { return is_set.load(); })) return false; diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp index 3ec9a30643..0498df5c1b 100644 --- a/src/common/wall_clock.cpp +++ b/src/common/wall_clock.cpp @@ -183,7 +183,7 @@ bool WallClock::IsNative() const { WallClock CreateOptimalClock() noexcept { #if defined(ARCHITECTURE_x86_64) - auto const& caps = GetCPUCaps(); + auto const& caps = Common::g_cpu_caps; return WallClock(caps.invariant_tsc && caps.tsc_frequency >= std::nano::den, caps.tsc_frequency); #elif defined(HAS_NCE) return WallClock(false, 1); diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp index d613954eb5..4fae186bd5 100644 --- a/src/common/x64/cpu_detect.cpp +++ b/src/common/x64/cpu_detect.cpp @@ -76,13 +76,12 @@ CPUCaps::Manufacturer CPUCaps::ParseManufacturer(std::string_view brand_string) return Manufacturer::Unknown; } -// Detects the various CPU features -static CPUCaps Detect() { +/// @brief Detects the various CPU features +const CPUCaps g_cpu_caps = [] { CPUCaps caps = {}; // Assumes the CPU supports the CPUID instruction. Those that don't would likely not support // yuzu at all anyway - int cpu_id[4]; // Detect CPU's CPUID capabilities and grab manufacturer string @@ -197,14 +196,8 @@ static CPUCaps Detect() { caps.max_frequency = cpu_id[1]; caps.bus_frequency = cpu_id[2]; } - return caps; -} - -const CPUCaps& GetCPUCaps() { - static CPUCaps caps = Detect(); - return caps; -} +}(); std::optional GetProcessorCount() { #if defined(_WIN32) diff --git a/src/common/x64/cpu_detect.h b/src/common/x64/cpu_detect.h index af93c36e2c..bf60be4ef2 100644 --- a/src/common/x64/cpu_detect.h +++ b/src/common/x64/cpu_detect.h @@ -70,11 +70,8 @@ struct CPUCaps { bool waitpkg : 1; }; -/** - * Gets the supported capabilities of the host CPU - * @return Reference to a CPUCaps struct with the detected host CPU capabilities - */ -const CPUCaps& GetCPUCaps(); +/// @brief Global cpu caps +extern const CPUCaps g_cpu_caps; /// Detects CPU core count std::optional GetProcessorCount(); diff --git a/src/qt_common/qt_common.cpp b/src/qt_common/qt_common.cpp index 753c537736..216551835d 100644 --- a/src/qt_common/qt_common.cpp +++ b/src/qt_common/qt_common.cpp @@ -214,7 +214,7 @@ void Init(QWidget* root) { LOG_INFO(Frontend, "Eden Version: {}", yuzu_build_version); LogRuntimes(); #ifdef ARCHITECTURE_x86_64 - const auto& caps = Common::GetCPUCaps(); + const auto& caps = Common::g_cpu_caps; std::string cpu_string = caps.cpu_string; if (caps.avx || caps.avx2 || caps.avx512f) { cpu_string += " | AVX"; diff --git a/src/video_core/host1x/vic.cpp b/src/video_core/host1x/vic.cpp index 906714cc16..c808489214 100644 --- a/src/video_core/host1x/vic.cpp +++ b/src/video_core/host1x/vic.cpp @@ -55,9 +55,8 @@ namespace Tegra::Host1x { namespace { static bool HasSSE41() { -#if defined(ARCHITECTURE_x86_64) - static bool has_sse41 = Common::GetCPUCaps().sse4_1; - return has_sse41; +#ifdef ARCHITECTURE_x86_64 + return Common::g_cpu_caps.sse4_1; #else return false; #endif diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index 5f5633d4d1..29a79810cd 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -254,11 +254,12 @@ std::optional GenericEnvironment::TryFindSize() { static constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL; static constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL; + code.resize(MAXIMUM_SIZE / INST_SIZE); + GPUVAddr guest_addr{program_base + start_address}; size_t offset{0}; size_t size{BLOCK_SIZE}; while (size <= MAXIMUM_SIZE) { - code.resize(size / INST_SIZE); u64* const data = code.data() + offset / INST_SIZE; gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE); for (size_t index = 0; index < BLOCK_SIZE; index += INST_SIZE) { diff --git a/src/yuzu/main_window.cpp b/src/yuzu/main_window.cpp index 5d60bd3a8f..494fe657b7 100644 --- a/src/yuzu/main_window.cpp +++ b/src/yuzu/main_window.cpp @@ -3119,7 +3119,7 @@ void MainWindow::OnMenuReportCompatibility() { tr("Compatibility list reporting is currently disabled. Check back later!")); // #if defined(ARCHITECTURE_x86_64) && !defined(__APPLE__) - // const auto& caps = Common::GetCPUCaps(); + // const auto& caps = g_cpu_caps; // const bool has_fma = caps.fma; // const auto processor_count = std::thread::hardware_concurrency(); // const bool has_4threads = processor_count == 0 || processor_count >= 4;