From bf175450d413c117a11278aa9502e8971d8d39e7 Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 23 May 2026 09:50:29 +0000 Subject: [PATCH] proper calcs for nanoseconds --- src/common/wall_clock.cpp | 32 ++++++++++++++--------------- src/common/wall_clock.h | 2 +- src/common/x64/cpu_wait.cpp | 41 ++++++++++++++++--------------------- src/common/x64/cpu_wait.h | 3 ++- src/core/core_timing.cpp | 9 ++++++-- 5 files changed, 43 insertions(+), 44 deletions(-) diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp index c06f9b6318..3ec9a30643 100644 --- a/src/common/wall_clock.cpp +++ b/src/common/wall_clock.cpp @@ -20,55 +20,53 @@ namespace Common { #if defined(ARCHITECTURE_x86_64) WallClock::WallClock(bool invariant_, u64 rdtsc_frequency_) noexcept - : invariant{invariant_} - , rdtsc_frequency{rdtsc_frequency_} - , ns_rdtsc_factor{invariant_ ? 0 : GetFixedPoint64Factor(NsRatio::den, rdtsc_frequency_)} - , us_rdtsc_factor{invariant_ ? 0 : GetFixedPoint64Factor(UsRatio::den, rdtsc_frequency_)} - , ms_rdtsc_factor{invariant_ ? 0 : GetFixedPoint64Factor(MsRatio::den, rdtsc_frequency_)} - , cntpct_rdtsc_factor{invariant_ ? 0 : GetFixedPoint64Factor(CNTFRQ, rdtsc_frequency_)} - , gputick_rdtsc_factor{invariant_ ? 0 : GetFixedPoint64Factor(GPUTickFreq, rdtsc_frequency_)} + : rdtsc_frequency{rdtsc_frequency_} + , ns_rdtsc_factor{invariant_ ? GetFixedPoint64Factor(NsRatio::den, rdtsc_frequency_) : 0} + , us_rdtsc_factor{invariant_ ? GetFixedPoint64Factor(UsRatio::den, rdtsc_frequency_) : 0} + , ms_rdtsc_factor{invariant_ ? GetFixedPoint64Factor(MsRatio::den, rdtsc_frequency_) : 0} + , cntpct_rdtsc_factor{invariant_ ? GetFixedPoint64Factor(CNTFRQ, rdtsc_frequency_) : 0} + , gputick_rdtsc_factor{invariant_ ? GetFixedPoint64Factor(GPUTickFreq, rdtsc_frequency_) : 0} + , invariant{invariant_} {} std::chrono::nanoseconds WallClock::GetTimeNS() const { - if (invariant) + if (!invariant) return std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()); return std::chrono::nanoseconds{MultiplyHigh(GetUptime(), ns_rdtsc_factor)}; } std::chrono::microseconds WallClock::GetTimeUS() const { - if (invariant) + if (!invariant) return std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()); return std::chrono::microseconds{MultiplyHigh(GetUptime(), us_rdtsc_factor)}; } std::chrono::milliseconds WallClock::GetTimeMS() const { - if (invariant) + if (!invariant) return std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()); return std::chrono::milliseconds{MultiplyHigh(GetUptime(), ms_rdtsc_factor)}; } s64 WallClock::GetCNTPCT() const { - if (invariant) + if (!invariant) return GetUptime() * NsToCNTPCTRatio::num / NsToCNTPCTRatio::den; return MultiplyHigh(GetUptime(), cntpct_rdtsc_factor); } s64 WallClock::GetGPUTick() const { - if (invariant) + if (!invariant) return GetUptime() * NsToGPUTickRatio::num / NsToGPUTickRatio::den; return MultiplyHigh(GetUptime(), gputick_rdtsc_factor); } s64 WallClock::GetUptime() const { - if (invariant) + if (!invariant) return std::chrono::duration_cast(std::chrono::steady_clock::now().time_since_epoch()).count(); return s64(Common::X64::FencedRDTSC()); } bool WallClock::IsNative() const { - if (invariant) - return false; - return true; + return invariant; } #elif defined(HAS_NCE) namespace { @@ -186,7 +184,7 @@ bool WallClock::IsNative() const { WallClock CreateOptimalClock() noexcept { #if defined(ARCHITECTURE_x86_64) auto const& caps = GetCPUCaps(); - return WallClock(!(caps.invariant_tsc && caps.tsc_frequency >= std::nano::den), caps.tsc_frequency); + return WallClock(caps.invariant_tsc && caps.tsc_frequency >= std::nano::den, caps.tsc_frequency); #elif defined(HAS_NCE) return WallClock(false, 1); #else diff --git a/src/common/wall_clock.h b/src/common/wall_clock.h index 6a6d56a610..73fea96967 100644 --- a/src/common/wall_clock.h +++ b/src/common/wall_clock.h @@ -87,13 +87,13 @@ protected: using CPUTickToGPUTickRatio = std::ratio; #if defined(ARCHITECTURE_x86_64) - bool invariant; u64 rdtsc_frequency; u64 ns_rdtsc_factor; u64 us_rdtsc_factor; u64 ms_rdtsc_factor; u64 cntpct_rdtsc_factor; u64 gputick_rdtsc_factor; + bool invariant; #elif defined(HAS_NCE) public: using FactorType = unsigned __int128; diff --git a/src/common/x64/cpu_wait.cpp b/src/common/x64/cpu_wait.cpp index f3ff4f264e..96971a1219 100644 --- a/src/common/x64/cpu_wait.cpp +++ b/src/common/x64/cpu_wait.cpp @@ -21,29 +21,24 @@ __attribute__((target("waitpkg,mwaitx"))) #pragma GCC target("waitpkg") #pragma GCC target("mwaitx") #endif -void MicroSleep(u64 rem) { - // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. - // For reference: - // At 1 GHz, 100K cycles is 100us - // At 2 GHz, 100K cycles is 50us - // At 4 GHz, 100K cycles is 25us - auto& caps = GetCPUCaps(); - u32 cycles = caps.invariant_tsc - ? rem * (caps.tsc_frequency / 1000000ULL) - : 1'000'000ULL; - if (caps.waitpkg) { - constexpr auto RequestC02State = 0U; - _tpause(RequestC02State, FencedRDTSC() + cycles); - } else if (caps.monitorx) { - constexpr auto EnableWaitTimeFlag = 1U << 1; - constexpr auto RequestC1State = 0U; - // monitor_var should be aligned to a cache line. - alignas(64) static const u64 monitor_var{}; - _mm_monitorx(const_cast(std::addressof(monitor_var)), 0, 0); - _mm_mwaitx(EnableWaitTimeFlag, RequestC1State, cycles); - } else { - std::this_thread::yield(); - } +void MicroSleep(const CPUCaps& caps, u64 cycles) { + do { + u64 start = FencedRDTSC(); + if (caps.waitpkg) { + constexpr auto RequestC02State = 0U; + _tpause(RequestC02State, start + cycles); + } else if (caps.monitorx) { + constexpr auto EnableWaitTimeFlag = 1U << 1; + constexpr auto RequestC1State = 0U; + // monitor_var should be aligned to a cache line. + alignas(64) static const u64 monitor_var{}; + _mm_monitorx(const_cast(std::addressof(monitor_var)), 0, 0); + _mm_mwaitx(EnableWaitTimeFlag, RequestC1State, cycles); + } else { + std::this_thread::yield(); + } + cycles -= FencedRDTSC() - start; + } while (cycles > 0); } } // namespace Common::X64 diff --git a/src/common/x64/cpu_wait.h b/src/common/x64/cpu_wait.h index 8bf47783ec..c445ac910a 100644 --- a/src/common/x64/cpu_wait.h +++ b/src/common/x64/cpu_wait.h @@ -4,9 +4,10 @@ #pragma once #include "common/common_types.h" +#include "common/x64/cpu_detect.h" namespace Common::X64 { -void MicroSleep(u64 rem); +void MicroSleep(const CPUCaps& caps, u64 cycles); } // namespace Common::X64 diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp index d91361f2e7..c8f845fd04 100644 --- a/src/core/core_timing.cpp +++ b/src/core/core_timing.cpp @@ -8,6 +8,7 @@ #include #include #include +#include "common/x64/cpu_detect.h" #ifdef _WIN32 #include "common/windows/timer_resolution.h" @@ -64,12 +65,16 @@ void CoreTiming::Initialize(std::function&& on_thread_init_) { Common::SetCurrentThreadPriority(Common::ThreadPriority::High); on_thread_init(); has_started = true; + + // base frequency in MHz: 1ns (10^-9) = 1GHz (10^9) + auto const& caps = Common::GetCPUCaps(); + [[maybe_unused]] u64 ns_scale = caps.base_frequency / 1'000; while (!stop_token.stop_requested()) { while (!paused && !stop_token.stop_requested()) { paused_set = false; if (auto const next_time = Advance(); next_time) { // There are more events left in the queue, wait until the next event. - auto wait_time = *next_time - GetGlobalTimeNs().count(); + auto const wait_time = *next_time - GetGlobalTimeNs().count(); if (wait_time > 0) { #ifdef _WIN32 while (!paused && !event.IsSet() && wait_time > 0) { @@ -78,7 +83,7 @@ void CoreTiming::Initialize(std::function&& on_thread_init_) { Common::Windows::SleepForOneTick(); } else { #ifdef ARCHITECTURE_x86_64 - Common::X64::MicroSleep(wait_time); + Common::X64::MicroSleep(caps, wait_time * ns_scale); #else std::this_thread::yield(); #endif