proper calcs for nanoseconds

This commit is contained in:
lizzie 2026-05-23 09:50:29 +00:00
parent c9c858a614
commit d04b9da170
5 changed files with 43 additions and 44 deletions

View file

@ -20,55 +20,53 @@ namespace Common {
#if defined(ARCHITECTURE_x86_64) #if defined(ARCHITECTURE_x86_64)
WallClock::WallClock(bool invariant_, u64 rdtsc_frequency_) noexcept WallClock::WallClock(bool invariant_, u64 rdtsc_frequency_) noexcept
: invariant{invariant_} : rdtsc_frequency{rdtsc_frequency_}
, rdtsc_frequency{rdtsc_frequency_} , ns_rdtsc_factor{invariant_ ? GetFixedPoint64Factor(NsRatio::den, rdtsc_frequency_) : 0}
, ns_rdtsc_factor{invariant_ ? 0 : GetFixedPoint64Factor(NsRatio::den, rdtsc_frequency_)} , us_rdtsc_factor{invariant_ ? GetFixedPoint64Factor(UsRatio::den, rdtsc_frequency_) : 0}
, us_rdtsc_factor{invariant_ ? 0 : GetFixedPoint64Factor(UsRatio::den, rdtsc_frequency_)} , ms_rdtsc_factor{invariant_ ? GetFixedPoint64Factor(MsRatio::den, rdtsc_frequency_) : 0}
, ms_rdtsc_factor{invariant_ ? 0 : GetFixedPoint64Factor(MsRatio::den, rdtsc_frequency_)} , cntpct_rdtsc_factor{invariant_ ? GetFixedPoint64Factor(CNTFRQ, rdtsc_frequency_) : 0}
, cntpct_rdtsc_factor{invariant_ ? 0 : GetFixedPoint64Factor(CNTFRQ, rdtsc_frequency_)} , gputick_rdtsc_factor{invariant_ ? GetFixedPoint64Factor(GPUTickFreq, rdtsc_frequency_) : 0}
, gputick_rdtsc_factor{invariant_ ? 0 : GetFixedPoint64Factor(GPUTickFreq, rdtsc_frequency_)} , invariant{invariant_}
{} {}
std::chrono::nanoseconds WallClock::GetTimeNS() const { std::chrono::nanoseconds WallClock::GetTimeNS() const {
if (invariant) if (!invariant)
return std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::system_clock::now().time_since_epoch()); return std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::system_clock::now().time_since_epoch());
return std::chrono::nanoseconds{MultiplyHigh(GetUptime(), ns_rdtsc_factor)}; return std::chrono::nanoseconds{MultiplyHigh(GetUptime(), ns_rdtsc_factor)};
} }
std::chrono::microseconds WallClock::GetTimeUS() const { std::chrono::microseconds WallClock::GetTimeUS() const {
if (invariant) if (!invariant)
return std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::system_clock::now().time_since_epoch()); return std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::system_clock::now().time_since_epoch());
return std::chrono::microseconds{MultiplyHigh(GetUptime(), us_rdtsc_factor)}; return std::chrono::microseconds{MultiplyHigh(GetUptime(), us_rdtsc_factor)};
} }
std::chrono::milliseconds WallClock::GetTimeMS() const { std::chrono::milliseconds WallClock::GetTimeMS() const {
if (invariant) if (!invariant)
return std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now().time_since_epoch()); return std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now().time_since_epoch());
return std::chrono::milliseconds{MultiplyHigh(GetUptime(), ms_rdtsc_factor)}; return std::chrono::milliseconds{MultiplyHigh(GetUptime(), ms_rdtsc_factor)};
} }
s64 WallClock::GetCNTPCT() const { s64 WallClock::GetCNTPCT() const {
if (invariant) if (!invariant)
return GetUptime() * NsToCNTPCTRatio::num / NsToCNTPCTRatio::den; return GetUptime() * NsToCNTPCTRatio::num / NsToCNTPCTRatio::den;
return MultiplyHigh(GetUptime(), cntpct_rdtsc_factor); return MultiplyHigh(GetUptime(), cntpct_rdtsc_factor);
} }
s64 WallClock::GetGPUTick() const { s64 WallClock::GetGPUTick() const {
if (invariant) if (!invariant)
return GetUptime() * NsToGPUTickRatio::num / NsToGPUTickRatio::den; return GetUptime() * NsToGPUTickRatio::num / NsToGPUTickRatio::den;
return MultiplyHigh(GetUptime(), gputick_rdtsc_factor); return MultiplyHigh(GetUptime(), gputick_rdtsc_factor);
} }
s64 WallClock::GetUptime() const { s64 WallClock::GetUptime() const {
if (invariant) if (!invariant)
return std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::steady_clock::now().time_since_epoch()).count(); return std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::steady_clock::now().time_since_epoch()).count();
return s64(Common::X64::FencedRDTSC()); return s64(Common::X64::FencedRDTSC());
} }
bool WallClock::IsNative() const { bool WallClock::IsNative() const {
if (invariant) return invariant;
return false;
return true;
} }
#elif defined(HAS_NCE) #elif defined(HAS_NCE)
namespace { namespace {
@ -186,7 +184,7 @@ bool WallClock::IsNative() const {
WallClock CreateOptimalClock() noexcept { WallClock CreateOptimalClock() noexcept {
#if defined(ARCHITECTURE_x86_64) #if defined(ARCHITECTURE_x86_64)
auto const& caps = GetCPUCaps(); auto const& caps = GetCPUCaps();
return WallClock(!(caps.invariant_tsc && caps.tsc_frequency >= std::nano::den), caps.tsc_frequency); return WallClock(caps.invariant_tsc && caps.tsc_frequency >= std::nano::den, caps.tsc_frequency);
#elif defined(HAS_NCE) #elif defined(HAS_NCE)
return WallClock(false, 1); return WallClock(false, 1);
#else #else

View file

@ -87,13 +87,13 @@ protected:
using CPUTickToGPUTickRatio = std::ratio<GPUTickFreq, CPUTickFreq>; using CPUTickToGPUTickRatio = std::ratio<GPUTickFreq, CPUTickFreq>;
#if defined(ARCHITECTURE_x86_64) #if defined(ARCHITECTURE_x86_64)
bool invariant;
u64 rdtsc_frequency; u64 rdtsc_frequency;
u64 ns_rdtsc_factor; u64 ns_rdtsc_factor;
u64 us_rdtsc_factor; u64 us_rdtsc_factor;
u64 ms_rdtsc_factor; u64 ms_rdtsc_factor;
u64 cntpct_rdtsc_factor; u64 cntpct_rdtsc_factor;
u64 gputick_rdtsc_factor; u64 gputick_rdtsc_factor;
bool invariant;
#elif defined(HAS_NCE) #elif defined(HAS_NCE)
public: public:
using FactorType = unsigned __int128; using FactorType = unsigned __int128;

View file

@ -21,29 +21,24 @@ __attribute__((target("waitpkg,mwaitx")))
#pragma GCC target("waitpkg") #pragma GCC target("waitpkg")
#pragma GCC target("mwaitx") #pragma GCC target("mwaitx")
#endif #endif
void MicroSleep(u64 rem) { void MicroSleep(const CPUCaps& caps, u64 cycles) {
// 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. do {
// For reference: u64 start = FencedRDTSC();
// At 1 GHz, 100K cycles is 100us if (caps.waitpkg) {
// At 2 GHz, 100K cycles is 50us constexpr auto RequestC02State = 0U;
// At 4 GHz, 100K cycles is 25us _tpause(RequestC02State, start + cycles);
auto& caps = GetCPUCaps(); } else if (caps.monitorx) {
u32 cycles = caps.invariant_tsc constexpr auto EnableWaitTimeFlag = 1U << 1;
? rem * (caps.tsc_frequency / 1000000ULL) constexpr auto RequestC1State = 0U;
: 1'000'000ULL; // monitor_var should be aligned to a cache line.
if (caps.waitpkg) { alignas(64) static const u64 monitor_var{};
constexpr auto RequestC02State = 0U; _mm_monitorx(const_cast<u64*>(std::addressof(monitor_var)), 0, 0);
_tpause(RequestC02State, FencedRDTSC() + cycles); _mm_mwaitx(EnableWaitTimeFlag, RequestC1State, cycles);
} else if (caps.monitorx) { } else {
constexpr auto EnableWaitTimeFlag = 1U << 1; std::this_thread::yield();
constexpr auto RequestC1State = 0U; }
// monitor_var should be aligned to a cache line. cycles -= FencedRDTSC() - start;
alignas(64) static const u64 monitor_var{}; } while (cycles > 0);
_mm_monitorx(const_cast<u64*>(std::addressof(monitor_var)), 0, 0);
_mm_mwaitx(EnableWaitTimeFlag, RequestC1State, cycles);
} else {
std::this_thread::yield();
}
} }
} // namespace Common::X64 } // namespace Common::X64

View file

@ -4,9 +4,10 @@
#pragma once #pragma once
#include "common/common_types.h" #include "common/common_types.h"
#include "common/x64/cpu_detect.h"
namespace Common::X64 { namespace Common::X64 {
void MicroSleep(u64 rem); void MicroSleep(const CPUCaps& caps, u64 cycles);
} // namespace Common::X64 } // namespace Common::X64

View file

@ -8,6 +8,7 @@
#include <mutex> #include <mutex>
#include <string> #include <string>
#include <tuple> #include <tuple>
#include "common/x64/cpu_detect.h"
#ifdef _WIN32 #ifdef _WIN32
#include "common/windows/timer_resolution.h" #include "common/windows/timer_resolution.h"
@ -64,12 +65,16 @@ void CoreTiming::Initialize(std::function<void()>&& on_thread_init_) {
Common::SetCurrentThreadPriority(Common::ThreadPriority::High); Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
on_thread_init(); on_thread_init();
has_started = true; has_started = true;
// base frequency in MHz: 1ns (10^-9) = 1GHz (10^9)
auto const& caps = Common::GetCPUCaps();
[[maybe_unused]] u64 ns_scale = caps.base_frequency / 1'000;
while (!stop_token.stop_requested()) { while (!stop_token.stop_requested()) {
while (!paused && !stop_token.stop_requested()) { while (!paused && !stop_token.stop_requested()) {
paused_set = false; paused_set = false;
if (auto const next_time = Advance(); next_time) { if (auto const next_time = Advance(); next_time) {
// There are more events left in the queue, wait until the next event. // There are more events left in the queue, wait until the next event.
auto wait_time = *next_time - GetGlobalTimeNs().count(); auto const wait_time = *next_time - GetGlobalTimeNs().count();
if (wait_time > 0) { if (wait_time > 0) {
#ifdef _WIN32 #ifdef _WIN32
while (!paused && !event.IsSet() && wait_time > 0) { while (!paused && !event.IsSet() && wait_time > 0) {
@ -78,7 +83,7 @@ void CoreTiming::Initialize(std::function<void()>&& on_thread_init_) {
Common::Windows::SleepForOneTick(); Common::Windows::SleepForOneTick();
} else { } else {
#ifdef ARCHITECTURE_x86_64 #ifdef ARCHITECTURE_x86_64
Common::X64::MicroSleep(wait_time); Common::X64::MicroSleep(caps, wait_time * ns_scale);
#else #else
std::this_thread::yield(); std::this_thread::yield();
#endif #endif