proper calcs for nanoseconds

This commit is contained in:
lizzie 2026-05-23 09:50:29 +00:00
parent c9c858a614
commit d04b9da170
5 changed files with 43 additions and 44 deletions

View file

@ -20,55 +20,53 @@ namespace Common {
#if defined(ARCHITECTURE_x86_64)
WallClock::WallClock(bool invariant_, u64 rdtsc_frequency_) noexcept
: invariant{invariant_}
, rdtsc_frequency{rdtsc_frequency_}
, ns_rdtsc_factor{invariant_ ? 0 : GetFixedPoint64Factor(NsRatio::den, rdtsc_frequency_)}
, us_rdtsc_factor{invariant_ ? 0 : GetFixedPoint64Factor(UsRatio::den, rdtsc_frequency_)}
, ms_rdtsc_factor{invariant_ ? 0 : GetFixedPoint64Factor(MsRatio::den, rdtsc_frequency_)}
, cntpct_rdtsc_factor{invariant_ ? 0 : GetFixedPoint64Factor(CNTFRQ, rdtsc_frequency_)}
, gputick_rdtsc_factor{invariant_ ? 0 : GetFixedPoint64Factor(GPUTickFreq, rdtsc_frequency_)}
: rdtsc_frequency{rdtsc_frequency_}
, ns_rdtsc_factor{invariant_ ? GetFixedPoint64Factor(NsRatio::den, rdtsc_frequency_) : 0}
, us_rdtsc_factor{invariant_ ? GetFixedPoint64Factor(UsRatio::den, rdtsc_frequency_) : 0}
, ms_rdtsc_factor{invariant_ ? GetFixedPoint64Factor(MsRatio::den, rdtsc_frequency_) : 0}
, cntpct_rdtsc_factor{invariant_ ? GetFixedPoint64Factor(CNTFRQ, rdtsc_frequency_) : 0}
, gputick_rdtsc_factor{invariant_ ? GetFixedPoint64Factor(GPUTickFreq, rdtsc_frequency_) : 0}
, invariant{invariant_}
{}
std::chrono::nanoseconds WallClock::GetTimeNS() const {
if (invariant)
if (!invariant)
return std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::system_clock::now().time_since_epoch());
return std::chrono::nanoseconds{MultiplyHigh(GetUptime(), ns_rdtsc_factor)};
}
std::chrono::microseconds WallClock::GetTimeUS() const {
if (invariant)
if (!invariant)
return std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::system_clock::now().time_since_epoch());
return std::chrono::microseconds{MultiplyHigh(GetUptime(), us_rdtsc_factor)};
}
std::chrono::milliseconds WallClock::GetTimeMS() const {
if (invariant)
if (!invariant)
return std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now().time_since_epoch());
return std::chrono::milliseconds{MultiplyHigh(GetUptime(), ms_rdtsc_factor)};
}
s64 WallClock::GetCNTPCT() const {
if (invariant)
if (!invariant)
return GetUptime() * NsToCNTPCTRatio::num / NsToCNTPCTRatio::den;
return MultiplyHigh(GetUptime(), cntpct_rdtsc_factor);
}
s64 WallClock::GetGPUTick() const {
if (invariant)
if (!invariant)
return GetUptime() * NsToGPUTickRatio::num / NsToGPUTickRatio::den;
return MultiplyHigh(GetUptime(), gputick_rdtsc_factor);
}
s64 WallClock::GetUptime() const {
if (invariant)
if (!invariant)
return std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::steady_clock::now().time_since_epoch()).count();
return s64(Common::X64::FencedRDTSC());
}
bool WallClock::IsNative() const {
if (invariant)
return false;
return true;
return invariant;
}
#elif defined(HAS_NCE)
namespace {
@ -186,7 +184,7 @@ bool WallClock::IsNative() const {
WallClock CreateOptimalClock() noexcept {
#if defined(ARCHITECTURE_x86_64)
auto const& caps = GetCPUCaps();
return WallClock(!(caps.invariant_tsc && caps.tsc_frequency >= std::nano::den), caps.tsc_frequency);
return WallClock(caps.invariant_tsc && caps.tsc_frequency >= std::nano::den, caps.tsc_frequency);
#elif defined(HAS_NCE)
return WallClock(false, 1);
#else

View file

@ -87,13 +87,13 @@ protected:
using CPUTickToGPUTickRatio = std::ratio<GPUTickFreq, CPUTickFreq>;
#if defined(ARCHITECTURE_x86_64)
bool invariant;
u64 rdtsc_frequency;
u64 ns_rdtsc_factor;
u64 us_rdtsc_factor;
u64 ms_rdtsc_factor;
u64 cntpct_rdtsc_factor;
u64 gputick_rdtsc_factor;
bool invariant;
#elif defined(HAS_NCE)
public:
using FactorType = unsigned __int128;

View file

@ -21,29 +21,24 @@ __attribute__((target("waitpkg,mwaitx")))
#pragma GCC target("waitpkg")
#pragma GCC target("mwaitx")
#endif
void MicroSleep(u64 rem) {
// 100,000 cycles is a reasonable amount of time to wait to save on CPU resources.
// For reference:
// At 1 GHz, 100K cycles is 100us
// At 2 GHz, 100K cycles is 50us
// At 4 GHz, 100K cycles is 25us
auto& caps = GetCPUCaps();
u32 cycles = caps.invariant_tsc
? rem * (caps.tsc_frequency / 1000000ULL)
: 1'000'000ULL;
if (caps.waitpkg) {
constexpr auto RequestC02State = 0U;
_tpause(RequestC02State, FencedRDTSC() + cycles);
} else if (caps.monitorx) {
constexpr auto EnableWaitTimeFlag = 1U << 1;
constexpr auto RequestC1State = 0U;
// monitor_var should be aligned to a cache line.
alignas(64) static const u64 monitor_var{};
_mm_monitorx(const_cast<u64*>(std::addressof(monitor_var)), 0, 0);
_mm_mwaitx(EnableWaitTimeFlag, RequestC1State, cycles);
} else {
std::this_thread::yield();
}
void MicroSleep(const CPUCaps& caps, u64 cycles) {
do {
u64 start = FencedRDTSC();
if (caps.waitpkg) {
constexpr auto RequestC02State = 0U;
_tpause(RequestC02State, start + cycles);
} else if (caps.monitorx) {
constexpr auto EnableWaitTimeFlag = 1U << 1;
constexpr auto RequestC1State = 0U;
// monitor_var should be aligned to a cache line.
alignas(64) static const u64 monitor_var{};
_mm_monitorx(const_cast<u64*>(std::addressof(monitor_var)), 0, 0);
_mm_mwaitx(EnableWaitTimeFlag, RequestC1State, cycles);
} else {
std::this_thread::yield();
}
cycles -= FencedRDTSC() - start;
} while (cycles > 0);
}
} // namespace Common::X64

View file

@ -4,9 +4,10 @@
#pragma once
#include "common/common_types.h"
#include "common/x64/cpu_detect.h"
namespace Common::X64 {
void MicroSleep(u64 rem);
void MicroSleep(const CPUCaps& caps, u64 cycles);
} // namespace Common::X64

View file

@ -8,6 +8,7 @@
#include <mutex>
#include <string>
#include <tuple>
#include "common/x64/cpu_detect.h"
#ifdef _WIN32
#include "common/windows/timer_resolution.h"
@ -64,12 +65,16 @@ void CoreTiming::Initialize(std::function<void()>&& on_thread_init_) {
Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
on_thread_init();
has_started = true;
// base frequency in MHz: 1ns (10^-9) = 1GHz (10^9)
auto const& caps = Common::GetCPUCaps();
[[maybe_unused]] u64 ns_scale = caps.base_frequency / 1'000;
while (!stop_token.stop_requested()) {
while (!paused && !stop_token.stop_requested()) {
paused_set = false;
if (auto const next_time = Advance(); next_time) {
// There are more events left in the queue, wait until the next event.
auto wait_time = *next_time - GetGlobalTimeNs().count();
auto const wait_time = *next_time - GetGlobalTimeNs().count();
if (wait_time > 0) {
#ifdef _WIN32
while (!paused && !event.IsSet() && wait_time > 0) {
@ -78,7 +83,7 @@ void CoreTiming::Initialize(std::function<void()>&& on_thread_init_) {
Common::Windows::SleepForOneTick();
} else {
#ifdef ARCHITECTURE_x86_64
Common::X64::MicroSleep(wait_time);
Common::X64::MicroSleep(caps, wait_time * ns_scale);
#else
std::this_thread::yield();
#endif