From 94ffe4cf7ce2bc03aca638b6a8c727e57e9330d7 Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 23 May 2026 22:00:21 +0000 Subject: [PATCH] fix mwaitx timer --- src/common/thread.cpp | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/common/thread.cpp b/src/common/thread.cpp index f6a2c176e4..e5d7d51aef 100644 --- a/src/common/thread.cpp +++ b/src/common/thread.cpp @@ -171,19 +171,27 @@ __attribute__((target("waitpkg,mwaitx"))) #endif bool Event::WaitFor(const std::chrono::nanoseconds time) { auto const start = Common::X64::FencedRDTSC(); - auto const& caps = Common::g_cpu_caps; auto const ns_ratio = std::max(1, caps.base_frequency / 1'000); auto const end = start + time.count() * ns_ratio; if (caps.monitorx) { while (true) { + // Armed monitor, as per manual, MWAITX must be conditional if the condition isn't satisfied + // to prevent a race condition. _mm_monitorx(reinterpret_cast(std::addressof(is_set)), 0, 0); if (!is_set.load()) { - constexpr auto EnableWaitTimeFlag = 1U << 1; - constexpr auto RequestC1State = 0U; - _mm_mwaitx(EnableWaitTimeFlag, RequestC1State, end); - if (!is_set.load()) - return false; + // RDTSC may be fenced here due to atomic load + s32 const cycles = s64(_rdtsc()) - s64(start); + if (cycles > 0) { + // See here: https://github.com/torvalds/linux/blob/948a64995aca6820abefd17f1a4258f5835c5ad9/arch/x86/lib/delay.c#L93 + // MWAITX accepts a 32-bit input timer which determines the total number of cycles to wait for + // NOT THE TOTAL ABSOLUTE TSC VALUE, it's just a delta + // BIT[1] = use a timer + // Hint = 0: Use C1 state when sleepy (means faster wakeup but less power saving) + _mm_mwaitx(1 << 1, 0u, cycles); + if (!is_set.load()) + return false; + } } bool expected = true; if (is_set.compare_exchange_weak(expected, false, std::memory_order_release))