From a75e8645f5390218f29b98b3113d922518ed4fc5 Mon Sep 17 00:00:00 2001 From: lizzie Date: Sun, 24 May 2026 04:33:49 +0000 Subject: [PATCH] better mwaitx --- src/common/thread.cpp | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/src/common/thread.cpp b/src/common/thread.cpp index fa052a7dd7..19dd8db58e 100644 --- a/src/common/thread.cpp +++ b/src/common/thread.cpp @@ -183,19 +183,15 @@ bool Event::WaitFor(const std::chrono::nanoseconds time) { _mm_monitorx(reinterpret_cast(std::addressof(is_set)), 0, 0); if (!is_set.load()) { // RDTSC may be fenced here due to atomic load - s32 const cycles = std::min(std::numeric_limits::max(), s64(_rdtsc()) - s64(start)); - if (cycles > 0) { - // See here: https://github.com/torvalds/linux/blob/948a64995aca6820abefd17f1a4258f5835c5ad9/arch/x86/lib/delay.c#L93 - // MWAITX accepts a 32-bit input timer which determines the total number of cycles to wait for - // NOT THE TOTAL ABSOLUTE TSC VALUE, it's just a delta - // BIT[1] = use a timer - // Hint = 0: Use C1 state when sleepy (means faster wakeup but less power saving) - _mm_mwaitx(1 << 1, 0u, cycles); - if (!is_set.load()) - return false; - } else { + u32 const cycles = std::min(std::numeric_limits::max(), s64(end) - s64(start)); + // See here: https://github.com/torvalds/linux/blob/948a64995aca6820abefd17f1a4258f5835c5ad9/arch/x86/lib/delay.c#L93 + // MWAITX accepts a 32-bit input timer which determines the total number of cycles to wait for + // NOT THE TOTAL ABSOLUTE TSC VALUE, it's just a delta + // BIT[1] = use a timer + // Hint = 0: Use C1 state when sleepy (means faster wakeup but less power saving) + _mm_mwaitx(1 << 1, 0u, cycles); + if (!is_set.load()) return false; - } } bool expected = true; if (is_set.compare_exchange_weak(expected, false, std::memory_order_release))