mirror of
https://git.eden-emu.dev/eden-emu/eden
synced 2026-05-26 19:37:01 +02:00
coalesced event wait for with waitpkg/mwaitx/umonitorrrrrrr
This commit is contained in:
parent
bf175450d4
commit
60db19dca9
3 changed files with 70 additions and 26 deletions
|
|
@ -11,6 +11,7 @@
|
|||
#include "common/logging.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/thread.h"
|
||||
#include "common/x64/cpu_detect.h"
|
||||
#ifdef __APPLE__
|
||||
#include <mach/mach.h>
|
||||
#elif defined(__HAIKU__)
|
||||
|
|
@ -33,6 +34,12 @@
|
|||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <intrin.h>
|
||||
#else
|
||||
#include <x86intrin.h>
|
||||
#endif
|
||||
|
||||
#ifdef __FreeBSD__
|
||||
# define cpu_set_t cpuset_t
|
||||
#endif
|
||||
|
|
@ -144,4 +151,63 @@ void PinCurrentThreadToPerformanceCore(size_t core_id) {
|
|||
}
|
||||
}
|
||||
|
||||
// On Linux and UNIX systems, a futex would nominally be used to cover the costs
|
||||
// the idea is that it's intuitivelly cheaper to use a direct instruction as opposed to a full futex call
|
||||
// the underlying libc++ implementation uses pthread_cond_timedwait which MAY invoke a futex
|
||||
// Let's pretend the OS is too expensive to jump into, and avoid ANY context switches
|
||||
// this should *IN THEORY* lower CPU usage while just waiting for stuff effectively
|
||||
// For windows the minimal quanta resolution is about 500us, and normal CRT cond var is 1.5ms(?)
|
||||
// so may as well avoid that too
|
||||
// Let's just give ALL platforms the same mechanisms (almost) for when they have umonitor OR waitpkg
|
||||
#ifdef __clang__
|
||||
__attribute__((target("waitpkg,mwaitx")))
|
||||
#elif defined(__GNUC__)
|
||||
#pragma GCC target("waitpkg")
|
||||
#pragma GCC target("mwaitx")
|
||||
#endif
|
||||
bool Event::WaitFor(const std::chrono::nanoseconds& time) {
|
||||
auto const& caps = Common::GetCPUCaps();
|
||||
auto const ns_ratio = std::max<u64>(1, caps.max_frequency / 1'000);
|
||||
auto const target_tsc = Common::X64::FencedRDTSC() + time.count() * ns_ratio;
|
||||
if (caps.monitorx) {
|
||||
while (true) {
|
||||
_mm_monitorx(reinterpret_cast<u64*>(std::addressof(is_set)), 0, 0);
|
||||
if (!IsSet()) {
|
||||
constexpr auto EnableWaitTimeFlag = 1U << 1;
|
||||
constexpr auto RequestC1State = 0U;
|
||||
_mm_mwaitx(EnableWaitTimeFlag, RequestC1State, target_tsc);
|
||||
if (!is_set.load())
|
||||
return false;
|
||||
}
|
||||
bool expected = true;
|
||||
if (is_set.compare_exchange_weak(expected, false, std::memory_order_release))
|
||||
return true;
|
||||
}
|
||||
} else if (caps.waitpkg) {
|
||||
// #UD If CPUID.7.0:ECX.WAITPKG[bit 5]=0.
|
||||
while (true) {
|
||||
_umonitor(std::addressof(is_set));
|
||||
if (!IsSet() && !_umwait(0, target_tsc))
|
||||
return false;
|
||||
bool expected = true;
|
||||
if (is_set.compare_exchange_weak(expected, false, std::memory_order_release))
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
#ifdef _WIN32
|
||||
while (!IsSet() && _rdtsc() < target_tsc)
|
||||
Common::Windows::SleepForOneTick();
|
||||
if (event.IsSet())
|
||||
event.Reset();
|
||||
return true;
|
||||
#else
|
||||
std::unique_lock lk{mutex};
|
||||
if (!condvar.wait_for(lk, time, [this] { return is_set.load(); }))
|
||||
return false;
|
||||
is_set = false;
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Common
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@
|
|||
#include <thread>
|
||||
#include "common/common_types.h"
|
||||
#include "common/polyfill_thread.h"
|
||||
#include "common/x64/rdtsc.h"
|
||||
|
||||
namespace Common {
|
||||
|
||||
|
|
@ -34,15 +35,9 @@ public:
|
|||
is_set = false;
|
||||
}
|
||||
|
||||
bool WaitFor(const std::chrono::nanoseconds& time) {
|
||||
std::unique_lock lk{mutex};
|
||||
if (!condvar.wait_for(lk, time, [this] { return is_set.load(); }))
|
||||
return false;
|
||||
is_set = false;
|
||||
return true;
|
||||
}
|
||||
bool WaitFor(const std::chrono::nanoseconds& time);
|
||||
|
||||
template <class Clock, class Duration>
|
||||
template<class Clock, class Duration>
|
||||
bool WaitUntil(const std::chrono::time_point<Clock, Duration>& time) {
|
||||
std::unique_lock lk{mutex};
|
||||
if (!condvar.wait_until(lk, time, [this] { return is_set.load(); }))
|
||||
|
|
@ -63,9 +58,9 @@ public:
|
|||
}
|
||||
|
||||
private:
|
||||
alignas(64) std::atomic<bool> is_set{false};
|
||||
std::condition_variable condvar;
|
||||
std::mutex mutex;
|
||||
std::atomic_bool is_set{false};
|
||||
};
|
||||
|
||||
class Barrier {
|
||||
|
|
|
|||
|
|
@ -76,24 +76,7 @@ void CoreTiming::Initialize(std::function<void()>&& on_thread_init_) {
|
|||
// There are more events left in the queue, wait until the next event.
|
||||
auto const wait_time = *next_time - GetGlobalTimeNs().count();
|
||||
if (wait_time > 0) {
|
||||
#ifdef _WIN32
|
||||
while (!paused && !event.IsSet() && wait_time > 0) {
|
||||
wait_time = *next_time - GetGlobalTimeNs().count();
|
||||
if (wait_time >= timer_resolution_ns) {
|
||||
Common::Windows::SleepForOneTick();
|
||||
} else {
|
||||
#ifdef ARCHITECTURE_x86_64
|
||||
Common::X64::MicroSleep(caps, wait_time * ns_scale);
|
||||
#else
|
||||
std::this_thread::yield();
|
||||
#endif
|
||||
}
|
||||
}
|
||||
if (event.IsSet())
|
||||
event.Reset();
|
||||
#else
|
||||
event.WaitFor(std::chrono::nanoseconds(wait_time));
|
||||
#endif
|
||||
}
|
||||
} else {
|
||||
// Queue is empty, wait until another event is scheduled and signals us to
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue