mirror of
https://git.eden-emu.dev/eden-emu/eden
synced 2026-05-26 23:57:03 +02:00
coalesced event wait for with waitpkg/mwaitx/umonitorrrrrrr
This commit is contained in:
parent
bf175450d4
commit
60db19dca9
3 changed files with 70 additions and 26 deletions
|
|
@ -11,6 +11,7 @@
|
||||||
#include "common/logging.h"
|
#include "common/logging.h"
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/thread.h"
|
#include "common/thread.h"
|
||||||
|
#include "common/x64/cpu_detect.h"
|
||||||
#ifdef __APPLE__
|
#ifdef __APPLE__
|
||||||
#include <mach/mach.h>
|
#include <mach/mach.h>
|
||||||
#elif defined(__HAIKU__)
|
#elif defined(__HAIKU__)
|
||||||
|
|
@ -33,6 +34,12 @@
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#include <intrin.h>
|
||||||
|
#else
|
||||||
|
#include <x86intrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef __FreeBSD__
|
#ifdef __FreeBSD__
|
||||||
# define cpu_set_t cpuset_t
|
# define cpu_set_t cpuset_t
|
||||||
#endif
|
#endif
|
||||||
|
|
@ -144,4 +151,63 @@ void PinCurrentThreadToPerformanceCore(size_t core_id) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// On Linux and UNIX systems, a futex would nominally be used to cover the costs
|
||||||
|
// the idea is that it's intuitivelly cheaper to use a direct instruction as opposed to a full futex call
|
||||||
|
// the underlying libc++ implementation uses pthread_cond_timedwait which MAY invoke a futex
|
||||||
|
// Let's pretend the OS is too expensive to jump into, and avoid ANY context switches
|
||||||
|
// this should *IN THEORY* lower CPU usage while just waiting for stuff effectively
|
||||||
|
// For windows the minimal quanta resolution is about 500us, and normal CRT cond var is 1.5ms(?)
|
||||||
|
// so may as well avoid that too
|
||||||
|
// Let's just give ALL platforms the same mechanisms (almost) for when they have umonitor OR waitpkg
|
||||||
|
#ifdef __clang__
|
||||||
|
__attribute__((target("waitpkg,mwaitx")))
|
||||||
|
#elif defined(__GNUC__)
|
||||||
|
#pragma GCC target("waitpkg")
|
||||||
|
#pragma GCC target("mwaitx")
|
||||||
|
#endif
|
||||||
|
bool Event::WaitFor(const std::chrono::nanoseconds& time) {
|
||||||
|
auto const& caps = Common::GetCPUCaps();
|
||||||
|
auto const ns_ratio = std::max<u64>(1, caps.max_frequency / 1'000);
|
||||||
|
auto const target_tsc = Common::X64::FencedRDTSC() + time.count() * ns_ratio;
|
||||||
|
if (caps.monitorx) {
|
||||||
|
while (true) {
|
||||||
|
_mm_monitorx(reinterpret_cast<u64*>(std::addressof(is_set)), 0, 0);
|
||||||
|
if (!IsSet()) {
|
||||||
|
constexpr auto EnableWaitTimeFlag = 1U << 1;
|
||||||
|
constexpr auto RequestC1State = 0U;
|
||||||
|
_mm_mwaitx(EnableWaitTimeFlag, RequestC1State, target_tsc);
|
||||||
|
if (!is_set.load())
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
bool expected = true;
|
||||||
|
if (is_set.compare_exchange_weak(expected, false, std::memory_order_release))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
} else if (caps.waitpkg) {
|
||||||
|
// #UD If CPUID.7.0:ECX.WAITPKG[bit 5]=0.
|
||||||
|
while (true) {
|
||||||
|
_umonitor(std::addressof(is_set));
|
||||||
|
if (!IsSet() && !_umwait(0, target_tsc))
|
||||||
|
return false;
|
||||||
|
bool expected = true;
|
||||||
|
if (is_set.compare_exchange_weak(expected, false, std::memory_order_release))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
#ifdef _WIN32
|
||||||
|
while (!IsSet() && _rdtsc() < target_tsc)
|
||||||
|
Common::Windows::SleepForOneTick();
|
||||||
|
if (event.IsSet())
|
||||||
|
event.Reset();
|
||||||
|
return true;
|
||||||
|
#else
|
||||||
|
std::unique_lock lk{mutex};
|
||||||
|
if (!condvar.wait_for(lk, time, [this] { return is_set.load(); }))
|
||||||
|
return false;
|
||||||
|
is_set = false;
|
||||||
|
return true;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Common
|
} // namespace Common
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,7 @@
|
||||||
#include <thread>
|
#include <thread>
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "common/polyfill_thread.h"
|
#include "common/polyfill_thread.h"
|
||||||
|
#include "common/x64/rdtsc.h"
|
||||||
|
|
||||||
namespace Common {
|
namespace Common {
|
||||||
|
|
||||||
|
|
@ -34,15 +35,9 @@ public:
|
||||||
is_set = false;
|
is_set = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool WaitFor(const std::chrono::nanoseconds& time) {
|
bool WaitFor(const std::chrono::nanoseconds& time);
|
||||||
std::unique_lock lk{mutex};
|
|
||||||
if (!condvar.wait_for(lk, time, [this] { return is_set.load(); }))
|
|
||||||
return false;
|
|
||||||
is_set = false;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <class Clock, class Duration>
|
template<class Clock, class Duration>
|
||||||
bool WaitUntil(const std::chrono::time_point<Clock, Duration>& time) {
|
bool WaitUntil(const std::chrono::time_point<Clock, Duration>& time) {
|
||||||
std::unique_lock lk{mutex};
|
std::unique_lock lk{mutex};
|
||||||
if (!condvar.wait_until(lk, time, [this] { return is_set.load(); }))
|
if (!condvar.wait_until(lk, time, [this] { return is_set.load(); }))
|
||||||
|
|
@ -63,9 +58,9 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
alignas(64) std::atomic<bool> is_set{false};
|
||||||
std::condition_variable condvar;
|
std::condition_variable condvar;
|
||||||
std::mutex mutex;
|
std::mutex mutex;
|
||||||
std::atomic_bool is_set{false};
|
|
||||||
};
|
};
|
||||||
|
|
||||||
class Barrier {
|
class Barrier {
|
||||||
|
|
|
||||||
|
|
@ -76,24 +76,7 @@ void CoreTiming::Initialize(std::function<void()>&& on_thread_init_) {
|
||||||
// There are more events left in the queue, wait until the next event.
|
// There are more events left in the queue, wait until the next event.
|
||||||
auto const wait_time = *next_time - GetGlobalTimeNs().count();
|
auto const wait_time = *next_time - GetGlobalTimeNs().count();
|
||||||
if (wait_time > 0) {
|
if (wait_time > 0) {
|
||||||
#ifdef _WIN32
|
|
||||||
while (!paused && !event.IsSet() && wait_time > 0) {
|
|
||||||
wait_time = *next_time - GetGlobalTimeNs().count();
|
|
||||||
if (wait_time >= timer_resolution_ns) {
|
|
||||||
Common::Windows::SleepForOneTick();
|
|
||||||
} else {
|
|
||||||
#ifdef ARCHITECTURE_x86_64
|
|
||||||
Common::X64::MicroSleep(caps, wait_time * ns_scale);
|
|
||||||
#else
|
|
||||||
std::this_thread::yield();
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (event.IsSet())
|
|
||||||
event.Reset();
|
|
||||||
#else
|
|
||||||
event.WaitFor(std::chrono::nanoseconds(wait_time));
|
event.WaitFor(std::chrono::nanoseconds(wait_time));
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Queue is empty, wait until another event is scheduled and signals us to
|
// Queue is empty, wait until another event is scheduled and signals us to
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue