mirror of
https://git.eden-emu.dev/eden-emu/eden
synced 2026-06-03 20:27:08 +02:00
[core/core_timing] better MWAITX and WAITPKG delays
Signed-off-by: lizzie <lizzie@eden-emu.dev>
This commit is contained in:
parent
9d55875377
commit
394578d038
3 changed files with 32 additions and 55 deletions
|
|
@ -5,6 +5,8 @@
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
#include <intrin.h>
|
#include <intrin.h>
|
||||||
|
#else
|
||||||
|
#include <x86intrin.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "common/x64/cpu_detect.h"
|
#include "common/x64/cpu_detect.h"
|
||||||
|
|
@ -13,60 +15,33 @@
|
||||||
|
|
||||||
namespace Common::X64 {
|
namespace Common::X64 {
|
||||||
|
|
||||||
namespace {
|
#ifdef __clang__
|
||||||
|
__attribute__((target("waitpkg")))
|
||||||
// 100,000 cycles is a reasonable amount of time to wait to save on CPU resources.
|
__attribute__((target("mwaitx")))
|
||||||
// For reference:
|
#elif defined(__GNUC__)
|
||||||
// At 1 GHz, 100K cycles is 100us
|
#pragma GCC target("waitpkg")
|
||||||
// At 2 GHz, 100K cycles is 50us
|
#pragma GCC target("mwaitx")
|
||||||
// At 4 GHz, 100K cycles is 25us
|
|
||||||
constexpr auto PauseCycles = 100'000U;
|
|
||||||
|
|
||||||
} // Anonymous namespace
|
|
||||||
|
|
||||||
#if defined(_MSC_VER) && !defined(__clang__)
|
|
||||||
__forceinline static void TPAUSE() {
|
|
||||||
static constexpr auto RequestC02State = 0U;
|
|
||||||
_tpause(RequestC02State, FencedRDTSC() + PauseCycles);
|
|
||||||
}
|
|
||||||
|
|
||||||
__forceinline static void MWAITX() {
|
|
||||||
static constexpr auto EnableWaitTimeFlag = 1U << 1;
|
|
||||||
static constexpr auto RequestC1State = 0U;
|
|
||||||
|
|
||||||
// monitor_var should be aligned to a cache line.
|
|
||||||
alignas(64) u64 monitor_var{};
|
|
||||||
_mm_monitorx(&monitor_var, 0, 0);
|
|
||||||
_mm_mwaitx(EnableWaitTimeFlag, RequestC1State, PauseCycles);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
static void TPAUSE() {
|
|
||||||
static constexpr auto RequestC02State = 0U;
|
|
||||||
const auto tsc = FencedRDTSC() + PauseCycles;
|
|
||||||
const auto eax = static_cast<u32>(tsc & 0xFFFFFFFF);
|
|
||||||
const auto edx = static_cast<u32>(tsc >> 32);
|
|
||||||
asm volatile("tpause %0" : : "r"(RequestC02State), "d"(edx), "a"(eax));
|
|
||||||
}
|
|
||||||
|
|
||||||
static void MWAITX() {
|
|
||||||
static constexpr auto EnableWaitTimeFlag = 1U << 1;
|
|
||||||
static constexpr auto RequestC1State = 0U;
|
|
||||||
|
|
||||||
// monitor_var should be aligned to a cache line.
|
|
||||||
alignas(64) u64 monitor_var{};
|
|
||||||
asm volatile("monitorx" : : "a"(&monitor_var), "c"(0), "d"(0));
|
|
||||||
asm volatile("mwaitx" : : "a"(RequestC1State), "b"(PauseCycles), "c"(EnableWaitTimeFlag));
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
|
void MicroSleep(u64 rem) {
|
||||||
void MicroSleep() {
|
// 100,000 cycles is a reasonable amount of time to wait to save on CPU resources.
|
||||||
static const bool has_waitpkg = GetCPUCaps().waitpkg;
|
// For reference:
|
||||||
static const bool has_monitorx = GetCPUCaps().monitorx;
|
// At 1 GHz, 100K cycles is 100us
|
||||||
|
// At 2 GHz, 100K cycles is 50us
|
||||||
if (has_waitpkg) {
|
// At 4 GHz, 100K cycles is 25us
|
||||||
TPAUSE();
|
auto& caps = GetCPUCaps();
|
||||||
} else if (has_monitorx) {
|
u32 cycles = caps.invariant_tsc
|
||||||
MWAITX();
|
? 1'000'000U
|
||||||
|
: rem * (caps.tsc_frequency / 1000000ULL);
|
||||||
|
if (caps.waitpkg) {
|
||||||
|
constexpr auto RequestC02State = 0U;
|
||||||
|
_tpause(RequestC02State, FencedRDTSC() + cycles);
|
||||||
|
} else if (caps.monitorx) {
|
||||||
|
constexpr auto EnableWaitTimeFlag = 1U << 1;
|
||||||
|
constexpr auto RequestC1State = 0U;
|
||||||
|
// monitor_var should be aligned to a cache line.
|
||||||
|
alignas(64) u64 monitor_var{};
|
||||||
|
_mm_monitorx(&monitor_var, 0, 0);
|
||||||
|
_mm_mwaitx(EnableWaitTimeFlag, RequestC1State, cycles);
|
||||||
} else {
|
} else {
|
||||||
std::this_thread::yield();
|
std::this_thread::yield();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -3,8 +3,10 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include "common/common_types.h"
|
||||||
|
|
||||||
namespace Common::X64 {
|
namespace Common::X64 {
|
||||||
|
|
||||||
void MicroSleep();
|
void MicroSleep(u64 rem);
|
||||||
|
|
||||||
} // namespace Common::X64
|
} // namespace Common::X64
|
||||||
|
|
|
||||||
|
|
@ -78,7 +78,7 @@ void CoreTiming::Initialize(std::function<void()>&& on_thread_init_) {
|
||||||
Common::Windows::SleepForOneTick();
|
Common::Windows::SleepForOneTick();
|
||||||
} else {
|
} else {
|
||||||
#ifdef ARCHITECTURE_x86_64
|
#ifdef ARCHITECTURE_x86_64
|
||||||
Common::X64::MicroSleep();
|
Common::X64::MicroSleep(wait_time);
|
||||||
#else
|
#else
|
||||||
std::this_thread::yield();
|
std::this_thread::yield();
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue