[core/core_timing] better MWAITX and WAITPKG delays (#3984)

This implements MWAITX and WAITPKG extensions (umonitor, mwait) for CPUs that support them.

Reduces wait times and bypasses the timing stuff from the OS that is slow (windows notably). generally it should answer within 0.2 to 0.5 microsecs (since most requests wait for that long).

Also does a general rework of static ctors and stuff

Signed-off-by: lizzie <lizzie@eden-emu.dev>
Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3984
Reviewed-by: MaranBr <maranbr@eden-emu.dev>
Reviewed-by: crueter <crueter@eden-emu.dev>
This commit is contained in:
lizzie 2026-05-30 21:59:10 +02:00 committed by crueter
parent ff7bbaea7d
commit 7c32cf03a1
No known key found for this signature in database
GPG key ID: 425ACD2D4830EBC6
19 changed files with 477 additions and 533 deletions

View file

@ -3,7 +3,7 @@
#include <numeric>
#include <bit>
#include "common/wall_clock.h"
#include "common/cpu_features.h"
#include "common/alignment.h"
#include "common/literals.h"
#include "core/arm/nce/arm_nce.h"

View file

@ -8,15 +8,13 @@
#include <mutex>
#include <string>
#include <tuple>
#include "common/cpu_features.h"
#include "common/cpu_features.h"
#ifdef _WIN32
#include "common/windows/timer_resolution.h"
#endif
#ifdef ARCHITECTURE_x86_64
#include "common/x64/cpu_wait.h"
#endif
#include "common/settings.h"
#include "core/core_timing.h"
#include "core/hardware_properties.h"
@ -47,8 +45,7 @@ struct CoreTiming::Event {
}
};
CoreTiming::CoreTiming() : clock{Common::CreateOptimalClock()} {}
CoreTiming::CoreTiming() = default;
CoreTiming::~CoreTiming() {
Reset();
}
@ -64,31 +61,16 @@ void CoreTiming::Initialize(std::function<void()>&& on_thread_init_) {
Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
on_thread_init();
has_started = true;
// base frequency in MHz: 1ns (10^-9) = 1GHz (10^9)
while (!stop_token.stop_requested()) {
while (!paused && !stop_token.stop_requested()) {
paused_set = false;
if (auto const next_time = Advance(); next_time) {
// There are more events left in the queue, wait until the next event.
auto wait_time = *next_time - GetGlobalTimeNs().count();
auto const wait_time = *next_time - GetGlobalTimeNs().count();
if (wait_time > 0) {
#ifdef _WIN32
while (!paused && !event.IsSet() && wait_time > 0) {
wait_time = *next_time - GetGlobalTimeNs().count();
if (wait_time >= timer_resolution_ns) {
Common::Windows::SleepForOneTick();
} else {
#ifdef ARCHITECTURE_x86_64
Common::X64::MicroSleep();
#else
std::this_thread::yield();
#endif
}
}
if (event.IsSet())
event.Reset();
#else
event.WaitFor(std::chrono::nanoseconds(wait_time));
#endif
}
} else {
// Queue is empty, wait until another event is scheduled and signals us to
@ -226,7 +208,7 @@ void CoreTiming::ResetTicks() {
}
u64 CoreTiming::GetClockTicks() const {
u64 fres = is_multicore ? clock.GetCNTPCT() : Common::WallClock::CPUTickToCNTPCT(cpu_ticks);
u64 fres = is_multicore ? Common::g_wall_clock.GetCNTPCT() : Common::WallClock::CPUTickToCNTPCT(cpu_ticks);
if (auto const overclock = Settings::values.fast_cpu_time.GetValue(); overclock != Settings::CpuClock::Off) {
fres = u64(f64(fres) * (1.7 + 0.3 * u32(overclock)));
}
@ -240,7 +222,7 @@ u64 CoreTiming::GetClockTicks() const {
u64 CoreTiming::GetGPUTicks() const {
return is_multicore
? clock.GetGPUTick()
? Common::g_wall_clock.GetGPUTick()
: Common::WallClock::CPUTickToGPUTick(cpu_ticks);
}
@ -317,14 +299,14 @@ void CoreTiming::Reset() {
/// @brief Returns current time in nanoseconds.
std::chrono::nanoseconds CoreTiming::GetGlobalTimeNs() const noexcept {
return is_multicore
? clock.GetTimeNS()
? Common::g_wall_clock.GetTimeNS()
: std::chrono::nanoseconds{Common::WallClock::CPUTickToNS(cpu_ticks)};
}
/// @brief Returns current time in microseconds.
std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const noexcept {
return is_multicore
? clock.GetTimeUS()
? Common::g_wall_clock.GetTimeUS()
: std::chrono::microseconds{Common::WallClock::CPUTickToUS(cpu_ticks)};
}

View file

@ -19,7 +19,7 @@
#include "common/common_types.h"
#include "common/thread.h"
#include "common/wall_clock.h"
#include "common/cpu_features.h"
namespace Core::Timing {
@ -142,37 +142,28 @@ public:
void Reset();
Common::WallClock clock;
using heap_t = boost::heap::fibonacci_heap<CoreTiming::Event, boost::heap::compare<std::greater<>>>;
heap_t event_queue;
s64 global_timer = 0;
#ifdef _WIN32
s64 timer_resolution_ns;
#endif
using heap_t =
boost::heap::fibonacci_heap<CoreTiming::Event, boost::heap::compare<std::greater<>>>;
heap_t event_queue;
u64 event_fifo_id = 0;
s64 pause_end_time{};
/// Cycle timing
u64 cpu_ticks{};
s64 downcount{};
Common::Event event{};
Common::Event pause_event{};
std::function<void()> on_thread_init{};
std::jthread timer_thread;
mutable std::mutex basic_lock;
std::mutex advance_lock;
std::jthread timer_thread;
std::atomic<bool> paused{};
std::atomic<bool> paused_set{};
std::atomic<bool> wait_set{};
std::atomic<bool> has_started{};
std::function<void()> on_thread_init{};
bool is_multicore{};
s64 pause_end_time{};
/// Cycle timing
u64 cpu_ticks{};
s64 downcount{};
};
/// Creates a core timing event with the given name and callback.

View file

@ -10,6 +10,7 @@
#include "common/assert.h"
#include "common/logging.h"
#include "common/settings.h"
#include "common/cpu_features.h"
#include "core/hle/kernel/k_event.h"
#include "core/hle/kernel/k_readable_event.h"
#include "core/hle/kernel/kernel.h"
@ -28,7 +29,6 @@ BufferQueueProducer::BufferQueueProducer(Service::KernelHelpers::ServiceContext&
Service::Nvidia::NvCore::NvMap& nvmap_)
: service_context{service_context_}, core{std::move(buffer_queue_core_)}
, slots(core->slots)
, clock{Common::CreateOptimalClock()}
, nvmap(nvmap_)
{
buffer_wait_event = service_context.CreateEvent("BufferQueue:WaitEvent");
@ -488,7 +488,7 @@ Status BufferQueueProducer::QueueBuffer(s32 slot, const QueueBufferInput& input,
slots[slot].buffer_state = BufferState::Queued;
slots[slot].frame_number = core->frame_counter;
slots[slot].queue_time = timestamp;
slots[slot].presentation_time = clock.GetTimeNS().count();
slots[slot].presentation_time = Common::g_wall_clock.GetTimeNS().count();
slots[slot].fence = fence;
item.slot = slot;

View file

@ -14,7 +14,7 @@
#include <mutex>
#include "common/common_funcs.h"
#include "common/wall_clock.h"
#include "common/cpu_features.h"
#include "core/hle/service/nvdrv/nvdata.h"
#include "core/hle/service/nvnflinger/binder.h"
#include "core/hle/service/nvnflinger/buffer_queue_defs.h"
@ -89,7 +89,6 @@ private:
s32 next_callback_ticket{};
s32 current_callback_ticket{};
std::condition_variable_any callback_condition;
Common::WallClock clock;
Service::Nvidia::NvCore::NvMap& nvmap;
};

View file

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
@ -13,7 +13,7 @@
#include "common/common_types.h"
#include "common/intrusive_list.h"
#include "common/uuid.h"
#include "common/wall_clock.h"
#include "common/cpu_features.h"
#include "core/hle/kernel/k_event.h"
#include "core/hle/service/kernel_helpers.h"
#include "core/hle/service/psc/time/errors.h"