[core/core_timing] better MWAITX and WAITPKG delays (#3984)

This implements MWAITX and WAITPKG extensions (umonitor, mwait) for CPUs that support them.

Reduces wait times and bypasses the timing stuff from the OS that is slow (windows notably). generally it should answer within 0.2 to 0.5 microsecs (since most requests wait for that long).

Also does a general rework of static ctors and stuff

Signed-off-by: lizzie <lizzie@eden-emu.dev>
Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3984
Reviewed-by: MaranBr <maranbr@eden-emu.dev>
Reviewed-by: crueter <crueter@eden-emu.dev>
This commit is contained in:
lizzie 2026-05-30 21:59:10 +02:00 committed by crueter
parent ff7bbaea7d
commit 7c32cf03a1
No known key found for this signature in database
GPG key ID: 425ACD2D4830EBC6
19 changed files with 477 additions and 533 deletions

View file

@ -39,7 +39,7 @@ extern "C" {
#include "video_core/textures/decoders.h"
#if defined(ARCHITECTURE_x86_64)
#include "common/x64/cpu_detect.h"
#include "common/cpu_features.h"
#endif
#if defined(ARCHITECTURE_x86_64) \
@ -55,9 +55,8 @@ namespace Tegra::Host1x {
namespace {
static bool HasSSE41() {
#if defined(ARCHITECTURE_x86_64)
static bool has_sse41 = Common::GetCPUCaps().sse4_1;
return has_sse41;
#ifdef ARCHITECTURE_x86_64
return Common::g_cpu_caps.sse4_1;
#else
return false;
#endif

View file

@ -254,11 +254,12 @@ std::optional<u64> GenericEnvironment::TryFindSize() {
static constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL;
static constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL;
code.resize(MAXIMUM_SIZE / INST_SIZE);
GPUVAddr guest_addr{program_base + start_address};
size_t offset{0};
size_t size{BLOCK_SIZE};
while (size <= MAXIMUM_SIZE) {
code.resize(size / INST_SIZE);
u64* const data = code.data() + offset / INST_SIZE;
gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE);
for (size_t index = 0; index < BLOCK_SIZE; index += INST_SIZE) {