mirror of
https://git.eden-emu.dev/eden-emu/eden
synced 2026-05-27 15:07:06 +02:00
243 lines
8.9 KiB
C++
243 lines
8.9 KiB
C++
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
|
// SPDX-FileCopyrightText: 2013 Dolphin Emulator Project
|
|
// SPDX-FileCopyrightText: 2014 Citra Emulator Project
|
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
|
|
#include <chrono>
|
|
#include <string>
|
|
#include <thread>
|
|
|
|
#include "common/error.h"
|
|
#include "common/logging.h"
|
|
#include "common/assert.h"
|
|
#include "common/thread.h"
|
|
#ifdef __APPLE__
|
|
#include <mach/mach.h>
|
|
#elif defined(__HAIKU__)
|
|
#include <kernel/OS.h>
|
|
#elif defined(_WIN32)
|
|
#include <windows.h>
|
|
#include "common/string_util.h"
|
|
#include "common/windows/timer_resolution.h"
|
|
#else
|
|
#if defined(__FreeBSD__)
|
|
#include <sys/cpuset.h>
|
|
#include <sys/_cpuset.h>
|
|
#include <pthread_np.h>
|
|
// Compatibility with CPUset
|
|
#define cpu_set_t cpuset_t
|
|
#elif defined(__DragonFly__) || defined(__OpenBSD__) || defined(__Bitrig__)
|
|
#include <pthread_np.h>
|
|
#endif
|
|
#include <pthread.h>
|
|
#include <sched.h>
|
|
#endif
|
|
|
|
#ifndef _WIN32
|
|
#include <unistd.h>
|
|
#endif
|
|
|
|
#ifdef ARCHITECTURE_x86_64
|
|
#ifdef _MSC_VER
|
|
#include <intrin.h>
|
|
#else
|
|
#include <x86intrin.h>
|
|
#endif
|
|
#include "common/x64/cpu_detect.h"
|
|
#include "common/x64/rdtsc.h"
|
|
#endif
|
|
#include "core/core_timing.h"
|
|
|
|
namespace Common {
|
|
|
|
void SetCurrentThreadPriority(ThreadPriority new_priority) {
|
|
#ifdef _WIN32
|
|
int windows_priority = [&]() {
|
|
switch (new_priority) {
|
|
case ThreadPriority::Low: return THREAD_PRIORITY_BELOW_NORMAL;
|
|
case ThreadPriority::Normal: return THREAD_PRIORITY_NORMAL;
|
|
case ThreadPriority::High: return THREAD_PRIORITY_ABOVE_NORMAL;
|
|
case ThreadPriority::VeryHigh: return THREAD_PRIORITY_HIGHEST;
|
|
case ThreadPriority::Critical: return THREAD_PRIORITY_TIME_CRITICAL;
|
|
default: return THREAD_PRIORITY_NORMAL;
|
|
}
|
|
}();
|
|
SetThreadPriority(GetCurrentThread(), windows_priority);
|
|
#elif defined(__HAIKU__)
|
|
// TODO: We have priorities for 3D rendering applications - may help lavapipe?
|
|
int priority = [&]() {
|
|
switch (new_priority) {
|
|
case ThreadPriority::Low: return B_LOW_PRIORITY;
|
|
case ThreadPriority::Normal: return B_NORMAL_PRIORITY;
|
|
case ThreadPriority::High: return B_DISPLAY_PRIORITY;
|
|
case ThreadPriority::VeryHigh: return B_URGENT_DISPLAY_PRIORITY;
|
|
case ThreadPriority::Critical: return B_URGENT_PRIORITY;
|
|
default: return B_NORMAL_PRIORITY;
|
|
}
|
|
}();
|
|
set_thread_priority(find_thread(NULL), priority);
|
|
#else
|
|
pthread_t this_thread = pthread_self();
|
|
const auto scheduling_type = SCHED_OTHER;
|
|
s32 max_prio = sched_get_priority_max(scheduling_type);
|
|
s32 min_prio = sched_get_priority_min(scheduling_type);
|
|
u32 level = (std::max)(u32(new_priority) + 1, 4U);
|
|
|
|
struct sched_param params;
|
|
if (max_prio > min_prio) {
|
|
params.sched_priority = min_prio + ((max_prio - min_prio) * level) / 4;
|
|
} else {
|
|
params.sched_priority = min_prio - ((min_prio - max_prio) * level) / 4;
|
|
}
|
|
|
|
pthread_setschedparam(this_thread, scheduling_type, ¶ms);
|
|
#endif
|
|
}
|
|
|
|
void SetCurrentThreadName(const char* name) {
|
|
#ifdef _MSC_VER
|
|
// Sets the debugger-visible name of the current thread.
|
|
if (auto pf = (decltype(&SetThreadDescription))(void*)GetProcAddress(GetModuleHandle(TEXT("KernelBase.dll")), "SetThreadDescription"); pf)
|
|
pf(GetCurrentThread(), UTF8ToUTF16W(name).data()); // Windows 10+
|
|
else
|
|
; // No-op
|
|
#elif defined(__APPLE__)
|
|
pthread_setname_np(name);
|
|
#elif defined(__HAIKU__)
|
|
rename_thread(find_thread(NULL), name);
|
|
#elif defined(__Bitrig__) || defined(__DragonFly__) || defined(__FreeBSD__) || defined(__OpenBSD__)
|
|
pthread_set_name_np(pthread_self(), name);
|
|
#elif defined(__NetBSD__)
|
|
pthread_setname_np(pthread_self(), "%s", (void*)name);
|
|
#elif defined(__linux__) || defined(__CYGWIN__) || defined(__sun__) || defined(__glibc__) || defined(__managarm__)
|
|
int ret = pthread_setname_np(pthread_self(), name);
|
|
if (ret == ERANGE) {
|
|
// Linux limits thread names to 15 characters and will outright reject any
|
|
// attempt to set a longer name with ERANGE.
|
|
char buf[16];
|
|
size_t const len = std::min<size_t>(std::strlen(name), sizeof(buf) - 1);
|
|
std::memcpy(buf, name, len);
|
|
buf[len] = '\0';
|
|
pthread_setname_np(pthread_self(), buf);
|
|
}
|
|
#elif defined(_WIN32)
|
|
// MinGW with the POSIX threading model does not support pthread_setname_np
|
|
// See for reference
|
|
// https://gitlab.freedesktop.org/mesa/mesa/-/blame/main/src/util/u_thread.c?ref_type=heads#L75
|
|
(void)name;
|
|
#else
|
|
pthread_setname_np(pthread_self(), name);
|
|
#endif
|
|
}
|
|
|
|
void PinCurrentThreadToPerformanceCore(size_t core_id) {
|
|
ASSERT(core_id < 4);
|
|
// If we set a flag for a CPU that doesn't exist, the thread may not be allowed to
|
|
// run in ANY processor!
|
|
auto const total_cores = std::thread::hardware_concurrency();
|
|
if (core_id < total_cores) {
|
|
#if defined(__ANDROID__)
|
|
cpu_set_t set;
|
|
CPU_ZERO(&set);
|
|
CPU_SET(core_id, &set);
|
|
sched_setaffinity(pthread_self(), sizeof(set), &set);
|
|
#elif defined(__linux__) || defined(__FreeBSD__)
|
|
cpu_set_t set;
|
|
CPU_ZERO(&set);
|
|
CPU_SET(core_id, &set);
|
|
pthread_setaffinity_np(pthread_self(), sizeof(set), &set);
|
|
#elif defined(_WIN32)
|
|
DWORD set = 1UL << core_id;
|
|
SetThreadAffinityMask(GetCurrentThread(), set);
|
|
#else
|
|
// No pin functionality implemented
|
|
#endif
|
|
}
|
|
}
|
|
|
|
#ifdef ARCHITECTURE_x86_64
|
|
// On Linux and UNIX systems, a futex would nominally be used to cover the costs
|
|
// the idea is that it's intuitivelly cheaper to use a direct instruction as opposed to a full futex call
|
|
// the underlying libc++ implementation uses pthread_cond_timedwait which MAY invoke a futex
|
|
// Let's pretend the OS is too expensive to jump into, and avoid ANY context switches
|
|
// this should *IN THEORY* lower CPU usage while just waiting for stuff effectively
|
|
// For windows the minimal quanta resolution is about 500us, and normal CRT cond var is 1.5ms(?)
|
|
// so may as well avoid that too
|
|
// Let's just give ALL platforms the same mechanisms (almost) for when they have umonitor OR waitpkg
|
|
#ifdef __clang__
|
|
__attribute__((target("waitpkg,mwaitx")))
|
|
#elif defined(__GNUC__)
|
|
#pragma GCC target("waitpkg")
|
|
#pragma GCC target("mwaitx")
|
|
#endif
|
|
bool Event::WaitFor(const std::chrono::nanoseconds time) {
|
|
auto const start = Common::X64::FencedRDTSC();
|
|
auto const& caps = Common::g_cpu_caps;
|
|
auto const ns_ratio = std::max<u64>(1, caps.tsc_frequency / 1'000);
|
|
[[maybe_unused]] auto const end = start + time.count() * ns_ratio;
|
|
if (caps.monitorx) {
|
|
while (true) {
|
|
// Armed monitor, as per manual, MWAITX must be conditional if the condition isn't satisfied
|
|
// to prevent a race condition.
|
|
_mm_monitorx(reinterpret_cast<u64*>(std::addressof(is_set)), 0, 0);
|
|
if (!is_set.load()) {
|
|
// RDTSC may be fenced here due to atomic load
|
|
s32 const cycles = s64(_rdtsc()) - s64(start);
|
|
if (cycles > 0) {
|
|
// See here: https://github.com/torvalds/linux/blob/948a64995aca6820abefd17f1a4258f5835c5ad9/arch/x86/lib/delay.c#L93
|
|
// MWAITX accepts a 32-bit input timer which determines the total number of cycles to wait for
|
|
// NOT THE TOTAL ABSOLUTE TSC VALUE, it's just a delta
|
|
// BIT[1] = use a timer
|
|
// Hint = 0: Use C1 state when sleepy (means faster wakeup but less power saving)
|
|
_mm_mwaitx(1 << 1, 0u, cycles);
|
|
if (!is_set.load())
|
|
return false;
|
|
} else {
|
|
return false;
|
|
}
|
|
}
|
|
bool expected = true;
|
|
if (is_set.compare_exchange_weak(expected, false, std::memory_order_release))
|
|
return true;
|
|
}
|
|
} else if (caps.waitpkg) {
|
|
// #UD If CPUID.7.0:ECX.WAITPKG[bit 5]=0.
|
|
while (true) {
|
|
_umonitor(std::addressof(is_set));
|
|
if (!is_set.load()) {
|
|
s32 const cycles = s64(_rdtsc()) - s64(start);
|
|
if (!_umwait(1, cycles))
|
|
return false;
|
|
}
|
|
bool expected = true;
|
|
if (is_set.compare_exchange_weak(expected, false, std::memory_order_release))
|
|
return true;
|
|
}
|
|
} else {
|
|
#ifdef _WIN32
|
|
while (!is_set.load() && end > _rdtsc())
|
|
Common::Windows::SleepForOneTick();
|
|
if (is_set.load())
|
|
Reset();
|
|
return true;
|
|
#else
|
|
std::unique_lock lk{mutex};
|
|
if (!condvar.wait_for(lk, time, [this] { return is_set.load(); }))
|
|
return false;
|
|
is_set = false;
|
|
return true;
|
|
#endif
|
|
}
|
|
}
|
|
#else
|
|
bool Event::WaitFor(const std::chrono::nanoseconds time) {
|
|
std::unique_lock lk{mutex};
|
|
if (!condvar.wait_for(lk, time, [this] { return is_set.load(); }))
|
|
return false;
|
|
is_set = false;
|
|
return true;
|
|
}
|
|
#endif
|
|
|
|
} // namespace Common
|