// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: 2013 Dolphin Emulator Project // SPDX-FileCopyrightText: 2014 Citra Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include #include #include #include "common/error.h" #include "common/logging.h" #include "common/assert.h" #include "common/thread.h" #ifdef __APPLE__ #include #elif defined(__HAIKU__) #include #elif defined(_WIN32) #include #include "common/string_util.h" #include "common/windows/timer_resolution.h" #else #if defined(__FreeBSD__) #include #include #include // Compatibility with CPUset #define cpu_set_t cpuset_t #elif defined(__DragonFly__) || defined(__OpenBSD__) || defined(__Bitrig__) #include #endif #include #include #endif #ifndef _WIN32 #include #endif #ifdef ARCHITECTURE_x86_64 #ifdef _MSC_VER #include #else #include #endif #include "common/x64/cpu_detect.h" #include "common/x64/rdtsc.h" #endif #include "core/core_timing.h" namespace Common { void SetCurrentThreadPriority(ThreadPriority new_priority) { #ifdef _WIN32 int windows_priority = [&]() { switch (new_priority) { case ThreadPriority::Low: return THREAD_PRIORITY_BELOW_NORMAL; case ThreadPriority::Normal: return THREAD_PRIORITY_NORMAL; case ThreadPriority::High: return THREAD_PRIORITY_ABOVE_NORMAL; case ThreadPriority::VeryHigh: return THREAD_PRIORITY_HIGHEST; case ThreadPriority::Critical: return THREAD_PRIORITY_TIME_CRITICAL; default: return THREAD_PRIORITY_NORMAL; } }(); SetThreadPriority(GetCurrentThread(), windows_priority); #elif defined(__HAIKU__) // TODO: We have priorities for 3D rendering applications - may help lavapipe? int priority = [&]() { switch (new_priority) { case ThreadPriority::Low: return B_LOW_PRIORITY; case ThreadPriority::Normal: return B_NORMAL_PRIORITY; case ThreadPriority::High: return B_DISPLAY_PRIORITY; case ThreadPriority::VeryHigh: return B_URGENT_DISPLAY_PRIORITY; case ThreadPriority::Critical: return B_URGENT_PRIORITY; default: return B_NORMAL_PRIORITY; } }(); set_thread_priority(find_thread(NULL), priority); #else pthread_t this_thread = pthread_self(); const auto scheduling_type = SCHED_OTHER; s32 max_prio = sched_get_priority_max(scheduling_type); s32 min_prio = sched_get_priority_min(scheduling_type); u32 level = (std::max)(u32(new_priority) + 1, 4U); struct sched_param params; if (max_prio > min_prio) { params.sched_priority = min_prio + ((max_prio - min_prio) * level) / 4; } else { params.sched_priority = min_prio - ((min_prio - max_prio) * level) / 4; } pthread_setschedparam(this_thread, scheduling_type, ¶ms); #endif } void SetCurrentThreadName(const char* name) { #ifdef _MSC_VER // Sets the debugger-visible name of the current thread. if (auto pf = (decltype(&SetThreadDescription))(void*)GetProcAddress(GetModuleHandle(TEXT("KernelBase.dll")), "SetThreadDescription"); pf) pf(GetCurrentThread(), UTF8ToUTF16W(name).data()); // Windows 10+ else ; // No-op #elif defined(__APPLE__) pthread_setname_np(name); #elif defined(__HAIKU__) rename_thread(find_thread(NULL), name); #elif defined(__Bitrig__) || defined(__DragonFly__) || defined(__FreeBSD__) || defined(__OpenBSD__) pthread_set_name_np(pthread_self(), name); #elif defined(__NetBSD__) pthread_setname_np(pthread_self(), "%s", (void*)name); #elif defined(__linux__) || defined(__CYGWIN__) || defined(__sun__) || defined(__glibc__) || defined(__managarm__) int ret = pthread_setname_np(pthread_self(), name); if (ret == ERANGE) { // Linux limits thread names to 15 characters and will outright reject any // attempt to set a longer name with ERANGE. char buf[16]; size_t const len = std::min(std::strlen(name), sizeof(buf) - 1); std::memcpy(buf, name, len); buf[len] = '\0'; pthread_setname_np(pthread_self(), buf); } #elif defined(_WIN32) // MinGW with the POSIX threading model does not support pthread_setname_np // See for reference // https://gitlab.freedesktop.org/mesa/mesa/-/blame/main/src/util/u_thread.c?ref_type=heads#L75 (void)name; #else pthread_setname_np(pthread_self(), name); #endif } void PinCurrentThreadToPerformanceCore(size_t core_id) { ASSERT(core_id < 4); // If we set a flag for a CPU that doesn't exist, the thread may not be allowed to // run in ANY processor! auto const total_cores = std::thread::hardware_concurrency(); if (core_id < total_cores) { #if defined(__ANDROID__) cpu_set_t set; CPU_ZERO(&set); CPU_SET(core_id, &set); sched_setaffinity(pthread_self(), sizeof(set), &set); #elif defined(__linux__) || defined(__FreeBSD__) cpu_set_t set; CPU_ZERO(&set); CPU_SET(core_id, &set); pthread_setaffinity_np(pthread_self(), sizeof(set), &set); #elif defined(_WIN32) DWORD set = 1UL << core_id; SetThreadAffinityMask(GetCurrentThread(), set); #else // No pin functionality implemented #endif } } #ifdef ARCHITECTURE_x86_64 // On Linux and UNIX systems, a futex would nominally be used to cover the costs // the idea is that it's intuitivelly cheaper to use a direct instruction as opposed to a full futex call // the underlying libc++ implementation uses pthread_cond_timedwait which MAY invoke a futex // Let's pretend the OS is too expensive to jump into, and avoid ANY context switches // this should *IN THEORY* lower CPU usage while just waiting for stuff effectively // For windows the minimal quanta resolution is about 500us, and normal CRT cond var is 1.5ms(?) // so may as well avoid that too // Let's just give ALL platforms the same mechanisms (almost) for when they have umonitor OR waitpkg #ifdef __clang__ __attribute__((target("waitpkg,mwaitx"))) #elif defined(__GNUC__) #pragma GCC target("waitpkg") #pragma GCC target("mwaitx") #endif bool Event::WaitFor(const std::chrono::nanoseconds time) { auto const start = Common::X64::FencedRDTSC(); auto const& caps = Common::g_cpu_caps; auto const ns_ratio = std::max(1, caps.tsc_frequency / 1'000); [[maybe_unused]] auto const end = start + time.count() * ns_ratio; if (caps.monitorx) { while (true) { // Armed monitor, as per manual, MWAITX must be conditional if the condition isn't satisfied // to prevent a race condition. _mm_monitorx(reinterpret_cast(std::addressof(is_set)), 0, 0); if (!is_set.load()) { // RDTSC may be fenced here due to atomic load s32 const cycles = s64(_rdtsc()) - s64(start); if (cycles > 0) { // See here: https://github.com/torvalds/linux/blob/948a64995aca6820abefd17f1a4258f5835c5ad9/arch/x86/lib/delay.c#L93 // MWAITX accepts a 32-bit input timer which determines the total number of cycles to wait for // NOT THE TOTAL ABSOLUTE TSC VALUE, it's just a delta // BIT[1] = use a timer // Hint = 0: Use C1 state when sleepy (means faster wakeup but less power saving) _mm_mwaitx(1 << 1, 0u, cycles); if (!is_set.load()) return false; } else { return false; } } bool expected = true; if (is_set.compare_exchange_weak(expected, false, std::memory_order_release)) return true; } } else if (caps.waitpkg) { // #UD If CPUID.7.0:ECX.WAITPKG[bit 5]=0. while (true) { _umonitor(std::addressof(is_set)); if (!is_set.load()) { s32 const cycles = s64(_rdtsc()) - s64(start); if (!_umwait(1, cycles)) return false; } bool expected = true; if (is_set.compare_exchange_weak(expected, false, std::memory_order_release)) return true; } } else { #ifdef _WIN32 while (!is_set.load() && end > _rdtsc()) Common::Windows::SleepForOneTick(); if (is_set.load()) Reset(); return true; #else std::unique_lock lk{mutex}; if (!condvar.wait_for(lk, time, [this] { return is_set.load(); })) return false; is_set = false; return true; #endif } } #else bool Event::WaitFor(const std::chrono::nanoseconds time) { std::unique_lock lk{mutex}; if (!condvar.wait_for(lk, time, [this] { return is_set.load(); })) return false; is_set = false; return true; } #endif } // namespace Common