mirror of
https://git.eden-emu.dev/eden-emu/eden
synced 2026-04-18 10:39:00 +02:00
This should fix performance regressions in games that didn't need this and fix it to work with the games it is intended to be used for. Made based on recommendations by Discord Contributor. Signed-off-by: Shinmegumi <shinmegumi@eden-emu.dev> Co-authored-by: Gamer64 <76565986+Gamer64ytb@users.noreply.github.com> Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/175 Co-authored-by: Shinmegumi <shinmegumi@eden-emu.dev> Co-committed-by: Shinmegumi <shinmegumi@eden-emu.dev>
300 lines
9.6 KiB
C++
300 lines
9.6 KiB
C++
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
|
|
|
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
|
|
#pragma once
|
|
|
|
#include <algorithm>
|
|
#include <condition_variable>
|
|
#include <cstring>
|
|
#include <deque>
|
|
#include <functional>
|
|
#include <memory>
|
|
#include <mutex>
|
|
#include <thread>
|
|
#include <queue>
|
|
|
|
#include "common/common_types.h"
|
|
#include "common/microprofile.h"
|
|
#include "common/scope_exit.h"
|
|
#include "common/settings.h"
|
|
#include "common/thread.h"
|
|
#include "video_core/delayed_destruction_ring.h"
|
|
#include "video_core/gpu.h"
|
|
#include "video_core/host1x/host1x.h"
|
|
#include "video_core/host1x/syncpoint_manager.h"
|
|
#include "video_core/rasterizer_interface.h"
|
|
|
|
namespace VideoCommon {
|
|
|
|
class FenceBase {
|
|
public:
|
|
explicit FenceBase(bool is_stubbed_) : is_stubbed{is_stubbed_} {}
|
|
|
|
bool IsStubbed() const {
|
|
return is_stubbed;
|
|
}
|
|
|
|
protected:
|
|
bool is_stubbed;
|
|
};
|
|
|
|
template <typename Traits>
|
|
class FenceManager {
|
|
using TFence = typename Traits::FenceType;
|
|
using TTextureCache = typename Traits::TextureCacheType;
|
|
using TBufferCache = typename Traits::BufferCacheType;
|
|
using TQueryCache = typename Traits::QueryCacheType;
|
|
static constexpr bool can_async_check = Traits::HAS_ASYNC_CHECK;
|
|
|
|
public:
|
|
/// Notify the fence manager about a new frame
|
|
void TickFrame() {
|
|
std::unique_lock lock(ring_guard);
|
|
delayed_destruction_ring.Tick();
|
|
}
|
|
|
|
// Unlike other fences, this one doesn't
|
|
void SignalOrdering() {
|
|
if constexpr (!can_async_check) {
|
|
TryReleasePendingFences<false>();
|
|
}
|
|
std::scoped_lock lock{buffer_cache.mutex};
|
|
buffer_cache.AccumulateFlushes();
|
|
}
|
|
|
|
void SignalReference() {
|
|
std::function<void()> do_nothing([] {});
|
|
SignalFence(std::move(do_nothing));
|
|
}
|
|
|
|
void SyncOperation(std::function<void()>&& func) {
|
|
uncommitted_operations.emplace_back(std::move(func));
|
|
}
|
|
|
|
void SignalFence(std::function<void()>&& func) {
|
|
bool delay_fence = Settings::IsGPULevelHigh();
|
|
#ifdef __ANDROID__
|
|
if (!delay_fence && !Settings::values.early_release_fences.GetValue()) {
|
|
TryReleasePendingFences<false>();
|
|
}
|
|
#else
|
|
if constexpr (!can_async_check) {
|
|
TryReleasePendingFences<false>();
|
|
}
|
|
#endif
|
|
const bool should_flush = ShouldFlush();
|
|
CommitAsyncFlushes();
|
|
TFence new_fence = CreateFence(!should_flush);
|
|
#ifdef __ANDROID__
|
|
if (delay_fence && !Settings::values.early_release_fences.GetValue()) {
|
|
guard.lock();
|
|
}
|
|
#else
|
|
if constexpr (can_async_check) {
|
|
guard.lock();
|
|
}
|
|
#endif
|
|
if (delay_fence) {
|
|
uncommitted_operations.emplace_back(std::move(func));
|
|
}
|
|
pending_operations.emplace_back(std::move(uncommitted_operations));
|
|
QueueFence(new_fence);
|
|
if (!delay_fence) {
|
|
func();
|
|
}
|
|
fences.push(std::move(new_fence));
|
|
if (should_flush) {
|
|
rasterizer.FlushCommands();
|
|
}
|
|
#ifdef __ANDROID__
|
|
if (delay_fence && !Settings::values.early_release_fences.GetValue()) {
|
|
guard.unlock();
|
|
cv.notify_all();
|
|
}
|
|
#else
|
|
if constexpr (can_async_check) {
|
|
guard.unlock();
|
|
cv.notify_all();
|
|
}
|
|
#endif
|
|
rasterizer.InvalidateGPUCache();
|
|
}
|
|
|
|
void SignalSyncPoint(u32 value) {
|
|
syncpoint_manager.IncrementGuest(value);
|
|
std::function<void()> func([this, value] { syncpoint_manager.IncrementHost(value); });
|
|
SignalFence(std::move(func));
|
|
}
|
|
|
|
void WaitPendingFences([[maybe_unused]] bool force) {
|
|
if constexpr (!can_async_check) {
|
|
TryReleasePendingFences<true>();
|
|
} else {
|
|
if (!force) {
|
|
return;
|
|
}
|
|
std::mutex wait_mutex;
|
|
std::condition_variable wait_cv;
|
|
std::atomic<bool> wait_finished{};
|
|
std::function<void()> func([&] {
|
|
std::scoped_lock lk(wait_mutex);
|
|
wait_finished.store(true, std::memory_order_relaxed);
|
|
wait_cv.notify_all();
|
|
});
|
|
SignalFence(std::move(func));
|
|
std::unique_lock lk(wait_mutex);
|
|
wait_cv.wait(
|
|
lk, [&wait_finished] { return wait_finished.load(std::memory_order_relaxed); });
|
|
}
|
|
}
|
|
|
|
protected:
|
|
explicit FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
|
|
TTextureCache& texture_cache_, TBufferCache& buffer_cache_,
|
|
TQueryCache& query_cache_)
|
|
: rasterizer{rasterizer_}, gpu{gpu_}, syncpoint_manager{gpu.Host1x().GetSyncpointManager()},
|
|
texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, query_cache{query_cache_} {
|
|
if constexpr (can_async_check) {
|
|
fence_thread =
|
|
std::jthread([this](std::stop_token token) { ReleaseThreadFunc(token); });
|
|
}
|
|
}
|
|
|
|
virtual ~FenceManager() {
|
|
if constexpr (can_async_check) {
|
|
fence_thread.request_stop();
|
|
cv.notify_all();
|
|
fence_thread.join();
|
|
}
|
|
}
|
|
|
|
/// Creates a Fence Interface, does not create a backend fence if 'is_stubbed' is
|
|
/// true
|
|
virtual TFence CreateFence(bool is_stubbed) = 0;
|
|
/// Queues a fence into the backend if the fence isn't stubbed.
|
|
virtual void QueueFence(TFence& fence) = 0;
|
|
/// Notifies that the backend fence has been signaled/reached in host GPU.
|
|
virtual bool IsFenceSignaled(TFence& fence) const = 0;
|
|
/// Waits until a fence has been signalled by the host GPU.
|
|
virtual void WaitFence(TFence& fence) = 0;
|
|
|
|
VideoCore::RasterizerInterface& rasterizer;
|
|
Tegra::GPU& gpu;
|
|
Tegra::Host1x::SyncpointManager& syncpoint_manager;
|
|
TTextureCache& texture_cache;
|
|
TBufferCache& buffer_cache;
|
|
TQueryCache& query_cache;
|
|
|
|
private:
|
|
template <bool force_wait>
|
|
void TryReleasePendingFences() {
|
|
while (!fences.empty()) {
|
|
TFence& current_fence = fences.front();
|
|
if (ShouldWait() && !IsFenceSignaled(current_fence)) {
|
|
if constexpr (force_wait) {
|
|
WaitFence(current_fence);
|
|
} else {
|
|
return;
|
|
}
|
|
}
|
|
PopAsyncFlushes();
|
|
auto operations = std::move(pending_operations.front());
|
|
pending_operations.pop_front();
|
|
for (auto& operation : operations) {
|
|
operation();
|
|
}
|
|
{
|
|
std::unique_lock lock(ring_guard);
|
|
delayed_destruction_ring.Push(std::move(current_fence));
|
|
}
|
|
fences.pop();
|
|
}
|
|
}
|
|
|
|
void ReleaseThreadFunc(std::stop_token stop_token) {
|
|
std::string name = "GPUFencingThread";
|
|
MicroProfileOnThreadCreate(name.c_str());
|
|
|
|
// Cleanup
|
|
SCOPE_EXIT {
|
|
MicroProfileOnThreadExit();
|
|
};
|
|
|
|
Common::SetCurrentThreadName(name.c_str());
|
|
Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
|
|
|
|
TFence current_fence;
|
|
std::deque<std::function<void()>> current_operations;
|
|
while (!stop_token.stop_requested()) {
|
|
{
|
|
std::unique_lock lock(guard);
|
|
cv.wait(lock, [&] { return stop_token.stop_requested() || !fences.empty(); });
|
|
if (stop_token.stop_requested()) [[unlikely]] {
|
|
return;
|
|
}
|
|
current_fence = std::move(fences.front());
|
|
current_operations = std::move(pending_operations.front());
|
|
fences.pop();
|
|
pending_operations.pop_front();
|
|
}
|
|
if (!current_fence->IsStubbed()) {
|
|
WaitFence(current_fence);
|
|
}
|
|
PopAsyncFlushes();
|
|
for (auto& operation : current_operations) {
|
|
operation();
|
|
}
|
|
{
|
|
std::unique_lock lock(ring_guard);
|
|
delayed_destruction_ring.Push(std::move(current_fence));
|
|
}
|
|
}
|
|
}
|
|
|
|
bool ShouldWait() const {
|
|
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
|
|
return texture_cache.ShouldWaitAsyncFlushes() || buffer_cache.ShouldWaitAsyncFlushes() ||
|
|
query_cache.ShouldWaitAsyncFlushes();
|
|
}
|
|
|
|
bool ShouldFlush() const {
|
|
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
|
|
return texture_cache.HasUncommittedFlushes() || buffer_cache.HasUncommittedFlushes() ||
|
|
query_cache.HasUncommittedFlushes();
|
|
}
|
|
|
|
void PopAsyncFlushes() {
|
|
{
|
|
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
|
|
texture_cache.PopAsyncFlushes();
|
|
buffer_cache.PopAsyncFlushes();
|
|
}
|
|
query_cache.PopAsyncFlushes();
|
|
}
|
|
|
|
void CommitAsyncFlushes() {
|
|
{
|
|
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
|
|
texture_cache.CommitAsyncFlushes();
|
|
buffer_cache.CommitAsyncFlushes();
|
|
}
|
|
query_cache.CommitAsyncFlushes();
|
|
}
|
|
|
|
std::queue<TFence> fences;
|
|
std::deque<std::function<void()>> uncommitted_operations;
|
|
std::deque<std::deque<std::function<void()>>> pending_operations;
|
|
|
|
std::mutex guard;
|
|
std::mutex ring_guard;
|
|
std::condition_variable cv;
|
|
|
|
std::jthread fence_thread;
|
|
|
|
DelayedDestructionRing<TFence, 8> delayed_destruction_ring;
|
|
};
|
|
|
|
} // namespace VideoCommon
|