From 8765b4951201a94673d5b561f6ba5094b2af0fa5 Mon Sep 17 00:00:00 2001
From: lizzie <lizzie@eden-emu.dev>
Date: Wed, 29 Apr 2026 19:23:20 +0200
Subject: [PATCH] [video_core] fix H264 and jthread() causing spurious errors
 (#3907)

fixes regression by #3878

Signed-off-by: lizzie <lizzie@eden-emu.dev>

Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3907
Reviewed-by: crueter <crueter@eden-emu.dev>
Reviewed-by: MaranBr <maranbr@eden-emu.dev>
---
 src/video_core/cdma_pusher.cpp        | 157 ++++++++++++--------------
 src/video_core/cdma_pusher.h          |   9 +-
 src/video_core/gpu_thread.cpp         |  73 ++++++------
 src/video_core/host1x/codecs/h264.cpp |   6 +-
 4 files changed, 112 insertions(+), 133 deletions(-)
diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp
index 7c342dbefa..1f0f8b5a38 100644
--- a/src/video_core/cdma_pusher.cpp
+++ b/src/video_core/cdma_pusher.cpp
@@ -20,110 +20,101 @@
 namespace Tegra {
 
 CDmaPusher::CDmaPusher(Host1x::Host1x& host1x_, s32 id)
-    : host_processor{std::make_unique<Host1x::Control>(host1x_)}
+    : host_processor(host1x_)
     , host1x{host1x_}
     , current_class{ChClassId(id)}
 {
-    thread = std::jthread([this](std::stop_token stop_token) { ProcessEntries(stop_token); });
+    thread = std::jthread([this](std::stop_token stop_token) {
+        Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
+        ChCommandHeaderList command_list{host1x.System().ApplicationMemory(), 0, 0};
+        u32 count{};
+        u32 method_offset{};
+        u32 mask{};
+        bool incrementing{};
+        while (!stop_token.stop_requested()) {
+            {
+                std::unique_lock l{command_mutex};
+                command_cv.wait(l, stop_token, [this]() { return command_lists.size() > 0; });
+                if (stop_token.stop_requested()) {
+                    return;
+                }
+
+                command_list = std::move(command_lists.front());
+                command_lists.pop_front();
+            }
+
+            size_t i = 0;
+            for (const auto value : command_list) {
+                i++;
+                if (mask != 0) {
+                    const auto lbs = static_cast<u32>(std::countr_zero(mask));
+                    mask &= ~(1U << lbs);
+                    ExecuteCommand(method_offset + lbs, value.raw);
+                    continue;
+                } else if (count != 0) {
+                    --count;
+                    ExecuteCommand(method_offset, value.raw);
+                    if (incrementing) {
+                        ++method_offset;
+                    }
+                    continue;
+                }
+                const auto mode = value.submission_mode.Value();
+                switch (mode) {
+                case ChSubmissionMode::SetClass: {
+                    mask = value.value & 0x3f;
+                    method_offset = value.method_offset;
+                    current_class = ChClassId((value.value >> 6) & 0x3ff);
+                    break;
+                }
+                case ChSubmissionMode::Incrementing:
+                case ChSubmissionMode::NonIncrementing:
+                    count = value.value;
+                    method_offset = value.method_offset;
+                    incrementing = mode == ChSubmissionMode::Incrementing;
+                    break;
+                case ChSubmissionMode::Mask:
+                    mask = value.value;
+                    method_offset = value.method_offset;
+                    break;
+                case ChSubmissionMode::Immediate: {
+                    const u32 data = value.value & 0xfff;
+                    method_offset = value.method_offset;
+                    ExecuteCommand(method_offset, data);
+                    break;
+                }
+                default:
+                    LOG_ERROR(HW_GPU, "Bad command at index {} (bytes {:#X}), buffer size {}", i - 1, (i - 1) * sizeof(u32), command_list.size());
+                    UNIMPLEMENTED_MSG("ChSubmission mode {} is not implemented!", u32(mode));
+                    break;
+                }
+            }
+        }
+    });
 }
 
 CDmaPusher::~CDmaPusher() = default;
 
-void CDmaPusher::ProcessEntries(std::stop_token stop_token) {
-    Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
-    ChCommandHeaderList command_list{host1x.System().ApplicationMemory(), 0, 0};
-    u32 count{};
-    u32 method_offset{};
-    u32 mask{};
-    bool incrementing{};
-
-    while (!stop_token.stop_requested()) {
-        {
-            std::unique_lock l{command_mutex};
-            command_cv.wait(l, stop_token,
-                            [this]() { return command_lists.size() > 0; });
-            if (stop_token.stop_requested()) {
-                return;
-            }
-
-            command_list = std::move(command_lists.front());
-            command_lists.pop_front();
-        }
-
-        size_t i = 0;
-        for (const auto value : command_list) {
-            i++;
-            if (mask != 0) {
-                const auto lbs = static_cast<u32>(std::countr_zero(mask));
-                mask &= ~(1U << lbs);
-                ExecuteCommand(method_offset + lbs, value.raw);
-                continue;
-            } else if (count != 0) {
-                --count;
-                ExecuteCommand(method_offset, value.raw);
-                if (incrementing) {
-                    ++method_offset;
-                }
-                continue;
-            }
-            const auto mode = value.submission_mode.Value();
-            switch (mode) {
-            case ChSubmissionMode::SetClass: {
-                mask = value.value & 0x3f;
-                method_offset = value.method_offset;
-                current_class = static_cast<ChClassId>((value.value >> 6) & 0x3ff);
-                break;
-            }
-            case ChSubmissionMode::Incrementing:
-            case ChSubmissionMode::NonIncrementing:
-                count = value.value;
-                method_offset = value.method_offset;
-                incrementing = mode == ChSubmissionMode::Incrementing;
-                break;
-            case ChSubmissionMode::Mask:
-                mask = value.value;
-                method_offset = value.method_offset;
-                break;
-            case ChSubmissionMode::Immediate: {
-                const u32 data = value.value & 0xfff;
-                method_offset = value.method_offset;
-                ExecuteCommand(method_offset, data);
-                break;
-            }
-            default:
-                LOG_ERROR(HW_GPU, "Bad command at index {} (bytes {:#X}), buffer size {}", i - 1,
-                          (i - 1) * sizeof(u32), command_list.size());
-                UNIMPLEMENTED_MSG("ChSubmission mode {} is not implemented!",
-                                  static_cast<u32>(mode));
-                break;
-            }
-        }
-    }
-}
-
 void CDmaPusher::ExecuteCommand(u32 method, u32 arg) {
     switch (current_class) {
     case ChClassId::Control:
-        LOG_TRACE(Service_NVDRV, "Class {} method {:#X} arg 0x{:X}",
-                  static_cast<u32>(current_class), method, arg);
-        host_processor->ProcessMethod(static_cast<Host1x::Control::Method>(method), arg);
+        LOG_TRACE(Service_NVDRV, "Class {} method {:#X} arg 0x{:X}", u32(current_class), method, arg);
+        host_processor.ProcessMethod(Host1x::Control::Method(method), arg);
         break;
     default:
         thi_regs.reg_array[method] = arg;
         switch (static_cast<ThiMethod>(method)) {
         case ThiMethod::IncSyncpt: {
-            const auto syncpoint_id = static_cast<u32>(arg & 0xFF);
-            [[maybe_unused]] const auto cond = static_cast<u32>((arg >> 8) & 0xFF);
-            LOG_TRACE(Service_NVDRV, "Class {} IncSyncpt Method, syncpt {} cond {}",
-                      static_cast<u32>(current_class), syncpoint_id, cond);
+            const auto syncpoint_id = u32(arg & 0xFF);
+            [[maybe_unused]] const auto cond = u32((arg >> 8) & 0xFF);
+            LOG_TRACE(Service_NVDRV, "Class {} IncSyncpt Method, syncpt {} cond {}", u32(current_class), syncpoint_id, cond);
             auto& syncpoint_manager = host1x.GetSyncpointManager();
             syncpoint_manager.IncrementGuest(syncpoint_id);
             syncpoint_manager.IncrementHost(syncpoint_id);
             break;
         }
         case ThiMethod::SetMethod1:
-            LOG_TRACE(Service_NVDRV, "Class {} method {:#X} arg 0x{:X}",
-                      static_cast<u32>(current_class), static_cast<u32>(thi_regs.method_0), arg);
+            LOG_TRACE(Service_NVDRV, "Class {} method {:#X} arg 0x{:X}", u32(current_class), u32(thi_regs.method_0), arg);
             ProcessMethod(thi_regs.method_0, arg);
             break;
         default:
diff --git a/src/video_core/cdma_pusher.h b/src/video_core/cdma_pusher.h
index 3186498070..2ce827cdfe 100644
--- a/src/video_core/cdma_pusher.h
+++ b/src/video_core/cdma_pusher.h
@@ -18,11 +18,11 @@
 #include "common/common_types.h"
 #include "common/polyfill_thread.h"
 #include "core/memory.h"
+#include "video_core/host1x/control.h"
 
 namespace Tegra {
 
 namespace Host1x {
-class Control;
 class Host1x;
 class Nvdec;
 class SyncptIncrManager;
@@ -121,9 +121,6 @@ protected:
     virtual void ProcessMethod(u32 method, u32 arg) = 0;
 
 private:
-    /// Process the command entry
-    void ProcessEntries(std::stop_token stop_token);
-
     /// Invoke command class devices to execute the command based on the current state
     void ExecuteCommand(u32 state_offset, u32 data);
 
@@ -131,11 +128,11 @@ protected:
     ThiRegisters thi_regs{};
     std::deque<ChCommandHeaderList> command_lists;
     std::condition_variable_any command_cv;
-    std::jthread thread;
-    std::unique_ptr<Host1x::Control> host_processor;
+    Host1x::Control host_processor;
     std::mutex command_mutex;
     Host1x::Host1x& host1x;
     ChClassId current_class;
+    std::jthread thread;
 };
 
 } // namespace Tegra
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 68f2530668..d7c8ac391c 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -19,45 +19,6 @@
 
 namespace VideoCommon::GPUThread {
 
-/// Runs the GPU thread
-static void RunThread(std::stop_token stop_token, Core::System& system,
-                      VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context,
-                      Tegra::Control::Scheduler& scheduler, SynchState& state) {
-    Common::SetCurrentThreadName("GPU");
-    Common::SetCurrentThreadPriority(Common::ThreadPriority::Critical);
-    system.RegisterHostThread();
-
-    auto current_context = context.Acquire();
-    VideoCore::RasterizerInterface* const rasterizer = renderer.ReadRasterizer();
-
-    CommandDataContainer next;
-
-    while (!stop_token.stop_requested()) {
-        state.queue.PopWait(next, stop_token);
-        if (stop_token.stop_requested()) {
-            break;
-        }
-        if (auto* submit_list = std::get_if<SubmitListCommand>(&next.data)) {
-            scheduler.Push(submit_list->channel, std::move(submit_list->entries));
-        } else if (std::holds_alternative<GPUTickCommand>(next.data)) {
-            system.GPU().TickWork();
-        } else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) {
-            rasterizer->FlushRegion(flush->addr, flush->size);
-        } else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) {
-            rasterizer->OnCacheInvalidation(invalidate->addr, invalidate->size);
-        } else {
-            ASSERT(false);
-        }
-        state.signaled_fence.store(next.fence);
-        if (next.block) {
-            // We have to lock the write_lock to ensure that the condition_variable wait not get a
-            // race between the check and the lock itself.
-            std::scoped_lock lk{state.write_lock};
-            state.cv.notify_all();
-        }
-    }
-}
-
 ThreadManager::ThreadManager(Core::System& system_, bool is_async_)
     : system{system_}, is_async{is_async_} {}
 
@@ -65,8 +26,38 @@ ThreadManager::~ThreadManager() = default;
 
 void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context, Tegra::Control::Scheduler& scheduler) {
     rasterizer = renderer.ReadRasterizer();
-    thread = std::jthread(RunThread, std::ref(system), std::ref(renderer), std::ref(context),
-                          std::ref(scheduler), std::ref(state));
+    thread = std::jthread([&](std::stop_token stop_token) {
+        Common::SetCurrentThreadName("GPU");
+        Common::SetCurrentThreadPriority(Common::ThreadPriority::Critical);
+        system.RegisterHostThread();
+
+        auto current_context = context.Acquire();
+        CommandDataContainer next;
+        while (!stop_token.stop_requested()) {
+            state.queue.PopWait(next, stop_token);
+            if (stop_token.stop_requested()) {
+                break;
+            }
+            if (auto* submit_list = std::get_if<SubmitListCommand>(&next.data)) {
+                scheduler.Push(submit_list->channel, std::move(submit_list->entries));
+            } else if (std::holds_alternative<GPUTickCommand>(next.data)) {
+                system.GPU().TickWork();
+            } else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) {
+                renderer.ReadRasterizer()->FlushRegion(flush->addr, flush->size);
+            } else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) {
+                renderer.ReadRasterizer()->OnCacheInvalidation(invalidate->addr, invalidate->size);
+            } else {
+                ASSERT(false);
+            }
+            state.signaled_fence.store(next.fence);
+            if (next.block) {
+                // We have to lock the write_lock to ensure that the condition_variable wait not get a
+                // race between the check and the lock itself.
+                std::scoped_lock lk{state.write_lock};
+                state.cv.notify_all();
+            }
+        }
+    });
 }
 
 void ThreadManager::SubmitList(s32 channel, Tegra::CommandList&& entries) {
diff --git a/src/video_core/host1x/codecs/h264.cpp b/src/video_core/host1x/codecs/h264.cpp
index 4a37e1d1d3..f439ac3828 100644
--- a/src/video_core/host1x/codecs/h264.cpp
+++ b/src/video_core/host1x/codecs/h264.cpp
@@ -51,11 +51,11 @@ bool H264::IsInterlaced() {
 }
 
 std::span<const u8> H264::ComposeFrame() {
-    host1x.memory_manager.ReadBlock(regs.picture_info_offset.Address(), &current_context, sizeof(H264DecoderContext));
+    host1x.gmmu_manager.ReadBlock(regs.picture_info_offset.Address(), &current_context, sizeof(H264DecoderContext));
     const s64 frame_number = current_context.h264_parameter_set.frame_number.Value();
     if (!is_first_frame && frame_number != 0) {
         frame_scratch.resize_destructive(current_context.stream_len);
-        host1x.memory_manager.ReadBlock(regs.frame_bitstream_offset.Address(), frame_scratch.data(), frame_scratch.size());
+        host1x.gmmu_manager.ReadBlock(regs.frame_bitstream_offset.Address(), frame_scratch.data(), frame_scratch.size());
         return frame_scratch;
     }
 
@@ -177,7 +177,7 @@ std::span<const u8> H264::ComposeFrame() {
     const auto& encoded_header = writer.GetByteArray();
     frame_scratch.resize(encoded_header.size() + current_context.stream_len);
     std::memcpy(frame_scratch.data(), encoded_header.data(), encoded_header.size());
-    host1x.memory_manager.ReadBlock(regs.frame_bitstream_offset.Address(), frame_scratch.data() + encoded_header.size(), current_context.stream_len);
+    host1x.gmmu_manager.ReadBlock(regs.frame_bitstream_offset.Address(), frame_scratch.data() + encoded_header.size(), current_context.stream_len);
     return frame_scratch;
 }