From 8765b4951201a94673d5b561f6ba5094b2af0fa5 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 29 Apr 2026 19:23:20 +0200 Subject: [PATCH] [video_core] fix H264 and jthread() causing spurious errors (#3907) fixes regression by #3878 Signed-off-by: lizzie Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3907 Reviewed-by: crueter Reviewed-by: MaranBr --- src/video_core/cdma_pusher.cpp | 157 ++++++++++++-------------- src/video_core/cdma_pusher.h | 9 +- src/video_core/gpu_thread.cpp | 73 ++++++------ src/video_core/host1x/codecs/h264.cpp | 6 +- 4 files changed, 112 insertions(+), 133 deletions(-) diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp index 7c342dbefa..1f0f8b5a38 100644 --- a/src/video_core/cdma_pusher.cpp +++ b/src/video_core/cdma_pusher.cpp @@ -20,110 +20,101 @@ namespace Tegra { CDmaPusher::CDmaPusher(Host1x::Host1x& host1x_, s32 id) - : host_processor{std::make_unique(host1x_)} + : host_processor(host1x_) , host1x{host1x_} , current_class{ChClassId(id)} { - thread = std::jthread([this](std::stop_token stop_token) { ProcessEntries(stop_token); }); + thread = std::jthread([this](std::stop_token stop_token) { + Common::SetCurrentThreadPriority(Common::ThreadPriority::High); + ChCommandHeaderList command_list{host1x.System().ApplicationMemory(), 0, 0}; + u32 count{}; + u32 method_offset{}; + u32 mask{}; + bool incrementing{}; + while (!stop_token.stop_requested()) { + { + std::unique_lock l{command_mutex}; + command_cv.wait(l, stop_token, [this]() { return command_lists.size() > 0; }); + if (stop_token.stop_requested()) { + return; + } + + command_list = std::move(command_lists.front()); + command_lists.pop_front(); + } + + size_t i = 0; + for (const auto value : command_list) { + i++; + if (mask != 0) { + const auto lbs = static_cast(std::countr_zero(mask)); + mask &= ~(1U << lbs); + ExecuteCommand(method_offset + lbs, value.raw); + continue; + } else if (count != 0) { + --count; + ExecuteCommand(method_offset, value.raw); + if (incrementing) { + ++method_offset; + } + continue; + } + const auto mode = value.submission_mode.Value(); + switch (mode) { + case ChSubmissionMode::SetClass: { + mask = value.value & 0x3f; + method_offset = value.method_offset; + current_class = ChClassId((value.value >> 6) & 0x3ff); + break; + } + case ChSubmissionMode::Incrementing: + case ChSubmissionMode::NonIncrementing: + count = value.value; + method_offset = value.method_offset; + incrementing = mode == ChSubmissionMode::Incrementing; + break; + case ChSubmissionMode::Mask: + mask = value.value; + method_offset = value.method_offset; + break; + case ChSubmissionMode::Immediate: { + const u32 data = value.value & 0xfff; + method_offset = value.method_offset; + ExecuteCommand(method_offset, data); + break; + } + default: + LOG_ERROR(HW_GPU, "Bad command at index {} (bytes {:#X}), buffer size {}", i - 1, (i - 1) * sizeof(u32), command_list.size()); + UNIMPLEMENTED_MSG("ChSubmission mode {} is not implemented!", u32(mode)); + break; + } + } + } + }); } CDmaPusher::~CDmaPusher() = default; -void CDmaPusher::ProcessEntries(std::stop_token stop_token) { - Common::SetCurrentThreadPriority(Common::ThreadPriority::High); - ChCommandHeaderList command_list{host1x.System().ApplicationMemory(), 0, 0}; - u32 count{}; - u32 method_offset{}; - u32 mask{}; - bool incrementing{}; - - while (!stop_token.stop_requested()) { - { - std::unique_lock l{command_mutex}; - command_cv.wait(l, stop_token, - [this]() { return command_lists.size() > 0; }); - if (stop_token.stop_requested()) { - return; - } - - command_list = std::move(command_lists.front()); - command_lists.pop_front(); - } - - size_t i = 0; - for (const auto value : command_list) { - i++; - if (mask != 0) { - const auto lbs = static_cast(std::countr_zero(mask)); - mask &= ~(1U << lbs); - ExecuteCommand(method_offset + lbs, value.raw); - continue; - } else if (count != 0) { - --count; - ExecuteCommand(method_offset, value.raw); - if (incrementing) { - ++method_offset; - } - continue; - } - const auto mode = value.submission_mode.Value(); - switch (mode) { - case ChSubmissionMode::SetClass: { - mask = value.value & 0x3f; - method_offset = value.method_offset; - current_class = static_cast((value.value >> 6) & 0x3ff); - break; - } - case ChSubmissionMode::Incrementing: - case ChSubmissionMode::NonIncrementing: - count = value.value; - method_offset = value.method_offset; - incrementing = mode == ChSubmissionMode::Incrementing; - break; - case ChSubmissionMode::Mask: - mask = value.value; - method_offset = value.method_offset; - break; - case ChSubmissionMode::Immediate: { - const u32 data = value.value & 0xfff; - method_offset = value.method_offset; - ExecuteCommand(method_offset, data); - break; - } - default: - LOG_ERROR(HW_GPU, "Bad command at index {} (bytes {:#X}), buffer size {}", i - 1, - (i - 1) * sizeof(u32), command_list.size()); - UNIMPLEMENTED_MSG("ChSubmission mode {} is not implemented!", - static_cast(mode)); - break; - } - } - } -} - void CDmaPusher::ExecuteCommand(u32 method, u32 arg) { switch (current_class) { case ChClassId::Control: - LOG_TRACE(Service_NVDRV, "Class {} method {:#X} arg 0x{:X}", - static_cast(current_class), method, arg); - host_processor->ProcessMethod(static_cast(method), arg); + LOG_TRACE(Service_NVDRV, "Class {} method {:#X} arg 0x{:X}", u32(current_class), method, arg); + host_processor.ProcessMethod(Host1x::Control::Method(method), arg); break; default: thi_regs.reg_array[method] = arg; switch (static_cast(method)) { case ThiMethod::IncSyncpt: { - const auto syncpoint_id = static_cast(arg & 0xFF); - [[maybe_unused]] const auto cond = static_cast((arg >> 8) & 0xFF); - LOG_TRACE(Service_NVDRV, "Class {} IncSyncpt Method, syncpt {} cond {}", - static_cast(current_class), syncpoint_id, cond); + const auto syncpoint_id = u32(arg & 0xFF); + [[maybe_unused]] const auto cond = u32((arg >> 8) & 0xFF); + LOG_TRACE(Service_NVDRV, "Class {} IncSyncpt Method, syncpt {} cond {}", u32(current_class), syncpoint_id, cond); auto& syncpoint_manager = host1x.GetSyncpointManager(); syncpoint_manager.IncrementGuest(syncpoint_id); syncpoint_manager.IncrementHost(syncpoint_id); break; } case ThiMethod::SetMethod1: - LOG_TRACE(Service_NVDRV, "Class {} method {:#X} arg 0x{:X}", - static_cast(current_class), static_cast(thi_regs.method_0), arg); + LOG_TRACE(Service_NVDRV, "Class {} method {:#X} arg 0x{:X}", u32(current_class), u32(thi_regs.method_0), arg); ProcessMethod(thi_regs.method_0, arg); break; default: diff --git a/src/video_core/cdma_pusher.h b/src/video_core/cdma_pusher.h index 3186498070..2ce827cdfe 100644 --- a/src/video_core/cdma_pusher.h +++ b/src/video_core/cdma_pusher.h @@ -18,11 +18,11 @@ #include "common/common_types.h" #include "common/polyfill_thread.h" #include "core/memory.h" +#include "video_core/host1x/control.h" namespace Tegra { namespace Host1x { -class Control; class Host1x; class Nvdec; class SyncptIncrManager; @@ -121,9 +121,6 @@ protected: virtual void ProcessMethod(u32 method, u32 arg) = 0; private: - /// Process the command entry - void ProcessEntries(std::stop_token stop_token); - /// Invoke command class devices to execute the command based on the current state void ExecuteCommand(u32 state_offset, u32 data); @@ -131,11 +128,11 @@ protected: ThiRegisters thi_regs{}; std::deque command_lists; std::condition_variable_any command_cv; - std::jthread thread; - std::unique_ptr host_processor; + Host1x::Control host_processor; std::mutex command_mutex; Host1x::Host1x& host1x; ChClassId current_class; + std::jthread thread; }; } // namespace Tegra diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 68f2530668..d7c8ac391c 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -19,45 +19,6 @@ namespace VideoCommon::GPUThread { -/// Runs the GPU thread -static void RunThread(std::stop_token stop_token, Core::System& system, - VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context, - Tegra::Control::Scheduler& scheduler, SynchState& state) { - Common::SetCurrentThreadName("GPU"); - Common::SetCurrentThreadPriority(Common::ThreadPriority::Critical); - system.RegisterHostThread(); - - auto current_context = context.Acquire(); - VideoCore::RasterizerInterface* const rasterizer = renderer.ReadRasterizer(); - - CommandDataContainer next; - - while (!stop_token.stop_requested()) { - state.queue.PopWait(next, stop_token); - if (stop_token.stop_requested()) { - break; - } - if (auto* submit_list = std::get_if(&next.data)) { - scheduler.Push(submit_list->channel, std::move(submit_list->entries)); - } else if (std::holds_alternative(next.data)) { - system.GPU().TickWork(); - } else if (const auto* flush = std::get_if(&next.data)) { - rasterizer->FlushRegion(flush->addr, flush->size); - } else if (const auto* invalidate = std::get_if(&next.data)) { - rasterizer->OnCacheInvalidation(invalidate->addr, invalidate->size); - } else { - ASSERT(false); - } - state.signaled_fence.store(next.fence); - if (next.block) { - // We have to lock the write_lock to ensure that the condition_variable wait not get a - // race between the check and the lock itself. - std::scoped_lock lk{state.write_lock}; - state.cv.notify_all(); - } - } -} - ThreadManager::ThreadManager(Core::System& system_, bool is_async_) : system{system_}, is_async{is_async_} {} @@ -65,8 +26,38 @@ ThreadManager::~ThreadManager() = default; void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context, Tegra::Control::Scheduler& scheduler) { rasterizer = renderer.ReadRasterizer(); - thread = std::jthread(RunThread, std::ref(system), std::ref(renderer), std::ref(context), - std::ref(scheduler), std::ref(state)); + thread = std::jthread([&](std::stop_token stop_token) { + Common::SetCurrentThreadName("GPU"); + Common::SetCurrentThreadPriority(Common::ThreadPriority::Critical); + system.RegisterHostThread(); + + auto current_context = context.Acquire(); + CommandDataContainer next; + while (!stop_token.stop_requested()) { + state.queue.PopWait(next, stop_token); + if (stop_token.stop_requested()) { + break; + } + if (auto* submit_list = std::get_if(&next.data)) { + scheduler.Push(submit_list->channel, std::move(submit_list->entries)); + } else if (std::holds_alternative(next.data)) { + system.GPU().TickWork(); + } else if (const auto* flush = std::get_if(&next.data)) { + renderer.ReadRasterizer()->FlushRegion(flush->addr, flush->size); + } else if (const auto* invalidate = std::get_if(&next.data)) { + renderer.ReadRasterizer()->OnCacheInvalidation(invalidate->addr, invalidate->size); + } else { + ASSERT(false); + } + state.signaled_fence.store(next.fence); + if (next.block) { + // We have to lock the write_lock to ensure that the condition_variable wait not get a + // race between the check and the lock itself. + std::scoped_lock lk{state.write_lock}; + state.cv.notify_all(); + } + } + }); } void ThreadManager::SubmitList(s32 channel, Tegra::CommandList&& entries) { diff --git a/src/video_core/host1x/codecs/h264.cpp b/src/video_core/host1x/codecs/h264.cpp index 4a37e1d1d3..f439ac3828 100644 --- a/src/video_core/host1x/codecs/h264.cpp +++ b/src/video_core/host1x/codecs/h264.cpp @@ -51,11 +51,11 @@ bool H264::IsInterlaced() { } std::span H264::ComposeFrame() { - host1x.memory_manager.ReadBlock(regs.picture_info_offset.Address(), ¤t_context, sizeof(H264DecoderContext)); + host1x.gmmu_manager.ReadBlock(regs.picture_info_offset.Address(), ¤t_context, sizeof(H264DecoderContext)); const s64 frame_number = current_context.h264_parameter_set.frame_number.Value(); if (!is_first_frame && frame_number != 0) { frame_scratch.resize_destructive(current_context.stream_len); - host1x.memory_manager.ReadBlock(regs.frame_bitstream_offset.Address(), frame_scratch.data(), frame_scratch.size()); + host1x.gmmu_manager.ReadBlock(regs.frame_bitstream_offset.Address(), frame_scratch.data(), frame_scratch.size()); return frame_scratch; } @@ -177,7 +177,7 @@ std::span H264::ComposeFrame() { const auto& encoded_header = writer.GetByteArray(); frame_scratch.resize(encoded_header.size() + current_context.stream_len); std::memcpy(frame_scratch.data(), encoded_header.data(), encoded_header.size()); - host1x.memory_manager.ReadBlock(regs.frame_bitstream_offset.Address(), frame_scratch.data() + encoded_header.size(), current_context.stream_len); + host1x.gmmu_manager.ReadBlock(regs.frame_bitstream_offset.Address(), frame_scratch.data() + encoded_header.size(), current_context.stream_len); return frame_scratch; }