[video_core] fix H264 and jthread() causing spurious errors (#3907)
Some checks are pending
tx-src / sources (push) Waiting to run
Check Strings / check-strings (push) Waiting to run

fixes regression by #3878

Signed-off-by: lizzie <lizzie@eden-emu.dev>

Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3907
Reviewed-by: crueter <crueter@eden-emu.dev>
Reviewed-by: MaranBr <maranbr@eden-emu.dev>
This commit is contained in:
lizzie 2026-04-29 19:23:20 +02:00 committed by crueter
parent a587b7dc3a
commit 8765b49512
No known key found for this signature in database
GPG key ID: 425ACD2D4830EBC6
4 changed files with 112 additions and 133 deletions

View file

@ -20,110 +20,101 @@
namespace Tegra {
CDmaPusher::CDmaPusher(Host1x::Host1x& host1x_, s32 id)
: host_processor{std::make_unique<Host1x::Control>(host1x_)}
: host_processor(host1x_)
, host1x{host1x_}
, current_class{ChClassId(id)}
{
thread = std::jthread([this](std::stop_token stop_token) { ProcessEntries(stop_token); });
thread = std::jthread([this](std::stop_token stop_token) {
Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
ChCommandHeaderList command_list{host1x.System().ApplicationMemory(), 0, 0};
u32 count{};
u32 method_offset{};
u32 mask{};
bool incrementing{};
while (!stop_token.stop_requested()) {
{
std::unique_lock l{command_mutex};
command_cv.wait(l, stop_token, [this]() { return command_lists.size() > 0; });
if (stop_token.stop_requested()) {
return;
}
command_list = std::move(command_lists.front());
command_lists.pop_front();
}
size_t i = 0;
for (const auto value : command_list) {
i++;
if (mask != 0) {
const auto lbs = static_cast<u32>(std::countr_zero(mask));
mask &= ~(1U << lbs);
ExecuteCommand(method_offset + lbs, value.raw);
continue;
} else if (count != 0) {
--count;
ExecuteCommand(method_offset, value.raw);
if (incrementing) {
++method_offset;
}
continue;
}
const auto mode = value.submission_mode.Value();
switch (mode) {
case ChSubmissionMode::SetClass: {
mask = value.value & 0x3f;
method_offset = value.method_offset;
current_class = ChClassId((value.value >> 6) & 0x3ff);
break;
}
case ChSubmissionMode::Incrementing:
case ChSubmissionMode::NonIncrementing:
count = value.value;
method_offset = value.method_offset;
incrementing = mode == ChSubmissionMode::Incrementing;
break;
case ChSubmissionMode::Mask:
mask = value.value;
method_offset = value.method_offset;
break;
case ChSubmissionMode::Immediate: {
const u32 data = value.value & 0xfff;
method_offset = value.method_offset;
ExecuteCommand(method_offset, data);
break;
}
default:
LOG_ERROR(HW_GPU, "Bad command at index {} (bytes {:#X}), buffer size {}", i - 1, (i - 1) * sizeof(u32), command_list.size());
UNIMPLEMENTED_MSG("ChSubmission mode {} is not implemented!", u32(mode));
break;
}
}
}
});
}
CDmaPusher::~CDmaPusher() = default;
void CDmaPusher::ProcessEntries(std::stop_token stop_token) {
Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
ChCommandHeaderList command_list{host1x.System().ApplicationMemory(), 0, 0};
u32 count{};
u32 method_offset{};
u32 mask{};
bool incrementing{};
while (!stop_token.stop_requested()) {
{
std::unique_lock l{command_mutex};
command_cv.wait(l, stop_token,
[this]() { return command_lists.size() > 0; });
if (stop_token.stop_requested()) {
return;
}
command_list = std::move(command_lists.front());
command_lists.pop_front();
}
size_t i = 0;
for (const auto value : command_list) {
i++;
if (mask != 0) {
const auto lbs = static_cast<u32>(std::countr_zero(mask));
mask &= ~(1U << lbs);
ExecuteCommand(method_offset + lbs, value.raw);
continue;
} else if (count != 0) {
--count;
ExecuteCommand(method_offset, value.raw);
if (incrementing) {
++method_offset;
}
continue;
}
const auto mode = value.submission_mode.Value();
switch (mode) {
case ChSubmissionMode::SetClass: {
mask = value.value & 0x3f;
method_offset = value.method_offset;
current_class = static_cast<ChClassId>((value.value >> 6) & 0x3ff);
break;
}
case ChSubmissionMode::Incrementing:
case ChSubmissionMode::NonIncrementing:
count = value.value;
method_offset = value.method_offset;
incrementing = mode == ChSubmissionMode::Incrementing;
break;
case ChSubmissionMode::Mask:
mask = value.value;
method_offset = value.method_offset;
break;
case ChSubmissionMode::Immediate: {
const u32 data = value.value & 0xfff;
method_offset = value.method_offset;
ExecuteCommand(method_offset, data);
break;
}
default:
LOG_ERROR(HW_GPU, "Bad command at index {} (bytes {:#X}), buffer size {}", i - 1,
(i - 1) * sizeof(u32), command_list.size());
UNIMPLEMENTED_MSG("ChSubmission mode {} is not implemented!",
static_cast<u32>(mode));
break;
}
}
}
}
void CDmaPusher::ExecuteCommand(u32 method, u32 arg) {
switch (current_class) {
case ChClassId::Control:
LOG_TRACE(Service_NVDRV, "Class {} method {:#X} arg 0x{:X}",
static_cast<u32>(current_class), method, arg);
host_processor->ProcessMethod(static_cast<Host1x::Control::Method>(method), arg);
LOG_TRACE(Service_NVDRV, "Class {} method {:#X} arg 0x{:X}", u32(current_class), method, arg);
host_processor.ProcessMethod(Host1x::Control::Method(method), arg);
break;
default:
thi_regs.reg_array[method] = arg;
switch (static_cast<ThiMethod>(method)) {
case ThiMethod::IncSyncpt: {
const auto syncpoint_id = static_cast<u32>(arg & 0xFF);
[[maybe_unused]] const auto cond = static_cast<u32>((arg >> 8) & 0xFF);
LOG_TRACE(Service_NVDRV, "Class {} IncSyncpt Method, syncpt {} cond {}",
static_cast<u32>(current_class), syncpoint_id, cond);
const auto syncpoint_id = u32(arg & 0xFF);
[[maybe_unused]] const auto cond = u32((arg >> 8) & 0xFF);
LOG_TRACE(Service_NVDRV, "Class {} IncSyncpt Method, syncpt {} cond {}", u32(current_class), syncpoint_id, cond);
auto& syncpoint_manager = host1x.GetSyncpointManager();
syncpoint_manager.IncrementGuest(syncpoint_id);
syncpoint_manager.IncrementHost(syncpoint_id);
break;
}
case ThiMethod::SetMethod1:
LOG_TRACE(Service_NVDRV, "Class {} method {:#X} arg 0x{:X}",
static_cast<u32>(current_class), static_cast<u32>(thi_regs.method_0), arg);
LOG_TRACE(Service_NVDRV, "Class {} method {:#X} arg 0x{:X}", u32(current_class), u32(thi_regs.method_0), arg);
ProcessMethod(thi_regs.method_0, arg);
break;
default:

View file

@ -18,11 +18,11 @@
#include "common/common_types.h"
#include "common/polyfill_thread.h"
#include "core/memory.h"
#include "video_core/host1x/control.h"
namespace Tegra {
namespace Host1x {
class Control;
class Host1x;
class Nvdec;
class SyncptIncrManager;
@ -121,9 +121,6 @@ protected:
virtual void ProcessMethod(u32 method, u32 arg) = 0;
private:
/// Process the command entry
void ProcessEntries(std::stop_token stop_token);
/// Invoke command class devices to execute the command based on the current state
void ExecuteCommand(u32 state_offset, u32 data);
@ -131,11 +128,11 @@ protected:
ThiRegisters thi_regs{};
std::deque<ChCommandHeaderList> command_lists;
std::condition_variable_any command_cv;
std::jthread thread;
std::unique_ptr<Host1x::Control> host_processor;
Host1x::Control host_processor;
std::mutex command_mutex;
Host1x::Host1x& host1x;
ChClassId current_class;
std::jthread thread;
};
} // namespace Tegra

View file

@ -19,45 +19,6 @@
namespace VideoCommon::GPUThread {
/// Runs the GPU thread
static void RunThread(std::stop_token stop_token, Core::System& system,
VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context,
Tegra::Control::Scheduler& scheduler, SynchState& state) {
Common::SetCurrentThreadName("GPU");
Common::SetCurrentThreadPriority(Common::ThreadPriority::Critical);
system.RegisterHostThread();
auto current_context = context.Acquire();
VideoCore::RasterizerInterface* const rasterizer = renderer.ReadRasterizer();
CommandDataContainer next;
while (!stop_token.stop_requested()) {
state.queue.PopWait(next, stop_token);
if (stop_token.stop_requested()) {
break;
}
if (auto* submit_list = std::get_if<SubmitListCommand>(&next.data)) {
scheduler.Push(submit_list->channel, std::move(submit_list->entries));
} else if (std::holds_alternative<GPUTickCommand>(next.data)) {
system.GPU().TickWork();
} else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) {
rasterizer->FlushRegion(flush->addr, flush->size);
} else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) {
rasterizer->OnCacheInvalidation(invalidate->addr, invalidate->size);
} else {
ASSERT(false);
}
state.signaled_fence.store(next.fence);
if (next.block) {
// We have to lock the write_lock to ensure that the condition_variable wait not get a
// race between the check and the lock itself.
std::scoped_lock lk{state.write_lock};
state.cv.notify_all();
}
}
}
ThreadManager::ThreadManager(Core::System& system_, bool is_async_)
: system{system_}, is_async{is_async_} {}
@ -65,8 +26,38 @@ ThreadManager::~ThreadManager() = default;
void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context, Tegra::Control::Scheduler& scheduler) {
rasterizer = renderer.ReadRasterizer();
thread = std::jthread(RunThread, std::ref(system), std::ref(renderer), std::ref(context),
std::ref(scheduler), std::ref(state));
thread = std::jthread([&](std::stop_token stop_token) {
Common::SetCurrentThreadName("GPU");
Common::SetCurrentThreadPriority(Common::ThreadPriority::Critical);
system.RegisterHostThread();
auto current_context = context.Acquire();
CommandDataContainer next;
while (!stop_token.stop_requested()) {
state.queue.PopWait(next, stop_token);
if (stop_token.stop_requested()) {
break;
}
if (auto* submit_list = std::get_if<SubmitListCommand>(&next.data)) {
scheduler.Push(submit_list->channel, std::move(submit_list->entries));
} else if (std::holds_alternative<GPUTickCommand>(next.data)) {
system.GPU().TickWork();
} else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) {
renderer.ReadRasterizer()->FlushRegion(flush->addr, flush->size);
} else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) {
renderer.ReadRasterizer()->OnCacheInvalidation(invalidate->addr, invalidate->size);
} else {
ASSERT(false);
}
state.signaled_fence.store(next.fence);
if (next.block) {
// We have to lock the write_lock to ensure that the condition_variable wait not get a
// race between the check and the lock itself.
std::scoped_lock lk{state.write_lock};
state.cv.notify_all();
}
}
});
}
void ThreadManager::SubmitList(s32 channel, Tegra::CommandList&& entries) {

View file

@ -51,11 +51,11 @@ bool H264::IsInterlaced() {
}
std::span<const u8> H264::ComposeFrame() {
host1x.memory_manager.ReadBlock(regs.picture_info_offset.Address(), &current_context, sizeof(H264DecoderContext));
host1x.gmmu_manager.ReadBlock(regs.picture_info_offset.Address(), &current_context, sizeof(H264DecoderContext));
const s64 frame_number = current_context.h264_parameter_set.frame_number.Value();
if (!is_first_frame && frame_number != 0) {
frame_scratch.resize_destructive(current_context.stream_len);
host1x.memory_manager.ReadBlock(regs.frame_bitstream_offset.Address(), frame_scratch.data(), frame_scratch.size());
host1x.gmmu_manager.ReadBlock(regs.frame_bitstream_offset.Address(), frame_scratch.data(), frame_scratch.size());
return frame_scratch;
}
@ -177,7 +177,7 @@ std::span<const u8> H264::ComposeFrame() {
const auto& encoded_header = writer.GetByteArray();
frame_scratch.resize(encoded_header.size() + current_context.stream_len);
std::memcpy(frame_scratch.data(), encoded_header.data(), encoded_header.size());
host1x.memory_manager.ReadBlock(regs.frame_bitstream_offset.Address(), frame_scratch.data() + encoded_header.size(), current_context.stream_len);
host1x.gmmu_manager.ReadBlock(regs.frame_bitstream_offset.Address(), frame_scratch.data() + encoded_header.size(), current_context.stream_len);
return frame_scratch;
}