From 90515bc6a2c0534fab99e9a10b6ab31c9279fad0 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 29 Apr 2026 16:41:25 +0200 Subject: [PATCH] [host1x] fix ffmpeg not having va-api on freebsd, inline nvenc (#3878) - fix va-api not being used on freebsd small thingies dont affect a lot: - removes some pointer indirection (why save pointer to GMMU if its accesible via host1x) - use std::variant<> for decoder - miscelly vp9/v8/h264 opts Signed-off-by: lizzie Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3878 Reviewed-by: crueter Reviewed-by: MaranBr --- src/core/hle/service/nvdrv/core/nvmap.cpp | 10 +- src/video_core/CMakeLists.txt | 8 +- src/video_core/cdma_pusher.cpp | 9 +- src/video_core/cdma_pusher.h | 20 +- src/video_core/gpu.cpp | 2 +- .../{codecs/vp9_types.h => codec_types.h} | 239 +++++++++++++++++- src/video_core/host1x/codecs/codec.cpp | 36 ++- src/video_core/host1x/codecs/codec.h | 24 +- src/video_core/host1x/codecs/decoder.cpp | 23 +- src/video_core/host1x/codecs/decoder.h | 20 +- src/video_core/host1x/codecs/h264.cpp | 97 +++---- src/video_core/host1x/codecs/h264.h | 196 +------------- src/video_core/host1x/codecs/vp8.cpp | 49 ++-- src/video_core/host1x/codecs/vp8.h | 52 +--- src/video_core/host1x/codecs/vp9.cpp | 83 ++---- src/video_core/host1x/codecs/vp9.h | 11 +- src/video_core/host1x/{ffmpeg => }/ffmpeg.cpp | 23 +- src/video_core/host1x/{ffmpeg => }/ffmpeg.h | 2 +- src/video_core/host1x/host1x.cpp | 15 +- src/video_core/host1x/host1x.h | 15 +- src/video_core/host1x/nvdec.cpp | 61 ++--- src/video_core/host1x/nvdec.h | 19 +- src/video_core/host1x/vic.cpp | 25 +- src/video_core/host1x/vic.h | 14 +- 24 files changed, 489 insertions(+), 564 deletions(-) rename src/video_core/host1x/{codecs/vp9_types.h => codec_types.h} (56%) mode change 100755 => 100644 src/video_core/host1x/codecs/decoder.cpp rename src/video_core/host1x/{ffmpeg => }/ffmpeg.cpp (95%) rename src/video_core/host1x/{ffmpeg => }/ffmpeg.h (98%) diff --git a/src/core/hle/service/nvdrv/core/nvmap.cpp b/src/core/hle/service/nvdrv/core/nvmap.cpp index a8c8f2f714..e2bf49da62 100644 --- a/src/core/hle/service/nvdrv/core/nvmap.cpp +++ b/src/core/hle/service/nvdrv/core/nvmap.cpp @@ -89,7 +89,7 @@ void NvMap::UnmapHandle(Handle& handle_description) { // Free and unmap the handle from Host1x GMMU if (handle_description.pin_virt_address) { - host1x.GMMU().Unmap(static_cast(handle_description.pin_virt_address), + host1x.gmmu_manager.Unmap(static_cast(handle_description.pin_virt_address), handle_description.aligned_size); host1x.Allocator().Free(handle_description.pin_virt_address, static_cast(handle_description.aligned_size)); @@ -169,12 +169,8 @@ DAddr NvMap::PinHandle(NvMap::Handle::Id handle, bool low_area_pin) { std::scoped_lock lock(handle_description->mutex); const auto map_low_area = [&] { if (handle_description->pin_virt_address == 0) { - auto& gmmu_allocator = host1x.Allocator(); - auto& gmmu = host1x.GMMU(); - u32 address = - gmmu_allocator.Allocate(static_cast(handle_description->aligned_size)); - gmmu.Map(static_cast(address), handle_description->d_address, - handle_description->aligned_size); + u32 address = host1x.Allocator().Allocate(u32(handle_description->aligned_size)); + host1x.gmmu_manager.Map(GPUVAddr(address), handle_description->d_address, handle_description->aligned_size); handle_description->pin_virt_address = address; } }; diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index f999592a67..25eb7846b4 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -8,7 +8,7 @@ add_subdirectory(host_shaders) add_subdirectory(gpu_logging) if(LIBVA_FOUND) - set_source_files_properties(host1x/ffmpeg/ffmpeg.cpp + set_source_files_properties(host1x/ffmpeg.cpp PROPERTIES COMPILE_DEFINITIONS LIBVA_FOUND=1) list(APPEND FFmpeg_LIBRARIES ${LIBVA_LIBRARIES}) endif() @@ -71,9 +71,9 @@ add_library(video_core STATIC host1x/codecs/vp8.h host1x/codecs/vp9.cpp host1x/codecs/vp9.h - host1x/codecs/vp9_types.h - host1x/ffmpeg/ffmpeg.cpp - host1x/ffmpeg/ffmpeg.h + host1x/codec_types.h + host1x/ffmpeg.cpp + host1x/ffmpeg.h host1x/control.cpp host1x/control.h host1x/gpu_device_memory_manager.cpp diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp index b9140d9335..7c342dbefa 100644 --- a/src/video_core/cdma_pusher.cpp +++ b/src/video_core/cdma_pusher.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Ryujinx Team and Contributors @@ -20,9 +20,10 @@ namespace Tegra { CDmaPusher::CDmaPusher(Host1x::Host1x& host1x_, s32 id) - : host1x{host1x_}, memory_manager{host1x.GMMU()}, - host_processor{std::make_unique(host1x_)}, current_class{ - static_cast(id)} { + : host_processor{std::make_unique(host1x_)} + , host1x{host1x_} + , current_class{ChClassId(id)} +{ thread = std::jthread([this](std::stop_token stop_token) { ProcessEntries(stop_token); }); } diff --git a/src/video_core/cdma_pusher.h b/src/video_core/cdma_pusher.h index becbccef18..3186498070 100644 --- a/src/video_core/cdma_pusher.h +++ b/src/video_core/cdma_pusher.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -117,9 +120,6 @@ protected: virtual void ProcessMethod(u32 method, u32 arg) = 0; - Host1x::Host1x& host1x; - Tegra::MemoryManager& memory_manager; - private: /// Process the command entry void ProcessEntries(std::stop_token stop_token); @@ -127,14 +127,14 @@ private: /// Invoke command class devices to execute the command based on the current state void ExecuteCommand(u32 state_offset, u32 data); - std::unique_ptr host_processor; - - std::mutex command_mutex; - std::condition_variable_any command_cv; - std::deque command_lists; - std::jthread thread; - +protected: ThiRegisters thi_regs{}; + std::deque command_lists; + std::condition_variable_any command_cv; + std::jthread thread; + std::unique_ptr host_processor; + std::mutex command_mutex; + Host1x::Host1x& host1x; ChClassId current_class; }; diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index d65658323e..5f4054212f 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -89,7 +89,7 @@ struct GPU::Impl { renderer = std::move(renderer_); rasterizer = renderer->ReadRasterizer(); host1x.MemoryManager().BindInterface(rasterizer); - host1x.GMMU().BindRasterizer(rasterizer); + host1x.gmmu_manager.BindRasterizer(rasterizer); } /// Flush all current written commands into the host GPU for execution. diff --git a/src/video_core/host1x/codecs/vp9_types.h b/src/video_core/host1x/codec_types.h similarity index 56% rename from src/video_core/host1x/codecs/vp9_types.h rename to src/video_core/host1x/codec_types.h index 77535d5f66..3a10afa8df 100644 --- a/src/video_core/host1x/codecs/vp9_types.h +++ b/src/video_core/host1x/codec_types.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -6,12 +9,192 @@ #include #include +#include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" namespace Tegra { namespace Decoders { + +struct Offset { + constexpr u32 Address() const noexcept { + return offset << 8; + } + +private: + u32 offset; +}; +static_assert(std::is_trivial_v, "Offset must be trivial"); +static_assert(sizeof(Offset) == 0x4, "Offset has the wrong size!"); + +struct H264ParameterSet { + s32 log2_max_pic_order_cnt_lsb_minus4; ///< 0x00 + s32 delta_pic_order_always_zero_flag; ///< 0x04 + s32 frame_mbs_only_flag; ///< 0x08 + u32 pic_width_in_mbs; ///< 0x0C + u32 frame_height_in_mbs; ///< 0x10 + union { ///< 0x14 + BitField<0, 2, u32> tile_format; + BitField<2, 3, u32> gob_height; + BitField<5, 27, u32> reserved_surface_format; + }; + u32 entropy_coding_mode_flag; ///< 0x18 + s32 pic_order_present_flag; ///< 0x1C + s32 num_refidx_l0_default_active; ///< 0x20 + s32 num_refidx_l1_default_active; ///< 0x24 + s32 deblocking_filter_control_present_flag; ///< 0x28 + s32 redundant_pic_cnt_present_flag; ///< 0x2C + u32 transform_8x8_mode_flag; ///< 0x30 + u32 pitch_luma; ///< 0x34 + u32 pitch_chroma; ///< 0x38 + Offset luma_top_offset; ///< 0x3C + Offset luma_bot_offset; ///< 0x40 + Offset luma_frame_offset; ///< 0x44 + Offset chroma_top_offset; ///< 0x48 + Offset chroma_bot_offset; ///< 0x4C + Offset chroma_frame_offset; ///< 0x50 + u32 hist_buffer_size; ///< 0x54 + union { ///< 0x58 + union { + BitField<0, 1, u64> mbaff_frame; + BitField<1, 1, u64> direct_8x8_inference; + BitField<2, 1, u64> weighted_pred; + BitField<3, 1, u64> constrained_intra_pred; + BitField<4, 1, u64> ref_pic; + BitField<5, 1, u64> field_pic; + BitField<6, 1, u64> bottom_field; + BitField<7, 1, u64> second_field; + } flags; + BitField<8, 4, u64> log2_max_frame_num_minus4; + BitField<12, 2, u64> chroma_format_idc; + BitField<14, 2, u64> pic_order_cnt_type; + BitField<16, 6, s64> pic_init_qp_minus26; + BitField<22, 5, s64> chroma_qp_index_offset; + BitField<27, 5, s64> second_chroma_qp_index_offset; + BitField<32, 2, u64> weighted_bipred_idc; + BitField<34, 7, u64> curr_pic_idx; + BitField<41, 5, u64> curr_col_idx; + BitField<46, 16, u64> frame_number; + BitField<62, 1, u64> frame_surfaces; + BitField<63, 1, u64> output_memory_layout; + }; +}; +static_assert(sizeof(H264ParameterSet) == 0x60, "H264ParameterSet is an invalid size"); + +#define ASSERT_POSITION(field_name, position) \ + static_assert(offsetof(H264ParameterSet, field_name) == position, \ + "Field " #field_name " has invalid position") + +ASSERT_POSITION(log2_max_pic_order_cnt_lsb_minus4, 0x00); +ASSERT_POSITION(delta_pic_order_always_zero_flag, 0x04); +ASSERT_POSITION(frame_mbs_only_flag, 0x08); +ASSERT_POSITION(pic_width_in_mbs, 0x0C); +ASSERT_POSITION(frame_height_in_mbs, 0x10); +ASSERT_POSITION(tile_format, 0x14); +ASSERT_POSITION(entropy_coding_mode_flag, 0x18); +ASSERT_POSITION(pic_order_present_flag, 0x1C); +ASSERT_POSITION(num_refidx_l0_default_active, 0x20); +ASSERT_POSITION(num_refidx_l1_default_active, 0x24); +ASSERT_POSITION(deblocking_filter_control_present_flag, 0x28); +ASSERT_POSITION(redundant_pic_cnt_present_flag, 0x2C); +ASSERT_POSITION(transform_8x8_mode_flag, 0x30); +ASSERT_POSITION(pitch_luma, 0x34); +ASSERT_POSITION(pitch_chroma, 0x38); +ASSERT_POSITION(luma_top_offset, 0x3C); +ASSERT_POSITION(luma_bot_offset, 0x40); +ASSERT_POSITION(luma_frame_offset, 0x44); +ASSERT_POSITION(chroma_top_offset, 0x48); +ASSERT_POSITION(chroma_bot_offset, 0x4C); +ASSERT_POSITION(chroma_frame_offset, 0x50); +ASSERT_POSITION(hist_buffer_size, 0x54); +ASSERT_POSITION(flags, 0x58); +#undef ASSERT_POSITION + +struct DpbEntry { + union { + BitField<0, 7, u32> index; + BitField<7, 5, u32> col_idx; + BitField<12, 2, u32> state; + BitField<14, 1, u32> is_long_term; + BitField<15, 1, u32> non_existing; + BitField<16, 1, u32> is_field; + BitField<17, 4, u32> top_field_marking; + BitField<21, 4, u32> bottom_field_marking; + BitField<25, 1, u32> output_memory_layout; + BitField<26, 6, u32> reserved; + } flags; + std::array field_order_cnt; + u32 frame_idx; +}; +static_assert(sizeof(DpbEntry) == 0x10, "DpbEntry has the wrong size!"); + +struct DisplayParam { + union { + BitField<0, 1, u32> enable_tf_output; + BitField<1, 1, u32> vc1_map_y_flag; + BitField<2, 3, u32> map_y_value; + BitField<5, 1, u32> vc1_map_uv_flag; + BitField<6, 3, u32> map_uv_value; + BitField<9, 8, u32> out_stride; + BitField<17, 3, u32> tiling_format; + BitField<20, 1, u32> output_structure; // 0=frame, 1=field + BitField<21, 11, u32> reserved0; + }; + std::array output_top; + std::array output_bottom; + union { + BitField<0, 1, u32> enable_histogram; + BitField<1, 12, u32> histogram_start_x; + BitField<13, 12, u32> histogram_start_y; + BitField<25, 7, u32> reserved1; + }; + union { + BitField<0, 12, u32> histogram_end_x; + BitField<12, 12, u32> histogram_end_y; + BitField<24, 8, u32> reserved2; + }; +}; +static_assert(sizeof(DisplayParam) == 0x1C, "DisplayParam has the wrong size!"); + +struct H264DecoderContext { + INSERT_PADDING_WORDS_NOINIT(13); ///< 0x0000 + std::array eos; ///< 0x0034 + u8 explicit_eos_present_flag; ///< 0x0044 + u8 hint_dump_en; ///< 0x0045 + INSERT_PADDING_BYTES_NOINIT(2); ///< 0x0046 + u32 stream_len; ///< 0x0048 + u32 slice_count; ///< 0x004C + u32 mbhist_buffer_size; ///< 0x0050 + u32 gptimer_timeout_value; ///< 0x0054 + H264ParameterSet h264_parameter_set; ///< 0x0058 + std::array curr_field_order_cnt; ///< 0x00B8 + std::array dpb; ///< 0x00C0 + std::array weight_scale_4x4; ///< 0x01C0 + std::array weight_scale_8x8; ///< 0x0220 + std::array num_inter_view_refs_lX; ///< 0x02A0 + std::array reserved2; ///< 0x02A2 + std::array, 2> inter_view_refidx_lX; ///< 0x02B0 + union { ///< 0x02D0 + BitField<0, 1, u32> lossless_ipred8x8_filter_enable; + BitField<1, 1, u32> qpprime_y_zero_transform_bypass_flag; + BitField<2, 30, u32> reserved3; + }; + DisplayParam display_param; ///< 0x02D4 + std::array reserved4; ///< 0x02F0 +}; +static_assert(sizeof(H264DecoderContext) == 0x2FC, "H264DecoderContext is an invalid size"); + +#define ASSERT_POSITION(field_name, position) \ + static_assert(offsetof(H264DecoderContext, field_name) == position, \ + "Field " #field_name " has invalid position") + +ASSERT_POSITION(stream_len, 0x48); +ASSERT_POSITION(h264_parameter_set, 0x58); +ASSERT_POSITION(dpb, 0xC0); +ASSERT_POSITION(weight_scale_4x4, 0x1C0); +#undef ASSERT_POSITION + enum class Vp9SurfaceIndex : u32 { Last = 0, Golden = 1, @@ -287,10 +470,7 @@ struct RefPoolElement { bool refresh{}; }; -#define ASSERT_POSITION(field_name, position) \ - static_assert(offsetof(Vp9EntropyProbs, field_name) == position, \ - "Field " #field_name " has invalid position") - +#define ASSERT_POSITION(field_name, position) static_assert(offsetof(Vp9EntropyProbs, field_name) == position) ASSERT_POSITION(partition_prob, 0x0024); ASSERT_POSITION(switchable_interp_prob, 0x0724); ASSERT_POSITION(sign, 0x0772); @@ -298,10 +478,7 @@ ASSERT_POSITION(class_0_fr, 0x079E); ASSERT_POSITION(high_precision, 0x07B2); #undef ASSERT_POSITION -#define ASSERT_POSITION(field_name, position) \ - static_assert(offsetof(PictureInfo, field_name) == position, \ - "Field " #field_name " has invalid position") - +#define ASSERT_POSITION(field_name, position) static_assert(offsetof(PictureInfo, field_name) == position) ASSERT_POSITION(bitstream_size, 0x30); ASSERT_POSITION(last_frame_size, 0x48); ASSERT_POSITION(first_level, 0x70); @@ -309,9 +486,7 @@ ASSERT_POSITION(segmentation, 0x80); ASSERT_POSITION(loop_filter, 0xE4); #undef ASSERT_POSITION -#define ASSERT_POSITION(field_name, position) \ - static_assert(offsetof(EntropyProbs, field_name) == position, \ - "Field " #field_name " has invalid position") +#define ASSERT_POSITION(field_name, position) static_assert(offsetof(EntropyProbs, field_name) == position) ASSERT_POSITION(inter_mode_prob, 0x400); ASSERT_POSITION(tx_8x8_prob, 0x470); @@ -321,5 +496,47 @@ ASSERT_POSITION(class_0_fr, 0x560); ASSERT_POSITION(coef_probs, 0x5A0); #undef ASSERT_POSITION +struct VP8PictureInfo { + INSERT_PADDING_WORDS_NOINIT(14); + u16 frame_width; // actual frame width + u16 frame_height; // actual frame height + u8 key_frame; + u8 version; + union { + u8 raw; + BitField<0, 2, u8> tile_format; + BitField<2, 3, u8> gob_height; + BitField<5, 3, u8> reserved_surface_format; + }; + u8 error_conceal_on; // 1: error conceal on; 0: off + u32 first_part_size; // the size of first partition(frame header and mb header partition) + u32 hist_buffer_size; // in units of 256 + u32 vld_buffer_size; // in units of 1 + // Current frame buffers + std::array frame_stride; // [y_c] + u32 luma_top_offset; // offset of luma top field in units of 256 + u32 luma_bot_offset; // offset of luma bottom field in units of 256 + u32 luma_frame_offset; // offset of luma frame in units of 256 + u32 chroma_top_offset; // offset of chroma top field in units of 256 + u32 chroma_bot_offset; // offset of chroma bottom field in units of 256 + u32 chroma_frame_offset; // offset of chroma frame in units of 256 + + INSERT_PADDING_BYTES_NOINIT(0x1c); // NvdecDisplayParams + + // Decode picture buffer related + s8 current_output_memory_layout; + // output NV12/NV24 setting. index 0: golden; 1: altref; 2: last + std::array output_memory_layout; + + u8 segmentation_feature_data_update; + INSERT_PADDING_BYTES_NOINIT(3); + + // ucode return result + u32 result_value; + std::array partition_offset; + INSERT_PADDING_WORDS_NOINIT(3); +}; +static_assert(sizeof(VP8PictureInfo) == 0xc0, "PictureInfo is an invalid size"); + }; // namespace Decoders }; // namespace Tegra diff --git a/src/video_core/host1x/codecs/codec.cpp b/src/video_core/host1x/codecs/codec.cpp index 1030db6819..56332866e5 100644 --- a/src/video_core/host1x/codecs/codec.cpp +++ b/src/video_core/host1x/codecs/codec.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -13,9 +16,12 @@ namespace Tegra { Codec::Codec(Host1x::Host1x& host1x_, const Host1x::NvdecCommon::NvdecRegisters& regs) - : host1x(host1x_), state{regs}, h264_decoder(std::make_unique(host1x)), - vp8_decoder(std::make_unique(host1x)), - vp9_decoder(std::make_unique(host1x)) {} + : host1x(host1x_) + , state{regs} + , h264_decoder(host1x_) + , vp8_decoder(host1x_) + , vp9_decoder(host1x_) +{} Codec::~Codec() = default; @@ -32,13 +38,11 @@ void Codec::SetTargetCodec(Host1x::NvdecCommon::VideoCodec codec) { void Codec::Decode() { const bool is_first_frame = !initialized; - if (is_first_frame) { + if (is_first_frame) Initialize(); - } - if (!initialized) { + if (!initialized) return; - } // Assemble bitstream. bool vp9_hidden_frame = false; @@ -46,13 +50,13 @@ void Codec::Decode() { const auto packet_data = [&]() { switch (current_codec) { case Tegra::Host1x::NvdecCommon::VideoCodec::H264: - return h264_decoder->ComposeFrame(state, &configuration_size, is_first_frame); + return h264_decoder.ComposeFrame(state, &configuration_size, is_first_frame); case Tegra::Host1x::NvdecCommon::VideoCodec::VP8: - return vp8_decoder->ComposeFrame(state); + return vp8_decoder.ComposeFrame(state); case Tegra::Host1x::NvdecCommon::VideoCodec::VP9: - vp9_decoder->ComposeFrame(state); - vp9_hidden_frame = vp9_decoder->WasFrameHidden(); - return vp9_decoder->GetFrameBytes(); + vp9_decoder.ComposeFrame(state); + vp9_hidden_frame = vp9_decoder.WasFrameHidden(); + return vp9_decoder.GetFrameBytes(); default: ASSERT(false); return std::span{}; @@ -81,19 +85,13 @@ void Codec::Decode() { std::unique_ptr Codec::GetCurrentFrame() { // Sometimes VIC will request more frames than have been decoded. // in this case, return a blank frame and don't overwrite previous data. - if (frames.empty()) { + if (frames.empty()) return {}; - } - auto frame = std::move(frames.front()); frames.pop(); return frame; } -Host1x::NvdecCommon::VideoCodec Codec::GetCurrentCodec() const { - return current_codec; -} - std::string_view Codec::GetCurrentCodecName() const { switch (current_codec) { case Host1x::NvdecCommon::VideoCodec::None: diff --git a/src/video_core/host1x/codecs/codec.h b/src/video_core/host1x/codecs/codec.h index f700ae1293..0ad8c19882 100644 --- a/src/video_core/host1x/codecs/codec.h +++ b/src/video_core/host1x/codecs/codec.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -8,17 +11,14 @@ #include #include #include "common/common_types.h" -#include "video_core/host1x/ffmpeg/ffmpeg.h" +#include "video_core/host1x/codecs/h264.h" +#include "video_core/host1x/codecs/vp8.h" +#include "video_core/host1x/codecs/vp9.h" +#include "video_core/host1x/ffmpeg.h" #include "video_core/host1x/nvdec_common.h" namespace Tegra { -namespace Decoder { -class H264; -class VP8; -class VP9; -} // namespace Decoder - namespace Host1x { class Host1x; } // namespace Host1x @@ -40,9 +40,6 @@ public: /// Returns next decoded frame [[nodiscard]] std::unique_ptr GetCurrentFrame(); - /// Returns the value of current_codec - [[nodiscard]] Host1x::NvdecCommon::VideoCodec GetCurrentCodec() const; - /// Return name of the current codec [[nodiscard]] std::string_view GetCurrentCodecName() const; @@ -53,10 +50,9 @@ private: Host1x::Host1x& host1x; const Host1x::NvdecCommon::NvdecRegisters& state; - std::unique_ptr h264_decoder; - std::unique_ptr vp8_decoder; - std::unique_ptr vp9_decoder; - + Decoders::H264 h264_decoder; + Decoders::VP8 vp8_decoder; + Decoders::VP9 vp9_decoder; std::queue> frames{}; }; diff --git a/src/video_core/host1x/codecs/decoder.cpp b/src/video_core/host1x/codecs/decoder.cpp old mode 100755 new mode 100644 index 887eb28c8c..c75059db6f --- a/src/video_core/host1x/codecs/decoder.cpp +++ b/src/video_core/host1x/codecs/decoder.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project @@ -12,10 +12,11 @@ namespace Tegra { -Decoder::Decoder(Host1x::Host1x& host1x_, s32 id_, const Host1x::NvdecCommon::NvdecRegisters& regs_, - Host1x::FrameQueue& frame_queue_) - : host1x(host1x_), memory_manager{host1x.GMMU()}, regs{regs_}, id{id_}, frame_queue{ - frame_queue_} {} +Decoder::Decoder(Host1x::Host1x& host1x_, s32 id_, const Host1x::NvdecCommon::NvdecRegisters& regs_) + : host1x(host1x_) + , regs{regs_} + , id{id_} +{} Decoder::~Decoder() = default; @@ -53,11 +54,11 @@ void Decoder::Decode() { } if (UsingDecodeOrder()) { - frame_queue.PushDecodeOrder(id, luma_top, std::move(frame)); - frame_queue.PushDecodeOrder(id, luma_bottom, std::move(frame_copy)); + host1x.frame_queue.PushDecodeOrder(id, luma_top, std::move(frame)); + host1x.frame_queue.PushDecodeOrder(id, luma_bottom, std::move(frame_copy)); } else { - frame_queue.PushPresentOrder(id, luma_top, std::move(frame)); - frame_queue.PushPresentOrder(id, luma_bottom, std::move(frame_copy)); + host1x.frame_queue.PushPresentOrder(id, luma_top, std::move(frame)); + host1x.frame_queue.PushPresentOrder(id, luma_bottom, std::move(frame_copy)); } } else { auto [luma_offset, chroma_offset] = GetProgressiveOffsets(); @@ -68,9 +69,9 @@ void Decoder::Decode() { } if (UsingDecodeOrder()) { - frame_queue.PushDecodeOrder(id, luma_offset, std::move(frame)); + host1x.frame_queue.PushDecodeOrder(id, luma_offset, std::move(frame)); } else { - frame_queue.PushPresentOrder(id, luma_offset, std::move(frame)); + host1x.frame_queue.PushPresentOrder(id, luma_offset, std::move(frame)); } } } diff --git a/src/video_core/host1x/codecs/decoder.h b/src/video_core/host1x/codecs/decoder.h index d25da81fd4..9fca89aa40 100644 --- a/src/video_core/host1x/codecs/decoder.h +++ b/src/video_core/host1x/codecs/decoder.h @@ -14,7 +14,7 @@ #include #include "common/common_types.h" -#include "video_core/host1x/ffmpeg/ffmpeg.h" +#include "video_core/host1x/ffmpeg.h" #include "video_core/host1x/nvdec_common.h" namespace Tegra { @@ -35,33 +35,23 @@ public: return decode_api.UsingDecodeOrder(); } - /// Returns the value of current_codec - [[nodiscard]] Host1x::NvdecCommon::VideoCodec GetCurrentCodec() const { - return codec; - } - /// Return name of the current codec [[nodiscard]] virtual std::string_view GetCurrentCodecName() const = 0; protected: - explicit Decoder(Host1x::Host1x& host1x, s32 id, - const Host1x::NvdecCommon::NvdecRegisters& regs, - Host1x::FrameQueue& frame_queue); + explicit Decoder(Host1x::Host1x& host1x, s32 id, const Host1x::NvdecCommon::NvdecRegisters& regs); virtual std::span ComposeFrame() = 0; virtual std::tuple GetProgressiveOffsets() = 0; virtual std::tuple GetInterlacedOffsets() = 0; virtual bool IsInterlaced() = 0; + FFmpeg::DecodeApi decode_api; Host1x::Host1x& host1x; - Tegra::MemoryManager& memory_manager; const Host1x::NvdecCommon::NvdecRegisters& regs; s32 id; - Host1x::FrameQueue& frame_queue; - Host1x::NvdecCommon::VideoCodec codec; - FFmpeg::DecodeApi decode_api; - bool initialized{}; - bool vp9_hidden_frame{}; + bool initialized : 1 = false; + bool vp9_hidden_frame : 1 = false; }; } // namespace Tegra diff --git a/src/video_core/host1x/codecs/h264.cpp b/src/video_core/host1x/codecs/h264.cpp index 0896fa6001..4a37e1d1d3 100644 --- a/src/video_core/host1x/codecs/h264.cpp +++ b/src/video_core/host1x/codecs/h264.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project @@ -14,25 +14,11 @@ #include "video_core/memory_manager.h" namespace Tegra::Decoders { -namespace { -// ZigZag LUTs from libavcodec. -constexpr std::array zig_zag_direct{ - 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48, - 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, - 30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63, -}; -constexpr std::array zig_zag_scan{ - 0 + 0 * 4, 1 + 0 * 4, 0 + 1 * 4, 0 + 2 * 4, 1 + 1 * 4, 2 + 0 * 4, 3 + 0 * 4, 2 + 1 * 4, - 1 + 2 * 4, 0 + 3 * 4, 1 + 3 * 4, 2 + 2 * 4, 3 + 1 * 4, 3 + 2 * 4, 2 + 3 * 4, 3 + 3 * 4, -}; -} // Anonymous namespace - -H264::H264(Host1x::Host1x& host1x_, const Host1x::NvdecCommon::NvdecRegisters& regs_, s32 id_, - Host1x::FrameQueue& frame_queue_) - : Decoder{host1x_, id_, regs_, frame_queue_} { - codec = Host1x::NvdecCommon::VideoCodec::H264; - initialized = decode_api.Initialize(codec); +H264::H264(Host1x::Host1x& host1x_, const Host1x::NvdecCommon::NvdecRegisters& regs_, s32 id_) + : Decoder{host1x_, id_, regs_} +{ + initialized = decode_api.Initialize(Host1x::NvdecCommon::VideoCodec::H264); } H264::~H264() = default; @@ -65,14 +51,11 @@ bool H264::IsInterlaced() { } std::span H264::ComposeFrame() { - memory_manager.ReadBlock(regs.picture_info_offset.Address(), ¤t_context, - sizeof(H264DecoderContext)); - + host1x.memory_manager.ReadBlock(regs.picture_info_offset.Address(), ¤t_context, sizeof(H264DecoderContext)); const s64 frame_number = current_context.h264_parameter_set.frame_number.Value(); if (!is_first_frame && frame_number != 0) { frame_scratch.resize_destructive(current_context.stream_len); - memory_manager.ReadBlock(regs.frame_bitstream_offset.Address(), frame_scratch.data(), - frame_scratch.size()); + host1x.memory_manager.ReadBlock(regs.frame_bitstream_offset.Address(), frame_scratch.data(), frame_scratch.size()); return frame_scratch; } @@ -174,15 +157,13 @@ std::span H264::ComposeFrame() { for (s32 index = 0; index < 6; index++) { writer.WriteBit(true); - std::span matrix{current_context.weight_scale_4x4}; - writer.WriteScalingList(scan_scratch, matrix, index * 16, 16); + writer.WriteScalingList(current_context.weight_scale_4x4, index * 16, 16); } if (current_context.h264_parameter_set.transform_8x8_mode_flag) { for (s32 index = 0; index < 2; index++) { writer.WriteBit(true); - std::span matrix{current_context.weight_scale_8x8}; - writer.WriteScalingList(scan_scratch, matrix, index * 64, 64); + writer.WriteScalingList(current_context.weight_scale_8x8, index * 64, 64); } } @@ -196,11 +177,7 @@ std::span H264::ComposeFrame() { const auto& encoded_header = writer.GetByteArray(); frame_scratch.resize(encoded_header.size() + current_context.stream_len); std::memcpy(frame_scratch.data(), encoded_header.data(), encoded_header.size()); - - memory_manager.ReadBlock(regs.frame_bitstream_offset.Address(), - frame_scratch.data() + encoded_header.size(), - current_context.stream_len); - + host1x.memory_manager.ReadBlock(regs.frame_bitstream_offset.Address(), frame_scratch.data() + encoded_header.size(), current_context.stream_len); return frame_scratch; } @@ -229,23 +206,37 @@ void H264BitWriter::WriteBit(bool state) { WriteBits(state ? 1 : 0, 1); } -void H264BitWriter::WriteScalingList(Common::ScratchBuffer& scan, std::span list, - s32 start, s32 count) { - scan.resize_destructive(count); +void H264BitWriter::WriteScalingList(std::span list, s32 start, s32 count) { if (count == 16) { - std::memcpy(scan.data(), zig_zag_scan.data(), scan.size()); + u8 last_scale = 8; + for (s32 index = 0; index < count; index++) { + // libavcodec has a zig zag LUT, but we dont need it, just use a magic + // constant which is a packing of 4 bits for each component of the table + const u8 value = list[start + ((0xfeb7adc963258410 >> (index * 4)) & 0xf)]; + const s32 delta_scale = s32(value - last_scale); + WriteSe(delta_scale); + last_scale = value; + } } else { - std::memcpy(scan.data(), zig_zag_direct.data(), scan.size()); - } - u8 last_scale = 8; - - for (s32 index = 0; index < count; index++) { - const u8 value = list[start + scan[index]]; - const s32 delta_scale = static_cast(value - last_scale); - - WriteSe(delta_scale); - - last_scale = value; + // ZigZag LUTs from libavcodec: this is the famous zigzag pattern found in the ffmpeg logo itself! + static constexpr std::array scan{ + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, + 5, 12, 19, 26, 33, 40, 48, + 41, 34, 27, 20, 13, 6, 7, + 14, 21, 28, 35, 42, 49, 56, + 57, 50, 43, 36, 29, 22, 15, + 23, 30, 37, 44, 51, 58, 59, + 52, 45, 38, 31, 39, 46, 53, + 60, 61, 54, 47, 55, 62, 63, + }; + u8 last_scale = 8; + for (s32 index = 0; index < count; index++) { + const u8 value = list[start + scan[index]]; + const s32 delta_scale = s32(value - last_scale); + WriteSe(delta_scale); + last_scale = value; + } } } @@ -286,19 +277,15 @@ void H264BitWriter::WriteBits(s32 value, s32 bit_count) { void H264BitWriter::WriteExpGolombCodedInt(s32 value) { const s32 sign = value <= 0 ? 0 : 1; - if (value < 0) { - value = -value; - } - value = (value << 1) - sign; - WriteExpGolombCodedUInt(value); + if (!sign) value = -value; + WriteExpGolombCodedUInt((value << 1) - sign); } void H264BitWriter::WriteExpGolombCodedUInt(u32 value) { const s32 size = 32 - std::countl_zero(value + 1); WriteBits(1, size); - value -= (1U << (size - 1)) - 1; - WriteBits(static_cast(value), size - 1); + WriteBits(s32(value), size - 1); } s32 H264BitWriter::GetFreeBufferBits() { diff --git a/src/video_core/host1x/codecs/h264.h b/src/video_core/host1x/codecs/h264.h index d946c6937d..1e5576291c 100644 --- a/src/video_core/host1x/codecs/h264.h +++ b/src/video_core/host1x/codecs/h264.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -6,11 +9,10 @@ #include #include -#include "common/bit_field.h" -#include "common/common_funcs.h" #include "common/common_types.h" #include "common/scratch_buffer.h" #include "video_core/host1x/codecs/decoder.h" +#include "video_core/host1x/codec_types.h" #include "video_core/host1x/nvdec_common.h" namespace Tegra { @@ -40,8 +42,7 @@ public: /// Based on section 7.3.2.1.1.1 and Table 7-4 in the H.264 specification /// Writes the scaling matrices of the sream - void WriteScalingList(Common::ScratchBuffer& scan, std::span list, s32 start, - s32 count); + void WriteScalingList(std::span list, s32 start, s32 count); /// Return the bitstream as a vector. [[nodiscard]] std::vector& GetByteArray(); @@ -61,188 +62,9 @@ private: std::vector byte_array; }; -struct Offset { - constexpr u32 Address() const noexcept { - return offset << 8; - } - -private: - u32 offset; -}; -static_assert(std::is_trivial_v, "Offset must be trivial"); -static_assert(sizeof(Offset) == 0x4, "Offset has the wrong size!"); - -struct H264ParameterSet { - s32 log2_max_pic_order_cnt_lsb_minus4; ///< 0x00 - s32 delta_pic_order_always_zero_flag; ///< 0x04 - s32 frame_mbs_only_flag; ///< 0x08 - u32 pic_width_in_mbs; ///< 0x0C - u32 frame_height_in_mbs; ///< 0x10 - union { ///< 0x14 - BitField<0, 2, u32> tile_format; - BitField<2, 3, u32> gob_height; - BitField<5, 27, u32> reserved_surface_format; - }; - u32 entropy_coding_mode_flag; ///< 0x18 - s32 pic_order_present_flag; ///< 0x1C - s32 num_refidx_l0_default_active; ///< 0x20 - s32 num_refidx_l1_default_active; ///< 0x24 - s32 deblocking_filter_control_present_flag; ///< 0x28 - s32 redundant_pic_cnt_present_flag; ///< 0x2C - u32 transform_8x8_mode_flag; ///< 0x30 - u32 pitch_luma; ///< 0x34 - u32 pitch_chroma; ///< 0x38 - Offset luma_top_offset; ///< 0x3C - Offset luma_bot_offset; ///< 0x40 - Offset luma_frame_offset; ///< 0x44 - Offset chroma_top_offset; ///< 0x48 - Offset chroma_bot_offset; ///< 0x4C - Offset chroma_frame_offset; ///< 0x50 - u32 hist_buffer_size; ///< 0x54 - union { ///< 0x58 - union { - BitField<0, 1, u64> mbaff_frame; - BitField<1, 1, u64> direct_8x8_inference; - BitField<2, 1, u64> weighted_pred; - BitField<3, 1, u64> constrained_intra_pred; - BitField<4, 1, u64> ref_pic; - BitField<5, 1, u64> field_pic; - BitField<6, 1, u64> bottom_field; - BitField<7, 1, u64> second_field; - } flags; - BitField<8, 4, u64> log2_max_frame_num_minus4; - BitField<12, 2, u64> chroma_format_idc; - BitField<14, 2, u64> pic_order_cnt_type; - BitField<16, 6, s64> pic_init_qp_minus26; - BitField<22, 5, s64> chroma_qp_index_offset; - BitField<27, 5, s64> second_chroma_qp_index_offset; - BitField<32, 2, u64> weighted_bipred_idc; - BitField<34, 7, u64> curr_pic_idx; - BitField<41, 5, u64> curr_col_idx; - BitField<46, 16, u64> frame_number; - BitField<62, 1, u64> frame_surfaces; - BitField<63, 1, u64> output_memory_layout; - }; -}; -static_assert(sizeof(H264ParameterSet) == 0x60, "H264ParameterSet is an invalid size"); - -#define ASSERT_POSITION(field_name, position) \ - static_assert(offsetof(H264ParameterSet, field_name) == position, \ - "Field " #field_name " has invalid position") - -ASSERT_POSITION(log2_max_pic_order_cnt_lsb_minus4, 0x00); -ASSERT_POSITION(delta_pic_order_always_zero_flag, 0x04); -ASSERT_POSITION(frame_mbs_only_flag, 0x08); -ASSERT_POSITION(pic_width_in_mbs, 0x0C); -ASSERT_POSITION(frame_height_in_mbs, 0x10); -ASSERT_POSITION(tile_format, 0x14); -ASSERT_POSITION(entropy_coding_mode_flag, 0x18); -ASSERT_POSITION(pic_order_present_flag, 0x1C); -ASSERT_POSITION(num_refidx_l0_default_active, 0x20); -ASSERT_POSITION(num_refidx_l1_default_active, 0x24); -ASSERT_POSITION(deblocking_filter_control_present_flag, 0x28); -ASSERT_POSITION(redundant_pic_cnt_present_flag, 0x2C); -ASSERT_POSITION(transform_8x8_mode_flag, 0x30); -ASSERT_POSITION(pitch_luma, 0x34); -ASSERT_POSITION(pitch_chroma, 0x38); -ASSERT_POSITION(luma_top_offset, 0x3C); -ASSERT_POSITION(luma_bot_offset, 0x40); -ASSERT_POSITION(luma_frame_offset, 0x44); -ASSERT_POSITION(chroma_top_offset, 0x48); -ASSERT_POSITION(chroma_bot_offset, 0x4C); -ASSERT_POSITION(chroma_frame_offset, 0x50); -ASSERT_POSITION(hist_buffer_size, 0x54); -ASSERT_POSITION(flags, 0x58); -#undef ASSERT_POSITION - -struct DpbEntry { - union { - BitField<0, 7, u32> index; - BitField<7, 5, u32> col_idx; - BitField<12, 2, u32> state; - BitField<14, 1, u32> is_long_term; - BitField<15, 1, u32> non_existing; - BitField<16, 1, u32> is_field; - BitField<17, 4, u32> top_field_marking; - BitField<21, 4, u32> bottom_field_marking; - BitField<25, 1, u32> output_memory_layout; - BitField<26, 6, u32> reserved; - } flags; - std::array field_order_cnt; - u32 frame_idx; -}; -static_assert(sizeof(DpbEntry) == 0x10, "DpbEntry has the wrong size!"); - -struct DisplayParam { - union { - BitField<0, 1, u32> enable_tf_output; - BitField<1, 1, u32> vc1_map_y_flag; - BitField<2, 3, u32> map_y_value; - BitField<5, 1, u32> vc1_map_uv_flag; - BitField<6, 3, u32> map_uv_value; - BitField<9, 8, u32> out_stride; - BitField<17, 3, u32> tiling_format; - BitField<20, 1, u32> output_structure; // 0=frame, 1=field - BitField<21, 11, u32> reserved0; - }; - std::array output_top; - std::array output_bottom; - union { - BitField<0, 1, u32> enable_histogram; - BitField<1, 12, u32> histogram_start_x; - BitField<13, 12, u32> histogram_start_y; - BitField<25, 7, u32> reserved1; - }; - union { - BitField<0, 12, u32> histogram_end_x; - BitField<12, 12, u32> histogram_end_y; - BitField<24, 8, u32> reserved2; - }; -}; -static_assert(sizeof(DisplayParam) == 0x1C, "DisplayParam has the wrong size!"); - -struct H264DecoderContext { - INSERT_PADDING_WORDS_NOINIT(13); ///< 0x0000 - std::array eos; ///< 0x0034 - u8 explicit_eos_present_flag; ///< 0x0044 - u8 hint_dump_en; ///< 0x0045 - INSERT_PADDING_BYTES_NOINIT(2); ///< 0x0046 - u32 stream_len; ///< 0x0048 - u32 slice_count; ///< 0x004C - u32 mbhist_buffer_size; ///< 0x0050 - u32 gptimer_timeout_value; ///< 0x0054 - H264ParameterSet h264_parameter_set; ///< 0x0058 - std::array curr_field_order_cnt; ///< 0x00B8 - std::array dpb; ///< 0x00C0 - std::array weight_scale_4x4; ///< 0x01C0 - std::array weight_scale_8x8; ///< 0x0220 - std::array num_inter_view_refs_lX; ///< 0x02A0 - std::array reserved2; ///< 0x02A2 - std::array, 2> inter_view_refidx_lX; ///< 0x02B0 - union { ///< 0x02D0 - BitField<0, 1, u32> lossless_ipred8x8_filter_enable; - BitField<1, 1, u32> qpprime_y_zero_transform_bypass_flag; - BitField<2, 30, u32> reserved3; - }; - DisplayParam display_param; ///< 0x02D4 - std::array reserved4; ///< 0x02F0 -}; -static_assert(sizeof(H264DecoderContext) == 0x2FC, "H264DecoderContext is an invalid size"); - -#define ASSERT_POSITION(field_name, position) \ - static_assert(offsetof(H264DecoderContext, field_name) == position, \ - "Field " #field_name " has invalid position") - -ASSERT_POSITION(stream_len, 0x48); -ASSERT_POSITION(h264_parameter_set, 0x58); -ASSERT_POSITION(dpb, 0xC0); -ASSERT_POSITION(weight_scale_4x4, 0x1C0); -#undef ASSERT_POSITION - class H264 final : public Decoder { public: - explicit H264(Host1x::Host1x& host1x, const Host1x::NvdecCommon::NvdecRegisters& regs, s32 id, - Host1x::FrameQueue& frame_queue); + explicit H264(Host1x::Host1x& host1x, const Host1x::NvdecCommon::NvdecRegisters& regs, s32 id); ~H264() override; H264(const H264&) = delete; @@ -263,10 +85,10 @@ public: } private: - bool is_first_frame{true}; - Common::ScratchBuffer frame_scratch; - Common::ScratchBuffer scan_scratch; H264DecoderContext current_context{}; + std::array scan_scratch; + Common::ScratchBuffer frame_scratch; + bool is_first_frame{true}; }; } // namespace Decoders diff --git a/src/video_core/host1x/codecs/vp8.cpp b/src/video_core/host1x/codecs/vp8.cpp index 6094f16e0e..00fe6e4499 100644 --- a/src/video_core/host1x/codecs/vp8.cpp +++ b/src/video_core/host1x/codecs/vp8.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -8,11 +11,10 @@ #include "video_core/memory_manager.h" namespace Tegra::Decoders { -VP8::VP8(Host1x::Host1x& host1x_, const Host1x::NvdecCommon::NvdecRegisters& regs_, s32 id_, - Host1x::FrameQueue& frame_queue_) - : Decoder{host1x_, id_, regs_, frame_queue_} { - codec = Host1x::NvdecCommon::VideoCodec::VP8; - initialized = decode_api.Initialize(codec); +VP8::VP8(Host1x::Host1x& host1x_, const Host1x::NvdecCommon::NvdecRegisters& regs_, s32 id_) + : Decoder{host1x_, id_, regs_} +{ + initialized = decode_api.Initialize(Host1x::NvdecCommon::VideoCodec::VP8); } VP8::~VP8() = default; @@ -25,35 +27,30 @@ std::tuple VP8::GetProgressiveOffsets() { std::tuple VP8::GetInterlacedOffsets() { auto luma_top{regs.surface_luma_offsets[static_cast(Vp8SurfaceIndex::Current)].Address()}; - auto luma_bottom{ - regs.surface_luma_offsets[static_cast(Vp8SurfaceIndex::Current)].Address()}; - auto chroma_top{ - regs.surface_chroma_offsets[static_cast(Vp8SurfaceIndex::Current)].Address()}; - auto chroma_bottom{ - regs.surface_chroma_offsets[static_cast(Vp8SurfaceIndex::Current)].Address()}; + auto luma_bottom = regs.surface_luma_offsets[u32(Vp8SurfaceIndex::Current)].Address(); + auto chroma_top = regs.surface_chroma_offsets[u32(Vp8SurfaceIndex::Current)].Address(); + auto chroma_bottom = regs.surface_chroma_offsets[u32(Vp8SurfaceIndex::Current)].Address(); return {luma_top, luma_bottom, chroma_top, chroma_bottom}; } std::span VP8::ComposeFrame() { - memory_manager.ReadBlock(regs.picture_info_offset.Address(), ¤t_context, - sizeof(VP8PictureInfo)); + host1x.gmmu_manager.ReadBlock(regs.picture_info_offset.Address(), ¤t_context, sizeof(VP8PictureInfo)); const bool is_key_frame = current_context.key_frame == 1u; - const auto bitstream_size = static_cast(current_context.vld_buffer_size); + const auto bitstream_size = size_t(current_context.vld_buffer_size); const size_t header_size = is_key_frame ? 10u : 3u; frame_scratch.resize(header_size + bitstream_size); // Based on page 30 of the VP8 specification. // https://datatracker.ietf.org/doc/rfc6386/ frame_scratch[0] = is_key_frame ? 0u : 1u; // 1-bit frame type (0: keyframe, 1: interframes). - frame_scratch[0] |= - static_cast((current_context.version & 7u) << 1u); // 3-bit version number - frame_scratch[0] |= static_cast(1u << 4u); // 1-bit show_frame flag + frame_scratch[0] |= u8((current_context.version & 7u) << 1u); // 3-bit version number + frame_scratch[0] |= u8(1u << 4u); // 1-bit show_frame flag // The next 19-bits are the first partition size - frame_scratch[0] |= static_cast((current_context.first_part_size & 7u) << 5u); - frame_scratch[1] = static_cast((current_context.first_part_size & 0x7f8u) >> 3u); - frame_scratch[2] = static_cast((current_context.first_part_size & 0x7f800u) >> 11u); + frame_scratch[0] |= u8((current_context.first_part_size & 7u) << 5u); + frame_scratch[1] = u8((current_context.first_part_size & 0x7f8u) >> 3u); + frame_scratch[2] = u8((current_context.first_part_size & 0x7f800u) >> 11u); if (is_key_frame) { frame_scratch[3] = 0x9du; @@ -61,15 +58,15 @@ std::span VP8::ComposeFrame() { frame_scratch[5] = 0x2au; // TODO(ameerj): Horizontal/Vertical Scale // 16 bits: (2 bits Horizontal Scale << 14) | Width (14 bits) - frame_scratch[6] = static_cast(current_context.frame_width & 0xff); - frame_scratch[7] = static_cast(((current_context.frame_width >> 8) & 0x3f)); + frame_scratch[6] = u8(current_context.frame_width & 0xff); + frame_scratch[7] = u8(((current_context.frame_width >> 8) & 0x3f)); // 16 bits:(2 bits Vertical Scale << 14) | Height (14 bits) - frame_scratch[8] = static_cast(current_context.frame_height & 0xff); - frame_scratch[9] = static_cast(((current_context.frame_height >> 8) & 0x3f)); + frame_scratch[8] = u8(current_context.frame_height & 0xff); + frame_scratch[9] = u8(((current_context.frame_height >> 8) & 0x3f)); } - const u64 bitstream_offset = regs.frame_bitstream_offset.Address(); - memory_manager.ReadBlock(bitstream_offset, frame_scratch.data() + header_size, bitstream_size); + const u64 bitstream_offset = regs.frame_bitstream_offset.Address(); + host1x.gmmu_manager.ReadBlock(bitstream_offset, frame_scratch.data() + header_size, bitstream_size); return frame_scratch; } diff --git a/src/video_core/host1x/codecs/vp8.h b/src/video_core/host1x/codecs/vp8.h index 74800281d8..8dae74f058 100644 --- a/src/video_core/host1x/codecs/vp8.h +++ b/src/video_core/host1x/codecs/vp8.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -11,6 +14,7 @@ #include "common/scratch_buffer.h" #include "video_core/host1x/codecs/decoder.h" #include "video_core/host1x/nvdec_common.h" +#include "video_core/host1x/codec_types.h" namespace Tegra { @@ -28,8 +32,7 @@ enum class Vp8SurfaceIndex : u32 { class VP8 final : public Decoder { public: - explicit VP8(Host1x::Host1x& host1x, const Host1x::NvdecCommon::NvdecRegisters& regs, s32 id, - Host1x::FrameQueue& frame_queue); + explicit VP8(Host1x::Host1x& host1x, const Host1x::NvdecCommon::NvdecRegisters& regs, s32 id); ~VP8() override; VP8(const VP8&) = delete; @@ -52,51 +55,8 @@ public: } private: - Common::ScratchBuffer frame_scratch; - - struct VP8PictureInfo { - INSERT_PADDING_WORDS_NOINIT(14); - u16 frame_width; // actual frame width - u16 frame_height; // actual frame height - u8 key_frame; - u8 version; - union { - u8 raw; - BitField<0, 2, u8> tile_format; - BitField<2, 3, u8> gob_height; - BitField<5, 3, u8> reserved_surface_format; - }; - u8 error_conceal_on; // 1: error conceal on; 0: off - u32 first_part_size; // the size of first partition(frame header and mb header partition) - u32 hist_buffer_size; // in units of 256 - u32 vld_buffer_size; // in units of 1 - // Current frame buffers - std::array frame_stride; // [y_c] - u32 luma_top_offset; // offset of luma top field in units of 256 - u32 luma_bot_offset; // offset of luma bottom field in units of 256 - u32 luma_frame_offset; // offset of luma frame in units of 256 - u32 chroma_top_offset; // offset of chroma top field in units of 256 - u32 chroma_bot_offset; // offset of chroma bottom field in units of 256 - u32 chroma_frame_offset; // offset of chroma frame in units of 256 - - INSERT_PADDING_BYTES_NOINIT(0x1c); // NvdecDisplayParams - - // Decode picture buffer related - s8 current_output_memory_layout; - // output NV12/NV24 setting. index 0: golden; 1: altref; 2: last - std::array output_memory_layout; - - u8 segmentation_feature_data_update; - INSERT_PADDING_BYTES_NOINIT(3); - - // ucode return result - u32 result_value; - std::array partition_offset; - INSERT_PADDING_WORDS_NOINIT(3); - }; - static_assert(sizeof(VP8PictureInfo) == 0xc0, "PictureInfo is an invalid size"); - VP8PictureInfo current_context{}; + Common::ScratchBuffer frame_scratch; }; } // namespace Decoders diff --git a/src/video_core/host1x/codecs/vp9.cpp b/src/video_core/host1x/codecs/vp9.cpp index f80709d785..1085e08939 100644 --- a/src/video_core/host1x/codecs/vp9.cpp +++ b/src/video_core/host1x/codecs/vp9.cpp @@ -1,10 +1,11 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include // for std::copy +#include #include #include "common/alignment.h" @@ -15,11 +16,11 @@ namespace Tegra::Decoders { namespace { -constexpr u32 diff_update_probability = 252; -constexpr u32 frame_sync_code = 0x498342; +static constexpr u32 diff_update_probability = 252; +static constexpr u32 frame_sync_code = 0x498342; // Default compressed header probabilities once frame context resets -constexpr Vp9EntropyProbs default_probs{ +static constexpr Vp9EntropyProbs default_probs{ .y_mode_prob{ 65, 32, 18, 144, 162, 194, 41, 51, 98, 132, 68, 18, 165, 217, 196, 45, 40, 78, 173, 80, 19, 176, 240, 193, 64, 35, 46, 221, 135, 38, 194, 248, 121, 96, 85, 29, @@ -158,34 +159,6 @@ constexpr Vp9EntropyProbs default_probs{ .high_precision{128, 128}, }; -constexpr std::array norm_lut{ - 0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -constexpr std::array map_lut{ - 20, 21, 22, 23, 24, 25, 0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, - 1, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 2, 50, 51, 52, 53, 54, - 55, 56, 57, 58, 59, 60, 61, 3, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, - 73, 4, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 5, 86, 87, 88, 89, - 90, 91, 92, 93, 94, 95, 96, 97, 6, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, - 108, 109, 7, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 8, 122, 123, 124, - 125, 126, 127, 128, 129, 130, 131, 132, 133, 9, 134, 135, 136, 137, 138, 139, 140, 141, 142, - 143, 144, 145, 10, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 11, 158, 159, - 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 12, 170, 171, 172, 173, 174, 175, 176, 177, - 178, 179, 180, 181, 13, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 14, 194, - 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 15, 206, 207, 208, 209, 210, 211, 212, - 213, 214, 215, 216, 217, 16, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 17, - 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 18, 242, 243, 244, 245, 246, 247, - 248, 249, 250, 251, 252, 253, 19, -}; - // 6.2.14 Tile size calculation [[nodiscard]] s32 CalcMinLog2TileCols(s32 frame_width) { @@ -227,25 +200,17 @@ constexpr std::array map_lut{ [[nodiscard]] s32 RemapProbability(s32 new_prob, s32 old_prob) { new_prob--; old_prob--; - - std::size_t index{}; - - if (old_prob * 2 <= 0xff) { - index = static_cast((std::max)(0, RecenterNonNeg(new_prob, old_prob) - 1)); - } else { - index = static_cast( - (std::max)(0, RecenterNonNeg(0xff - 1 - new_prob, 0xff - 1 - old_prob) - 1)); - } - - return static_cast(map_lut[index]); + u8 i = old_prob * 2 <= 0xff + ? u8((std::max)(0, RecenterNonNeg(new_prob, old_prob) - 1)) + : u8((std::max)(0, RecenterNonNeg(0xff - 1 - new_prob, 0xff - 1 - old_prob) - 1)); + return s32((i + 7) % 13 == 0 ? (i + 7) / 13 - 1 : i + 20 - (i + 7) / 13); } } // Anonymous namespace -VP9::VP9(Host1x::Host1x& host1x_, const Host1x::NvdecCommon::NvdecRegisters& regs_, s32 id_, - Host1x::FrameQueue& frame_queue_) - : Decoder{host1x_, id_, regs_, frame_queue_} { - codec = Host1x::NvdecCommon::VideoCodec::VP9; - initialized = decode_api.Initialize(codec); +VP9::VP9(Host1x::Host1x& host1x_, const Host1x::NvdecCommon::NvdecRegisters& regs_, s32 id_) + : Decoder{host1x_, id_, regs_} +{ + initialized = decode_api.Initialize(Host1x::NvdecCommon::VideoCodec::VP9); } VP9::~VP9() = default; @@ -377,8 +342,7 @@ void VP9::WriteSegmentation(VpxBitStreamWriter& writer) { if (update_map) { EntropyProbs entropy_probs{}; - memory_manager.ReadBlock(regs.vp9_prob_tab_buffer_offset.Address(), &entropy_probs, - sizeof(entropy_probs)); + host1x.gmmu_manager.ReadBlock(regs.vp9_prob_tab_buffer_offset.Address(), &entropy_probs, sizeof(entropy_probs)); auto WriteProb = [&](u8 prob) { bool coded = prob != 255; @@ -442,8 +406,7 @@ void VP9::WriteSegmentation(VpxBitStreamWriter& writer) { } Vp9PictureInfo VP9::GetVp9PictureInfo() { - memory_manager.ReadBlock(regs.picture_info_offset.Address(), ¤t_picture_info, - sizeof(PictureInfo)); + host1x.gmmu_manager.ReadBlock(regs.picture_info_offset.Address(), ¤t_picture_info, sizeof(PictureInfo)); Vp9PictureInfo vp9_info = current_picture_info.Convert(); InsertEntropy(regs.vp9_prob_tab_buffer_offset.Address(), vp9_info.entropy); @@ -459,7 +422,7 @@ Vp9PictureInfo VP9::GetVp9PictureInfo() { void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) { EntropyProbs entropy; - memory_manager.ReadBlock(offset, &entropy, sizeof(EntropyProbs)); + host1x.gmmu_manager.ReadBlock(offset, &entropy, sizeof(EntropyProbs)); entropy.Convert(dst); } @@ -469,9 +432,7 @@ Vp9FrameContainer VP9::GetCurrentFrame() { // gpu.SyncGuestHost(); epic, why? current_frame.info = GetVp9PictureInfo(); current_frame.bit_stream.resize(current_frame.info.bitstream_size); - memory_manager.ReadBlock(regs.frame_bitstream_offset.Address(), - current_frame.bit_stream.data(), - current_frame.info.bitstream_size); + host1x.gmmu_manager.ReadBlock(regs.frame_bitstream_offset.Address(), current_frame.bit_stream.data(), current_frame.info.bitstream_size); } if (!next_frame.bit_stream.empty()) { Vp9FrameContainer temp{ @@ -893,13 +854,9 @@ std::span VP9::ComposeFrame() { // Write headers and frame to buffer frame_scratch.resize(uncompressed_header.size() + compressed_header.size() + bitstream.size()); std::copy(uncompressed_header.begin(), uncompressed_header.end(), frame_scratch.begin()); - std::copy(compressed_header.begin(), compressed_header.end(), - frame_scratch.begin() + uncompressed_header.size()); - std::copy(bitstream.begin(), bitstream.end(), - frame_scratch.begin() + uncompressed_header.size() + compressed_header.size()); - + std::copy(compressed_header.begin(), compressed_header.end(), frame_scratch.begin() + uncompressed_header.size()); + std::copy(bitstream.begin(), bitstream.end(), frame_scratch.begin() + uncompressed_header.size() + compressed_header.size()); vp9_hidden_frame = WasFrameHidden(); - return GetFrameBytes(); } @@ -929,7 +886,7 @@ void VpxRangeEncoder::Write(bool bit, s32 probability) { local_range = range - split; } - s32 shift = static_cast(norm_lut[local_range]); + s32 shift = s32(local_range == 0 ? 0 : (std::countl_zero(local_range) - 24)); local_range <<= shift; count += shift; diff --git a/src/video_core/host1x/codecs/vp9.h b/src/video_core/host1x/codecs/vp9.h index 9d42033cb3..f0265669b8 100644 --- a/src/video_core/host1x/codecs/vp9.h +++ b/src/video_core/host1x/codecs/vp9.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -11,7 +14,7 @@ #include "common/scratch_buffer.h" #include "common/stream.h" #include "video_core/host1x/codecs/decoder.h" -#include "video_core/host1x/codecs/vp9_types.h" +#include "video_core/host1x/codec_types.h" #include "video_core/host1x/nvdec_common.h" namespace Tegra { @@ -113,8 +116,7 @@ private: class VP9 final : public Decoder { public: - explicit VP9(Host1x::Host1x& host1x, const Host1x::NvdecCommon::NvdecRegisters& regs, s32 id, - Host1x::FrameQueue& frame_queue); + explicit VP9(Host1x::Host1x& host1x, const Host1x::NvdecCommon::NvdecRegisters& regs, s32 id); ~VP9() override; VP9(const VP9&) = delete; @@ -193,11 +195,10 @@ private: [[nodiscard]] std::vector ComposeCompressedHeader(); [[nodiscard]] VpxBitStreamWriter ComposeUncompressedHeader(); - Common::ScratchBuffer frame_scratch; - std::array loop_filter_ref_deltas{}; std::array loop_filter_mode_deltas{}; + Common::ScratchBuffer frame_scratch; Vp9FrameContainer next_frame{}; std::array frame_ctxs{}; bool swap_ref_indices{}; diff --git a/src/video_core/host1x/ffmpeg/ffmpeg.cpp b/src/video_core/host1x/ffmpeg.cpp similarity index 95% rename from src/video_core/host1x/ffmpeg/ffmpeg.cpp rename to src/video_core/host1x/ffmpeg.cpp index 486fd9365c..80e48692f7 100644 --- a/src/video_core/host1x/ffmpeg/ffmpeg.cpp +++ b/src/video_core/host1x/ffmpeg.cpp @@ -9,7 +9,7 @@ #include "common/scope_exit.h" #include "common/settings.h" #include "core/memory.h" -#include "video_core/host1x/ffmpeg/ffmpeg.h" +#include "video_core/host1x/ffmpeg.h" #include "video_core/memory_manager.h" extern "C" { @@ -34,7 +34,9 @@ constexpr std::array PreferredGpuDecoders = { AV_HWDEVICE_TYPE_DXVA2, AV_HWDEVICE_TYPE_D3D12VA, #elif defined(__FreeBSD__) + AV_HWDEVICE_TYPE_VAAPI, AV_HWDEVICE_TYPE_VDPAU, + AV_HWDEVICE_TYPE_DRM, #elif defined(__APPLE__) AV_HWDEVICE_TYPE_VIDEOTOOLBOX, #elif defined(ANDROID) @@ -105,18 +107,17 @@ Frame::~Frame() { Decoder::Decoder(Tegra::Host1x::NvdecCommon::VideoCodec codec) { const AVCodecID av_codec = [&] { switch (codec) { - case Tegra::Host1x::NvdecCommon::VideoCodec::H264: - return AV_CODEC_ID_H264; - case Tegra::Host1x::NvdecCommon::VideoCodec::VP8: - return AV_CODEC_ID_VP8; - case Tegra::Host1x::NvdecCommon::VideoCodec::VP9: - return AV_CODEC_ID_VP9; - default: - UNIMPLEMENTED_MSG("Unknown codec {}", codec); - return AV_CODEC_ID_NONE; + case Tegra::Host1x::NvdecCommon::VideoCodec::H264: + return AV_CODEC_ID_H264; + case Tegra::Host1x::NvdecCommon::VideoCodec::VP8: + return AV_CODEC_ID_VP8; + case Tegra::Host1x::NvdecCommon::VideoCodec::VP9: + return AV_CODEC_ID_VP9; + default: + UNIMPLEMENTED_MSG("Unknown codec {}", codec); + return AV_CODEC_ID_NONE; } }(); - m_codec = avcodec_find_decoder(av_codec); } diff --git a/src/video_core/host1x/ffmpeg/ffmpeg.h b/src/video_core/host1x/ffmpeg.h similarity index 98% rename from src/video_core/host1x/ffmpeg/ffmpeg.h rename to src/video_core/host1x/ffmpeg.h index d60a8ac4a7..fdb6908bb6 100644 --- a/src/video_core/host1x/ffmpeg/ffmpeg.h +++ b/src/video_core/host1x/ffmpeg.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project diff --git a/src/video_core/host1x/host1x.cpp b/src/video_core/host1x/host1x.cpp index cec5104144..b4e4a38c80 100644 --- a/src/video_core/host1x/host1x.cpp +++ b/src/video_core/host1x/host1x.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: 2021 yuzu Emulator Project @@ -12,9 +12,12 @@ namespace Tegra::Host1x { Host1x::Host1x(Core::System& system_) - : system{system_}, syncpoint_manager{}, - memory_manager(system.DeviceMemory()), gmmu_manager{system, memory_manager, 32, 0, 12}, - allocator{std::make_unique>(1 << 12)} {} + : system{system_} + , syncpoint_manager{} + , memory_manager(system.DeviceMemory()) + , gmmu_manager{system, memory_manager, 32, 0, 12} + , allocator{1 << 12} +{} Host1x::~Host1x() = default; @@ -24,13 +27,13 @@ void Host1x::StartDevice(s32 fd, ChannelType type, u32 syncpt) { #ifdef YUZU_LEGACY std::call_once(nvdec_first_init, []() {std::this_thread::sleep_for(std::chrono::milliseconds{500});}); // HACK: For Astroneer #endif - devices[fd] = std::make_unique(*this, fd, syncpt, frame_queue); + devices[fd] = std::make_unique(*this, fd, syncpt); break; case ChannelType::VIC: #ifdef YUZU_LEGACY std::call_once(vic_first_init, []() {std::this_thread::sleep_for(std::chrono::milliseconds{500});}); // HACK: For Astroneer #endif - devices[fd] = std::make_unique(*this, fd, syncpt, frame_queue); + devices[fd] = std::make_unique(*this, fd, syncpt); break; default: LOG_ERROR(HW_GPU, "Unimplemented host1x device {}", static_cast(type)); diff --git a/src/video_core/host1x/host1x.h b/src/video_core/host1x/host1x.h index d7534da23a..838e749072 100644 --- a/src/video_core/host1x/host1x.h +++ b/src/video_core/host1x/host1x.h @@ -184,20 +184,12 @@ public: return memory_manager; } - Tegra::MemoryManager& GMMU() { - return gmmu_manager; - } - - const Tegra::MemoryManager& GMMU() const { - return gmmu_manager; - } - Common::FlatAllocator& Allocator() { - return *allocator; + return allocator; } const Common::FlatAllocator& Allocator() const { - return *allocator; + return allocator; } void StartDevice(s32 fd, ChannelType type, u32 syncpt); @@ -211,12 +203,11 @@ public: it->second->PushEntries(std::move(entries)); } -private: Core::System& system; SyncpointManager syncpoint_manager; Tegra::MaxwellDeviceMemoryManager memory_manager; Tegra::MemoryManager gmmu_manager; - std::unique_ptr> allocator; + Common::FlatAllocator allocator; FrameQueue frame_queue; ankerl::unordered_dense::map> devices; #ifdef YUZU_LEGACY diff --git a/src/video_core/host1x/nvdec.cpp b/src/video_core/host1x/nvdec.cpp index 1882ccb100..f2e5c358d8 100644 --- a/src/video_core/host1x/nvdec.cpp +++ b/src/video_core/host1x/nvdec.cpp @@ -1,9 +1,10 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include #include "common/assert.h" #include "common/polyfill_thread.h" @@ -19,10 +20,13 @@ namespace Tegra::Host1x { #define NVDEC_REG_INDEX(field_name) \ (offsetof(NvdecCommon::NvdecRegisters, field_name) / sizeof(u64)) -Nvdec::Nvdec(Host1x& host1x_, s32 id_, u32 syncpt, FrameQueue& frame_queue_) - : CDmaPusher{host1x_, id_}, id{id_}, syncpoint{syncpt}, frame_queue{frame_queue_} { +Nvdec::Nvdec(Host1x& host1x_, s32 id_, u32 syncpt) + : CDmaPusher{host1x_, id_} + , id{id_} + , syncpoint{syncpt} +{ LOG_INFO(HW_GPU, "Created nvdec {}", id); - frame_queue.Open(id); + host1x.frame_queue.Open(id); } Nvdec::~Nvdec() { @@ -43,24 +47,22 @@ void Nvdec::ProcessMethod(u32 method, u32 argument) { } void Nvdec::CreateDecoder(NvdecCommon::VideoCodec codec) { - if (decoder.get()) { - return; + if (std::holds_alternative(decoder)) { + switch (codec) { + case NvdecCommon::VideoCodec::H264: + decoder.emplace(host1x, regs, id); + break; + case NvdecCommon::VideoCodec::VP8: + decoder.emplace(host1x, regs, id); + break; + case NvdecCommon::VideoCodec::VP9: + decoder.emplace(host1x, regs, id); + break; + default: + break; + } + LOG_INFO(HW_GPU, "Created decoder {} for id {}", codec, id); } - switch (codec) { - case NvdecCommon::VideoCodec::H264: - decoder = std::make_unique(host1x, regs, id, frame_queue); - break; - case NvdecCommon::VideoCodec::VP8: - decoder = std::make_unique(host1x, regs, id, frame_queue); - break; - case NvdecCommon::VideoCodec::VP9: - decoder = std::make_unique(host1x, regs, id, frame_queue); - break; - default: - UNIMPLEMENTED_MSG("Codec {}", decoder->GetCurrentCodecName()); - break; - } - LOG_INFO(HW_GPU, "Created decoder {} for id {}", decoder->GetCurrentCodecName(), id); } void Nvdec::Execute() { @@ -70,15 +72,14 @@ void Nvdec::Execute() { std::this_thread::sleep_for(std::chrono::milliseconds(8)); return; } - switch (decoder->GetCurrentCodec()) { - case NvdecCommon::VideoCodec::H264: - case NvdecCommon::VideoCodec::VP8: - case NvdecCommon::VideoCodec::VP9: - decoder->Decode(); - break; - default: - UNIMPLEMENTED_MSG("Codec {}", decoder->GetCurrentCodecName()); - break; + if (auto* h264 = std::get_if(&decoder)) { + h264->Decode(); + } else if (auto* vp8 = std::get_if(&decoder)) { + vp8->Decode(); + } else if (auto* vp9 = std::get_if(&decoder)) { + vp9->Decode(); + } else { + LOG_ERROR(HW_GPU, "Unrecognized codec executed?"); } } diff --git a/src/video_core/host1x/nvdec.h b/src/video_core/host1x/nvdec.h index 8fa91c744b..b89e57f357 100644 --- a/src/video_core/host1x/nvdec.h +++ b/src/video_core/host1x/nvdec.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project @@ -8,10 +8,14 @@ #include #include +#include #include "common/common_types.h" #include "video_core/cdma_pusher.h" #include "video_core/host1x/codecs/decoder.h" +#include "video_core/host1x/codecs/h264.h" +#include "video_core/host1x/codecs/vp8.h" +#include "video_core/host1x/codecs/vp9.h" namespace Tegra { @@ -21,7 +25,7 @@ class FrameQueue; class Nvdec final : public CDmaPusher { public: - explicit Nvdec(Host1x& host1x, s32 id, u32 syncpt, FrameQueue& frame_queue_); + explicit Nvdec(Host1x& host1x, s32 id, u32 syncpt); ~Nvdec(); /// Writes the method into the state, Invoke Execute() if encountered @@ -38,12 +42,15 @@ private: /// Invoke codec to decode a frame void Execute(); + NvdecCommon::NvdecRegisters regs{}; + std::variant< + Decoders::H264, + Decoders::VP8, + Decoders::VP9, + std::monostate + > decoder = std::monostate{}; s32 id; u32 syncpoint; - FrameQueue& frame_queue; - - NvdecCommon::NvdecRegisters regs{}; - std::unique_ptr decoder; }; } // namespace Host1x diff --git a/src/video_core/host1x/vic.cpp b/src/video_core/host1x/vic.cpp index 37972c045f..906714cc16 100644 --- a/src/video_core/host1x/vic.cpp +++ b/src/video_core/host1x/vic.cpp @@ -85,18 +85,17 @@ void SwizzleSurface(std::span output, u32 out_stride, std::span in } // namespace -Vic::Vic(Host1x& host1x_, s32 id_, u32 syncpt, FrameQueue& frame_queue_) noexcept : - CDmaPusher{host1x_, id_} +Vic::Vic(Host1x& host1x_, s32 id_, u32 syncpt) noexcept + : CDmaPusher{host1x_, id_} , id{id_} , syncpoint{syncpt} - , frame_queue{frame_queue_} { LOG_INFO(HW_GPU, "Created vic {}", id); } Vic::~Vic() noexcept { LOG_INFO(HW_GPU, "Destroying vic {}", id); - frame_queue.Close(id); + host1x.frame_queue.Close(id); } void Vic::ProcessMethod(u32 method, u32 arg) noexcept { @@ -113,7 +112,7 @@ void Vic::ProcessMethod(u32 method, u32 arg) noexcept { void Vic::Execute() noexcept { ConfigStruct config{}; - memory_manager.ReadBlock(regs.config_struct_offset.Address(), &config, sizeof(ConfigStruct)); + host1x.gmmu_manager.ReadBlock(regs.config_struct_offset.Address(), &config, sizeof(ConfigStruct)); auto output_width = config.output_surface_config.out_surface_width + 1; auto output_height = config.output_surface_config.out_surface_height + 1; @@ -124,8 +123,8 @@ void Vic::Execute() noexcept { if (auto& slot_config = config.slot_structs[i]; slot_config.config.slot_enable) { auto const luma_offset = regs.surfaces[i][SurfaceIndex::Current].luma.Address(); if (nvdec_id == -1) - nvdec_id = frame_queue.VicFindNvdecFdFromOffset(luma_offset); - if (auto frame = frame_queue.GetFrame(nvdec_id, luma_offset); frame.get()) { + nvdec_id = host1x.frame_queue.VicFindNvdecFdFromOffset(luma_offset); + if (auto frame = host1x.frame_queue.GetFrame(nvdec_id, luma_offset); frame.get()) { switch (frame->GetPixelFormat()) { case AV_PIX_FMT_YUV420P: ReadY8__V8U8_N420(slot_config, regs.surfaces[i], std::move(frame), true); @@ -881,8 +880,8 @@ void Vic::WriteY8__V8U8_N420(const OutputSurfaceConfig& output_surface_config) n chroma_scratch.resize_destructive(out_chroma_size); Decode(luma_scratch.data(), chroma_scratch.data()); - Tegra::Memory::GpuGuestMemoryScoped out_luma(memory_manager, regs.output_surface.luma.Address(), out_luma_swizzle_size, &swizzle_scratch); - Tegra::Memory::GpuGuestMemoryScoped out_chroma(memory_manager, regs.output_surface.chroma_u.Address(), out_chroma_swizzle_size, &swizzle_scratch); + Tegra::Memory::GpuGuestMemoryScoped out_luma(host1x.gmmu_manager, regs.output_surface.luma.Address(), out_luma_swizzle_size, &swizzle_scratch); + Tegra::Memory::GpuGuestMemoryScoped out_chroma(host1x.gmmu_manager, regs.output_surface.chroma_u.Address(), out_chroma_swizzle_size, &swizzle_scratch); if (block_height == 1) { SwizzleSurface(out_luma, out_luma_stride, luma_scratch, out_luma_stride, out_luma_height); SwizzleSurface(out_chroma, out_chroma_stride, chroma_scratch, out_chroma_stride, out_chroma_height); @@ -910,8 +909,8 @@ void Vic::WriteY8__V8U8_N420(const OutputSurfaceConfig& output_surface_config) n luma_scratch.resize_destructive(out_luma_size); chroma_scratch.resize_destructive(out_chroma_size); Decode(luma_scratch.data(), chroma_scratch.data()); - memory_manager.WriteBlock(regs.output_surface.luma.Address(), luma_scratch.data(), out_luma_size); - memory_manager.WriteBlock(regs.output_surface.chroma_u.Address(), chroma_scratch.data(), out_chroma_size); + host1x.gmmu_manager.WriteBlock(regs.output_surface.luma.Address(), luma_scratch.data(), out_luma_size); + host1x.gmmu_manager.WriteBlock(regs.output_surface.chroma_u.Address(), chroma_scratch.data(), out_chroma_size); } break; default: UNREACHABLE(); @@ -1046,7 +1045,7 @@ void Vic::WriteABGR(const OutputSurfaceConfig& output_surface_config, VideoPixel luma_scratch.resize_destructive(out_luma_size); Decode(luma_scratch.data(), output_surface.data()); - Tegra::Memory::GpuGuestMemoryScoped out_luma(memory_manager, regs.output_surface.luma.Address(), out_swizzle_size, &swizzle_scratch); + Tegra::Memory::GpuGuestMemoryScoped out_luma(host1x.gmmu_manager, regs.output_surface.luma.Address(), out_swizzle_size, &swizzle_scratch); if (block_height == 1) { SwizzleSurface(out_luma, out_luma_stride, luma_scratch, out_luma_stride, out_luma_height); } else { @@ -1061,7 +1060,7 @@ void Vic::WriteABGR(const OutputSurfaceConfig& output_surface_config, VideoPixel surface_stride * surface_height * BytesPerPixel, out_luma_width, out_luma_height, out_luma_stride, out_luma_size); luma_scratch.resize_destructive(out_luma_size); - Tegra::Memory::GpuGuestMemoryScoped out_luma(memory_manager, regs.output_surface.luma.Address(), out_luma_size, &luma_scratch); + Tegra::Memory::GpuGuestMemoryScoped out_luma(host1x.gmmu_manager, regs.output_surface.luma.Address(), out_luma_size, &luma_scratch); Decode(out_luma.data(), output_surface.data()); } break; default: diff --git a/src/video_core/host1x/vic.h b/src/video_core/host1x/vic.h index f6e1d2fa4d..d728b38f91 100644 --- a/src/video_core/host1x/vic.h +++ b/src/video_core/host1x/vic.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project @@ -15,6 +15,7 @@ #include "common/common_types.h" #include "common/scratch_buffer.h" #include "video_core/cdma_pusher.h" +#include "video_core/host1x/host1x.h" namespace Tegra::Host1x { class Host1x; @@ -605,7 +606,7 @@ public: SetOutputSurfaceChromaUnusedOffset = offsetof(VicRegisters, output_surface.chroma_v) }; - explicit Vic(Host1x& host1x, s32 id, u32 syncpt, FrameQueue& frame_queue) noexcept; + explicit Vic(Host1x& host1x, s32 id, u32 syncpt) noexcept; ~Vic() noexcept; /// Write to the device state. @@ -620,18 +621,17 @@ private: void WriteY8__V8U8_N420(const OutputSurfaceConfig& output_surface_config) noexcept; void WriteABGR(const OutputSurfaceConfig& output_surface_config, VideoPixelFormat format) noexcept; - s32 id; - s32 nvdec_id{-1}; - u32 syncpoint; - VicRegisters regs{}; - FrameQueue& frame_queue; Common::ScratchBuffer swizzle_scratch; Common::ScratchBuffer output_surface; Common::ScratchBuffer slot_surface; Common::ScratchBuffer luma_scratch; Common::ScratchBuffer chroma_scratch; + + s32 id; + s32 nvdec_id{-1}; + u32 syncpoint; }; } // namespace Tegra::Host1x