[video_core, hle] remove redundant parent references in system structs (#3908)

reworked a bit to remove references of parent objects and instead pass as arguments to methods to prevent useless reloads

Signed-off-by: lizzie <lizzie@eden-emu.dev>
Co-authored-by: maufeat <sahyno1996@gmail.com>
Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3908
Reviewed-by: Maufeat <sahyno1996@gmail.com>
Reviewed-by: crueter <crueter@eden-emu.dev>
This commit is contained in:
lizzie 2026-06-23 06:31:25 +02:00 committed by crueter
parent f8facda35f
commit 3aa0d46259
No known key found for this signature in database
GPG key ID: 425ACD2D4830EBC6
307 changed files with 4419 additions and 4477 deletions

View file

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
@ -10,26 +10,22 @@
namespace Tegra::Host1x {
Control::Control(Host1x& host1x_) : host1x(host1x_) {}
Control::~Control() = default;
void Control::ProcessMethod(Method method, u32 argument) {
void Control::ProcessMethod(Host1x& host1x, Method method, u32 argument) {
switch (method) {
case Method::LoadSyncptPayload32:
syncpoint_value = argument;
break;
case Method::WaitSyncpt:
case Method::WaitSyncpt32:
Execute(argument);
Execute(host1x, argument);
break;
default:
UNIMPLEMENTED_MSG("Control method {:#X}", static_cast<u32>(method));
UNIMPLEMENTED_MSG("Control method {:#X}", u32(method));
break;
}
}
void Control::Execute(u32 data) {
void Control::Execute(Host1x& host1x, u32 data) {
LOG_TRACE(Service_NVDRV, "Control wait syncpt {} value {}", data, syncpoint_value);
host1x.GetSyncpointManager().WaitHost(data, syncpoint_value);
}

View file

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors
// SPDX-License-Identifier: GPL-3.0-or-later
@ -19,17 +22,11 @@ public:
WaitSyncpt32 = 0x50,
};
explicit Control(Host1x& host1x);
~Control();
/// Writes the method into the state, Invoke Execute() if encountered
void ProcessMethod(Method method, u32 argument);
private:
void ProcessMethod(Host1x& host1x, Method method, u32 argument);
/// For Host1x, execute is waiting on a syncpoint previously written into the state
void Execute(u32 data);
void Execute(Host1x& host1x, u32 data);
Host1x& host1x;
u32 syncpoint_value{};
};

View file

@ -27,22 +27,22 @@ void Host1x::StartDevice(s32 fd, ChannelType type, u32 syncpt) {
#ifdef YUZU_LEGACY
std::call_once(nvdec_first_init, []() {std::this_thread::sleep_for(std::chrono::milliseconds{500});}); // HACK: For Astroneer
#endif
devices[fd] = std::make_unique<Tegra::Host1x::Nvdec>(*this, fd, syncpt);
devices[fd].emplace<Tegra::Host1x::Nvdec>(*this, fd, syncpt);
break;
case ChannelType::VIC:
#ifdef YUZU_LEGACY
std::call_once(vic_first_init, []() {std::this_thread::sleep_for(std::chrono::milliseconds{500});}); // HACK: For Astroneer
#endif
devices[fd] = std::make_unique<Tegra::Host1x::Vic>(*this, fd, syncpt);
devices[fd].emplace<Tegra::Host1x::Vic>(*this, fd, syncpt);
break;
default:
LOG_ERROR(HW_GPU, "Unimplemented host1x device {}", static_cast<u32>(type));
LOG_ERROR(HW_GPU, "Unimplemented host1x device {}", u32(type));
break;
}
}
void Host1x::StopDevice(s32 fd, ChannelType type) {
devices.erase(fd);
devices[fd].emplace<std::monostate>();
}
} // namespace Tegra::Host1x

View file

@ -8,10 +8,14 @@
#include <ankerl/unordered_dense.h>
#include <unordered_map>
#include <queue>
#include <variant>
#include "common/common_types.h"
// fd types?
#include "video_core/host1x/nvdec.h"
#include "video_core/host1x/vic.h"
#include "common/address_space.h"
#include "video_core/cdma_pusher.h"
#include "video_core/host1x/gpu_device_memory_manager.h"
@ -31,118 +35,90 @@ class Nvdec;
class FrameQueue {
public:
struct FrameDevice {
std::deque<std::pair<u64, std::shared_ptr<FFmpeg::Frame>>> m_presentation_order;
std::unordered_map<u64, std::shared_ptr<FFmpeg::Frame>> m_decode_order;
};
void Open(s32 fd) {
std::scoped_lock l{m_mutex};
m_presentation_order.insert({fd, {}});
m_decode_order.insert({fd, {}});
m_frame_devices.insert_or_assign(fd, FrameDevice{});
}
void Close(s32 fd) {
std::scoped_lock l{m_mutex};
m_presentation_order.erase(fd);
m_decode_order.erase(fd);
m_frame_devices.erase(fd);
}
s32 VicFindNvdecFdFromOffset(u64 search_offset) {
std::scoped_lock l{m_mutex};
for (auto& map : m_presentation_order) {
for (auto& [offset, frame] : map.second) {
if (offset == search_offset) {
return map.first;
}
}
}
for (auto& map : m_decode_order) {
for (auto& [offset, frame] : map.second) {
if (offset == search_offset) {
return map.first;
}
}
for (auto const& [fd, dev] : m_frame_devices) {
for (auto const& [offset, frame] : dev.m_presentation_order)
if (offset == search_offset)
return fd;
for (auto const& [offset, frame] : dev.m_decode_order)
if (offset == search_offset)
return fd;
}
return -1;
}
void PushPresentOrder(s32 fd, u64 offset, std::shared_ptr<FFmpeg::Frame>&& frame) {
std::scoped_lock l{m_mutex};
auto map = m_presentation_order.find(fd);
if (map == m_presentation_order.end()) {
return;
if (auto const it = m_frame_devices.find(fd); it != m_frame_devices.end()) {
if (it->second.m_presentation_order.size() >= MAX_PRESENT_QUEUE)
it->second.m_presentation_order.pop_front();
it->second.m_presentation_order.emplace_back(offset, std::move(frame));
}
if (map->second.size() >= MAX_PRESENT_QUEUE) {
map->second.pop_front();
}
map->second.emplace_back(offset, std::move(frame));
}
void PushDecodeOrder(s32 fd, u64 offset, std::shared_ptr<FFmpeg::Frame>&& frame) {
std::scoped_lock l{m_mutex};
auto map = m_decode_order.find(fd);
if (map == m_decode_order.end()) {
return;
}
map->second.insert_or_assign(offset, std::move(frame));
if (map->second.size() > MAX_DECODE_MAP) {
auto it = map->second.begin();
std::advance(it, map->second.size() - MAX_DECODE_MAP);
map->second.erase(map->second.begin(), it);
if (auto const it = m_frame_devices.find(fd); it != m_frame_devices.end()) {
it->second.m_decode_order.insert_or_assign(offset, std::move(frame));
if (it->second.m_decode_order.size() > MAX_DECODE_MAP) {
auto it2 = it->second.m_decode_order.begin();
std::advance(it2, it->second.m_decode_order.size() - MAX_DECODE_MAP);
it->second.m_decode_order.erase(it->second.m_decode_order.begin(), it2);
}
}
}
std::shared_ptr<FFmpeg::Frame> GetFrame(s32 fd, u64 offset) {
if (fd == -1) {
return {};
if (fd != -1) {
std::scoped_lock l{m_mutex};
if (auto const it = m_frame_devices.find(fd); it != m_frame_devices.end()) {
if (it->second.m_presentation_order.size() > 0)
return GetPresentOrderLocked(fd);
if (it->second.m_decode_order.size() > 0)
return GetDecodeOrderLocked(fd, offset);
}
}
std::scoped_lock l{m_mutex};
auto present_map = m_presentation_order.find(fd);
if (present_map != m_presentation_order.end() && !present_map->second.empty()) {
return GetPresentOrderLocked(fd);
}
auto decode_map = m_decode_order.find(fd);
if (decode_map != m_decode_order.end() && !decode_map->second.empty()) {
return GetDecodeOrderLocked(fd, offset);
}
return {};
}
private:
std::shared_ptr<FFmpeg::Frame> GetPresentOrderLocked(s32 fd) {
auto map = m_presentation_order.find(fd);
if (map == m_presentation_order.end() || map->second.empty()) {
return {};
if (auto const it = m_frame_devices.find(fd); it != m_frame_devices.end()) {
auto frame = std::move(it->second.m_presentation_order.front().second);
it->second.m_presentation_order.pop_front();
return frame;
}
auto frame = std::move(map->second.front().second);
map->second.pop_front();
return frame;
return {};
}
std::shared_ptr<FFmpeg::Frame> GetDecodeOrderLocked(s32 fd, u64 offset) {
auto map = m_decode_order.find(fd);
if (map == m_decode_order.end() || map->second.empty()) {
return {};
if (auto const it = m_frame_devices.find(fd); it != m_frame_devices.end()) {
if (auto const it2 = it->second.m_decode_order.find(offset); it2 != it->second.m_decode_order.end()) {
// TODO: this "mapped" prevents us from fully embracing ankerl
return std::move(it->second.m_decode_order.extract(it2).mapped());
}
}
auto it = map->second.find(offset);
if (it == map->second.end()) {
return {};
}
// TODO: this "mapped" prevents us from fully embracing ankerl
return std::move(map->second.extract(it).mapped());
return {};
}
using FramePtr = std::shared_ptr<FFmpeg::Frame>;
std::mutex m_mutex{};
ankerl::unordered_dense::map<s32, std::deque<std::pair<u64, FramePtr>>> m_presentation_order;
ankerl::unordered_dense::map<s32, std::unordered_map<u64, FramePtr>> m_decode_order;
ankerl::unordered_dense::map<s32, FrameDevice> m_frame_devices;
static constexpr size_t MAX_PRESENT_QUEUE = 100;
static constexpr size_t MAX_DECODE_MAP = 200;
@ -196,11 +172,11 @@ public:
void StopDevice(s32 fd, ChannelType type);
void PushEntries(s32 fd, ChCommandHeaderList&& entries) {
auto it = devices.find(fd);
if (it == devices.end()) {
return;
if (auto const nvdec = std::get_if<Tegra::Host1x::Nvdec>(&devices[fd])) {
nvdec->PushEntries(std::move(entries));
} else if (auto const vic = std::get_if<Tegra::Host1x::Vic>(&devices[fd])) {
vic->PushEntries(std::move(entries));
}
it->second->PushEntries(std::move(entries));
}
Core::System& system;
@ -209,7 +185,11 @@ public:
Tegra::MemoryManager gmmu_manager;
Common::FlatAllocator<u32, 0, 32> allocator;
FrameQueue frame_queue;
ankerl::unordered_dense::map<s32, std::unique_ptr<CDmaPusher>> devices;
std::array<std::variant<
std::monostate,
Tegra::Host1x::Nvdec,
Tegra::Host1x::Vic
>, 1024> devices;
#ifdef YUZU_LEGACY
std::once_flag nvdec_first_init;
std::once_flag vic_first_init;

View file

@ -407,15 +407,15 @@ void Vic::ReadInterlacedY8__V8U8_N420(const SlotStruct& slot, std::span<const Pl
};
switch (slot.config.deinterlace_mode) {
case DXVAHD_DEINTERLACE_MODE_PRIVATE::WEAVE:
case DxvhadDeinterlaceModePrivate::Weave:
// Due to the fact that we do not write to memory in nvdec, we cannot use Weave as it
// relies on the previous frame.
DecodeBobField();
break;
case DXVAHD_DEINTERLACE_MODE_PRIVATE::BOB_FIELD:
case DxvhadDeinterlaceModePrivate::BobField:
DecodeBobField();
break;
case DXVAHD_DEINTERLACE_MODE_PRIVATE::DISI1:
case DxvhadDeinterlaceModePrivate::Disi1:
// Due to the fact that we do not write to memory in nvdec, we cannot use DISI1 as it
// relies on previous/next frames.
DecodeBobField();
@ -431,13 +431,13 @@ void Vic::ReadInterlacedY8__V8U8_N420(const SlotStruct& slot, std::span<const Pl
void Vic::ReadY8__V8U8_N420(const SlotStruct& slot, std::span<const PlaneOffsets> offsets, std::shared_ptr<const FFmpeg::Frame> frame, bool planar) noexcept {
switch (slot.config.frame_format) {
case DXVAHD_FRAME_FORMAT::PROGRESSIVE:
case DxvhadFrameFormat::Progressive:
ReadProgressiveY8__V8U8_N420(slot, offsets, std::move(frame), planar, false);
break;
case DXVAHD_FRAME_FORMAT::TOP_FIELD:
case DxvhadFrameFormat::TopField:
ReadInterlacedY8__V8U8_N420(slot, offsets, std::move(frame), planar, true);
break;
case DXVAHD_FRAME_FORMAT::BOTTOM_FIELD:
case DxvhadFrameFormat::BottomField:
ReadInterlacedY8__V8U8_N420(slot, offsets, std::move(frame), planar, false);
break;
default:
@ -860,7 +860,7 @@ void Vic::WriteY8__V8U8_N420(const OutputSurfaceConfig& output_surface_config) n
};
switch (output_surface_config.out_block_kind) {
case BLK_KIND::GENERIC_16Bx2: {
case BlkKind::Generic_16Bx2: {
u32 const block_height = u32(output_surface_config.out_block_height);
auto const out_luma_swizzle_size = Texture::CalculateSize(true, BytesPerPixel, out_luma_width, out_luma_height, 1, block_height, 0);
auto const out_chroma_swizzle_size = Texture::CalculateSize(true, BytesPerPixel * 2, out_chroma_width, out_chroma_height, 1, block_height, 0);
@ -889,7 +889,7 @@ void Vic::WriteY8__V8U8_N420(const OutputSurfaceConfig& output_surface_config) n
Texture::SwizzleTexture(out_chroma, chroma_scratch, BytesPerPixel, out_chroma_width, out_chroma_height, 1, block_height, 0, 1);
}
} break;
case BLK_KIND::PITCH: {
case BlkKind::Pitch: {
LOG_TRACE(HW_GPU, "Writing Y8__V8U8_N420 swizzled frame\n"
"\tinput surface {}x{} stride {} size {:#X}\n"
"\toutput luma {}x{} stride {} size {:#X} block height {} swizzled size 0x{:X}\n",
@ -1032,7 +1032,7 @@ void Vic::WriteABGR(const OutputSurfaceConfig& output_surface_config, VideoPixel
};
switch (output_surface_config.out_block_kind) {
case BLK_KIND::GENERIC_16Bx2: {
case BlkKind::Generic_16Bx2: {
const u32 block_height = u32(output_surface_config.out_block_height);
auto const out_swizzle_size = Texture::CalculateSize(true, BytesPerPixel, out_luma_width, out_luma_height, 1, block_height, 0);
LOG_TRACE(HW_GPU, "Writing ABGR swizzled frame\n"
@ -1051,7 +1051,7 @@ void Vic::WriteABGR(const OutputSurfaceConfig& output_surface_config, VideoPixel
Texture::SwizzleTexture(out_luma, luma_scratch, BytesPerPixel, out_luma_width, out_luma_height, 1, block_height, 0, 1);
}
} break;
case BLK_KIND::PITCH: {
case BlkKind::Pitch: {
LOG_TRACE(HW_GPU, "Writing ABGR pitch frame\n"
"\tinput surface {}x{} stride {} size {:#X}"
"\toutput surface {}x{} stride {} size {:#X}",

View file

@ -6,16 +6,12 @@
#pragma once
#include <condition_variable>
#include <functional>
#include <memory>
#include <mutex>
#include <thread>
#include "common/common_types.h"
#include "common/scratch_buffer.h"
#include "video_core/cdma_pusher.h"
#include "video_core/host1x/host1x.h"
#include "video_core/host1x/ffmpeg.h"
namespace Tegra::Host1x {
class Host1x;
@ -138,52 +134,53 @@ enum SurfaceIndex : u32 {
CombinedMotion = 7,
};
enum class DXVAHD_ALPHA_FILL_MODE : u32 {
OPAQUE = 0,
BACKGROUND = 1,
DESTINATION = 2,
SOURCE_STREAM = 3,
COMPOSITED = 4,
SOURCE_ALPHA = 5,
// Note: these will inevitably collide with Win32 defines if you use their UPPER_SNAKE_CASE naming
enum class DxvhadAlphaFillMode : u32 {
Opaque = 0,
Background = 1,
Destination = 2,
SourceStream = 3,
Composited = 4,
SourceAlpha = 5,
};
enum class DXVAHD_FRAME_FORMAT : u64 {
PROGRESSIVE = 0,
INTERLACED_TOP_FIELD_FIRST = 1,
INTERLACED_BOTTOM_FIELD_FIRST = 2,
TOP_FIELD = 3,
BOTTOM_FIELD = 4,
SUBPIC_PROGRESSIVE = 5,
SUBPIC_INTERLACED_TOP_FIELD_FIRST = 6,
SUBPIC_INTERLACED_BOTTOM_FIELD_FIRST = 7,
SUBPIC_TOP_FIELD = 8,
SUBPIC_BOTTOM_FIELD = 9,
TOP_FIELD_CHROMA_BOTTOM = 10,
BOTTOM_FIELD_CHROMA_TOP = 11,
SUBPIC_TOP_FIELD_CHROMA_BOTTOM = 12,
SUBPIC_BOTTOM_FIELD_CHROMA_TOP = 13,
enum class DxvhadFrameFormat : u64 {
Progressive = 0,
InterlacedTopFieldFirst = 1,
InterlacedBottomFieldFirst = 2,
TopField = 3,
BottomField = 4,
SubpicProgressive = 5,
SubpicInterlacedTopFieldFirst = 6,
SubpicInterlacedBottomFieldFirst = 7,
SubpicTopField = 8,
SubpicBottomField = 9,
TopFieldChromaBottom = 10,
BottomFieldChromaTop = 11,
SubpicTopFieldChromaBottom = 12,
SubpicBottomFieldChromaTop = 13,
};
enum class DXVAHD_DEINTERLACE_MODE_PRIVATE : u64 {
WEAVE = 0,
BOB_FIELD = 1,
BOB = 2,
NEWBOB = 3,
DISI1 = 4,
WEAVE_LUMA_BOB_FIELD_CHROMA = 5,
MAX = 0xF,
enum class DxvhadDeinterlaceModePrivate : u64 {
Weave = 0,
BobField = 1,
Bob = 2,
Newbob = 3,
Disi1 = 4,
WeaveLumaBobFieldChroma = 5,
Max = 0xF,
};
enum class BLK_KIND {
PITCH = 0,
GENERIC_16Bx2 = 1,
enum class BlkKind {
Pitch = 0,
Generic_16Bx2 = 1,
// These are unsupported in the vic
BL_NAIVE = 2,
BL_KEPLER_XBAR_RAW = 3,
VP2_TILED = 15,
BlNaive = 2,
BlKeplerXbarRaw = 3,
Vp2Tiled = 15,
};
enum class BLEND_SRCFACTC : u32 {
enum class BlendSrcFactC : u32 {
K1 = 0,
K1_TIMES_DST = 1,
NEG_K1_TIMES_DST = 2,
@ -191,7 +188,7 @@ enum class BLEND_SRCFACTC : u32 {
ZERO = 4,
};
enum class BLEND_DSTFACTC : u32 {
enum class BlendDstFactC : u32 {
K1 = 0,
K2 = 1,
K1_TIMES_DST = 2,
@ -201,7 +198,7 @@ enum class BLEND_DSTFACTC : u32 {
ONE = 6,
};
enum class BLEND_SRCFACTA : u32 {
enum class BlendSrcFactA : u32 {
K1 = 0,
K2 = 1,
NEG_K1_TIMES_DST = 2,
@ -209,7 +206,7 @@ enum class BLEND_SRCFACTA : u32 {
MAX = 7,
};
enum class BLEND_DSTFACTA : u32 {
enum class BlendDstFactA : u32 {
K2 = 0,
NEG_K1_TIMES_SRC = 1,
ZERO = 2,
@ -232,7 +229,7 @@ static_assert(sizeof(PipeConfig) == 0x10, "PipeConfig has the wrong size!");
struct OutputConfig {
union {
BitField<0, 3, DXVAHD_ALPHA_FILL_MODE> alpha_fill_mode;
BitField<0, 3, DxvhadAlphaFillMode> alpha_fill_mode;
BitField<3, 3, u64> alpha_fill_slot;
BitField<6, 10, u64> background_a;
BitField<16, 10, u64> background_r;
@ -265,7 +262,7 @@ struct OutputSurfaceConfig {
BitField<0, 7, VideoPixelFormat> out_pixel_format;
BitField<7, 2, u32> out_chroma_loc_horiz;
BitField<9, 2, u32> out_chroma_loc_vert;
BitField<11, 4, BLK_KIND> out_block_kind;
BitField<11, 4, BlkKind> out_block_kind;
BitField<15, 4, u32> out_block_height; // in gobs, log2
BitField<19, 3, u32> reserved0;
BitField<22, 10, u32> reserved1;
@ -365,7 +362,7 @@ struct SlotConfig {
BitField<14, 1, u64> prev_prev_motion_field_enable;
BitField<15, 1, u64> combined_motion_field_enable;
BitField<16, 4, DXVAHD_FRAME_FORMAT> frame_format;
BitField<16, 4, DxvhadFrameFormat> frame_format;
BitField<20, 2, u64> filter_length_y; // 0: 1-tap, 1: 2-tap, 2: 5-tap, 3: 10-tap
BitField<22, 2, u64> filter_length_x;
BitField<24, 12, u64> panoramic;
@ -377,7 +374,7 @@ struct SlotConfig {
BitField<10, 10, u64> filter_detail;
BitField<20, 10, u64> chroma_noise;
BitField<30, 10, u64> chroma_detail;
BitField<40, 4, DXVAHD_DEINTERLACE_MODE_PRIVATE> deinterlace_mode;
BitField<40, 4, DxvhadDeinterlaceModePrivate> deinterlace_mode;
BitField<44, 3, u64> motion_accumulation_weight;
BitField<47, 11, u64> noise_iir;
BitField<58, 4, u64> light_level;
@ -484,13 +481,13 @@ struct BlendingSlotStruct {
BitField<26, 6, u32> reserved1;
};
union {
BitField<0, 3, BLEND_SRCFACTC> src_factor_color_match_select;
BitField<0, 3, BlendSrcFactC> src_factor_color_match_select;
BitField<3, 1, u32> reserved2;
BitField<4, 3, BLEND_DSTFACTC> dst_factor_color_match_select;
BitField<4, 3, BlendDstFactC> dst_factor_color_match_select;
BitField<7, 1, u32> reserved3;
BitField<8, 3, BLEND_SRCFACTA> src_factor_a_match_select;
BitField<8, 3, BlendSrcFactA> src_factor_a_match_select;
BitField<11, 1, u32> reserved4;
BitField<12, 3, BLEND_DSTFACTA> dst_factor_a_match_select;
BitField<12, 3, BlendDstFactA> dst_factor_a_match_select;
BitField<15, 1, u32> reserved5;
BitField<16, 4, u32> reserved6;
BitField<20, 4, u32> reserved7;
@ -624,8 +621,8 @@ private:
VicRegisters regs{};
Common::ScratchBuffer<u8> swizzle_scratch;
Common::ScratchBuffer<Pixel> output_surface;
Common::ScratchBuffer<Pixel> slot_surface;
Common::ScratchBuffer<Tegra::Host1x::Pixel> output_surface;
Common::ScratchBuffer<Tegra::Host1x::Pixel> slot_surface;
Common::ScratchBuffer<u8> luma_scratch;
Common::ScratchBuffer<u8> chroma_scratch;