From 4755ec7a59072fedc6043976e7d70238ea6d00ef Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Tue, 10 Mar 2026 02:46:35 -0400 Subject: [PATCH 01/13] [vulkan] simplify numeric type determination --- src/shader_recompiler/ir_opt/texture_pass.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index a1405b225f..45149a7e80 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -39,9 +39,7 @@ NumericType GetNumericType(TexturePixelFormat format) { if (!VideoCore::Surface::IsPixelFormatInteger(pixel_format)) { return NumericType::Float; } - return VideoCore::Surface::IsPixelFormatSignedInteger(pixel_format) - ? NumericType::SignedInt - : NumericType::UnsignedInt; + return NumericType::UnsignedInt; } IR::Opcode IndexedInstruction(const IR::Inst& inst) { @@ -450,7 +448,9 @@ public: u32 Add(const ImageBufferDescriptor& desc) { const u32 index{Add(image_buffer_descriptors, desc, [&desc](const auto& existing) { - return desc.format == existing.format && desc.cbuf_index == existing.cbuf_index && + return desc.format == existing.format && + desc.numeric_type == existing.numeric_type && + desc.cbuf_index == existing.cbuf_index && desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count && desc.size_shift == existing.size_shift; })}; @@ -480,6 +480,7 @@ public: u32 Add(const ImageDescriptor& desc) { const u32 index{Add(image_descriptors, desc, [&desc](const auto& existing) { return desc.type == existing.type && desc.format == existing.format && + desc.numeric_type == existing.numeric_type && desc.cbuf_index == existing.cbuf_index && desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count && desc.size_shift == existing.size_shift; From b88ca5b6357c207a22b3559d19911a3bcf112fae Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Tue, 10 Mar 2026 02:58:57 -0400 Subject: [PATCH 02/13] Revert "[nce] Added dual channel handling for guest access faults" --- src/core/arm/nce/arm_nce.cpp | 44 +----------------------------------- 1 file changed, 1 insertion(+), 43 deletions(-) diff --git a/src/core/arm/nce/arm_nce.cpp b/src/core/arm/nce/arm_nce.cpp index 2a00729338..a3c7abbb99 100644 --- a/src/core/arm/nce/arm_nce.cpp +++ b/src/core/arm/nce/arm_nce.cpp @@ -48,35 +48,6 @@ constexpr u64 SplitPageAccessWindow = 64; constexpr size_t MaxPreciseAccessPages = 256; constexpr u8 MaxPreciseAccessPageWeight = 4; -[[nodiscard]] constexpr u64 AlignDownPage(u64 addr) { - return addr & ~u64{Memory::YUZU_PAGEMASK}; -} - -[[nodiscard]] bool IsNearPageBoundary(u64 addr) { - const u64 page_offset = addr & Memory::YUZU_PAGEMASK; - return page_offset < SplitPageAccessWindow || - page_offset + SplitPageAccessWindow > Memory::YUZU_PAGESIZE; -} - -[[nodiscard]] bool IsNearTlsWindow(u64 tls_base, u64 fault_addr) { - if (tls_base == 0) { - return false; - } - - const u64 tls_first_page = AlignDownPage(tls_base); - const u64 tls_last_byte = tls_base + Kernel::Svc::ThreadLocalRegionSize - 1; - const u64 tls_last_page = AlignDownPage(tls_last_byte); - const u64 fault_page = AlignDownPage(fault_addr); - - return fault_page + Memory::YUZU_PAGESIZE >= tls_first_page && - fault_page <= tls_last_page + Memory::YUZU_PAGESIZE; -} - -[[nodiscard]] bool ShouldUsePreciseAccessChannel(const GuestContext* guest_ctx, u64 fault_addr) { - return IsNearPageBoundary(fault_addr) || IsNearTlsWindow(guest_ctx->tpidrro_el0, fault_addr) || - IsNearTlsWindow(guest_ctx->tpidr_el0, fault_addr); -} - } // namespace void* ArmNce::RestoreGuestContext(void* raw_context) { @@ -199,20 +170,7 @@ bool ArmNce::HandleGuestAccessFault(GuestContext* guest_ctx, void* raw_info, voi const u64 fault_addr = reinterpret_cast(info->si_addr); const Common::ProcessAddress addr = fault_addr & ~Memory::YUZU_PAGEMASK; const u64 page_offset = fault_addr & Memory::YUZU_PAGEMASK; - auto& memory = parent->m_running_thread->GetOwnerProcess()->GetMemory(); - const bool rasterizer_cached = memory.IsRasterizerCached(addr); - const bool prefer_precise_channel = ShouldUsePreciseAccessChannel(guest_ctx, fault_addr) || - parent->IsPreciseAccessPage(fault_addr) || - rasterizer_cached; - - if (prefer_precise_channel) { - if (auto next_pc = MatchAndExecuteOneInstruction(memory, &host_ctx, fpctx); next_pc) { - parent->MarkPreciseAccessFaultWindow(fault_addr); - host_ctx.pc = *next_pc; - return true; - } - } - + auto& memory = guest_ctx->parent->m_running_thread->GetOwnerProcess()->GetMemory(); bool handled = memory.InvalidateNCE(addr, Memory::YUZU_PAGESIZE); if (page_offset < SplitPageAccessWindow && addr >= Memory::YUZU_PAGESIZE) { From 8c077fc4cd6d433412eb6d92700812e0240417a9 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Tue, 10 Mar 2026 03:01:02 -0400 Subject: [PATCH 03/13] Revert "[nce] Added case for access fault handling to manage page edge cases" --- src/core/arm/nce/arm_nce.cpp | 30 +++++------------------------- 1 file changed, 5 insertions(+), 25 deletions(-) diff --git a/src/core/arm/nce/arm_nce.cpp b/src/core/arm/nce/arm_nce.cpp index a3c7abbb99..0e285b4e75 100644 --- a/src/core/arm/nce/arm_nce.cpp +++ b/src/core/arm/nce/arm_nce.cpp @@ -43,10 +43,6 @@ fpsimd_context* GetFloatingPointState(mcontext_t& host_ctx) { using namespace Common::Literals; constexpr u32 StackSize = 128_KiB; -constexpr u64 CacheLineSize = 64; -constexpr u64 SplitPageAccessWindow = 64; -constexpr size_t MaxPreciseAccessPages = 256; -constexpr u8 MaxPreciseAccessPageWeight = 4; } // namespace @@ -162,35 +158,19 @@ bool ArmNce::HandleGuestAlignmentFault(GuestContext* guest_ctx, void* raw_info, } bool ArmNce::HandleGuestAccessFault(GuestContext* guest_ctx, void* raw_info, void* raw_context) { - auto& host_ctx = static_cast(raw_context)->uc_mcontext; - auto* fpctx = GetFloatingPointState(host_ctx); auto* info = static_cast(raw_info); auto* parent = guest_ctx->parent; - const u64 fault_addr = reinterpret_cast(info->si_addr); - const Common::ProcessAddress addr = fault_addr & ~Memory::YUZU_PAGEMASK; - const u64 page_offset = fault_addr & Memory::YUZU_PAGEMASK; + // Try to handle an invalid access. + // TODO: handle accesses which split a page? + const Common::ProcessAddress addr = + (reinterpret_cast(info->si_addr) & ~Memory::YUZU_PAGEMASK); auto& memory = guest_ctx->parent->m_running_thread->GetOwnerProcess()->GetMemory(); - bool handled = memory.InvalidateNCE(addr, Memory::YUZU_PAGESIZE); - - if (page_offset < SplitPageAccessWindow && addr >= Memory::YUZU_PAGESIZE) { - handled |= memory.InvalidateNCE(addr - Memory::YUZU_PAGESIZE, Memory::YUZU_PAGESIZE); - } - if (page_offset + SplitPageAccessWindow > Memory::YUZU_PAGESIZE) { - handled |= memory.InvalidateNCE(addr + Memory::YUZU_PAGESIZE, Memory::YUZU_PAGESIZE); - } - - if (handled) { + if (memory.InvalidateNCE(addr, Memory::YUZU_PAGESIZE)) { // We handled the access successfully and are returning to guest code. return true; } - if (auto next_pc = MatchAndExecuteOneInstruction(memory, &host_ctx, fpctx); next_pc) { - parent->MarkPreciseAccessFaultWindow(fault_addr); - host_ctx.pc = *next_pc; - return true; - } - // We couldn't handle the access. return HandleFailedGuestFault(guest_ctx, raw_info, raw_context); } From ce2f2187bd9a126f1d65953e2c3c2016e4a2ea14 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Tue, 10 Mar 2026 03:09:30 -0400 Subject: [PATCH 04/13] Revert "[nce] Adjusted precise access fault window handling + decay mechanism" --- src/core/arm/nce/arm_nce.cpp | 40 +++--------------------------------- src/core/arm/nce/arm_nce.h | 6 ++---- 2 files changed, 5 insertions(+), 41 deletions(-) diff --git a/src/core/arm/nce/arm_nce.cpp b/src/core/arm/nce/arm_nce.cpp index 0e285b4e75..57f19e169d 100644 --- a/src/core/arm/nce/arm_nce.cpp +++ b/src/core/arm/nce/arm_nce.cpp @@ -190,44 +190,10 @@ bool ArmNce::IsPreciseAccessPage(u64 addr) const { void ArmNce::MarkPreciseAccessPage(u64 addr) { const std::scoped_lock lk{m_precise_pages_guard}; - const u64 page = AlignDownPage(addr); - if (auto it = m_precise_pages.find(page); it != m_precise_pages.end()) { - it->second = std::min(MaxPreciseAccessPageWeight, static_cast(it->second + 1)); - return; - } - - while (m_precise_pages.size() >= MaxPreciseAccessPages) { - DecayPreciseAccessPagesLocked(); - } - - m_precise_pages.emplace(page, 1); -} - -void ArmNce::MarkPreciseAccessFaultWindow(u64 addr) { - MarkPreciseAccessPage(addr); - - if (!IsNearPageBoundary(addr)) { - return; - } - - const u64 page_offset = addr & Memory::YUZU_PAGEMASK; - if (page_offset < SplitPageAccessWindow && addr >= Memory::YUZU_PAGESIZE) { - MarkPreciseAccessPage(addr - Memory::YUZU_PAGESIZE); - } - if (page_offset + SplitPageAccessWindow > Memory::YUZU_PAGESIZE) { - MarkPreciseAccessPage(addr + Memory::YUZU_PAGESIZE); - } -} - -void ArmNce::DecayPreciseAccessPagesLocked() { - for (auto it = m_precise_pages.begin(); it != m_precise_pages.end();) { - if (it->second > 1) { - --it->second; - ++it; - } else { - it = m_precise_pages.erase(it); - } + if (m_precise_pages.size() >= MaxPreciseAccessPages) { + m_precise_pages.clear(); } + m_precise_pages.insert(AlignDownPage(addr)); } void ArmNce::LockThread(Kernel::KThread* thread) { diff --git a/src/core/arm/nce/arm_nce.h b/src/core/arm/nce/arm_nce.h index 4c0ffb6517..32772d9694 100644 --- a/src/core/arm/nce/arm_nce.h +++ b/src/core/arm/nce/arm_nce.h @@ -7,7 +7,7 @@ #pragma once #include -#include +#include #include "core/arm/arm_interface.h" #include "core/arm/nce/guest_context.h" @@ -84,8 +84,6 @@ private: bool IsPreciseAccessPage(u64 addr) const; void MarkPreciseAccessPage(u64 addr); - void MarkPreciseAccessFaultWindow(u64 addr); - void DecayPreciseAccessPagesLocked(); public: Core::System& m_system; @@ -99,7 +97,7 @@ public: Kernel::KThread* m_running_thread{}; mutable std::mutex m_precise_pages_guard{}; - std::unordered_map m_precise_pages{}; + std::unordered_set m_precise_pages{}; // Stack for signal processing. std::unique_ptr m_stack{}; From b2b07abbc84bac7070f720d059cfd5bcd02c1e05 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Tue, 10 Mar 2026 03:10:37 -0400 Subject: [PATCH 05/13] Revert "[nce] Added "tainted" page fault handling inside dual channel" --- src/core/arm/nce/arm_nce.cpp | 14 -------------- src/core/arm/nce/arm_nce.h | 10 ---------- 2 files changed, 24 deletions(-) diff --git a/src/core/arm/nce/arm_nce.cpp b/src/core/arm/nce/arm_nce.cpp index 57f19e169d..4b1200887f 100644 --- a/src/core/arm/nce/arm_nce.cpp +++ b/src/core/arm/nce/arm_nce.cpp @@ -159,7 +159,6 @@ bool ArmNce::HandleGuestAlignmentFault(GuestContext* guest_ctx, void* raw_info, bool ArmNce::HandleGuestAccessFault(GuestContext* guest_ctx, void* raw_info, void* raw_context) { auto* info = static_cast(raw_info); - auto* parent = guest_ctx->parent; // Try to handle an invalid access. // TODO: handle accesses which split a page? @@ -183,19 +182,6 @@ void ArmNce::HandleHostAccessFault(int sig, void* raw_info, void* raw_context) { return g_orig_segv_action.sa_sigaction(sig, static_cast(raw_info), raw_context); } -bool ArmNce::IsPreciseAccessPage(u64 addr) const { - const std::scoped_lock lk{m_precise_pages_guard}; - return m_precise_pages.contains(AlignDownPage(addr)); -} - -void ArmNce::MarkPreciseAccessPage(u64 addr) { - const std::scoped_lock lk{m_precise_pages_guard}; - if (m_precise_pages.size() >= MaxPreciseAccessPages) { - m_precise_pages.clear(); - } - m_precise_pages.insert(AlignDownPage(addr)); -} - void ArmNce::LockThread(Kernel::KThread* thread) { auto* thread_params = &thread->GetNativeExecutionParameters(); LockThreadParameters(thread_params); diff --git a/src/core/arm/nce/arm_nce.h b/src/core/arm/nce/arm_nce.h index 32772d9694..98dffd57d6 100644 --- a/src/core/arm/nce/arm_nce.h +++ b/src/core/arm/nce/arm_nce.h @@ -1,13 +1,9 @@ -// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #pragma once #include -#include #include "core/arm/arm_interface.h" #include "core/arm/nce/guest_context.h" @@ -82,9 +78,6 @@ private: static void HandleHostAlignmentFault(int sig, void* info, void* raw_context); static void HandleHostAccessFault(int sig, void* info, void* raw_context); - bool IsPreciseAccessPage(u64 addr) const; - void MarkPreciseAccessPage(u64 addr); - public: Core::System& m_system; @@ -96,9 +89,6 @@ public: GuestContext m_guest_ctx{}; Kernel::KThread* m_running_thread{}; - mutable std::mutex m_precise_pages_guard{}; - std::unordered_set m_precise_pages{}; - // Stack for signal processing. std::unique_ptr m_stack{}; }; From 852b8e176f6461963304ea892d5ddd9d2133d658 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Tue, 10 Mar 2026 03:12:19 -0400 Subject: [PATCH 06/13] Revert "[nce] Added rasterizer memory handling by nce page faults + intercepted memory access in nce with cached rasterizer data" --- src/core/arm/arm_interface.h | 15 ---- src/core/arm/dynarmic/arm_dynarmic.h | 4 +- src/core/arm/nce/arm_nce.cpp | 35 -------- src/core/arm/nce/arm_nce.h | 1 - src/core/arm/nce/guest_context.h | 6 -- src/core/arm/nce/patcher.cpp | 124 -------------------------- src/core/arm/nce/patcher.h | 10 --- src/core/hle/kernel/physical_core.cpp | 8 +- 8 files changed, 2 insertions(+), 201 deletions(-) diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h index e6aebe002b..495963eefd 100644 --- a/src/core/arm/arm_interface.h +++ b/src/core/arm/arm_interface.h @@ -1,6 +1,3 @@ -// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - // SPDX-FileCopyrightText: 2014 Citra Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -34,7 +31,6 @@ using WatchpointArray = std::array args) const = 0; virtual void SetSvcArguments(std::span args) = 0; virtual u32 GetSvcNumber() const = 0; - virtual bool HandleCacheOperation(Kernel::KThread* thread) { - return false; - } void SetWatchpointArray(const WatchpointArray* watchpoints) { m_watchpoints = watchpoints; diff --git a/src/core/arm/dynarmic/arm_dynarmic.h b/src/core/arm/dynarmic/arm_dynarmic.h index 79125bb2dc..46384f7e6d 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.h +++ b/src/core/arm/dynarmic/arm_dynarmic.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project @@ -11,7 +11,6 @@ namespace Core { constexpr Dynarmic::HaltReason StepThread = Dynarmic::HaltReason::Step; -constexpr Dynarmic::HaltReason CacheInvalidation = Dynarmic::HaltReason::CacheInvalidation; constexpr Dynarmic::HaltReason DataAbort = Dynarmic::HaltReason::MemoryAbort; constexpr Dynarmic::HaltReason BreakLoop = Dynarmic::HaltReason::UserDefined2; constexpr Dynarmic::HaltReason SupervisorCall = Dynarmic::HaltReason::UserDefined3; @@ -20,7 +19,6 @@ constexpr Dynarmic::HaltReason PrefetchAbort = Dynarmic::HaltReason::UserDefined constexpr HaltReason TranslateHaltReason(Dynarmic::HaltReason hr) { static_assert(u64(HaltReason::StepThread) == u64(StepThread)); - static_assert(u64(HaltReason::CacheInvalidation) == u64(CacheInvalidation)); static_assert(u64(HaltReason::DataAbort) == u64(DataAbort)); static_assert(u64(HaltReason::BreakLoop) == u64(BreakLoop)); static_assert(u64(HaltReason::SupervisorCall) == u64(SupervisorCall)); diff --git a/src/core/arm/nce/arm_nce.cpp b/src/core/arm/nce/arm_nce.cpp index 4b1200887f..bbff9f2829 100644 --- a/src/core/arm/nce/arm_nce.cpp +++ b/src/core/arm/nce/arm_nce.cpp @@ -269,41 +269,6 @@ void ArmNce::SetSvcArguments(std::span args) { } } -bool ArmNce::HandleCacheOperation(Kernel::KThread* thread) { - const auto op = static_cast(m_guest_ctx.cache_operation); - if (op == CacheOperationKind::None) { - return false; - } - - const u64 cache_line_start = m_guest_ctx.cache_operation_address & ~(CacheLineSize - 1); - auto& memory = thread->GetOwnerProcess()->GetMemory(); - - switch (op) { - case CacheOperationKind::DataCacheInvalidate: { - [[maybe_unused]] auto invalidate_result = - memory.InvalidateDataCache(cache_line_start, CacheLineSize); - break; - } - case CacheOperationKind::DataCacheStore: { - [[maybe_unused]] auto store_result = memory.StoreDataCache(cache_line_start, CacheLineSize); - break; - } - case CacheOperationKind::DataCacheFlush: { - [[maybe_unused]] auto flush_result = memory.FlushDataCache(cache_line_start, CacheLineSize); - break; - } - case CacheOperationKind::InstructionCacheInvalidate: - InvalidateCacheRange(cache_line_start, CacheLineSize); - break; - case CacheOperationKind::None: - break; - } - - m_guest_ctx.cache_operation = static_cast(CacheOperationKind::None); - m_guest_ctx.cache_operation_address = 0; - return true; -} - ArmNce::ArmNce(System& system, bool uses_wall_clock, std::size_t core_index) : ArmInterface{uses_wall_clock}, m_system{system}, m_core_index{core_index} { m_guest_ctx.system = &m_system; diff --git a/src/core/arm/nce/arm_nce.h b/src/core/arm/nce/arm_nce.h index 98dffd57d6..be9b304c4c 100644 --- a/src/core/arm/nce/arm_nce.h +++ b/src/core/arm/nce/arm_nce.h @@ -37,7 +37,6 @@ public: void GetSvcArguments(std::span args) const override; void SetSvcArguments(std::span args) override; u32 GetSvcNumber() const override; - bool HandleCacheOperation(Kernel::KThread* thread) override; void SignalInterrupt(Kernel::KThread* thread) override; void ClearInstructionCache() override; diff --git a/src/core/arm/nce/guest_context.h b/src/core/arm/nce/guest_context.h index 865e883f27..a7eadccce5 100644 --- a/src/core/arm/nce/guest_context.h +++ b/src/core/arm/nce/guest_context.h @@ -1,6 +1,3 @@ -// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -41,9 +38,6 @@ struct GuestContext { u32 svc{}; System* system{}; ArmNce* parent{}; - u32 cache_operation{}; - u32 cache_operation_reserved{}; - u64 cache_operation_address{}; }; // Verify assembly offsets. diff --git a/src/core/arm/nce/patcher.cpp b/src/core/arm/nce/patcher.cpp index d4c0023ed5..ea77166645 100644 --- a/src/core/arm/nce/patcher.cpp +++ b/src/core/arm/nce/patcher.cpp @@ -26,26 +26,6 @@ using NativeExecutionParameters = Kernel::KThread::NativeExecutionParameters; constexpr size_t MaxRelativeBranch = 128_MiB; constexpr u32 ModuleCodeIndex = 0x24 / sizeof(u32); -namespace { - -[[nodiscard]] std::optional DecodeCacheOperation(u32 inst) { - switch (inst & ~u32{0x1F}) { - case 0xD5087620: - return CacheOperationKind::DataCacheInvalidate; - case 0xD50B7A20: - case 0xD50B7B20: - return CacheOperationKind::DataCacheStore; - case 0xD50B7E20: - return CacheOperationKind::DataCacheFlush; - case 0xD50B7520: - return CacheOperationKind::InstructionCacheInvalidate; - default: - return std::nullopt; - } -} - -} // namespace - Patcher::Patcher() : c(m_patch_instructions), c_pre(m_patch_instructions_pre) { // The first word of the patch section is always a branch to the first instruction of the // module. @@ -180,20 +160,6 @@ bool Patcher::PatchText(std::span program_image, const Kernel::CodeSet continue; } - if (auto cache_op = DecodeCacheOperation(inst); cache_op.has_value()) { - bool pre_buffer = false; - auto ret = AddRelocations(pre_buffer); - const auto src_reg = oaknut::XReg{static_cast(inst & 0x1F)}; - if (pre_buffer) { - WriteCacheOperationTrampoline(ret, *cache_op, src_reg, c_pre, m_save_context_pre, - m_load_context_pre); - } else { - WriteCacheOperationTrampoline(ret, *cache_op, src_reg, c, m_save_context, - m_load_context); - } - continue; - } - if (auto exclusive = Exclusive{inst}; exclusive.Verify()) { curr_patch->m_exclusives.push_back(i); } @@ -576,96 +542,6 @@ void Patcher::WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id, oaknut this->WriteModulePc(module_dest); } -void Patcher::WriteCacheOperationTrampoline(ModuleDestLabel module_dest, - CacheOperationKind op_kind, oaknut::XReg src_reg, - oaknut::VectorCodeGenerator& cg, - oaknut::Label& save_ctx, - oaknut::Label& load_ctx) { - const bool is_pre = (&cg == &c_pre); - - this->LockContext(cg); - - cg.STR(X30, SP, PRE_INDEXED, -16); - cg.BL(save_ctx); - cg.LDR(X30, SP, POST_INDEXED, 16); - - oaknut::Label pc_after_cache_op; - cg.MRS(X1, oaknut::SystemReg::TPIDR_EL0); - cg.LDR(X1, X1, offsetof(NativeExecutionParameters, native_context)); - cg.LDR(X2, pc_after_cache_op); - cg.STR(X2, X1, offsetof(GuestContext, pc)); - - cg.MOV(X2, static_cast(op_kind)); - cg.STR(W2, X1, offsetof(GuestContext, cache_operation)); - cg.STR(src_reg, X1, offsetof(GuestContext, cache_operation_address)); - - static_assert(std::is_same_v, u64>); - oaknut::Label retry; - cg.ADD(X2, X1, offsetof(GuestContext, esr_el1)); - cg.l(retry); - cg.LDAXR(X0, X2); - cg.STLXR(W3, XZR, X2); - cg.CBNZ(W3, retry); - cg.ORR(X0, X0, static_cast(HaltReason::CacheInvalidation)); - - cg.ADD(X1, X1, offsetof(GuestContext, host_ctx)); - - static_assert(offsetof(HostContext, host_sp) + 8 == offsetof(HostContext, host_tpidr_el0)); - cg.LDP(X2, X3, X1, offsetof(HostContext, host_sp)); - cg.MOV(SP, X2); - cg.MSR(oaknut::SystemReg::TPIDR_EL0, X3); - - static constexpr size_t HOST_REGS_OFF = offsetof(HostContext, host_saved_regs); - static constexpr size_t HOST_VREGS_OFF = offsetof(HostContext, host_saved_vregs); - cg.LDP(X19, X20, X1, HOST_REGS_OFF); - cg.LDP(X21, X22, X1, HOST_REGS_OFF + 2 * sizeof(u64)); - cg.LDP(X23, X24, X1, HOST_REGS_OFF + 4 * sizeof(u64)); - cg.LDP(X25, X26, X1, HOST_REGS_OFF + 6 * sizeof(u64)); - cg.LDP(X27, X28, X1, HOST_REGS_OFF + 8 * sizeof(u64)); - cg.LDP(X29, X30, X1, HOST_REGS_OFF + 10 * sizeof(u64)); - cg.LDP(Q8, Q9, X1, HOST_VREGS_OFF); - cg.LDP(Q10, Q11, X1, HOST_VREGS_OFF + 2 * sizeof(u128)); - cg.LDP(Q12, Q13, X1, HOST_VREGS_OFF + 4 * sizeof(u128)); - cg.LDP(Q14, Q15, X1, HOST_VREGS_OFF + 6 * sizeof(u128)); - cg.RET(); - - if (is_pre) { - curr_patch->m_trampolines_pre.push_back({cg.offset(), module_dest}); - } else { - curr_patch->m_trampolines.push_back({cg.offset(), module_dest}); - } - - cg.MRS(X2, oaknut::SystemReg::TPIDR_EL0); - cg.LDR(X2, X2, offsetof(NativeExecutionParameters, native_context)); - cg.ADD(X0, X2, offsetof(GuestContext, host_ctx)); - cg.STR(X30, X0, offsetof(HostContext, host_saved_regs) + 11 * sizeof(u64)); - - cg.STR(X30, SP, PRE_INDEXED, -16); - cg.BL(load_ctx); - cg.LDR(X30, SP, POST_INDEXED, 16); - - cg.STR(X1, SP, PRE_INDEXED, -16); - cg.MRS(X1, oaknut::SystemReg::TPIDR_EL0); - cg.LDR(X1, X1, offsetof(NativeExecutionParameters, native_context)); - cg.LDR(X30, X1, offsetof(GuestContext, cpu_registers) + sizeof(u64) * 30); - cg.LDR(X1, SP, POST_INDEXED, 16); - - this->UnlockContext(cg); - - if (is_pre) { - this->BranchToModulePre(module_dest); - } else { - this->BranchToModule(module_dest); - } - - cg.l(pc_after_cache_op); - if (is_pre) { - this->WriteModulePcPre(module_dest); - } else { - this->WriteModulePc(module_dest); - } -} - void Patcher::WriteMrsHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg, oaknut::SystemReg src_reg, oaknut::VectorCodeGenerator& cg) { // Retrieve emulated TLS register from GuestContext. diff --git a/src/core/arm/nce/patcher.h b/src/core/arm/nce/patcher.h index 534e15119c..499c98c901 100644 --- a/src/core/arm/nce/patcher.h +++ b/src/core/arm/nce/patcher.h @@ -78,11 +78,6 @@ private: void LockContext(oaknut::VectorCodeGenerator& code); void UnlockContext(oaknut::VectorCodeGenerator& code); void WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id, oaknut::VectorCodeGenerator& code, oaknut::Label& save_ctx, oaknut::Label& load_ctx); - void WriteCacheOperationTrampoline(ModuleDestLabel module_dest, CacheOperationKind op_kind, - oaknut::XReg src_reg, - oaknut::VectorCodeGenerator& code, - oaknut::Label& save_ctx, - oaknut::Label& load_ctx); void WriteMrsHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg, oaknut::SystemReg src_reg, oaknut::VectorCodeGenerator& code); void WriteMsrHandler(ModuleDestLabel module_dest, oaknut::XReg src_reg, oaknut::VectorCodeGenerator& code); void WriteCntpctHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg, oaknut::VectorCodeGenerator& code); @@ -93,11 +88,6 @@ private: void LockContext() { LockContext(c); } void UnlockContext() { UnlockContext(c); } void WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id) { WriteSvcTrampoline(module_dest, svc_id, c, m_save_context, m_load_context); } - void WriteCacheOperationTrampoline(ModuleDestLabel module_dest, CacheOperationKind op_kind, - oaknut::XReg src_reg) { - WriteCacheOperationTrampoline(module_dest, op_kind, src_reg, c, m_save_context, - m_load_context); - } void WriteMrsHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg, oaknut::SystemReg src_reg) { WriteMrsHandler(module_dest, dest_reg, src_reg, c); } void WriteMsrHandler(ModuleDestLabel module_dest, oaknut::XReg src_reg) { WriteMsrHandler(module_dest, src_reg, c); } void WriteCntpctHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg) { WriteCntpctHandler(module_dest, dest_reg, c); } diff --git a/src/core/hle/kernel/physical_core.cpp b/src/core/hle/kernel/physical_core.cpp index 2078472e3d..77cdab76d7 100644 --- a/src/core/hle/kernel/physical_core.cpp +++ b/src/core/hle/kernel/physical_core.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project @@ -97,7 +97,6 @@ void PhysicalCore::RunThread(Kernel::KThread* thread) { } // Determine why we stopped. - const bool cache_invalidation = True(hr & Core::HaltReason::CacheInvalidation); const bool supervisor_call = True(hr & Core::HaltReason::SupervisorCall); const bool prefetch_abort = True(hr & Core::HaltReason::PrefetchAbort); const bool breakpoint = True(hr & Core::HaltReason::InstructionBreakpoint); @@ -152,11 +151,6 @@ void PhysicalCore::RunThread(Kernel::KThread* thread) { return; } - if (cache_invalidation) { - interface->HandleCacheOperation(thread); - continue; - } - // Handle external interrupt sources. if (interrupt || m_is_single_core) { return; From e715925d5279b5bc451bb398dcc4afaeaf296877 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Tue, 10 Mar 2026 03:13:21 -0400 Subject: [PATCH 07/13] Revert "[nce] Added rasterizer caching checks to memory management" --- src/core/memory.cpp | 13 ------------- src/core/memory.h | 4 +--- 2 files changed, 1 insertion(+), 16 deletions(-) diff --git a/src/core/memory.cpp b/src/core/memory.cpp index f7031d88b5..0ad360c3df 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -639,15 +639,6 @@ struct Memory::Impl { GetInteger(vaddr), []() {}, []() {}); } - [[nodiscard]] bool IsRasterizerCached(const Common::ProcessAddress vaddr) const { - const u64 addr = GetInteger(vaddr) & 0xffffffffffffULL; - if (!AddressSpaceContains(*current_page_table, addr, 1)) { - return false; - } - return current_page_table->entries[addr >> YUZU_PAGEBITS].ptr.Type() == - Common::PageType::RasterizerCachedMemory; - } - /// @brief Reads a particular data type out of memory at the given virtual address. /// @param vaddr The virtual address to read the data type from. /// @tparam T The data type to read out of memory. @@ -1045,10 +1036,6 @@ void Memory::RasterizerMarkRegionCached(Common::ProcessAddress vaddr, u64 size, impl->RasterizerMarkRegionCached(GetInteger(vaddr), size, cached); } -bool Memory::IsRasterizerCached(Common::ProcessAddress vaddr) const { - return impl->IsRasterizerCached(vaddr); -} - void Memory::MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug) { impl->MarkRegionDebug(GetInteger(vaddr), size, debug); } diff --git a/src/core/memory.h b/src/core/memory.h index dcf8ea9656..7167efbb84 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: 2014 Citra Emulator Project @@ -493,8 +493,6 @@ public: void SetGPUDirtyManagers(std::span managers); - [[nodiscard]] bool IsRasterizerCached(Common::ProcessAddress vaddr) const; - bool InvalidateNCE(Common::ProcessAddress vaddr, size_t size); bool InvalidateSeparateHeap(void* fault_address); From 74248bd35a3578300e7f9ba61b24fe9008877898 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Tue, 10 Mar 2026 03:20:05 -0400 Subject: [PATCH 08/13] Removing remanents of NCE changes to previous state --- src/core/arm/nce/interpreter_visitor.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/core/arm/nce/interpreter_visitor.cpp b/src/core/arm/nce/interpreter_visitor.cpp index b5f89510ca..be6fee8613 100644 --- a/src/core/arm/nce/interpreter_visitor.cpp +++ b/src/core/arm/nce/interpreter_visitor.cpp @@ -765,8 +765,8 @@ std::optional MatchAndExecuteOneInstruction(Core::Memory::Memory& memory, m fpsimd_context* fpsimd_context) { std::span regs(reinterpret_cast(context->regs), 31); std::span vregs(reinterpret_cast(fpsimd_context->vregs), 32); - u64 sp = context->sp; - const u64 pc = context->pc; + u64& sp = *reinterpret_cast(&context->sp); + const u64& pc = *reinterpret_cast(&context->pc); InterpreterVisitor visitor(memory, regs, vregs, sp, pc); u32 instruction = memory.Read32(pc); @@ -774,7 +774,6 @@ std::optional MatchAndExecuteOneInstruction(Core::Memory::Memory& memory, m auto decoder = Dynarmic::A64::Decode(instruction); was_executed = decoder.get().call(visitor, instruction); - context->sp = sp; return was_executed ? std::optional(pc + 4) : std::nullopt; } From 2dbca791f60c216ac99affd19e089f927514d4b4 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Tue, 10 Mar 2026 03:53:45 -0400 Subject: [PATCH 09/13] [vulkan] Adjusted synchronization handling in QueryCacheRuntime + ConditionalRendering setting bug with syncing --- .../renderer_vulkan/vk_query_cache.cpp | 77 ++++++++++++++++--- 1 file changed, 66 insertions(+), 11 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index e5f26263d5..56eb2906bf 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -203,6 +203,11 @@ public: } void SyncWrites() override { + if (!direct_sync_values.empty()) { + runtime.template SyncValues(direct_sync_values); + direct_sync_values.clear(); + } + if (sync_values_stash.empty()) { return; } @@ -223,8 +228,54 @@ public: const auto driver_id = device.GetDriverID(); if (driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY || driver_id == VK_DRIVER_ID_ARM_PROPRIETARY || driver_id == VK_DRIVER_ID_MESA_TURNIP) { - pending_sync.clear(); + ApplyBanksWideOp( + pending_sync, + [](SamplesQueryBank* bank, size_t start, size_t amount) { bank->Sync(start, amount); }); + + direct_sync_values.clear(); + direct_sync_values.reserve(pending_sync.size()); + + bool has_multi_queries = accumulation_since_last_sync; + for (auto q : pending_sync) { + auto* query = GetQuery(q); + if (True(query->flags & VideoCommon::QueryFlagBits::IsRewritten)) { + continue; + } + if (True(query->flags & VideoCommon::QueryFlagBits::IsInvalidated)) { + continue; + } + + u64 total = 0; + ApplyBankOp(query, [&total](SamplesQueryBank* bank, size_t start, size_t amount) { + const auto& results = bank->GetResults(); + for (size_t i = 0; i < amount; i++) { + total += results[start + i]; + } + }); + + total += GetAmendValue(); + query->value = total; + query->flags |= VideoCommon::QueryFlagBits::IsHostSynced; + query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced; + direct_sync_values.emplace_back(VideoCommon::SyncValuesStruct{ + .address = query->guest_address, + .value = total, + .size = SamplesQueryBank::QUERY_SIZE, + }); + + has_multi_queries |= query->size_slots > 1; + } + + ReplicateCurrentQueryIfNeeded(); + std::function func([this] { amend_value = accumulation_value; }); + rasterizer->SyncOperation(std::move(func)); + AbandonCurrentQuery(); + num_slots_used = 0; + first_accumulation_checkpoint = (std::numeric_limits::max)(); + last_accumulation_checkpoint = 0; + accumulation_since_last_sync = has_multi_queries; sync_values_stash.clear(); + pending_sync.clear(); return; } sync_values_stash.clear(); @@ -570,6 +621,7 @@ private: std::array resolve_table{}; std::array intermediary_table{}; vk::Buffer accumulation_buffer; + std::vector direct_sync_values; std::deque> sync_values_stash; std::vector resolve_buffers; @@ -1423,13 +1475,6 @@ bool QueryCacheRuntime::HostConditionalRenderingCompareValues(VideoCommon::Looku return false; } - auto driver_id = impl->device.GetDriverID(); - const bool is_gpu_high = Settings::IsGPULevelHigh(); - - if ((!is_gpu_high && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) || driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY || driver_id == VK_DRIVER_ID_ARM_PROPRIETARY || driver_id == VK_DRIVER_ID_MESA_TURNIP) { - return true; - } - for (size_t i = 0; i < 2; i++) { is_null[i] = !is_in_ac[i] && check_value(objects[i]->address); } @@ -1442,12 +1487,22 @@ bool QueryCacheRuntime::HostConditionalRenderingCompareValues(VideoCommon::Looku } } - if (!is_gpu_high) { - return true; + auto driver_id = impl->device.GetDriverID(); + const bool is_gpu_high = Settings::IsGPULevelHigh(); + const bool driver_blocks_pair_resolve = + ((!is_gpu_high && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) || + driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY || + driver_id == VK_DRIVER_ID_ARM_PROPRIETARY || + driver_id == VK_DRIVER_ID_MESA_TURNIP); + + if (driver_blocks_pair_resolve || !is_gpu_high) { + EndHostConditionalRendering(); + return false; } if (!is_in_bc[0] && !is_in_bc[1]) { - return true; + EndHostConditionalRendering(); + return false; } HostConditionalRenderingCompareBCImpl(object_1.address, equal_check); return true; From da30efbc556e766d3f61d57816bd271d7a97351e Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Tue, 10 Mar 2026 04:14:07 -0400 Subject: [PATCH 10/13] [vulkan] Added primitive count calculation based on topology + patch vertices in PrimitivesSucceededStreamer --- .../renderer_vulkan/vk_query_cache.cpp | 75 ++++++++++++------- 1 file changed, 48 insertions(+), 27 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 56eb2906bf..2219a10fbd 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -1068,10 +1068,52 @@ public: u64 stride{}; DAddr dependant_address{}; Maxwell3D::Regs::PrimitiveTopology topology{Maxwell3D::Regs::PrimitiveTopology::Points}; + u32 patch_vertices{1}; size_t dependant_index{}; bool dependant_manage{}; }; +[[nodiscard]] constexpr u64 SaturatingSub(u64 value, u64 amount) { + return value > amount ? value - amount : 0; +} + +[[nodiscard]] constexpr u64 PrimitiveCountFromVertices( + Maxwell3D::Regs::PrimitiveTopology topology, u64 num_vertices, u32 patch_vertices) { + switch (topology) { + case Maxwell3D::Regs::PrimitiveTopology::Points: + return num_vertices; + case Maxwell3D::Regs::PrimitiveTopology::Lines: + return num_vertices / 2; + case Maxwell3D::Regs::PrimitiveTopology::LineLoop: + return num_vertices >= 2 ? num_vertices : 0; + case Maxwell3D::Regs::PrimitiveTopology::LineStrip: + return SaturatingSub(num_vertices, 1); + case Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency: + return num_vertices / 4; + case Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency: + return SaturatingSub(num_vertices, 3); + case Maxwell3D::Regs::PrimitiveTopology::Triangles: + return num_vertices / 3; + case Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency: + return num_vertices / 6; + case Maxwell3D::Regs::PrimitiveTopology::TriangleFan: + case Maxwell3D::Regs::PrimitiveTopology::TriangleStrip: + return SaturatingSub(num_vertices, 2); + case Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency: + return num_vertices >= 6 ? (num_vertices - 4) / 2 : 0; + case Maxwell3D::Regs::PrimitiveTopology::Quads: + return num_vertices / 4; + case Maxwell3D::Regs::PrimitiveTopology::QuadStrip: + return num_vertices >= 4 ? (num_vertices / 2) - 1 : 0; + case Maxwell3D::Regs::PrimitiveTopology::Patches: + return patch_vertices != 0 ? num_vertices / patch_vertices : 0; + case Maxwell3D::Regs::PrimitiveTopology::Polygon: + return num_vertices != 0 ? 1 : 0; + default: + return num_vertices; + } +} + class PrimitivesSucceededStreamer : public VideoCommon::SimpleStreamer { public: explicit PrimitivesSucceededStreamer(size_t id_, QueryCacheRuntime& runtime_, @@ -1100,7 +1142,10 @@ public: const size_t subreport = static_cast(*subreport_); auto dependant_address_opt = tfb_streamer.GetLastQueryStream(subreport); bool must_manage_dependance = false; - new_query->topology = tfb_streamer.GetOutputTopology(); + runtime.View3DRegs([new_query, this](Maxwell3D& maxwell3d) { + new_query->topology = tfb_streamer.GetOutputTopology(); + new_query->patch_vertices = std::max(maxwell3d.regs.patch_vertices, 1); + }); if (dependant_address_opt) { auto [dep_address, stride] = *dependant_address_opt; new_query->dependant_address = dep_address; @@ -1183,32 +1228,8 @@ public: num_vertices = static_cast(result) / safe_stride; } } - query->value = [&]() -> u64 { - switch (query->topology) { - case Maxwell3D::Regs::PrimitiveTopology::Points: - return num_vertices; - case Maxwell3D::Regs::PrimitiveTopology::Lines: - return num_vertices / 2; - case Maxwell3D::Regs::PrimitiveTopology::LineLoop: - return (num_vertices / 2) + 1; - case Maxwell3D::Regs::PrimitiveTopology::LineStrip: - return num_vertices - 1; - case Maxwell3D::Regs::PrimitiveTopology::Patches: - case Maxwell3D::Regs::PrimitiveTopology::Triangles: - case Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency: - return num_vertices / 3; - case Maxwell3D::Regs::PrimitiveTopology::TriangleFan: - case Maxwell3D::Regs::PrimitiveTopology::TriangleStrip: - case Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency: - return num_vertices - 2; - case Maxwell3D::Regs::PrimitiveTopology::Quads: - return num_vertices / 4; - case Maxwell3D::Regs::PrimitiveTopology::Polygon: - return 1U; - default: - return num_vertices; - } - }(); + query->value = + PrimitiveCountFromVertices(query->topology, num_vertices, query->patch_vertices); } } From a134ad3fbd359ebaab8e72d4080eda96fed78497 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Tue, 10 Mar 2026 04:58:11 -0400 Subject: [PATCH 11/13] [vulkan] Added Line loop + topology emulation accuracy increased by changing triangle assumption --- .../renderer_vulkan/maxwell_to_vk.cpp | 2 +- .../renderer_vulkan/vk_rasterizer.cpp | 229 ++++++++++++++++++ .../renderer_vulkan/vk_rasterizer.h | 6 + 3 files changed, 236 insertions(+), 1 deletion(-) diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 0538102e4a..8a77c8296c 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -323,7 +323,7 @@ VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const Device& device, case Maxwell::PrimitiveTopology::Lines: return VK_PRIMITIVE_TOPOLOGY_LINE_LIST; case Maxwell::PrimitiveTopology::LineLoop: - return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP; case Maxwell::PrimitiveTopology::LineStrip: return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP; case Maxwell::PrimitiveTopology::Triangles: diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 73d2d07c08..dde89f5574 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -6,6 +6,8 @@ #include #include +#include +#include #include #include @@ -24,6 +26,7 @@ #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/host1x/gpu_device_memory_manager.h" +#include "video_core/memory_manager.h" #include "video_core/renderer_vulkan/blit_image.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" @@ -61,6 +64,58 @@ struct DrawParams { bool is_indexed; }; +[[nodiscard]] bool IsLineLoop(Maxwell::PrimitiveTopology topology) { + return topology == Maxwell::PrimitiveTopology::LineLoop; +} + +[[nodiscard]] u32 PrimitiveRestartIndex(Maxwell::IndexFormat format) { + switch (format) { + case Maxwell::IndexFormat::UnsignedByte: + return std::numeric_limits::max(); + case Maxwell::IndexFormat::UnsignedShort: + return std::numeric_limits::max(); + case Maxwell::IndexFormat::UnsignedInt: + return std::numeric_limits::max(); + } + ASSERT(false); + return std::numeric_limits::max(); +} + +template +bool ReadGuestObject(Tegra::MemoryManager* gpu_memory, GPUVAddr address, T& value) { + if (gpu_memory == nullptr) { + return false; + } + gpu_memory->ReadBlockUnsafe(address, &value, sizeof(T)); + return true; +} + +bool ReadGuestIndex(Tegra::MemoryManager* gpu_memory, GPUVAddr address, Maxwell::IndexFormat format, + u32& value) { + switch (format) { + case Maxwell::IndexFormat::UnsignedByte: { + u8 result{}; + if (!ReadGuestObject(gpu_memory, address, result)) { + return false; + } + value = result; + return true; + } + case Maxwell::IndexFormat::UnsignedShort: { + u16 result{}; + if (!ReadGuestObject(gpu_memory, address, result)) { + return false; + } + value = result; + return true; + } + case Maxwell::IndexFormat::UnsignedInt: + return ReadGuestObject(gpu_memory, address, value); + } + ASSERT(false); + return false; +} + VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t index, float scale) { const auto& src = regs.viewport_transform[index]; const auto conv = [scale](float value) { @@ -343,6 +398,21 @@ void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) { GPU::Logging::GPULogger::GetInstance().LogVulkanCall( is_indexed ? "vkCmdDrawIndexed" : "vkCmdDraw", params, VK_SUCCESS); } + + if (IsLineLoop(draw_state.topology) && draw_params.num_vertices >= 2) { + if (maxwell3d->regs.transform_feedback_enabled != 0) { + query_cache.CounterEnable(VideoCommon::QueryType::StreamingByteCount, false); + } + scheduler.Record([](vk::CommandBuffer cmdbuf) { + cmdbuf.SetPrimitiveTopologyEXT(VK_PRIMITIVE_TOPOLOGY_LINE_LIST); + }); + DrawLineLoopClosure(draw_state, draw_params.base_instance, draw_params.num_instances, + static_cast(draw_params.base_vertex), + draw_params.num_vertices, draw_params.is_indexed); + scheduler.Record([](vk::CommandBuffer cmdbuf) { + cmdbuf.SetPrimitiveTopologyEXT(VK_PRIMITIVE_TOPOLOGY_LINE_STRIP); + }); + } }); } @@ -350,6 +420,7 @@ void RasterizerVulkan::DrawIndirect() { const auto& params = maxwell3d->draw_manager->GetIndirectParams(); buffer_cache.SetDrawIndirect(¶ms); PrepareDraw(params.is_indexed, [this, ¶ms] { + const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); const auto indirect_buffer = buffer_cache.GetDrawIndirectBuffer(); const auto& buffer = indirect_buffer.first; const auto& offset = indirect_buffer.second; @@ -385,6 +456,9 @@ void RasterizerVulkan::DrawIndirect() { static_cast(params.stride)); } }); + if (IsLineLoop(draw_state.topology)) { + DrawIndirectLineLoopClosures(draw_state, params); + } return; } scheduler.Record([buffer_obj = buffer->Handle(), offset, params](vk::CommandBuffer cmdbuf) { @@ -407,10 +481,165 @@ void RasterizerVulkan::DrawIndirect() { params.is_indexed ? "vkCmdDrawIndexedIndirect" : "vkCmdDrawIndirect", log_params, VK_SUCCESS); } + + if (IsLineLoop(draw_state.topology)) { + DrawIndirectLineLoopClosures(draw_state, params); + } }); buffer_cache.SetDrawIndirect(nullptr); } +bool RasterizerVulkan::DrawLineLoopClosure(const MaxwellDrawState& draw_state, u32 base_instance, + u32 num_instances, s32 base_vertex, + u32 num_vertices, bool is_indexed) { + if (!IsLineLoop(draw_state.topology) || num_instances == 0 || num_vertices < 2) { + return false; + } + + std::array closure_indices{}; + if (!is_indexed) { + closure_indices = {num_vertices - 1, 0}; + } else if (!draw_state.inline_index_draw_indexes.empty()) { + const size_t last_offset = (static_cast(num_vertices) - 1) * sizeof(u32); + if (draw_state.inline_index_draw_indexes.size() < last_offset + sizeof(u32)) { + return false; + } + std::memcpy(&closure_indices[0], draw_state.inline_index_draw_indexes.data() + last_offset, + sizeof(u32)); + std::memcpy(&closure_indices[1], draw_state.inline_index_draw_indexes.data(), + sizeof(u32)); + } else { + const auto index_format = draw_state.index_buffer.format; + const size_t index_size = draw_state.index_buffer.FormatSizeInBytes(); + const GPUVAddr first_address = + draw_state.index_buffer.StartAddress() + + static_cast(draw_state.index_buffer.first) * index_size; + const GPUVAddr last_address = + first_address + static_cast(num_vertices - 1) * index_size; + if (!ReadGuestIndex(gpu_memory, last_address, index_format, closure_indices[0]) || + !ReadGuestIndex(gpu_memory, first_address, index_format, closure_indices[1])) { + return false; + } + if (maxwell3d->regs.primitive_restart.enabled != 0) { + const u32 restart_index = PrimitiveRestartIndex(index_format); + if (closure_indices[0] == restart_index || closure_indices[1] == restart_index) { + return false; + } + } + } + + const auto upload = staging_pool.Request(sizeof(closure_indices), MemoryUsage::Upload); + std::memcpy(upload.mapped_span.data(), closure_indices.data(), sizeof(closure_indices)); + + scheduler.Record([buffer = upload.buffer, offset = upload.offset](vk::CommandBuffer cmdbuf) { + cmdbuf.BindIndexBuffer(buffer, offset, VK_INDEX_TYPE_UINT32); + }); + scheduler.Record([base_instance, num_instances, base_vertex](vk::CommandBuffer cmdbuf) { + cmdbuf.DrawIndexed(2, num_instances, 0, base_vertex, base_instance); + }); + return true; +} + +void RasterizerVulkan::DrawIndirectLineLoopClosures( + const MaxwellDrawState& draw_state, const Tegra::Engines::DrawManager::IndirectParams& params) { + if (!IsLineLoop(draw_state.topology) || params.is_byte_count) { + return; + } + + u32 draw_count = static_cast(params.max_draw_counts); + if (params.include_count) { + gpu_memory->ReadBlockUnsafe(params.count_start_address, &draw_count, sizeof(draw_count)); + draw_count = std::min(draw_count, static_cast(params.max_draw_counts)); + } + if (draw_count == 0) { + return; + } + + bool emitted_closure = false; + if (params.is_indexed) { + const u32 command_stride = + params.stride != 0 ? static_cast(params.stride) : sizeof(VkDrawIndexedIndirectCommand); + for (u32 i = 0; i < draw_count; ++i) { + VkDrawIndexedIndirectCommand command{}; + gpu_memory->ReadBlockUnsafe(params.indirect_start_address + + static_cast(i) * command_stride, + &command, sizeof(command)); + if (command.indexCount < 2 || command.instanceCount == 0) { + continue; + } + + std::array closure_indices{}; + const auto index_format = draw_state.index_buffer.format; + const size_t index_size = draw_state.index_buffer.FormatSizeInBytes(); + const GPUVAddr first_address = draw_state.index_buffer.StartAddress() + + static_cast(command.firstIndex) * index_size; + const GPUVAddr last_address = + first_address + static_cast(command.indexCount - 1) * index_size; + if (!ReadGuestIndex(gpu_memory, last_address, index_format, closure_indices[0]) || + !ReadGuestIndex(gpu_memory, first_address, index_format, closure_indices[1])) { + continue; + } + if (maxwell3d->regs.primitive_restart.enabled != 0) { + const u32 restart_index = PrimitiveRestartIndex(index_format); + if (closure_indices[0] == restart_index || closure_indices[1] == restart_index) { + continue; + } + } + + if (!emitted_closure) { + if (maxwell3d->regs.transform_feedback_enabled != 0) { + query_cache.CounterEnable(VideoCommon::QueryType::StreamingByteCount, false); + } + scheduler.Record([](vk::CommandBuffer cmdbuf) { + cmdbuf.SetPrimitiveTopologyEXT(VK_PRIMITIVE_TOPOLOGY_LINE_LIST); + }); + emitted_closure = true; + } + + const auto upload = staging_pool.Request(sizeof(closure_indices), MemoryUsage::Upload); + std::memcpy(upload.mapped_span.data(), closure_indices.data(), sizeof(closure_indices)); + scheduler.Record( + [buffer = upload.buffer, offset = upload.offset](vk::CommandBuffer cmdbuf) { + cmdbuf.BindIndexBuffer(buffer, offset, VK_INDEX_TYPE_UINT32); + }); + scheduler.Record([command](vk::CommandBuffer cmdbuf) { + cmdbuf.DrawIndexed(2, command.instanceCount, 0, command.vertexOffset, + command.firstInstance); + }); + } + } else { + const u32 command_stride = + params.stride != 0 ? static_cast(params.stride) : sizeof(VkDrawIndirectCommand); + for (u32 i = 0; i < draw_count; ++i) { + VkDrawIndirectCommand command{}; + gpu_memory->ReadBlockUnsafe(params.indirect_start_address + + static_cast(i) * command_stride, + &command, sizeof(command)); + if (command.vertexCount < 2 || command.instanceCount == 0) { + continue; + } + if (!emitted_closure) { + if (maxwell3d->regs.transform_feedback_enabled != 0) { + query_cache.CounterEnable(VideoCommon::QueryType::StreamingByteCount, false); + } + scheduler.Record([](vk::CommandBuffer cmdbuf) { + cmdbuf.SetPrimitiveTopologyEXT(VK_PRIMITIVE_TOPOLOGY_LINE_LIST); + }); + emitted_closure = true; + } + DrawLineLoopClosure(draw_state, command.firstInstance, command.instanceCount, + static_cast(command.firstVertex), command.vertexCount, + false); + } + } + + if (emitted_closure) { + scheduler.Record([](vk::CommandBuffer cmdbuf) { + cmdbuf.SetPrimitiveTopologyEXT(VK_PRIMITIVE_TOPOLOGY_LINE_STRIP); + }); + } +} + void RasterizerVulkan::DrawTexture() { SCOPE_EXIT { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index bdea6510c0..7de2039bb9 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -155,6 +155,12 @@ private: template void PrepareDraw(bool is_indexed, Func&&); + bool DrawLineLoopClosure(const MaxwellDrawState& draw_state, u32 base_instance, + u32 num_instances, s32 base_vertex, u32 num_vertices, + bool is_indexed); + void DrawIndirectLineLoopClosures(const MaxwellDrawState& draw_state, + const Tegra::Engines::DrawManager::IndirectParams& params); + void FlushWork(); void UpdateDynamicStates(); From 1240268048ee5ae4737d51ecc78dc00a4d85b6dd Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Tue, 10 Mar 2026 05:03:45 -0400 Subject: [PATCH 12/13] [vulkan] Fix primitive count calculation for Quads and QuadStrip in vk_query_cache --- src/video_core/renderer_vulkan/vk_query_cache.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 2219a10fbd..2909120025 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -1102,9 +1102,9 @@ public: case Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency: return num_vertices >= 6 ? (num_vertices - 4) / 2 : 0; case Maxwell3D::Regs::PrimitiveTopology::Quads: - return num_vertices / 4; + return num_vertices / 6; case Maxwell3D::Regs::PrimitiveTopology::QuadStrip: - return num_vertices >= 4 ? (num_vertices / 2) - 1 : 0; + return num_vertices / 6; case Maxwell3D::Regs::PrimitiveTopology::Patches: return patch_vertices != 0 ? num_vertices / patch_vertices : 0; case Maxwell3D::Regs::PrimitiveTopology::Polygon: From 1845d5b222433201cd8e2c006eafc390ac129b27 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Tue, 10 Mar 2026 05:25:45 -0400 Subject: [PATCH 13/13] Fix build --- src/video_core/renderer_vulkan/vk_rasterizer.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 7de2039bb9..2f9175b533 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -155,10 +155,11 @@ private: template void PrepareDraw(bool is_indexed, Func&&); - bool DrawLineLoopClosure(const MaxwellDrawState& draw_state, u32 base_instance, + bool DrawLineLoopClosure(const Tegra::Engines::DrawManager::State& draw_state, + u32 base_instance, u32 num_instances, s32 base_vertex, u32 num_vertices, bool is_indexed); - void DrawIndirectLineLoopClosures(const MaxwellDrawState& draw_state, + void DrawIndirectLineLoopClosures(const Tegra::Engines::DrawManager::State& draw_state, const Tegra::Engines::DrawManager::IndirectParams& params); void FlushWork();