Compare commits

...

13 commits

Author SHA1 Message Date
CamilleLaVey
1845d5b222 Fix build 2026-03-10 05:25:45 -04:00
CamilleLaVey
1240268048 [vulkan] Fix primitive count calculation for Quads and QuadStrip in vk_query_cache 2026-03-10 05:03:45 -04:00
CamilleLaVey
a134ad3fbd [vulkan] Added Line loop + topology emulation accuracy increased by changing triangle assumption 2026-03-10 04:58:11 -04:00
CamilleLaVey
da30efbc55 [vulkan] Added primitive count calculation based on topology + patch vertices in PrimitivesSucceededStreamer 2026-03-10 04:14:07 -04:00
CamilleLaVey
2dbca791f6 [vulkan] Adjusted synchronization handling in QueryCacheRuntime + ConditionalRendering setting bug with syncing 2026-03-10 03:53:45 -04:00
CamilleLaVey
74248bd35a Removing remanents of NCE changes to previous state 2026-03-10 03:20:05 -04:00
CamilleLaVey
e715925d52 Revert "[nce] Added rasterizer caching checks to memory management" 2026-03-10 03:13:21 -04:00
CamilleLaVey
852b8e176f Revert "[nce] Added rasterizer memory handling by nce page faults + intercepted memory access in nce with cached rasterizer data" 2026-03-10 03:12:19 -04:00
CamilleLaVey
b2b07abbc8 Revert "[nce] Added "tainted" page fault handling inside dual channel" 2026-03-10 03:10:37 -04:00
CamilleLaVey
ce2f2187bd Revert "[nce] Adjusted precise access fault window handling + decay mechanism" 2026-03-10 03:09:30 -04:00
CamilleLaVey
8c077fc4cd Revert "[nce] Added case for access fault handling to manage page edge cases" 2026-03-10 03:01:02 -04:00
CamilleLaVey
b88ca5b635 Revert "[nce] Added dual channel handling for guest access faults" 2026-03-10 02:58:57 -04:00
CamilleLaVey
4755ec7a59 [vulkan] simplify numeric type determination 2026-03-10 02:46:35 -04:00
16 changed files with 367 additions and 391 deletions

View file

@ -1,6 +1,3 @@
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: 2014 Citra Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@ -34,7 +31,6 @@ using WatchpointArray = std::array<Kernel::DebugWatchpoint, Core::Hardware::NUM_
// NOTE: these values match the HaltReason enum in Dynarmic
enum class HaltReason : u64 {
StepThread = 0x00000001,
CacheInvalidation = 0x00000002,
DataAbort = 0x00000004,
BreakLoop = 0x02000000,
SupervisorCall = 0x04000000,
@ -43,14 +39,6 @@ enum class HaltReason : u64 {
};
DECLARE_ENUM_FLAG_OPERATORS(HaltReason);
enum class CacheOperationKind : u32 {
None,
DataCacheInvalidate,
DataCacheStore,
DataCacheFlush,
InstructionCacheInvalidate,
};
enum class Architecture {
AArch64,
AArch32,
@ -97,9 +85,6 @@ public:
virtual void GetSvcArguments(std::span<uint64_t, 8> args) const = 0;
virtual void SetSvcArguments(std::span<const uint64_t, 8> args) = 0;
virtual u32 GetSvcNumber() const = 0;
virtual bool HandleCacheOperation(Kernel::KThread* thread) {
return false;
}
void SetWatchpointArray(const WatchpointArray* watchpoints) {
m_watchpoints = watchpoints;

View file

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
@ -11,7 +11,6 @@
namespace Core {
constexpr Dynarmic::HaltReason StepThread = Dynarmic::HaltReason::Step;
constexpr Dynarmic::HaltReason CacheInvalidation = Dynarmic::HaltReason::CacheInvalidation;
constexpr Dynarmic::HaltReason DataAbort = Dynarmic::HaltReason::MemoryAbort;
constexpr Dynarmic::HaltReason BreakLoop = Dynarmic::HaltReason::UserDefined2;
constexpr Dynarmic::HaltReason SupervisorCall = Dynarmic::HaltReason::UserDefined3;
@ -20,7 +19,6 @@ constexpr Dynarmic::HaltReason PrefetchAbort = Dynarmic::HaltReason::UserDefined
constexpr HaltReason TranslateHaltReason(Dynarmic::HaltReason hr) {
static_assert(u64(HaltReason::StepThread) == u64(StepThread));
static_assert(u64(HaltReason::CacheInvalidation) == u64(CacheInvalidation));
static_assert(u64(HaltReason::DataAbort) == u64(DataAbort));
static_assert(u64(HaltReason::BreakLoop) == u64(BreakLoop));
static_assert(u64(HaltReason::SupervisorCall) == u64(SupervisorCall));

View file

@ -43,39 +43,6 @@ fpsimd_context* GetFloatingPointState(mcontext_t& host_ctx) {
using namespace Common::Literals;
constexpr u32 StackSize = 128_KiB;
constexpr u64 CacheLineSize = 64;
constexpr u64 SplitPageAccessWindow = 64;
constexpr size_t MaxPreciseAccessPages = 256;
constexpr u8 MaxPreciseAccessPageWeight = 4;
[[nodiscard]] constexpr u64 AlignDownPage(u64 addr) {
return addr & ~u64{Memory::YUZU_PAGEMASK};
}
[[nodiscard]] bool IsNearPageBoundary(u64 addr) {
const u64 page_offset = addr & Memory::YUZU_PAGEMASK;
return page_offset < SplitPageAccessWindow ||
page_offset + SplitPageAccessWindow > Memory::YUZU_PAGESIZE;
}
[[nodiscard]] bool IsNearTlsWindow(u64 tls_base, u64 fault_addr) {
if (tls_base == 0) {
return false;
}
const u64 tls_first_page = AlignDownPage(tls_base);
const u64 tls_last_byte = tls_base + Kernel::Svc::ThreadLocalRegionSize - 1;
const u64 tls_last_page = AlignDownPage(tls_last_byte);
const u64 fault_page = AlignDownPage(fault_addr);
return fault_page + Memory::YUZU_PAGESIZE >= tls_first_page &&
fault_page <= tls_last_page + Memory::YUZU_PAGESIZE;
}
[[nodiscard]] bool ShouldUsePreciseAccessChannel(const GuestContext* guest_ctx, u64 fault_addr) {
return IsNearPageBoundary(fault_addr) || IsNearTlsWindow(guest_ctx->tpidrro_el0, fault_addr) ||
IsNearTlsWindow(guest_ctx->tpidr_el0, fault_addr);
}
} // namespace
@ -191,48 +158,18 @@ bool ArmNce::HandleGuestAlignmentFault(GuestContext* guest_ctx, void* raw_info,
}
bool ArmNce::HandleGuestAccessFault(GuestContext* guest_ctx, void* raw_info, void* raw_context) {
auto& host_ctx = static_cast<ucontext_t*>(raw_context)->uc_mcontext;
auto* fpctx = GetFloatingPointState(host_ctx);
auto* info = static_cast<siginfo_t*>(raw_info);
auto* parent = guest_ctx->parent;
const u64 fault_addr = reinterpret_cast<u64>(info->si_addr);
const Common::ProcessAddress addr = fault_addr & ~Memory::YUZU_PAGEMASK;
const u64 page_offset = fault_addr & Memory::YUZU_PAGEMASK;
auto& memory = parent->m_running_thread->GetOwnerProcess()->GetMemory();
const bool rasterizer_cached = memory.IsRasterizerCached(addr);
const bool prefer_precise_channel = ShouldUsePreciseAccessChannel(guest_ctx, fault_addr) ||
parent->IsPreciseAccessPage(fault_addr) ||
rasterizer_cached;
if (prefer_precise_channel) {
if (auto next_pc = MatchAndExecuteOneInstruction(memory, &host_ctx, fpctx); next_pc) {
parent->MarkPreciseAccessFaultWindow(fault_addr);
host_ctx.pc = *next_pc;
return true;
}
}
bool handled = memory.InvalidateNCE(addr, Memory::YUZU_PAGESIZE);
if (page_offset < SplitPageAccessWindow && addr >= Memory::YUZU_PAGESIZE) {
handled |= memory.InvalidateNCE(addr - Memory::YUZU_PAGESIZE, Memory::YUZU_PAGESIZE);
}
if (page_offset + SplitPageAccessWindow > Memory::YUZU_PAGESIZE) {
handled |= memory.InvalidateNCE(addr + Memory::YUZU_PAGESIZE, Memory::YUZU_PAGESIZE);
}
if (handled) {
// Try to handle an invalid access.
// TODO: handle accesses which split a page?
const Common::ProcessAddress addr =
(reinterpret_cast<u64>(info->si_addr) & ~Memory::YUZU_PAGEMASK);
auto& memory = guest_ctx->parent->m_running_thread->GetOwnerProcess()->GetMemory();
if (memory.InvalidateNCE(addr, Memory::YUZU_PAGESIZE)) {
// We handled the access successfully and are returning to guest code.
return true;
}
if (auto next_pc = MatchAndExecuteOneInstruction(memory, &host_ctx, fpctx); next_pc) {
parent->MarkPreciseAccessFaultWindow(fault_addr);
host_ctx.pc = *next_pc;
return true;
}
// We couldn't handle the access.
return HandleFailedGuestFault(guest_ctx, raw_info, raw_context);
}
@ -245,53 +182,6 @@ void ArmNce::HandleHostAccessFault(int sig, void* raw_info, void* raw_context) {
return g_orig_segv_action.sa_sigaction(sig, static_cast<siginfo_t*>(raw_info), raw_context);
}
bool ArmNce::IsPreciseAccessPage(u64 addr) const {
const std::scoped_lock lk{m_precise_pages_guard};
return m_precise_pages.contains(AlignDownPage(addr));
}
void ArmNce::MarkPreciseAccessPage(u64 addr) {
const std::scoped_lock lk{m_precise_pages_guard};
const u64 page = AlignDownPage(addr);
if (auto it = m_precise_pages.find(page); it != m_precise_pages.end()) {
it->second = std::min<u8>(MaxPreciseAccessPageWeight, static_cast<u8>(it->second + 1));
return;
}
while (m_precise_pages.size() >= MaxPreciseAccessPages) {
DecayPreciseAccessPagesLocked();
}
m_precise_pages.emplace(page, 1);
}
void ArmNce::MarkPreciseAccessFaultWindow(u64 addr) {
MarkPreciseAccessPage(addr);
if (!IsNearPageBoundary(addr)) {
return;
}
const u64 page_offset = addr & Memory::YUZU_PAGEMASK;
if (page_offset < SplitPageAccessWindow && addr >= Memory::YUZU_PAGESIZE) {
MarkPreciseAccessPage(addr - Memory::YUZU_PAGESIZE);
}
if (page_offset + SplitPageAccessWindow > Memory::YUZU_PAGESIZE) {
MarkPreciseAccessPage(addr + Memory::YUZU_PAGESIZE);
}
}
void ArmNce::DecayPreciseAccessPagesLocked() {
for (auto it = m_precise_pages.begin(); it != m_precise_pages.end();) {
if (it->second > 1) {
--it->second;
++it;
} else {
it = m_precise_pages.erase(it);
}
}
}
void ArmNce::LockThread(Kernel::KThread* thread) {
auto* thread_params = &thread->GetNativeExecutionParameters();
LockThreadParameters(thread_params);
@ -379,41 +269,6 @@ void ArmNce::SetSvcArguments(std::span<const uint64_t, 8> args) {
}
}
bool ArmNce::HandleCacheOperation(Kernel::KThread* thread) {
const auto op = static_cast<CacheOperationKind>(m_guest_ctx.cache_operation);
if (op == CacheOperationKind::None) {
return false;
}
const u64 cache_line_start = m_guest_ctx.cache_operation_address & ~(CacheLineSize - 1);
auto& memory = thread->GetOwnerProcess()->GetMemory();
switch (op) {
case CacheOperationKind::DataCacheInvalidate: {
[[maybe_unused]] auto invalidate_result =
memory.InvalidateDataCache(cache_line_start, CacheLineSize);
break;
}
case CacheOperationKind::DataCacheStore: {
[[maybe_unused]] auto store_result = memory.StoreDataCache(cache_line_start, CacheLineSize);
break;
}
case CacheOperationKind::DataCacheFlush: {
[[maybe_unused]] auto flush_result = memory.FlushDataCache(cache_line_start, CacheLineSize);
break;
}
case CacheOperationKind::InstructionCacheInvalidate:
InvalidateCacheRange(cache_line_start, CacheLineSize);
break;
case CacheOperationKind::None:
break;
}
m_guest_ctx.cache_operation = static_cast<u32>(CacheOperationKind::None);
m_guest_ctx.cache_operation_address = 0;
return true;
}
ArmNce::ArmNce(System& system, bool uses_wall_clock, std::size_t core_index)
: ArmInterface{uses_wall_clock}, m_system{system}, m_core_index{core_index} {
m_guest_ctx.system = &m_system;

View file

@ -1,13 +1,9 @@
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <mutex>
#include <unordered_map>
#include "core/arm/arm_interface.h"
#include "core/arm/nce/guest_context.h"
@ -41,7 +37,6 @@ public:
void GetSvcArguments(std::span<uint64_t, 8> args) const override;
void SetSvcArguments(std::span<const uint64_t, 8> args) override;
u32 GetSvcNumber() const override;
bool HandleCacheOperation(Kernel::KThread* thread) override;
void SignalInterrupt(Kernel::KThread* thread) override;
void ClearInstructionCache() override;
@ -82,11 +77,6 @@ private:
static void HandleHostAlignmentFault(int sig, void* info, void* raw_context);
static void HandleHostAccessFault(int sig, void* info, void* raw_context);
bool IsPreciseAccessPage(u64 addr) const;
void MarkPreciseAccessPage(u64 addr);
void MarkPreciseAccessFaultWindow(u64 addr);
void DecayPreciseAccessPagesLocked();
public:
Core::System& m_system;
@ -98,9 +88,6 @@ public:
GuestContext m_guest_ctx{};
Kernel::KThread* m_running_thread{};
mutable std::mutex m_precise_pages_guard{};
std::unordered_map<u64, u8> m_precise_pages{};
// Stack for signal processing.
std::unique_ptr<u8[]> m_stack{};
};

View file

@ -1,6 +1,3 @@
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@ -41,9 +38,6 @@ struct GuestContext {
u32 svc{};
System* system{};
ArmNce* parent{};
u32 cache_operation{};
u32 cache_operation_reserved{};
u64 cache_operation_address{};
};
// Verify assembly offsets.

View file

@ -765,8 +765,8 @@ std::optional<u64> MatchAndExecuteOneInstruction(Core::Memory::Memory& memory, m
fpsimd_context* fpsimd_context) {
std::span<u64, 31> regs(reinterpret_cast<u64*>(context->regs), 31);
std::span<u128, 32> vregs(reinterpret_cast<u128*>(fpsimd_context->vregs), 32);
u64 sp = context->sp;
const u64 pc = context->pc;
u64& sp = *reinterpret_cast<u64*>(&context->sp);
const u64& pc = *reinterpret_cast<u64*>(&context->pc);
InterpreterVisitor visitor(memory, regs, vregs, sp, pc);
u32 instruction = memory.Read32(pc);
@ -774,7 +774,6 @@ std::optional<u64> MatchAndExecuteOneInstruction(Core::Memory::Memory& memory, m
auto decoder = Dynarmic::A64::Decode<VisitorBase>(instruction);
was_executed = decoder.get().call(visitor, instruction);
context->sp = sp;
return was_executed ? std::optional<u64>(pc + 4) : std::nullopt;
}

View file

@ -26,26 +26,6 @@ using NativeExecutionParameters = Kernel::KThread::NativeExecutionParameters;
constexpr size_t MaxRelativeBranch = 128_MiB;
constexpr u32 ModuleCodeIndex = 0x24 / sizeof(u32);
namespace {
[[nodiscard]] std::optional<CacheOperationKind> DecodeCacheOperation(u32 inst) {
switch (inst & ~u32{0x1F}) {
case 0xD5087620:
return CacheOperationKind::DataCacheInvalidate;
case 0xD50B7A20:
case 0xD50B7B20:
return CacheOperationKind::DataCacheStore;
case 0xD50B7E20:
return CacheOperationKind::DataCacheFlush;
case 0xD50B7520:
return CacheOperationKind::InstructionCacheInvalidate;
default:
return std::nullopt;
}
}
} // namespace
Patcher::Patcher() : c(m_patch_instructions), c_pre(m_patch_instructions_pre) {
// The first word of the patch section is always a branch to the first instruction of the
// module.
@ -180,20 +160,6 @@ bool Patcher::PatchText(std::span<const u8> program_image, const Kernel::CodeSet
continue;
}
if (auto cache_op = DecodeCacheOperation(inst); cache_op.has_value()) {
bool pre_buffer = false;
auto ret = AddRelocations(pre_buffer);
const auto src_reg = oaknut::XReg{static_cast<int>(inst & 0x1F)};
if (pre_buffer) {
WriteCacheOperationTrampoline(ret, *cache_op, src_reg, c_pre, m_save_context_pre,
m_load_context_pre);
} else {
WriteCacheOperationTrampoline(ret, *cache_op, src_reg, c, m_save_context,
m_load_context);
}
continue;
}
if (auto exclusive = Exclusive{inst}; exclusive.Verify()) {
curr_patch->m_exclusives.push_back(i);
}
@ -576,96 +542,6 @@ void Patcher::WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id, oaknut
this->WriteModulePc(module_dest);
}
void Patcher::WriteCacheOperationTrampoline(ModuleDestLabel module_dest,
CacheOperationKind op_kind, oaknut::XReg src_reg,
oaknut::VectorCodeGenerator& cg,
oaknut::Label& save_ctx,
oaknut::Label& load_ctx) {
const bool is_pre = (&cg == &c_pre);
this->LockContext(cg);
cg.STR(X30, SP, PRE_INDEXED, -16);
cg.BL(save_ctx);
cg.LDR(X30, SP, POST_INDEXED, 16);
oaknut::Label pc_after_cache_op;
cg.MRS(X1, oaknut::SystemReg::TPIDR_EL0);
cg.LDR(X1, X1, offsetof(NativeExecutionParameters, native_context));
cg.LDR(X2, pc_after_cache_op);
cg.STR(X2, X1, offsetof(GuestContext, pc));
cg.MOV(X2, static_cast<u32>(op_kind));
cg.STR(W2, X1, offsetof(GuestContext, cache_operation));
cg.STR(src_reg, X1, offsetof(GuestContext, cache_operation_address));
static_assert(std::is_same_v<std::underlying_type_t<HaltReason>, u64>);
oaknut::Label retry;
cg.ADD(X2, X1, offsetof(GuestContext, esr_el1));
cg.l(retry);
cg.LDAXR(X0, X2);
cg.STLXR(W3, XZR, X2);
cg.CBNZ(W3, retry);
cg.ORR(X0, X0, static_cast<u64>(HaltReason::CacheInvalidation));
cg.ADD(X1, X1, offsetof(GuestContext, host_ctx));
static_assert(offsetof(HostContext, host_sp) + 8 == offsetof(HostContext, host_tpidr_el0));
cg.LDP(X2, X3, X1, offsetof(HostContext, host_sp));
cg.MOV(SP, X2);
cg.MSR(oaknut::SystemReg::TPIDR_EL0, X3);
static constexpr size_t HOST_REGS_OFF = offsetof(HostContext, host_saved_regs);
static constexpr size_t HOST_VREGS_OFF = offsetof(HostContext, host_saved_vregs);
cg.LDP(X19, X20, X1, HOST_REGS_OFF);
cg.LDP(X21, X22, X1, HOST_REGS_OFF + 2 * sizeof(u64));
cg.LDP(X23, X24, X1, HOST_REGS_OFF + 4 * sizeof(u64));
cg.LDP(X25, X26, X1, HOST_REGS_OFF + 6 * sizeof(u64));
cg.LDP(X27, X28, X1, HOST_REGS_OFF + 8 * sizeof(u64));
cg.LDP(X29, X30, X1, HOST_REGS_OFF + 10 * sizeof(u64));
cg.LDP(Q8, Q9, X1, HOST_VREGS_OFF);
cg.LDP(Q10, Q11, X1, HOST_VREGS_OFF + 2 * sizeof(u128));
cg.LDP(Q12, Q13, X1, HOST_VREGS_OFF + 4 * sizeof(u128));
cg.LDP(Q14, Q15, X1, HOST_VREGS_OFF + 6 * sizeof(u128));
cg.RET();
if (is_pre) {
curr_patch->m_trampolines_pre.push_back({cg.offset(), module_dest});
} else {
curr_patch->m_trampolines.push_back({cg.offset(), module_dest});
}
cg.MRS(X2, oaknut::SystemReg::TPIDR_EL0);
cg.LDR(X2, X2, offsetof(NativeExecutionParameters, native_context));
cg.ADD(X0, X2, offsetof(GuestContext, host_ctx));
cg.STR(X30, X0, offsetof(HostContext, host_saved_regs) + 11 * sizeof(u64));
cg.STR(X30, SP, PRE_INDEXED, -16);
cg.BL(load_ctx);
cg.LDR(X30, SP, POST_INDEXED, 16);
cg.STR(X1, SP, PRE_INDEXED, -16);
cg.MRS(X1, oaknut::SystemReg::TPIDR_EL0);
cg.LDR(X1, X1, offsetof(NativeExecutionParameters, native_context));
cg.LDR(X30, X1, offsetof(GuestContext, cpu_registers) + sizeof(u64) * 30);
cg.LDR(X1, SP, POST_INDEXED, 16);
this->UnlockContext(cg);
if (is_pre) {
this->BranchToModulePre(module_dest);
} else {
this->BranchToModule(module_dest);
}
cg.l(pc_after_cache_op);
if (is_pre) {
this->WriteModulePcPre(module_dest);
} else {
this->WriteModulePc(module_dest);
}
}
void Patcher::WriteMrsHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg,
oaknut::SystemReg src_reg, oaknut::VectorCodeGenerator& cg) {
// Retrieve emulated TLS register from GuestContext.

View file

@ -78,11 +78,6 @@ private:
void LockContext(oaknut::VectorCodeGenerator& code);
void UnlockContext(oaknut::VectorCodeGenerator& code);
void WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id, oaknut::VectorCodeGenerator& code, oaknut::Label& save_ctx, oaknut::Label& load_ctx);
void WriteCacheOperationTrampoline(ModuleDestLabel module_dest, CacheOperationKind op_kind,
oaknut::XReg src_reg,
oaknut::VectorCodeGenerator& code,
oaknut::Label& save_ctx,
oaknut::Label& load_ctx);
void WriteMrsHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg, oaknut::SystemReg src_reg, oaknut::VectorCodeGenerator& code);
void WriteMsrHandler(ModuleDestLabel module_dest, oaknut::XReg src_reg, oaknut::VectorCodeGenerator& code);
void WriteCntpctHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg, oaknut::VectorCodeGenerator& code);
@ -93,11 +88,6 @@ private:
void LockContext() { LockContext(c); }
void UnlockContext() { UnlockContext(c); }
void WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id) { WriteSvcTrampoline(module_dest, svc_id, c, m_save_context, m_load_context); }
void WriteCacheOperationTrampoline(ModuleDestLabel module_dest, CacheOperationKind op_kind,
oaknut::XReg src_reg) {
WriteCacheOperationTrampoline(module_dest, op_kind, src_reg, c, m_save_context,
m_load_context);
}
void WriteMrsHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg, oaknut::SystemReg src_reg) { WriteMrsHandler(module_dest, dest_reg, src_reg, c); }
void WriteMsrHandler(ModuleDestLabel module_dest, oaknut::XReg src_reg) { WriteMsrHandler(module_dest, src_reg, c); }
void WriteCntpctHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg) { WriteCntpctHandler(module_dest, dest_reg, c); }

View file

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
@ -97,7 +97,6 @@ void PhysicalCore::RunThread(Kernel::KThread* thread) {
}
// Determine why we stopped.
const bool cache_invalidation = True(hr & Core::HaltReason::CacheInvalidation);
const bool supervisor_call = True(hr & Core::HaltReason::SupervisorCall);
const bool prefetch_abort = True(hr & Core::HaltReason::PrefetchAbort);
const bool breakpoint = True(hr & Core::HaltReason::InstructionBreakpoint);
@ -152,11 +151,6 @@ void PhysicalCore::RunThread(Kernel::KThread* thread) {
return;
}
if (cache_invalidation) {
interface->HandleCacheOperation(thread);
continue;
}
// Handle external interrupt sources.
if (interrupt || m_is_single_core) {
return;

View file

@ -639,15 +639,6 @@ struct Memory::Impl {
GetInteger(vaddr), []() {}, []() {});
}
[[nodiscard]] bool IsRasterizerCached(const Common::ProcessAddress vaddr) const {
const u64 addr = GetInteger(vaddr) & 0xffffffffffffULL;
if (!AddressSpaceContains(*current_page_table, addr, 1)) {
return false;
}
return current_page_table->entries[addr >> YUZU_PAGEBITS].ptr.Type() ==
Common::PageType::RasterizerCachedMemory;
}
/// @brief Reads a particular data type out of memory at the given virtual address.
/// @param vaddr The virtual address to read the data type from.
/// @tparam T The data type to read out of memory.
@ -1045,10 +1036,6 @@ void Memory::RasterizerMarkRegionCached(Common::ProcessAddress vaddr, u64 size,
impl->RasterizerMarkRegionCached(GetInteger(vaddr), size, cached);
}
bool Memory::IsRasterizerCached(Common::ProcessAddress vaddr) const {
return impl->IsRasterizerCached(vaddr);
}
void Memory::MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug) {
impl->MarkRegionDebug(GetInteger(vaddr), size, debug);
}

View file

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: 2014 Citra Emulator Project
@ -493,8 +493,6 @@ public:
void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers);
[[nodiscard]] bool IsRasterizerCached(Common::ProcessAddress vaddr) const;
bool InvalidateNCE(Common::ProcessAddress vaddr, size_t size);
bool InvalidateSeparateHeap(void* fault_address);

View file

@ -39,9 +39,7 @@ NumericType GetNumericType(TexturePixelFormat format) {
if (!VideoCore::Surface::IsPixelFormatInteger(pixel_format)) {
return NumericType::Float;
}
return VideoCore::Surface::IsPixelFormatSignedInteger(pixel_format)
? NumericType::SignedInt
: NumericType::UnsignedInt;
return NumericType::UnsignedInt;
}
IR::Opcode IndexedInstruction(const IR::Inst& inst) {
@ -450,7 +448,9 @@ public:
u32 Add(const ImageBufferDescriptor& desc) {
const u32 index{Add(image_buffer_descriptors, desc, [&desc](const auto& existing) {
return desc.format == existing.format && desc.cbuf_index == existing.cbuf_index &&
return desc.format == existing.format &&
desc.numeric_type == existing.numeric_type &&
desc.cbuf_index == existing.cbuf_index &&
desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count &&
desc.size_shift == existing.size_shift;
})};
@ -480,6 +480,7 @@ public:
u32 Add(const ImageDescriptor& desc) {
const u32 index{Add(image_descriptors, desc, [&desc](const auto& existing) {
return desc.type == existing.type && desc.format == existing.format &&
desc.numeric_type == existing.numeric_type &&
desc.cbuf_index == existing.cbuf_index &&
desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count &&
desc.size_shift == existing.size_shift;

View file

@ -323,7 +323,7 @@ VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const Device& device,
case Maxwell::PrimitiveTopology::Lines:
return VK_PRIMITIVE_TOPOLOGY_LINE_LIST;
case Maxwell::PrimitiveTopology::LineLoop:
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP;
case Maxwell::PrimitiveTopology::LineStrip:
return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP;
case Maxwell::PrimitiveTopology::Triangles:

View file

@ -203,6 +203,11 @@ public:
}
void SyncWrites() override {
if (!direct_sync_values.empty()) {
runtime.template SyncValues<VideoCommon::SyncValuesStruct>(direct_sync_values);
direct_sync_values.clear();
}
if (sync_values_stash.empty()) {
return;
}
@ -223,8 +228,54 @@ public:
const auto driver_id = device.GetDriverID();
if (driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_ARM_PROPRIETARY || driver_id == VK_DRIVER_ID_MESA_TURNIP) {
pending_sync.clear();
ApplyBanksWideOp<false>(
pending_sync,
[](SamplesQueryBank* bank, size_t start, size_t amount) { bank->Sync(start, amount); });
direct_sync_values.clear();
direct_sync_values.reserve(pending_sync.size());
bool has_multi_queries = accumulation_since_last_sync;
for (auto q : pending_sync) {
auto* query = GetQuery(q);
if (True(query->flags & VideoCommon::QueryFlagBits::IsRewritten)) {
continue;
}
if (True(query->flags & VideoCommon::QueryFlagBits::IsInvalidated)) {
continue;
}
u64 total = 0;
ApplyBankOp(query, [&total](SamplesQueryBank* bank, size_t start, size_t amount) {
const auto& results = bank->GetResults();
for (size_t i = 0; i < amount; i++) {
total += results[start + i];
}
});
total += GetAmendValue();
query->value = total;
query->flags |= VideoCommon::QueryFlagBits::IsHostSynced;
query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced;
direct_sync_values.emplace_back(VideoCommon::SyncValuesStruct{
.address = query->guest_address,
.value = total,
.size = SamplesQueryBank::QUERY_SIZE,
});
has_multi_queries |= query->size_slots > 1;
}
ReplicateCurrentQueryIfNeeded();
std::function<void()> func([this] { amend_value = accumulation_value; });
rasterizer->SyncOperation(std::move(func));
AbandonCurrentQuery();
num_slots_used = 0;
first_accumulation_checkpoint = (std::numeric_limits<size_t>::max)();
last_accumulation_checkpoint = 0;
accumulation_since_last_sync = has_multi_queries;
sync_values_stash.clear();
pending_sync.clear();
return;
}
sync_values_stash.clear();
@ -570,6 +621,7 @@ private:
std::array<size_t, 32> resolve_table{};
std::array<size_t, 32> intermediary_table{};
vk::Buffer accumulation_buffer;
std::vector<VideoCommon::SyncValuesStruct> direct_sync_values;
std::deque<std::vector<HostSyncValues>> sync_values_stash;
std::vector<size_t> resolve_buffers;
@ -1016,10 +1068,52 @@ public:
u64 stride{};
DAddr dependant_address{};
Maxwell3D::Regs::PrimitiveTopology topology{Maxwell3D::Regs::PrimitiveTopology::Points};
u32 patch_vertices{1};
size_t dependant_index{};
bool dependant_manage{};
};
[[nodiscard]] constexpr u64 SaturatingSub(u64 value, u64 amount) {
return value > amount ? value - amount : 0;
}
[[nodiscard]] constexpr u64 PrimitiveCountFromVertices(
Maxwell3D::Regs::PrimitiveTopology topology, u64 num_vertices, u32 patch_vertices) {
switch (topology) {
case Maxwell3D::Regs::PrimitiveTopology::Points:
return num_vertices;
case Maxwell3D::Regs::PrimitiveTopology::Lines:
return num_vertices / 2;
case Maxwell3D::Regs::PrimitiveTopology::LineLoop:
return num_vertices >= 2 ? num_vertices : 0;
case Maxwell3D::Regs::PrimitiveTopology::LineStrip:
return SaturatingSub(num_vertices, 1);
case Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency:
return num_vertices / 4;
case Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency:
return SaturatingSub(num_vertices, 3);
case Maxwell3D::Regs::PrimitiveTopology::Triangles:
return num_vertices / 3;
case Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency:
return num_vertices / 6;
case Maxwell3D::Regs::PrimitiveTopology::TriangleFan:
case Maxwell3D::Regs::PrimitiveTopology::TriangleStrip:
return SaturatingSub(num_vertices, 2);
case Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency:
return num_vertices >= 6 ? (num_vertices - 4) / 2 : 0;
case Maxwell3D::Regs::PrimitiveTopology::Quads:
return num_vertices / 6;
case Maxwell3D::Regs::PrimitiveTopology::QuadStrip:
return num_vertices / 6;
case Maxwell3D::Regs::PrimitiveTopology::Patches:
return patch_vertices != 0 ? num_vertices / patch_vertices : 0;
case Maxwell3D::Regs::PrimitiveTopology::Polygon:
return num_vertices != 0 ? 1 : 0;
default:
return num_vertices;
}
}
class PrimitivesSucceededStreamer : public VideoCommon::SimpleStreamer<PrimitivesQueryBase> {
public:
explicit PrimitivesSucceededStreamer(size_t id_, QueryCacheRuntime& runtime_,
@ -1048,7 +1142,10 @@ public:
const size_t subreport = static_cast<size_t>(*subreport_);
auto dependant_address_opt = tfb_streamer.GetLastQueryStream(subreport);
bool must_manage_dependance = false;
new_query->topology = tfb_streamer.GetOutputTopology();
runtime.View3DRegs([new_query, this](Maxwell3D& maxwell3d) {
new_query->topology = tfb_streamer.GetOutputTopology();
new_query->patch_vertices = std::max<u32>(maxwell3d.regs.patch_vertices, 1);
});
if (dependant_address_opt) {
auto [dep_address, stride] = *dependant_address_opt;
new_query->dependant_address = dep_address;
@ -1131,32 +1228,8 @@ public:
num_vertices = static_cast<u64>(result) / safe_stride;
}
}
query->value = [&]() -> u64 {
switch (query->topology) {
case Maxwell3D::Regs::PrimitiveTopology::Points:
return num_vertices;
case Maxwell3D::Regs::PrimitiveTopology::Lines:
return num_vertices / 2;
case Maxwell3D::Regs::PrimitiveTopology::LineLoop:
return (num_vertices / 2) + 1;
case Maxwell3D::Regs::PrimitiveTopology::LineStrip:
return num_vertices - 1;
case Maxwell3D::Regs::PrimitiveTopology::Patches:
case Maxwell3D::Regs::PrimitiveTopology::Triangles:
case Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency:
return num_vertices / 3;
case Maxwell3D::Regs::PrimitiveTopology::TriangleFan:
case Maxwell3D::Regs::PrimitiveTopology::TriangleStrip:
case Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency:
return num_vertices - 2;
case Maxwell3D::Regs::PrimitiveTopology::Quads:
return num_vertices / 4;
case Maxwell3D::Regs::PrimitiveTopology::Polygon:
return 1U;
default:
return num_vertices;
}
}();
query->value =
PrimitiveCountFromVertices(query->topology, num_vertices, query->patch_vertices);
}
}
@ -1423,13 +1496,6 @@ bool QueryCacheRuntime::HostConditionalRenderingCompareValues(VideoCommon::Looku
return false;
}
auto driver_id = impl->device.GetDriverID();
const bool is_gpu_high = Settings::IsGPULevelHigh();
if ((!is_gpu_high && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) || driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY || driver_id == VK_DRIVER_ID_ARM_PROPRIETARY || driver_id == VK_DRIVER_ID_MESA_TURNIP) {
return true;
}
for (size_t i = 0; i < 2; i++) {
is_null[i] = !is_in_ac[i] && check_value(objects[i]->address);
}
@ -1442,12 +1508,22 @@ bool QueryCacheRuntime::HostConditionalRenderingCompareValues(VideoCommon::Looku
}
}
if (!is_gpu_high) {
return true;
auto driver_id = impl->device.GetDriverID();
const bool is_gpu_high = Settings::IsGPULevelHigh();
const bool driver_blocks_pair_resolve =
((!is_gpu_high && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) ||
driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_ARM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_MESA_TURNIP);
if (driver_blocks_pair_resolve || !is_gpu_high) {
EndHostConditionalRendering();
return false;
}
if (!is_in_bc[0] && !is_in_bc[1]) {
return true;
EndHostConditionalRendering();
return false;
}
HostConditionalRenderingCompareBCImpl(object_1.address, equal_check);
return true;

View file

@ -6,6 +6,8 @@
#include <algorithm>
#include <array>
#include <cstring>
#include <limits>
#include <memory>
#include <mutex>
@ -24,6 +26,7 @@
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/host1x/gpu_device_memory_manager.h"
#include "video_core/memory_manager.h"
#include "video_core/renderer_vulkan/blit_image.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
@ -61,6 +64,58 @@ struct DrawParams {
bool is_indexed;
};
[[nodiscard]] bool IsLineLoop(Maxwell::PrimitiveTopology topology) {
return topology == Maxwell::PrimitiveTopology::LineLoop;
}
[[nodiscard]] u32 PrimitiveRestartIndex(Maxwell::IndexFormat format) {
switch (format) {
case Maxwell::IndexFormat::UnsignedByte:
return std::numeric_limits<u8>::max();
case Maxwell::IndexFormat::UnsignedShort:
return std::numeric_limits<u16>::max();
case Maxwell::IndexFormat::UnsignedInt:
return std::numeric_limits<u32>::max();
}
ASSERT(false);
return std::numeric_limits<u32>::max();
}
template <typename T>
bool ReadGuestObject(Tegra::MemoryManager* gpu_memory, GPUVAddr address, T& value) {
if (gpu_memory == nullptr) {
return false;
}
gpu_memory->ReadBlockUnsafe(address, &value, sizeof(T));
return true;
}
bool ReadGuestIndex(Tegra::MemoryManager* gpu_memory, GPUVAddr address, Maxwell::IndexFormat format,
u32& value) {
switch (format) {
case Maxwell::IndexFormat::UnsignedByte: {
u8 result{};
if (!ReadGuestObject(gpu_memory, address, result)) {
return false;
}
value = result;
return true;
}
case Maxwell::IndexFormat::UnsignedShort: {
u16 result{};
if (!ReadGuestObject(gpu_memory, address, result)) {
return false;
}
value = result;
return true;
}
case Maxwell::IndexFormat::UnsignedInt:
return ReadGuestObject(gpu_memory, address, value);
}
ASSERT(false);
return false;
}
VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t index, float scale) {
const auto& src = regs.viewport_transform[index];
const auto conv = [scale](float value) {
@ -343,6 +398,21 @@ void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) {
GPU::Logging::GPULogger::GetInstance().LogVulkanCall(
is_indexed ? "vkCmdDrawIndexed" : "vkCmdDraw", params, VK_SUCCESS);
}
if (IsLineLoop(draw_state.topology) && draw_params.num_vertices >= 2) {
if (maxwell3d->regs.transform_feedback_enabled != 0) {
query_cache.CounterEnable(VideoCommon::QueryType::StreamingByteCount, false);
}
scheduler.Record([](vk::CommandBuffer cmdbuf) {
cmdbuf.SetPrimitiveTopologyEXT(VK_PRIMITIVE_TOPOLOGY_LINE_LIST);
});
DrawLineLoopClosure(draw_state, draw_params.base_instance, draw_params.num_instances,
static_cast<s32>(draw_params.base_vertex),
draw_params.num_vertices, draw_params.is_indexed);
scheduler.Record([](vk::CommandBuffer cmdbuf) {
cmdbuf.SetPrimitiveTopologyEXT(VK_PRIMITIVE_TOPOLOGY_LINE_STRIP);
});
}
});
}
@ -350,6 +420,7 @@ void RasterizerVulkan::DrawIndirect() {
const auto& params = maxwell3d->draw_manager->GetIndirectParams();
buffer_cache.SetDrawIndirect(&params);
PrepareDraw(params.is_indexed, [this, &params] {
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
const auto indirect_buffer = buffer_cache.GetDrawIndirectBuffer();
const auto& buffer = indirect_buffer.first;
const auto& offset = indirect_buffer.second;
@ -385,6 +456,9 @@ void RasterizerVulkan::DrawIndirect() {
static_cast<u32>(params.stride));
}
});
if (IsLineLoop(draw_state.topology)) {
DrawIndirectLineLoopClosures(draw_state, params);
}
return;
}
scheduler.Record([buffer_obj = buffer->Handle(), offset, params](vk::CommandBuffer cmdbuf) {
@ -407,10 +481,165 @@ void RasterizerVulkan::DrawIndirect() {
params.is_indexed ? "vkCmdDrawIndexedIndirect" : "vkCmdDrawIndirect",
log_params, VK_SUCCESS);
}
if (IsLineLoop(draw_state.topology)) {
DrawIndirectLineLoopClosures(draw_state, params);
}
});
buffer_cache.SetDrawIndirect(nullptr);
}
bool RasterizerVulkan::DrawLineLoopClosure(const MaxwellDrawState& draw_state, u32 base_instance,
u32 num_instances, s32 base_vertex,
u32 num_vertices, bool is_indexed) {
if (!IsLineLoop(draw_state.topology) || num_instances == 0 || num_vertices < 2) {
return false;
}
std::array<u32, 2> closure_indices{};
if (!is_indexed) {
closure_indices = {num_vertices - 1, 0};
} else if (!draw_state.inline_index_draw_indexes.empty()) {
const size_t last_offset = (static_cast<size_t>(num_vertices) - 1) * sizeof(u32);
if (draw_state.inline_index_draw_indexes.size() < last_offset + sizeof(u32)) {
return false;
}
std::memcpy(&closure_indices[0], draw_state.inline_index_draw_indexes.data() + last_offset,
sizeof(u32));
std::memcpy(&closure_indices[1], draw_state.inline_index_draw_indexes.data(),
sizeof(u32));
} else {
const auto index_format = draw_state.index_buffer.format;
const size_t index_size = draw_state.index_buffer.FormatSizeInBytes();
const GPUVAddr first_address =
draw_state.index_buffer.StartAddress() +
static_cast<GPUVAddr>(draw_state.index_buffer.first) * index_size;
const GPUVAddr last_address =
first_address + static_cast<GPUVAddr>(num_vertices - 1) * index_size;
if (!ReadGuestIndex(gpu_memory, last_address, index_format, closure_indices[0]) ||
!ReadGuestIndex(gpu_memory, first_address, index_format, closure_indices[1])) {
return false;
}
if (maxwell3d->regs.primitive_restart.enabled != 0) {
const u32 restart_index = PrimitiveRestartIndex(index_format);
if (closure_indices[0] == restart_index || closure_indices[1] == restart_index) {
return false;
}
}
}
const auto upload = staging_pool.Request(sizeof(closure_indices), MemoryUsage::Upload);
std::memcpy(upload.mapped_span.data(), closure_indices.data(), sizeof(closure_indices));
scheduler.Record([buffer = upload.buffer, offset = upload.offset](vk::CommandBuffer cmdbuf) {
cmdbuf.BindIndexBuffer(buffer, offset, VK_INDEX_TYPE_UINT32);
});
scheduler.Record([base_instance, num_instances, base_vertex](vk::CommandBuffer cmdbuf) {
cmdbuf.DrawIndexed(2, num_instances, 0, base_vertex, base_instance);
});
return true;
}
void RasterizerVulkan::DrawIndirectLineLoopClosures(
const MaxwellDrawState& draw_state, const Tegra::Engines::DrawManager::IndirectParams& params) {
if (!IsLineLoop(draw_state.topology) || params.is_byte_count) {
return;
}
u32 draw_count = static_cast<u32>(params.max_draw_counts);
if (params.include_count) {
gpu_memory->ReadBlockUnsafe(params.count_start_address, &draw_count, sizeof(draw_count));
draw_count = std::min(draw_count, static_cast<u32>(params.max_draw_counts));
}
if (draw_count == 0) {
return;
}
bool emitted_closure = false;
if (params.is_indexed) {
const u32 command_stride =
params.stride != 0 ? static_cast<u32>(params.stride) : sizeof(VkDrawIndexedIndirectCommand);
for (u32 i = 0; i < draw_count; ++i) {
VkDrawIndexedIndirectCommand command{};
gpu_memory->ReadBlockUnsafe(params.indirect_start_address +
static_cast<GPUVAddr>(i) * command_stride,
&command, sizeof(command));
if (command.indexCount < 2 || command.instanceCount == 0) {
continue;
}
std::array<u32, 2> closure_indices{};
const auto index_format = draw_state.index_buffer.format;
const size_t index_size = draw_state.index_buffer.FormatSizeInBytes();
const GPUVAddr first_address = draw_state.index_buffer.StartAddress() +
static_cast<GPUVAddr>(command.firstIndex) * index_size;
const GPUVAddr last_address =
first_address + static_cast<GPUVAddr>(command.indexCount - 1) * index_size;
if (!ReadGuestIndex(gpu_memory, last_address, index_format, closure_indices[0]) ||
!ReadGuestIndex(gpu_memory, first_address, index_format, closure_indices[1])) {
continue;
}
if (maxwell3d->regs.primitive_restart.enabled != 0) {
const u32 restart_index = PrimitiveRestartIndex(index_format);
if (closure_indices[0] == restart_index || closure_indices[1] == restart_index) {
continue;
}
}
if (!emitted_closure) {
if (maxwell3d->regs.transform_feedback_enabled != 0) {
query_cache.CounterEnable(VideoCommon::QueryType::StreamingByteCount, false);
}
scheduler.Record([](vk::CommandBuffer cmdbuf) {
cmdbuf.SetPrimitiveTopologyEXT(VK_PRIMITIVE_TOPOLOGY_LINE_LIST);
});
emitted_closure = true;
}
const auto upload = staging_pool.Request(sizeof(closure_indices), MemoryUsage::Upload);
std::memcpy(upload.mapped_span.data(), closure_indices.data(), sizeof(closure_indices));
scheduler.Record(
[buffer = upload.buffer, offset = upload.offset](vk::CommandBuffer cmdbuf) {
cmdbuf.BindIndexBuffer(buffer, offset, VK_INDEX_TYPE_UINT32);
});
scheduler.Record([command](vk::CommandBuffer cmdbuf) {
cmdbuf.DrawIndexed(2, command.instanceCount, 0, command.vertexOffset,
command.firstInstance);
});
}
} else {
const u32 command_stride =
params.stride != 0 ? static_cast<u32>(params.stride) : sizeof(VkDrawIndirectCommand);
for (u32 i = 0; i < draw_count; ++i) {
VkDrawIndirectCommand command{};
gpu_memory->ReadBlockUnsafe(params.indirect_start_address +
static_cast<GPUVAddr>(i) * command_stride,
&command, sizeof(command));
if (command.vertexCount < 2 || command.instanceCount == 0) {
continue;
}
if (!emitted_closure) {
if (maxwell3d->regs.transform_feedback_enabled != 0) {
query_cache.CounterEnable(VideoCommon::QueryType::StreamingByteCount, false);
}
scheduler.Record([](vk::CommandBuffer cmdbuf) {
cmdbuf.SetPrimitiveTopologyEXT(VK_PRIMITIVE_TOPOLOGY_LINE_LIST);
});
emitted_closure = true;
}
DrawLineLoopClosure(draw_state, command.firstInstance, command.instanceCount,
static_cast<s32>(command.firstVertex), command.vertexCount,
false);
}
}
if (emitted_closure) {
scheduler.Record([](vk::CommandBuffer cmdbuf) {
cmdbuf.SetPrimitiveTopologyEXT(VK_PRIMITIVE_TOPOLOGY_LINE_STRIP);
});
}
}
void RasterizerVulkan::DrawTexture() {
SCOPE_EXIT {

View file

@ -155,6 +155,13 @@ private:
template <typename Func>
void PrepareDraw(bool is_indexed, Func&&);
bool DrawLineLoopClosure(const Tegra::Engines::DrawManager::State& draw_state,
u32 base_instance,
u32 num_instances, s32 base_vertex, u32 num_vertices,
bool is_indexed);
void DrawIndirectLineLoopClosures(const Tegra::Engines::DrawManager::State& draw_state,
const Tegra::Engines::DrawManager::IndirectParams& params);
void FlushWork();
void UpdateDynamicStates();