mirror of
https://git.eden-emu.dev/eden-emu/eden
synced 2026-04-10 03:18:55 +02:00
Compare commits
13 commits
e20875d8fc
...
1845d5b222
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1845d5b222 | ||
|
|
1240268048 | ||
|
|
a134ad3fbd | ||
|
|
da30efbc55 | ||
|
|
2dbca791f6 | ||
|
|
74248bd35a | ||
|
|
e715925d52 | ||
|
|
852b8e176f | ||
|
|
b2b07abbc8 | ||
|
|
ce2f2187bd | ||
|
|
8c077fc4cd | ||
|
|
b88ca5b635 | ||
|
|
4755ec7a59 |
16 changed files with 367 additions and 391 deletions
|
|
@ -1,6 +1,3 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: 2014 Citra Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
|
|
@ -34,7 +31,6 @@ using WatchpointArray = std::array<Kernel::DebugWatchpoint, Core::Hardware::NUM_
|
|||
// NOTE: these values match the HaltReason enum in Dynarmic
|
||||
enum class HaltReason : u64 {
|
||||
StepThread = 0x00000001,
|
||||
CacheInvalidation = 0x00000002,
|
||||
DataAbort = 0x00000004,
|
||||
BreakLoop = 0x02000000,
|
||||
SupervisorCall = 0x04000000,
|
||||
|
|
@ -43,14 +39,6 @@ enum class HaltReason : u64 {
|
|||
};
|
||||
DECLARE_ENUM_FLAG_OPERATORS(HaltReason);
|
||||
|
||||
enum class CacheOperationKind : u32 {
|
||||
None,
|
||||
DataCacheInvalidate,
|
||||
DataCacheStore,
|
||||
DataCacheFlush,
|
||||
InstructionCacheInvalidate,
|
||||
};
|
||||
|
||||
enum class Architecture {
|
||||
AArch64,
|
||||
AArch32,
|
||||
|
|
@ -97,9 +85,6 @@ public:
|
|||
virtual void GetSvcArguments(std::span<uint64_t, 8> args) const = 0;
|
||||
virtual void SetSvcArguments(std::span<const uint64_t, 8> args) = 0;
|
||||
virtual u32 GetSvcNumber() const = 0;
|
||||
virtual bool HandleCacheOperation(Kernel::KThread* thread) {
|
||||
return false;
|
||||
}
|
||||
|
||||
void SetWatchpointArray(const WatchpointArray* watchpoints) {
|
||||
m_watchpoints = watchpoints;
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||
|
|
@ -11,7 +11,6 @@
|
|||
namespace Core {
|
||||
|
||||
constexpr Dynarmic::HaltReason StepThread = Dynarmic::HaltReason::Step;
|
||||
constexpr Dynarmic::HaltReason CacheInvalidation = Dynarmic::HaltReason::CacheInvalidation;
|
||||
constexpr Dynarmic::HaltReason DataAbort = Dynarmic::HaltReason::MemoryAbort;
|
||||
constexpr Dynarmic::HaltReason BreakLoop = Dynarmic::HaltReason::UserDefined2;
|
||||
constexpr Dynarmic::HaltReason SupervisorCall = Dynarmic::HaltReason::UserDefined3;
|
||||
|
|
@ -20,7 +19,6 @@ constexpr Dynarmic::HaltReason PrefetchAbort = Dynarmic::HaltReason::UserDefined
|
|||
|
||||
constexpr HaltReason TranslateHaltReason(Dynarmic::HaltReason hr) {
|
||||
static_assert(u64(HaltReason::StepThread) == u64(StepThread));
|
||||
static_assert(u64(HaltReason::CacheInvalidation) == u64(CacheInvalidation));
|
||||
static_assert(u64(HaltReason::DataAbort) == u64(DataAbort));
|
||||
static_assert(u64(HaltReason::BreakLoop) == u64(BreakLoop));
|
||||
static_assert(u64(HaltReason::SupervisorCall) == u64(SupervisorCall));
|
||||
|
|
|
|||
|
|
@ -43,39 +43,6 @@ fpsimd_context* GetFloatingPointState(mcontext_t& host_ctx) {
|
|||
|
||||
using namespace Common::Literals;
|
||||
constexpr u32 StackSize = 128_KiB;
|
||||
constexpr u64 CacheLineSize = 64;
|
||||
constexpr u64 SplitPageAccessWindow = 64;
|
||||
constexpr size_t MaxPreciseAccessPages = 256;
|
||||
constexpr u8 MaxPreciseAccessPageWeight = 4;
|
||||
|
||||
[[nodiscard]] constexpr u64 AlignDownPage(u64 addr) {
|
||||
return addr & ~u64{Memory::YUZU_PAGEMASK};
|
||||
}
|
||||
|
||||
[[nodiscard]] bool IsNearPageBoundary(u64 addr) {
|
||||
const u64 page_offset = addr & Memory::YUZU_PAGEMASK;
|
||||
return page_offset < SplitPageAccessWindow ||
|
||||
page_offset + SplitPageAccessWindow > Memory::YUZU_PAGESIZE;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool IsNearTlsWindow(u64 tls_base, u64 fault_addr) {
|
||||
if (tls_base == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const u64 tls_first_page = AlignDownPage(tls_base);
|
||||
const u64 tls_last_byte = tls_base + Kernel::Svc::ThreadLocalRegionSize - 1;
|
||||
const u64 tls_last_page = AlignDownPage(tls_last_byte);
|
||||
const u64 fault_page = AlignDownPage(fault_addr);
|
||||
|
||||
return fault_page + Memory::YUZU_PAGESIZE >= tls_first_page &&
|
||||
fault_page <= tls_last_page + Memory::YUZU_PAGESIZE;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool ShouldUsePreciseAccessChannel(const GuestContext* guest_ctx, u64 fault_addr) {
|
||||
return IsNearPageBoundary(fault_addr) || IsNearTlsWindow(guest_ctx->tpidrro_el0, fault_addr) ||
|
||||
IsNearTlsWindow(guest_ctx->tpidr_el0, fault_addr);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
|
|
@ -191,48 +158,18 @@ bool ArmNce::HandleGuestAlignmentFault(GuestContext* guest_ctx, void* raw_info,
|
|||
}
|
||||
|
||||
bool ArmNce::HandleGuestAccessFault(GuestContext* guest_ctx, void* raw_info, void* raw_context) {
|
||||
auto& host_ctx = static_cast<ucontext_t*>(raw_context)->uc_mcontext;
|
||||
auto* fpctx = GetFloatingPointState(host_ctx);
|
||||
auto* info = static_cast<siginfo_t*>(raw_info);
|
||||
auto* parent = guest_ctx->parent;
|
||||
|
||||
const u64 fault_addr = reinterpret_cast<u64>(info->si_addr);
|
||||
const Common::ProcessAddress addr = fault_addr & ~Memory::YUZU_PAGEMASK;
|
||||
const u64 page_offset = fault_addr & Memory::YUZU_PAGEMASK;
|
||||
auto& memory = parent->m_running_thread->GetOwnerProcess()->GetMemory();
|
||||
const bool rasterizer_cached = memory.IsRasterizerCached(addr);
|
||||
const bool prefer_precise_channel = ShouldUsePreciseAccessChannel(guest_ctx, fault_addr) ||
|
||||
parent->IsPreciseAccessPage(fault_addr) ||
|
||||
rasterizer_cached;
|
||||
|
||||
if (prefer_precise_channel) {
|
||||
if (auto next_pc = MatchAndExecuteOneInstruction(memory, &host_ctx, fpctx); next_pc) {
|
||||
parent->MarkPreciseAccessFaultWindow(fault_addr);
|
||||
host_ctx.pc = *next_pc;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
bool handled = memory.InvalidateNCE(addr, Memory::YUZU_PAGESIZE);
|
||||
|
||||
if (page_offset < SplitPageAccessWindow && addr >= Memory::YUZU_PAGESIZE) {
|
||||
handled |= memory.InvalidateNCE(addr - Memory::YUZU_PAGESIZE, Memory::YUZU_PAGESIZE);
|
||||
}
|
||||
if (page_offset + SplitPageAccessWindow > Memory::YUZU_PAGESIZE) {
|
||||
handled |= memory.InvalidateNCE(addr + Memory::YUZU_PAGESIZE, Memory::YUZU_PAGESIZE);
|
||||
}
|
||||
|
||||
if (handled) {
|
||||
// Try to handle an invalid access.
|
||||
// TODO: handle accesses which split a page?
|
||||
const Common::ProcessAddress addr =
|
||||
(reinterpret_cast<u64>(info->si_addr) & ~Memory::YUZU_PAGEMASK);
|
||||
auto& memory = guest_ctx->parent->m_running_thread->GetOwnerProcess()->GetMemory();
|
||||
if (memory.InvalidateNCE(addr, Memory::YUZU_PAGESIZE)) {
|
||||
// We handled the access successfully and are returning to guest code.
|
||||
return true;
|
||||
}
|
||||
|
||||
if (auto next_pc = MatchAndExecuteOneInstruction(memory, &host_ctx, fpctx); next_pc) {
|
||||
parent->MarkPreciseAccessFaultWindow(fault_addr);
|
||||
host_ctx.pc = *next_pc;
|
||||
return true;
|
||||
}
|
||||
|
||||
// We couldn't handle the access.
|
||||
return HandleFailedGuestFault(guest_ctx, raw_info, raw_context);
|
||||
}
|
||||
|
|
@ -245,53 +182,6 @@ void ArmNce::HandleHostAccessFault(int sig, void* raw_info, void* raw_context) {
|
|||
return g_orig_segv_action.sa_sigaction(sig, static_cast<siginfo_t*>(raw_info), raw_context);
|
||||
}
|
||||
|
||||
bool ArmNce::IsPreciseAccessPage(u64 addr) const {
|
||||
const std::scoped_lock lk{m_precise_pages_guard};
|
||||
return m_precise_pages.contains(AlignDownPage(addr));
|
||||
}
|
||||
|
||||
void ArmNce::MarkPreciseAccessPage(u64 addr) {
|
||||
const std::scoped_lock lk{m_precise_pages_guard};
|
||||
const u64 page = AlignDownPage(addr);
|
||||
if (auto it = m_precise_pages.find(page); it != m_precise_pages.end()) {
|
||||
it->second = std::min<u8>(MaxPreciseAccessPageWeight, static_cast<u8>(it->second + 1));
|
||||
return;
|
||||
}
|
||||
|
||||
while (m_precise_pages.size() >= MaxPreciseAccessPages) {
|
||||
DecayPreciseAccessPagesLocked();
|
||||
}
|
||||
|
||||
m_precise_pages.emplace(page, 1);
|
||||
}
|
||||
|
||||
void ArmNce::MarkPreciseAccessFaultWindow(u64 addr) {
|
||||
MarkPreciseAccessPage(addr);
|
||||
|
||||
if (!IsNearPageBoundary(addr)) {
|
||||
return;
|
||||
}
|
||||
|
||||
const u64 page_offset = addr & Memory::YUZU_PAGEMASK;
|
||||
if (page_offset < SplitPageAccessWindow && addr >= Memory::YUZU_PAGESIZE) {
|
||||
MarkPreciseAccessPage(addr - Memory::YUZU_PAGESIZE);
|
||||
}
|
||||
if (page_offset + SplitPageAccessWindow > Memory::YUZU_PAGESIZE) {
|
||||
MarkPreciseAccessPage(addr + Memory::YUZU_PAGESIZE);
|
||||
}
|
||||
}
|
||||
|
||||
void ArmNce::DecayPreciseAccessPagesLocked() {
|
||||
for (auto it = m_precise_pages.begin(); it != m_precise_pages.end();) {
|
||||
if (it->second > 1) {
|
||||
--it->second;
|
||||
++it;
|
||||
} else {
|
||||
it = m_precise_pages.erase(it);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ArmNce::LockThread(Kernel::KThread* thread) {
|
||||
auto* thread_params = &thread->GetNativeExecutionParameters();
|
||||
LockThreadParameters(thread_params);
|
||||
|
|
@ -379,41 +269,6 @@ void ArmNce::SetSvcArguments(std::span<const uint64_t, 8> args) {
|
|||
}
|
||||
}
|
||||
|
||||
bool ArmNce::HandleCacheOperation(Kernel::KThread* thread) {
|
||||
const auto op = static_cast<CacheOperationKind>(m_guest_ctx.cache_operation);
|
||||
if (op == CacheOperationKind::None) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const u64 cache_line_start = m_guest_ctx.cache_operation_address & ~(CacheLineSize - 1);
|
||||
auto& memory = thread->GetOwnerProcess()->GetMemory();
|
||||
|
||||
switch (op) {
|
||||
case CacheOperationKind::DataCacheInvalidate: {
|
||||
[[maybe_unused]] auto invalidate_result =
|
||||
memory.InvalidateDataCache(cache_line_start, CacheLineSize);
|
||||
break;
|
||||
}
|
||||
case CacheOperationKind::DataCacheStore: {
|
||||
[[maybe_unused]] auto store_result = memory.StoreDataCache(cache_line_start, CacheLineSize);
|
||||
break;
|
||||
}
|
||||
case CacheOperationKind::DataCacheFlush: {
|
||||
[[maybe_unused]] auto flush_result = memory.FlushDataCache(cache_line_start, CacheLineSize);
|
||||
break;
|
||||
}
|
||||
case CacheOperationKind::InstructionCacheInvalidate:
|
||||
InvalidateCacheRange(cache_line_start, CacheLineSize);
|
||||
break;
|
||||
case CacheOperationKind::None:
|
||||
break;
|
||||
}
|
||||
|
||||
m_guest_ctx.cache_operation = static_cast<u32>(CacheOperationKind::None);
|
||||
m_guest_ctx.cache_operation_address = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
ArmNce::ArmNce(System& system, bool uses_wall_clock, std::size_t core_index)
|
||||
: ArmInterface{uses_wall_clock}, m_system{system}, m_core_index{core_index} {
|
||||
m_guest_ctx.system = &m_system;
|
||||
|
|
|
|||
|
|
@ -1,13 +1,9 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <mutex>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "core/arm/arm_interface.h"
|
||||
#include "core/arm/nce/guest_context.h"
|
||||
|
|
@ -41,7 +37,6 @@ public:
|
|||
void GetSvcArguments(std::span<uint64_t, 8> args) const override;
|
||||
void SetSvcArguments(std::span<const uint64_t, 8> args) override;
|
||||
u32 GetSvcNumber() const override;
|
||||
bool HandleCacheOperation(Kernel::KThread* thread) override;
|
||||
|
||||
void SignalInterrupt(Kernel::KThread* thread) override;
|
||||
void ClearInstructionCache() override;
|
||||
|
|
@ -82,11 +77,6 @@ private:
|
|||
static void HandleHostAlignmentFault(int sig, void* info, void* raw_context);
|
||||
static void HandleHostAccessFault(int sig, void* info, void* raw_context);
|
||||
|
||||
bool IsPreciseAccessPage(u64 addr) const;
|
||||
void MarkPreciseAccessPage(u64 addr);
|
||||
void MarkPreciseAccessFaultWindow(u64 addr);
|
||||
void DecayPreciseAccessPagesLocked();
|
||||
|
||||
public:
|
||||
Core::System& m_system;
|
||||
|
||||
|
|
@ -98,9 +88,6 @@ public:
|
|||
GuestContext m_guest_ctx{};
|
||||
Kernel::KThread* m_running_thread{};
|
||||
|
||||
mutable std::mutex m_precise_pages_guard{};
|
||||
std::unordered_map<u64, u8> m_precise_pages{};
|
||||
|
||||
// Stack for signal processing.
|
||||
std::unique_ptr<u8[]> m_stack{};
|
||||
};
|
||||
|
|
|
|||
|
|
@ -1,6 +1,3 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
|
|
@ -41,9 +38,6 @@ struct GuestContext {
|
|||
u32 svc{};
|
||||
System* system{};
|
||||
ArmNce* parent{};
|
||||
u32 cache_operation{};
|
||||
u32 cache_operation_reserved{};
|
||||
u64 cache_operation_address{};
|
||||
};
|
||||
|
||||
// Verify assembly offsets.
|
||||
|
|
|
|||
|
|
@ -765,8 +765,8 @@ std::optional<u64> MatchAndExecuteOneInstruction(Core::Memory::Memory& memory, m
|
|||
fpsimd_context* fpsimd_context) {
|
||||
std::span<u64, 31> regs(reinterpret_cast<u64*>(context->regs), 31);
|
||||
std::span<u128, 32> vregs(reinterpret_cast<u128*>(fpsimd_context->vregs), 32);
|
||||
u64 sp = context->sp;
|
||||
const u64 pc = context->pc;
|
||||
u64& sp = *reinterpret_cast<u64*>(&context->sp);
|
||||
const u64& pc = *reinterpret_cast<u64*>(&context->pc);
|
||||
|
||||
InterpreterVisitor visitor(memory, regs, vregs, sp, pc);
|
||||
u32 instruction = memory.Read32(pc);
|
||||
|
|
@ -774,7 +774,6 @@ std::optional<u64> MatchAndExecuteOneInstruction(Core::Memory::Memory& memory, m
|
|||
|
||||
auto decoder = Dynarmic::A64::Decode<VisitorBase>(instruction);
|
||||
was_executed = decoder.get().call(visitor, instruction);
|
||||
context->sp = sp;
|
||||
return was_executed ? std::optional<u64>(pc + 4) : std::nullopt;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -26,26 +26,6 @@ using NativeExecutionParameters = Kernel::KThread::NativeExecutionParameters;
|
|||
constexpr size_t MaxRelativeBranch = 128_MiB;
|
||||
constexpr u32 ModuleCodeIndex = 0x24 / sizeof(u32);
|
||||
|
||||
namespace {
|
||||
|
||||
[[nodiscard]] std::optional<CacheOperationKind> DecodeCacheOperation(u32 inst) {
|
||||
switch (inst & ~u32{0x1F}) {
|
||||
case 0xD5087620:
|
||||
return CacheOperationKind::DataCacheInvalidate;
|
||||
case 0xD50B7A20:
|
||||
case 0xD50B7B20:
|
||||
return CacheOperationKind::DataCacheStore;
|
||||
case 0xD50B7E20:
|
||||
return CacheOperationKind::DataCacheFlush;
|
||||
case 0xD50B7520:
|
||||
return CacheOperationKind::InstructionCacheInvalidate;
|
||||
default:
|
||||
return std::nullopt;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
Patcher::Patcher() : c(m_patch_instructions), c_pre(m_patch_instructions_pre) {
|
||||
// The first word of the patch section is always a branch to the first instruction of the
|
||||
// module.
|
||||
|
|
@ -180,20 +160,6 @@ bool Patcher::PatchText(std::span<const u8> program_image, const Kernel::CodeSet
|
|||
continue;
|
||||
}
|
||||
|
||||
if (auto cache_op = DecodeCacheOperation(inst); cache_op.has_value()) {
|
||||
bool pre_buffer = false;
|
||||
auto ret = AddRelocations(pre_buffer);
|
||||
const auto src_reg = oaknut::XReg{static_cast<int>(inst & 0x1F)};
|
||||
if (pre_buffer) {
|
||||
WriteCacheOperationTrampoline(ret, *cache_op, src_reg, c_pre, m_save_context_pre,
|
||||
m_load_context_pre);
|
||||
} else {
|
||||
WriteCacheOperationTrampoline(ret, *cache_op, src_reg, c, m_save_context,
|
||||
m_load_context);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (auto exclusive = Exclusive{inst}; exclusive.Verify()) {
|
||||
curr_patch->m_exclusives.push_back(i);
|
||||
}
|
||||
|
|
@ -576,96 +542,6 @@ void Patcher::WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id, oaknut
|
|||
this->WriteModulePc(module_dest);
|
||||
}
|
||||
|
||||
void Patcher::WriteCacheOperationTrampoline(ModuleDestLabel module_dest,
|
||||
CacheOperationKind op_kind, oaknut::XReg src_reg,
|
||||
oaknut::VectorCodeGenerator& cg,
|
||||
oaknut::Label& save_ctx,
|
||||
oaknut::Label& load_ctx) {
|
||||
const bool is_pre = (&cg == &c_pre);
|
||||
|
||||
this->LockContext(cg);
|
||||
|
||||
cg.STR(X30, SP, PRE_INDEXED, -16);
|
||||
cg.BL(save_ctx);
|
||||
cg.LDR(X30, SP, POST_INDEXED, 16);
|
||||
|
||||
oaknut::Label pc_after_cache_op;
|
||||
cg.MRS(X1, oaknut::SystemReg::TPIDR_EL0);
|
||||
cg.LDR(X1, X1, offsetof(NativeExecutionParameters, native_context));
|
||||
cg.LDR(X2, pc_after_cache_op);
|
||||
cg.STR(X2, X1, offsetof(GuestContext, pc));
|
||||
|
||||
cg.MOV(X2, static_cast<u32>(op_kind));
|
||||
cg.STR(W2, X1, offsetof(GuestContext, cache_operation));
|
||||
cg.STR(src_reg, X1, offsetof(GuestContext, cache_operation_address));
|
||||
|
||||
static_assert(std::is_same_v<std::underlying_type_t<HaltReason>, u64>);
|
||||
oaknut::Label retry;
|
||||
cg.ADD(X2, X1, offsetof(GuestContext, esr_el1));
|
||||
cg.l(retry);
|
||||
cg.LDAXR(X0, X2);
|
||||
cg.STLXR(W3, XZR, X2);
|
||||
cg.CBNZ(W3, retry);
|
||||
cg.ORR(X0, X0, static_cast<u64>(HaltReason::CacheInvalidation));
|
||||
|
||||
cg.ADD(X1, X1, offsetof(GuestContext, host_ctx));
|
||||
|
||||
static_assert(offsetof(HostContext, host_sp) + 8 == offsetof(HostContext, host_tpidr_el0));
|
||||
cg.LDP(X2, X3, X1, offsetof(HostContext, host_sp));
|
||||
cg.MOV(SP, X2);
|
||||
cg.MSR(oaknut::SystemReg::TPIDR_EL0, X3);
|
||||
|
||||
static constexpr size_t HOST_REGS_OFF = offsetof(HostContext, host_saved_regs);
|
||||
static constexpr size_t HOST_VREGS_OFF = offsetof(HostContext, host_saved_vregs);
|
||||
cg.LDP(X19, X20, X1, HOST_REGS_OFF);
|
||||
cg.LDP(X21, X22, X1, HOST_REGS_OFF + 2 * sizeof(u64));
|
||||
cg.LDP(X23, X24, X1, HOST_REGS_OFF + 4 * sizeof(u64));
|
||||
cg.LDP(X25, X26, X1, HOST_REGS_OFF + 6 * sizeof(u64));
|
||||
cg.LDP(X27, X28, X1, HOST_REGS_OFF + 8 * sizeof(u64));
|
||||
cg.LDP(X29, X30, X1, HOST_REGS_OFF + 10 * sizeof(u64));
|
||||
cg.LDP(Q8, Q9, X1, HOST_VREGS_OFF);
|
||||
cg.LDP(Q10, Q11, X1, HOST_VREGS_OFF + 2 * sizeof(u128));
|
||||
cg.LDP(Q12, Q13, X1, HOST_VREGS_OFF + 4 * sizeof(u128));
|
||||
cg.LDP(Q14, Q15, X1, HOST_VREGS_OFF + 6 * sizeof(u128));
|
||||
cg.RET();
|
||||
|
||||
if (is_pre) {
|
||||
curr_patch->m_trampolines_pre.push_back({cg.offset(), module_dest});
|
||||
} else {
|
||||
curr_patch->m_trampolines.push_back({cg.offset(), module_dest});
|
||||
}
|
||||
|
||||
cg.MRS(X2, oaknut::SystemReg::TPIDR_EL0);
|
||||
cg.LDR(X2, X2, offsetof(NativeExecutionParameters, native_context));
|
||||
cg.ADD(X0, X2, offsetof(GuestContext, host_ctx));
|
||||
cg.STR(X30, X0, offsetof(HostContext, host_saved_regs) + 11 * sizeof(u64));
|
||||
|
||||
cg.STR(X30, SP, PRE_INDEXED, -16);
|
||||
cg.BL(load_ctx);
|
||||
cg.LDR(X30, SP, POST_INDEXED, 16);
|
||||
|
||||
cg.STR(X1, SP, PRE_INDEXED, -16);
|
||||
cg.MRS(X1, oaknut::SystemReg::TPIDR_EL0);
|
||||
cg.LDR(X1, X1, offsetof(NativeExecutionParameters, native_context));
|
||||
cg.LDR(X30, X1, offsetof(GuestContext, cpu_registers) + sizeof(u64) * 30);
|
||||
cg.LDR(X1, SP, POST_INDEXED, 16);
|
||||
|
||||
this->UnlockContext(cg);
|
||||
|
||||
if (is_pre) {
|
||||
this->BranchToModulePre(module_dest);
|
||||
} else {
|
||||
this->BranchToModule(module_dest);
|
||||
}
|
||||
|
||||
cg.l(pc_after_cache_op);
|
||||
if (is_pre) {
|
||||
this->WriteModulePcPre(module_dest);
|
||||
} else {
|
||||
this->WriteModulePc(module_dest);
|
||||
}
|
||||
}
|
||||
|
||||
void Patcher::WriteMrsHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg,
|
||||
oaknut::SystemReg src_reg, oaknut::VectorCodeGenerator& cg) {
|
||||
// Retrieve emulated TLS register from GuestContext.
|
||||
|
|
|
|||
|
|
@ -78,11 +78,6 @@ private:
|
|||
void LockContext(oaknut::VectorCodeGenerator& code);
|
||||
void UnlockContext(oaknut::VectorCodeGenerator& code);
|
||||
void WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id, oaknut::VectorCodeGenerator& code, oaknut::Label& save_ctx, oaknut::Label& load_ctx);
|
||||
void WriteCacheOperationTrampoline(ModuleDestLabel module_dest, CacheOperationKind op_kind,
|
||||
oaknut::XReg src_reg,
|
||||
oaknut::VectorCodeGenerator& code,
|
||||
oaknut::Label& save_ctx,
|
||||
oaknut::Label& load_ctx);
|
||||
void WriteMrsHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg, oaknut::SystemReg src_reg, oaknut::VectorCodeGenerator& code);
|
||||
void WriteMsrHandler(ModuleDestLabel module_dest, oaknut::XReg src_reg, oaknut::VectorCodeGenerator& code);
|
||||
void WriteCntpctHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg, oaknut::VectorCodeGenerator& code);
|
||||
|
|
@ -93,11 +88,6 @@ private:
|
|||
void LockContext() { LockContext(c); }
|
||||
void UnlockContext() { UnlockContext(c); }
|
||||
void WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id) { WriteSvcTrampoline(module_dest, svc_id, c, m_save_context, m_load_context); }
|
||||
void WriteCacheOperationTrampoline(ModuleDestLabel module_dest, CacheOperationKind op_kind,
|
||||
oaknut::XReg src_reg) {
|
||||
WriteCacheOperationTrampoline(module_dest, op_kind, src_reg, c, m_save_context,
|
||||
m_load_context);
|
||||
}
|
||||
void WriteMrsHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg, oaknut::SystemReg src_reg) { WriteMrsHandler(module_dest, dest_reg, src_reg, c); }
|
||||
void WriteMsrHandler(ModuleDestLabel module_dest, oaknut::XReg src_reg) { WriteMsrHandler(module_dest, src_reg, c); }
|
||||
void WriteCntpctHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg) { WriteCntpctHandler(module_dest, dest_reg, c); }
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
|
|
@ -97,7 +97,6 @@ void PhysicalCore::RunThread(Kernel::KThread* thread) {
|
|||
}
|
||||
|
||||
// Determine why we stopped.
|
||||
const bool cache_invalidation = True(hr & Core::HaltReason::CacheInvalidation);
|
||||
const bool supervisor_call = True(hr & Core::HaltReason::SupervisorCall);
|
||||
const bool prefetch_abort = True(hr & Core::HaltReason::PrefetchAbort);
|
||||
const bool breakpoint = True(hr & Core::HaltReason::InstructionBreakpoint);
|
||||
|
|
@ -152,11 +151,6 @@ void PhysicalCore::RunThread(Kernel::KThread* thread) {
|
|||
return;
|
||||
}
|
||||
|
||||
if (cache_invalidation) {
|
||||
interface->HandleCacheOperation(thread);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Handle external interrupt sources.
|
||||
if (interrupt || m_is_single_core) {
|
||||
return;
|
||||
|
|
|
|||
|
|
@ -639,15 +639,6 @@ struct Memory::Impl {
|
|||
GetInteger(vaddr), []() {}, []() {});
|
||||
}
|
||||
|
||||
[[nodiscard]] bool IsRasterizerCached(const Common::ProcessAddress vaddr) const {
|
||||
const u64 addr = GetInteger(vaddr) & 0xffffffffffffULL;
|
||||
if (!AddressSpaceContains(*current_page_table, addr, 1)) {
|
||||
return false;
|
||||
}
|
||||
return current_page_table->entries[addr >> YUZU_PAGEBITS].ptr.Type() ==
|
||||
Common::PageType::RasterizerCachedMemory;
|
||||
}
|
||||
|
||||
/// @brief Reads a particular data type out of memory at the given virtual address.
|
||||
/// @param vaddr The virtual address to read the data type from.
|
||||
/// @tparam T The data type to read out of memory.
|
||||
|
|
@ -1045,10 +1036,6 @@ void Memory::RasterizerMarkRegionCached(Common::ProcessAddress vaddr, u64 size,
|
|||
impl->RasterizerMarkRegionCached(GetInteger(vaddr), size, cached);
|
||||
}
|
||||
|
||||
bool Memory::IsRasterizerCached(Common::ProcessAddress vaddr) const {
|
||||
return impl->IsRasterizerCached(vaddr);
|
||||
}
|
||||
|
||||
void Memory::MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug) {
|
||||
impl->MarkRegionDebug(GetInteger(vaddr), size, debug);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: 2014 Citra Emulator Project
|
||||
|
|
@ -493,8 +493,6 @@ public:
|
|||
|
||||
void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers);
|
||||
|
||||
[[nodiscard]] bool IsRasterizerCached(Common::ProcessAddress vaddr) const;
|
||||
|
||||
bool InvalidateNCE(Common::ProcessAddress vaddr, size_t size);
|
||||
|
||||
bool InvalidateSeparateHeap(void* fault_address);
|
||||
|
|
|
|||
|
|
@ -39,9 +39,7 @@ NumericType GetNumericType(TexturePixelFormat format) {
|
|||
if (!VideoCore::Surface::IsPixelFormatInteger(pixel_format)) {
|
||||
return NumericType::Float;
|
||||
}
|
||||
return VideoCore::Surface::IsPixelFormatSignedInteger(pixel_format)
|
||||
? NumericType::SignedInt
|
||||
: NumericType::UnsignedInt;
|
||||
return NumericType::UnsignedInt;
|
||||
}
|
||||
|
||||
IR::Opcode IndexedInstruction(const IR::Inst& inst) {
|
||||
|
|
@ -450,7 +448,9 @@ public:
|
|||
|
||||
u32 Add(const ImageBufferDescriptor& desc) {
|
||||
const u32 index{Add(image_buffer_descriptors, desc, [&desc](const auto& existing) {
|
||||
return desc.format == existing.format && desc.cbuf_index == existing.cbuf_index &&
|
||||
return desc.format == existing.format &&
|
||||
desc.numeric_type == existing.numeric_type &&
|
||||
desc.cbuf_index == existing.cbuf_index &&
|
||||
desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count &&
|
||||
desc.size_shift == existing.size_shift;
|
||||
})};
|
||||
|
|
@ -480,6 +480,7 @@ public:
|
|||
u32 Add(const ImageDescriptor& desc) {
|
||||
const u32 index{Add(image_descriptors, desc, [&desc](const auto& existing) {
|
||||
return desc.type == existing.type && desc.format == existing.format &&
|
||||
desc.numeric_type == existing.numeric_type &&
|
||||
desc.cbuf_index == existing.cbuf_index &&
|
||||
desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count &&
|
||||
desc.size_shift == existing.size_shift;
|
||||
|
|
|
|||
|
|
@ -323,7 +323,7 @@ VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const Device& device,
|
|||
case Maxwell::PrimitiveTopology::Lines:
|
||||
return VK_PRIMITIVE_TOPOLOGY_LINE_LIST;
|
||||
case Maxwell::PrimitiveTopology::LineLoop:
|
||||
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
|
||||
return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP;
|
||||
case Maxwell::PrimitiveTopology::LineStrip:
|
||||
return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP;
|
||||
case Maxwell::PrimitiveTopology::Triangles:
|
||||
|
|
|
|||
|
|
@ -203,6 +203,11 @@ public:
|
|||
}
|
||||
|
||||
void SyncWrites() override {
|
||||
if (!direct_sync_values.empty()) {
|
||||
runtime.template SyncValues<VideoCommon::SyncValuesStruct>(direct_sync_values);
|
||||
direct_sync_values.clear();
|
||||
}
|
||||
|
||||
if (sync_values_stash.empty()) {
|
||||
return;
|
||||
}
|
||||
|
|
@ -223,8 +228,54 @@ public:
|
|||
const auto driver_id = device.GetDriverID();
|
||||
if (driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
|
||||
driver_id == VK_DRIVER_ID_ARM_PROPRIETARY || driver_id == VK_DRIVER_ID_MESA_TURNIP) {
|
||||
pending_sync.clear();
|
||||
ApplyBanksWideOp<false>(
|
||||
pending_sync,
|
||||
[](SamplesQueryBank* bank, size_t start, size_t amount) { bank->Sync(start, amount); });
|
||||
|
||||
direct_sync_values.clear();
|
||||
direct_sync_values.reserve(pending_sync.size());
|
||||
|
||||
bool has_multi_queries = accumulation_since_last_sync;
|
||||
for (auto q : pending_sync) {
|
||||
auto* query = GetQuery(q);
|
||||
if (True(query->flags & VideoCommon::QueryFlagBits::IsRewritten)) {
|
||||
continue;
|
||||
}
|
||||
if (True(query->flags & VideoCommon::QueryFlagBits::IsInvalidated)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
u64 total = 0;
|
||||
ApplyBankOp(query, [&total](SamplesQueryBank* bank, size_t start, size_t amount) {
|
||||
const auto& results = bank->GetResults();
|
||||
for (size_t i = 0; i < amount; i++) {
|
||||
total += results[start + i];
|
||||
}
|
||||
});
|
||||
|
||||
total += GetAmendValue();
|
||||
query->value = total;
|
||||
query->flags |= VideoCommon::QueryFlagBits::IsHostSynced;
|
||||
query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced;
|
||||
direct_sync_values.emplace_back(VideoCommon::SyncValuesStruct{
|
||||
.address = query->guest_address,
|
||||
.value = total,
|
||||
.size = SamplesQueryBank::QUERY_SIZE,
|
||||
});
|
||||
|
||||
has_multi_queries |= query->size_slots > 1;
|
||||
}
|
||||
|
||||
ReplicateCurrentQueryIfNeeded();
|
||||
std::function<void()> func([this] { amend_value = accumulation_value; });
|
||||
rasterizer->SyncOperation(std::move(func));
|
||||
AbandonCurrentQuery();
|
||||
num_slots_used = 0;
|
||||
first_accumulation_checkpoint = (std::numeric_limits<size_t>::max)();
|
||||
last_accumulation_checkpoint = 0;
|
||||
accumulation_since_last_sync = has_multi_queries;
|
||||
sync_values_stash.clear();
|
||||
pending_sync.clear();
|
||||
return;
|
||||
}
|
||||
sync_values_stash.clear();
|
||||
|
|
@ -570,6 +621,7 @@ private:
|
|||
std::array<size_t, 32> resolve_table{};
|
||||
std::array<size_t, 32> intermediary_table{};
|
||||
vk::Buffer accumulation_buffer;
|
||||
std::vector<VideoCommon::SyncValuesStruct> direct_sync_values;
|
||||
std::deque<std::vector<HostSyncValues>> sync_values_stash;
|
||||
std::vector<size_t> resolve_buffers;
|
||||
|
||||
|
|
@ -1016,10 +1068,52 @@ public:
|
|||
u64 stride{};
|
||||
DAddr dependant_address{};
|
||||
Maxwell3D::Regs::PrimitiveTopology topology{Maxwell3D::Regs::PrimitiveTopology::Points};
|
||||
u32 patch_vertices{1};
|
||||
size_t dependant_index{};
|
||||
bool dependant_manage{};
|
||||
};
|
||||
|
||||
[[nodiscard]] constexpr u64 SaturatingSub(u64 value, u64 amount) {
|
||||
return value > amount ? value - amount : 0;
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr u64 PrimitiveCountFromVertices(
|
||||
Maxwell3D::Regs::PrimitiveTopology topology, u64 num_vertices, u32 patch_vertices) {
|
||||
switch (topology) {
|
||||
case Maxwell3D::Regs::PrimitiveTopology::Points:
|
||||
return num_vertices;
|
||||
case Maxwell3D::Regs::PrimitiveTopology::Lines:
|
||||
return num_vertices / 2;
|
||||
case Maxwell3D::Regs::PrimitiveTopology::LineLoop:
|
||||
return num_vertices >= 2 ? num_vertices : 0;
|
||||
case Maxwell3D::Regs::PrimitiveTopology::LineStrip:
|
||||
return SaturatingSub(num_vertices, 1);
|
||||
case Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency:
|
||||
return num_vertices / 4;
|
||||
case Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency:
|
||||
return SaturatingSub(num_vertices, 3);
|
||||
case Maxwell3D::Regs::PrimitiveTopology::Triangles:
|
||||
return num_vertices / 3;
|
||||
case Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency:
|
||||
return num_vertices / 6;
|
||||
case Maxwell3D::Regs::PrimitiveTopology::TriangleFan:
|
||||
case Maxwell3D::Regs::PrimitiveTopology::TriangleStrip:
|
||||
return SaturatingSub(num_vertices, 2);
|
||||
case Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency:
|
||||
return num_vertices >= 6 ? (num_vertices - 4) / 2 : 0;
|
||||
case Maxwell3D::Regs::PrimitiveTopology::Quads:
|
||||
return num_vertices / 6;
|
||||
case Maxwell3D::Regs::PrimitiveTopology::QuadStrip:
|
||||
return num_vertices / 6;
|
||||
case Maxwell3D::Regs::PrimitiveTopology::Patches:
|
||||
return patch_vertices != 0 ? num_vertices / patch_vertices : 0;
|
||||
case Maxwell3D::Regs::PrimitiveTopology::Polygon:
|
||||
return num_vertices != 0 ? 1 : 0;
|
||||
default:
|
||||
return num_vertices;
|
||||
}
|
||||
}
|
||||
|
||||
class PrimitivesSucceededStreamer : public VideoCommon::SimpleStreamer<PrimitivesQueryBase> {
|
||||
public:
|
||||
explicit PrimitivesSucceededStreamer(size_t id_, QueryCacheRuntime& runtime_,
|
||||
|
|
@ -1048,7 +1142,10 @@ public:
|
|||
const size_t subreport = static_cast<size_t>(*subreport_);
|
||||
auto dependant_address_opt = tfb_streamer.GetLastQueryStream(subreport);
|
||||
bool must_manage_dependance = false;
|
||||
new_query->topology = tfb_streamer.GetOutputTopology();
|
||||
runtime.View3DRegs([new_query, this](Maxwell3D& maxwell3d) {
|
||||
new_query->topology = tfb_streamer.GetOutputTopology();
|
||||
new_query->patch_vertices = std::max<u32>(maxwell3d.regs.patch_vertices, 1);
|
||||
});
|
||||
if (dependant_address_opt) {
|
||||
auto [dep_address, stride] = *dependant_address_opt;
|
||||
new_query->dependant_address = dep_address;
|
||||
|
|
@ -1131,32 +1228,8 @@ public:
|
|||
num_vertices = static_cast<u64>(result) / safe_stride;
|
||||
}
|
||||
}
|
||||
query->value = [&]() -> u64 {
|
||||
switch (query->topology) {
|
||||
case Maxwell3D::Regs::PrimitiveTopology::Points:
|
||||
return num_vertices;
|
||||
case Maxwell3D::Regs::PrimitiveTopology::Lines:
|
||||
return num_vertices / 2;
|
||||
case Maxwell3D::Regs::PrimitiveTopology::LineLoop:
|
||||
return (num_vertices / 2) + 1;
|
||||
case Maxwell3D::Regs::PrimitiveTopology::LineStrip:
|
||||
return num_vertices - 1;
|
||||
case Maxwell3D::Regs::PrimitiveTopology::Patches:
|
||||
case Maxwell3D::Regs::PrimitiveTopology::Triangles:
|
||||
case Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency:
|
||||
return num_vertices / 3;
|
||||
case Maxwell3D::Regs::PrimitiveTopology::TriangleFan:
|
||||
case Maxwell3D::Regs::PrimitiveTopology::TriangleStrip:
|
||||
case Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency:
|
||||
return num_vertices - 2;
|
||||
case Maxwell3D::Regs::PrimitiveTopology::Quads:
|
||||
return num_vertices / 4;
|
||||
case Maxwell3D::Regs::PrimitiveTopology::Polygon:
|
||||
return 1U;
|
||||
default:
|
||||
return num_vertices;
|
||||
}
|
||||
}();
|
||||
query->value =
|
||||
PrimitiveCountFromVertices(query->topology, num_vertices, query->patch_vertices);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1423,13 +1496,6 @@ bool QueryCacheRuntime::HostConditionalRenderingCompareValues(VideoCommon::Looku
|
|||
return false;
|
||||
}
|
||||
|
||||
auto driver_id = impl->device.GetDriverID();
|
||||
const bool is_gpu_high = Settings::IsGPULevelHigh();
|
||||
|
||||
if ((!is_gpu_high && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) || driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY || driver_id == VK_DRIVER_ID_ARM_PROPRIETARY || driver_id == VK_DRIVER_ID_MESA_TURNIP) {
|
||||
return true;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < 2; i++) {
|
||||
is_null[i] = !is_in_ac[i] && check_value(objects[i]->address);
|
||||
}
|
||||
|
|
@ -1442,12 +1508,22 @@ bool QueryCacheRuntime::HostConditionalRenderingCompareValues(VideoCommon::Looku
|
|||
}
|
||||
}
|
||||
|
||||
if (!is_gpu_high) {
|
||||
return true;
|
||||
auto driver_id = impl->device.GetDriverID();
|
||||
const bool is_gpu_high = Settings::IsGPULevelHigh();
|
||||
const bool driver_blocks_pair_resolve =
|
||||
((!is_gpu_high && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) ||
|
||||
driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
|
||||
driver_id == VK_DRIVER_ID_ARM_PROPRIETARY ||
|
||||
driver_id == VK_DRIVER_ID_MESA_TURNIP);
|
||||
|
||||
if (driver_blocks_pair_resolve || !is_gpu_high) {
|
||||
EndHostConditionalRendering();
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!is_in_bc[0] && !is_in_bc[1]) {
|
||||
return true;
|
||||
EndHostConditionalRendering();
|
||||
return false;
|
||||
}
|
||||
HostConditionalRenderingCompareBCImpl(object_1.address, equal_check);
|
||||
return true;
|
||||
|
|
|
|||
|
|
@ -6,6 +6,8 @@
|
|||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cstring>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
|
||||
|
|
@ -24,6 +26,7 @@
|
|||
#include "video_core/engines/kepler_compute.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/host1x/gpu_device_memory_manager.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/renderer_vulkan/blit_image.h"
|
||||
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
|
||||
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
|
||||
|
|
@ -61,6 +64,58 @@ struct DrawParams {
|
|||
bool is_indexed;
|
||||
};
|
||||
|
||||
[[nodiscard]] bool IsLineLoop(Maxwell::PrimitiveTopology topology) {
|
||||
return topology == Maxwell::PrimitiveTopology::LineLoop;
|
||||
}
|
||||
|
||||
[[nodiscard]] u32 PrimitiveRestartIndex(Maxwell::IndexFormat format) {
|
||||
switch (format) {
|
||||
case Maxwell::IndexFormat::UnsignedByte:
|
||||
return std::numeric_limits<u8>::max();
|
||||
case Maxwell::IndexFormat::UnsignedShort:
|
||||
return std::numeric_limits<u16>::max();
|
||||
case Maxwell::IndexFormat::UnsignedInt:
|
||||
return std::numeric_limits<u32>::max();
|
||||
}
|
||||
ASSERT(false);
|
||||
return std::numeric_limits<u32>::max();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool ReadGuestObject(Tegra::MemoryManager* gpu_memory, GPUVAddr address, T& value) {
|
||||
if (gpu_memory == nullptr) {
|
||||
return false;
|
||||
}
|
||||
gpu_memory->ReadBlockUnsafe(address, &value, sizeof(T));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ReadGuestIndex(Tegra::MemoryManager* gpu_memory, GPUVAddr address, Maxwell::IndexFormat format,
|
||||
u32& value) {
|
||||
switch (format) {
|
||||
case Maxwell::IndexFormat::UnsignedByte: {
|
||||
u8 result{};
|
||||
if (!ReadGuestObject(gpu_memory, address, result)) {
|
||||
return false;
|
||||
}
|
||||
value = result;
|
||||
return true;
|
||||
}
|
||||
case Maxwell::IndexFormat::UnsignedShort: {
|
||||
u16 result{};
|
||||
if (!ReadGuestObject(gpu_memory, address, result)) {
|
||||
return false;
|
||||
}
|
||||
value = result;
|
||||
return true;
|
||||
}
|
||||
case Maxwell::IndexFormat::UnsignedInt:
|
||||
return ReadGuestObject(gpu_memory, address, value);
|
||||
}
|
||||
ASSERT(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t index, float scale) {
|
||||
const auto& src = regs.viewport_transform[index];
|
||||
const auto conv = [scale](float value) {
|
||||
|
|
@ -343,6 +398,21 @@ void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) {
|
|||
GPU::Logging::GPULogger::GetInstance().LogVulkanCall(
|
||||
is_indexed ? "vkCmdDrawIndexed" : "vkCmdDraw", params, VK_SUCCESS);
|
||||
}
|
||||
|
||||
if (IsLineLoop(draw_state.topology) && draw_params.num_vertices >= 2) {
|
||||
if (maxwell3d->regs.transform_feedback_enabled != 0) {
|
||||
query_cache.CounterEnable(VideoCommon::QueryType::StreamingByteCount, false);
|
||||
}
|
||||
scheduler.Record([](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.SetPrimitiveTopologyEXT(VK_PRIMITIVE_TOPOLOGY_LINE_LIST);
|
||||
});
|
||||
DrawLineLoopClosure(draw_state, draw_params.base_instance, draw_params.num_instances,
|
||||
static_cast<s32>(draw_params.base_vertex),
|
||||
draw_params.num_vertices, draw_params.is_indexed);
|
||||
scheduler.Record([](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.SetPrimitiveTopologyEXT(VK_PRIMITIVE_TOPOLOGY_LINE_STRIP);
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
|
@ -350,6 +420,7 @@ void RasterizerVulkan::DrawIndirect() {
|
|||
const auto& params = maxwell3d->draw_manager->GetIndirectParams();
|
||||
buffer_cache.SetDrawIndirect(¶ms);
|
||||
PrepareDraw(params.is_indexed, [this, ¶ms] {
|
||||
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
|
||||
const auto indirect_buffer = buffer_cache.GetDrawIndirectBuffer();
|
||||
const auto& buffer = indirect_buffer.first;
|
||||
const auto& offset = indirect_buffer.second;
|
||||
|
|
@ -385,6 +456,9 @@ void RasterizerVulkan::DrawIndirect() {
|
|||
static_cast<u32>(params.stride));
|
||||
}
|
||||
});
|
||||
if (IsLineLoop(draw_state.topology)) {
|
||||
DrawIndirectLineLoopClosures(draw_state, params);
|
||||
}
|
||||
return;
|
||||
}
|
||||
scheduler.Record([buffer_obj = buffer->Handle(), offset, params](vk::CommandBuffer cmdbuf) {
|
||||
|
|
@ -407,10 +481,165 @@ void RasterizerVulkan::DrawIndirect() {
|
|||
params.is_indexed ? "vkCmdDrawIndexedIndirect" : "vkCmdDrawIndirect",
|
||||
log_params, VK_SUCCESS);
|
||||
}
|
||||
|
||||
if (IsLineLoop(draw_state.topology)) {
|
||||
DrawIndirectLineLoopClosures(draw_state, params);
|
||||
}
|
||||
});
|
||||
buffer_cache.SetDrawIndirect(nullptr);
|
||||
}
|
||||
|
||||
bool RasterizerVulkan::DrawLineLoopClosure(const MaxwellDrawState& draw_state, u32 base_instance,
|
||||
u32 num_instances, s32 base_vertex,
|
||||
u32 num_vertices, bool is_indexed) {
|
||||
if (!IsLineLoop(draw_state.topology) || num_instances == 0 || num_vertices < 2) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::array<u32, 2> closure_indices{};
|
||||
if (!is_indexed) {
|
||||
closure_indices = {num_vertices - 1, 0};
|
||||
} else if (!draw_state.inline_index_draw_indexes.empty()) {
|
||||
const size_t last_offset = (static_cast<size_t>(num_vertices) - 1) * sizeof(u32);
|
||||
if (draw_state.inline_index_draw_indexes.size() < last_offset + sizeof(u32)) {
|
||||
return false;
|
||||
}
|
||||
std::memcpy(&closure_indices[0], draw_state.inline_index_draw_indexes.data() + last_offset,
|
||||
sizeof(u32));
|
||||
std::memcpy(&closure_indices[1], draw_state.inline_index_draw_indexes.data(),
|
||||
sizeof(u32));
|
||||
} else {
|
||||
const auto index_format = draw_state.index_buffer.format;
|
||||
const size_t index_size = draw_state.index_buffer.FormatSizeInBytes();
|
||||
const GPUVAddr first_address =
|
||||
draw_state.index_buffer.StartAddress() +
|
||||
static_cast<GPUVAddr>(draw_state.index_buffer.first) * index_size;
|
||||
const GPUVAddr last_address =
|
||||
first_address + static_cast<GPUVAddr>(num_vertices - 1) * index_size;
|
||||
if (!ReadGuestIndex(gpu_memory, last_address, index_format, closure_indices[0]) ||
|
||||
!ReadGuestIndex(gpu_memory, first_address, index_format, closure_indices[1])) {
|
||||
return false;
|
||||
}
|
||||
if (maxwell3d->regs.primitive_restart.enabled != 0) {
|
||||
const u32 restart_index = PrimitiveRestartIndex(index_format);
|
||||
if (closure_indices[0] == restart_index || closure_indices[1] == restart_index) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const auto upload = staging_pool.Request(sizeof(closure_indices), MemoryUsage::Upload);
|
||||
std::memcpy(upload.mapped_span.data(), closure_indices.data(), sizeof(closure_indices));
|
||||
|
||||
scheduler.Record([buffer = upload.buffer, offset = upload.offset](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.BindIndexBuffer(buffer, offset, VK_INDEX_TYPE_UINT32);
|
||||
});
|
||||
scheduler.Record([base_instance, num_instances, base_vertex](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.DrawIndexed(2, num_instances, 0, base_vertex, base_instance);
|
||||
});
|
||||
return true;
|
||||
}
|
||||
|
||||
void RasterizerVulkan::DrawIndirectLineLoopClosures(
|
||||
const MaxwellDrawState& draw_state, const Tegra::Engines::DrawManager::IndirectParams& params) {
|
||||
if (!IsLineLoop(draw_state.topology) || params.is_byte_count) {
|
||||
return;
|
||||
}
|
||||
|
||||
u32 draw_count = static_cast<u32>(params.max_draw_counts);
|
||||
if (params.include_count) {
|
||||
gpu_memory->ReadBlockUnsafe(params.count_start_address, &draw_count, sizeof(draw_count));
|
||||
draw_count = std::min(draw_count, static_cast<u32>(params.max_draw_counts));
|
||||
}
|
||||
if (draw_count == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
bool emitted_closure = false;
|
||||
if (params.is_indexed) {
|
||||
const u32 command_stride =
|
||||
params.stride != 0 ? static_cast<u32>(params.stride) : sizeof(VkDrawIndexedIndirectCommand);
|
||||
for (u32 i = 0; i < draw_count; ++i) {
|
||||
VkDrawIndexedIndirectCommand command{};
|
||||
gpu_memory->ReadBlockUnsafe(params.indirect_start_address +
|
||||
static_cast<GPUVAddr>(i) * command_stride,
|
||||
&command, sizeof(command));
|
||||
if (command.indexCount < 2 || command.instanceCount == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
std::array<u32, 2> closure_indices{};
|
||||
const auto index_format = draw_state.index_buffer.format;
|
||||
const size_t index_size = draw_state.index_buffer.FormatSizeInBytes();
|
||||
const GPUVAddr first_address = draw_state.index_buffer.StartAddress() +
|
||||
static_cast<GPUVAddr>(command.firstIndex) * index_size;
|
||||
const GPUVAddr last_address =
|
||||
first_address + static_cast<GPUVAddr>(command.indexCount - 1) * index_size;
|
||||
if (!ReadGuestIndex(gpu_memory, last_address, index_format, closure_indices[0]) ||
|
||||
!ReadGuestIndex(gpu_memory, first_address, index_format, closure_indices[1])) {
|
||||
continue;
|
||||
}
|
||||
if (maxwell3d->regs.primitive_restart.enabled != 0) {
|
||||
const u32 restart_index = PrimitiveRestartIndex(index_format);
|
||||
if (closure_indices[0] == restart_index || closure_indices[1] == restart_index) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (!emitted_closure) {
|
||||
if (maxwell3d->regs.transform_feedback_enabled != 0) {
|
||||
query_cache.CounterEnable(VideoCommon::QueryType::StreamingByteCount, false);
|
||||
}
|
||||
scheduler.Record([](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.SetPrimitiveTopologyEXT(VK_PRIMITIVE_TOPOLOGY_LINE_LIST);
|
||||
});
|
||||
emitted_closure = true;
|
||||
}
|
||||
|
||||
const auto upload = staging_pool.Request(sizeof(closure_indices), MemoryUsage::Upload);
|
||||
std::memcpy(upload.mapped_span.data(), closure_indices.data(), sizeof(closure_indices));
|
||||
scheduler.Record(
|
||||
[buffer = upload.buffer, offset = upload.offset](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.BindIndexBuffer(buffer, offset, VK_INDEX_TYPE_UINT32);
|
||||
});
|
||||
scheduler.Record([command](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.DrawIndexed(2, command.instanceCount, 0, command.vertexOffset,
|
||||
command.firstInstance);
|
||||
});
|
||||
}
|
||||
} else {
|
||||
const u32 command_stride =
|
||||
params.stride != 0 ? static_cast<u32>(params.stride) : sizeof(VkDrawIndirectCommand);
|
||||
for (u32 i = 0; i < draw_count; ++i) {
|
||||
VkDrawIndirectCommand command{};
|
||||
gpu_memory->ReadBlockUnsafe(params.indirect_start_address +
|
||||
static_cast<GPUVAddr>(i) * command_stride,
|
||||
&command, sizeof(command));
|
||||
if (command.vertexCount < 2 || command.instanceCount == 0) {
|
||||
continue;
|
||||
}
|
||||
if (!emitted_closure) {
|
||||
if (maxwell3d->regs.transform_feedback_enabled != 0) {
|
||||
query_cache.CounterEnable(VideoCommon::QueryType::StreamingByteCount, false);
|
||||
}
|
||||
scheduler.Record([](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.SetPrimitiveTopologyEXT(VK_PRIMITIVE_TOPOLOGY_LINE_LIST);
|
||||
});
|
||||
emitted_closure = true;
|
||||
}
|
||||
DrawLineLoopClosure(draw_state, command.firstInstance, command.instanceCount,
|
||||
static_cast<s32>(command.firstVertex), command.vertexCount,
|
||||
false);
|
||||
}
|
||||
}
|
||||
|
||||
if (emitted_closure) {
|
||||
scheduler.Record([](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.SetPrimitiveTopologyEXT(VK_PRIMITIVE_TOPOLOGY_LINE_STRIP);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerVulkan::DrawTexture() {
|
||||
|
||||
SCOPE_EXIT {
|
||||
|
|
|
|||
|
|
@ -155,6 +155,13 @@ private:
|
|||
template <typename Func>
|
||||
void PrepareDraw(bool is_indexed, Func&&);
|
||||
|
||||
bool DrawLineLoopClosure(const Tegra::Engines::DrawManager::State& draw_state,
|
||||
u32 base_instance,
|
||||
u32 num_instances, s32 base_vertex, u32 num_vertices,
|
||||
bool is_indexed);
|
||||
void DrawIndirectLineLoopClosures(const Tegra::Engines::DrawManager::State& draw_state,
|
||||
const Tegra::Engines::DrawManager::IndirectParams& params);
|
||||
|
||||
void FlushWork();
|
||||
|
||||
void UpdateDynamicStates();
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue