Compare commits

...

4 commits

12 changed files with 226 additions and 10 deletions

View file

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: 2014 Citra Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@ -31,6 +34,7 @@ using WatchpointArray = std::array<Kernel::DebugWatchpoint, Core::Hardware::NUM_
// NOTE: these values match the HaltReason enum in Dynarmic
enum class HaltReason : u64 {
StepThread = 0x00000001,
CacheInvalidation = 0x00000002,
DataAbort = 0x00000004,
BreakLoop = 0x02000000,
SupervisorCall = 0x04000000,
@ -39,6 +43,14 @@ enum class HaltReason : u64 {
};
DECLARE_ENUM_FLAG_OPERATORS(HaltReason);
enum class CacheOperationKind : u32 {
None,
DataCacheInvalidate,
DataCacheStore,
DataCacheFlush,
InstructionCacheInvalidate,
};
enum class Architecture {
AArch64,
AArch32,
@ -85,6 +97,9 @@ public:
virtual void GetSvcArguments(std::span<uint64_t, 8> args) const = 0;
virtual void SetSvcArguments(std::span<const uint64_t, 8> args) = 0;
virtual u32 GetSvcNumber() const = 0;
virtual bool HandleCacheOperation(Kernel::KThread* thread) {
return false;
}
void SetWatchpointArray(const WatchpointArray* watchpoints) {
m_watchpoints = watchpoints;

View file

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
@ -11,6 +11,7 @@
namespace Core {
constexpr Dynarmic::HaltReason StepThread = Dynarmic::HaltReason::Step;
constexpr Dynarmic::HaltReason CacheInvalidation = Dynarmic::HaltReason::CacheInvalidation;
constexpr Dynarmic::HaltReason DataAbort = Dynarmic::HaltReason::MemoryAbort;
constexpr Dynarmic::HaltReason BreakLoop = Dynarmic::HaltReason::UserDefined2;
constexpr Dynarmic::HaltReason SupervisorCall = Dynarmic::HaltReason::UserDefined3;
@ -19,6 +20,7 @@ constexpr Dynarmic::HaltReason PrefetchAbort = Dynarmic::HaltReason::UserDefined
constexpr HaltReason TranslateHaltReason(Dynarmic::HaltReason hr) {
static_assert(u64(HaltReason::StepThread) == u64(StepThread));
static_assert(u64(HaltReason::CacheInvalidation) == u64(CacheInvalidation));
static_assert(u64(HaltReason::DataAbort) == u64(DataAbort));
static_assert(u64(HaltReason::BreakLoop) == u64(BreakLoop));
static_assert(u64(HaltReason::SupervisorCall) == u64(SupervisorCall));

View file

@ -43,6 +43,7 @@ fpsimd_context* GetFloatingPointState(mcontext_t& host_ctx) {
using namespace Common::Literals;
constexpr u32 StackSize = 128_KiB;
constexpr u64 CacheLineSize = 64;
constexpr u64 SplitPageAccessWindow = 64;
constexpr size_t MaxPreciseAccessPages = 256;
constexpr u8 MaxPreciseAccessPageWeight = 4;
@ -199,8 +200,10 @@ bool ArmNce::HandleGuestAccessFault(GuestContext* guest_ctx, void* raw_info, voi
const Common::ProcessAddress addr = fault_addr & ~Memory::YUZU_PAGEMASK;
const u64 page_offset = fault_addr & Memory::YUZU_PAGEMASK;
auto& memory = parent->m_running_thread->GetOwnerProcess()->GetMemory();
const bool rasterizer_cached = memory.IsRasterizerCached(addr);
const bool prefer_precise_channel = ShouldUsePreciseAccessChannel(guest_ctx, fault_addr) ||
parent->IsPreciseAccessPage(fault_addr);
parent->IsPreciseAccessPage(fault_addr) ||
rasterizer_cached;
if (prefer_precise_channel) {
if (auto next_pc = MatchAndExecuteOneInstruction(memory, &host_ctx, fpctx); next_pc) {
@ -376,6 +379,41 @@ void ArmNce::SetSvcArguments(std::span<const uint64_t, 8> args) {
}
}
bool ArmNce::HandleCacheOperation(Kernel::KThread* thread) {
const auto op = static_cast<CacheOperationKind>(m_guest_ctx.cache_operation);
if (op == CacheOperationKind::None) {
return false;
}
const u64 cache_line_start = m_guest_ctx.cache_operation_address & ~(CacheLineSize - 1);
auto& memory = thread->GetOwnerProcess()->GetMemory();
switch (op) {
case CacheOperationKind::DataCacheInvalidate: {
[[maybe_unused]] auto invalidate_result =
memory.InvalidateDataCache(cache_line_start, CacheLineSize);
break;
}
case CacheOperationKind::DataCacheStore: {
[[maybe_unused]] auto store_result = memory.StoreDataCache(cache_line_start, CacheLineSize);
break;
}
case CacheOperationKind::DataCacheFlush: {
[[maybe_unused]] auto flush_result = memory.FlushDataCache(cache_line_start, CacheLineSize);
break;
}
case CacheOperationKind::InstructionCacheInvalidate:
InvalidateCacheRange(cache_line_start, CacheLineSize);
break;
case CacheOperationKind::None:
break;
}
m_guest_ctx.cache_operation = static_cast<u32>(CacheOperationKind::None);
m_guest_ctx.cache_operation_address = 0;
return true;
}
ArmNce::ArmNce(System& system, bool uses_wall_clock, std::size_t core_index)
: ArmInterface{uses_wall_clock}, m_system{system}, m_core_index{core_index} {
m_guest_ctx.system = &m_system;

View file

@ -41,6 +41,7 @@ public:
void GetSvcArguments(std::span<uint64_t, 8> args) const override;
void SetSvcArguments(std::span<const uint64_t, 8> args) override;
u32 GetSvcNumber() const override;
bool HandleCacheOperation(Kernel::KThread* thread) override;
void SignalInterrupt(Kernel::KThread* thread) override;
void ClearInstructionCache() override;

View file

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@ -38,6 +41,9 @@ struct GuestContext {
u32 svc{};
System* system{};
ArmNce* parent{};
u32 cache_operation{};
u32 cache_operation_reserved{};
u64 cache_operation_address{};
};
// Verify assembly offsets.

View file

@ -765,8 +765,8 @@ std::optional<u64> MatchAndExecuteOneInstruction(Core::Memory::Memory& memory, m
fpsimd_context* fpsimd_context) {
std::span<u64, 31> regs(reinterpret_cast<u64*>(context->regs), 31);
std::span<u128, 32> vregs(reinterpret_cast<u128*>(fpsimd_context->vregs), 32);
u64& sp = *reinterpret_cast<u64*>(&context->sp);
const u64& pc = *reinterpret_cast<u64*>(&context->pc);
u64 sp = context->sp;
const u64 pc = context->pc;
InterpreterVisitor visitor(memory, regs, vregs, sp, pc);
u32 instruction = memory.Read32(pc);
@ -774,6 +774,7 @@ std::optional<u64> MatchAndExecuteOneInstruction(Core::Memory::Memory& memory, m
auto decoder = Dynarmic::A64::Decode<VisitorBase>(instruction);
was_executed = decoder.get().call(visitor, instruction);
context->sp = sp;
return was_executed ? std::optional<u64>(pc + 4) : std::nullopt;
}

View file

@ -26,6 +26,26 @@ using NativeExecutionParameters = Kernel::KThread::NativeExecutionParameters;
constexpr size_t MaxRelativeBranch = 128_MiB;
constexpr u32 ModuleCodeIndex = 0x24 / sizeof(u32);
namespace {
[[nodiscard]] std::optional<CacheOperationKind> DecodeCacheOperation(u32 inst) {
switch (inst & ~u32{0x1F}) {
case 0xD5087620:
return CacheOperationKind::DataCacheInvalidate;
case 0xD50B7A20:
case 0xD50B7B20:
return CacheOperationKind::DataCacheStore;
case 0xD50B7E20:
return CacheOperationKind::DataCacheFlush;
case 0xD50B7520:
return CacheOperationKind::InstructionCacheInvalidate;
default:
return std::nullopt;
}
}
} // namespace
Patcher::Patcher() : c(m_patch_instructions), c_pre(m_patch_instructions_pre) {
// The first word of the patch section is always a branch to the first instruction of the
// module.
@ -160,6 +180,20 @@ bool Patcher::PatchText(std::span<const u8> program_image, const Kernel::CodeSet
continue;
}
if (auto cache_op = DecodeCacheOperation(inst); cache_op.has_value()) {
bool pre_buffer = false;
auto ret = AddRelocations(pre_buffer);
const auto src_reg = oaknut::XReg{static_cast<int>(inst & 0x1F)};
if (pre_buffer) {
WriteCacheOperationTrampoline(ret, *cache_op, src_reg, c_pre, m_save_context_pre,
m_load_context_pre);
} else {
WriteCacheOperationTrampoline(ret, *cache_op, src_reg, c, m_save_context,
m_load_context);
}
continue;
}
if (auto exclusive = Exclusive{inst}; exclusive.Verify()) {
curr_patch->m_exclusives.push_back(i);
}
@ -542,6 +576,96 @@ void Patcher::WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id, oaknut
this->WriteModulePc(module_dest);
}
void Patcher::WriteCacheOperationTrampoline(ModuleDestLabel module_dest,
CacheOperationKind op_kind, oaknut::XReg src_reg,
oaknut::VectorCodeGenerator& cg,
oaknut::Label& save_ctx,
oaknut::Label& load_ctx) {
const bool is_pre = (&cg == &c_pre);
this->LockContext(cg);
cg.STR(X30, SP, PRE_INDEXED, -16);
cg.BL(save_ctx);
cg.LDR(X30, SP, POST_INDEXED, 16);
oaknut::Label pc_after_cache_op;
cg.MRS(X1, oaknut::SystemReg::TPIDR_EL0);
cg.LDR(X1, X1, offsetof(NativeExecutionParameters, native_context));
cg.LDR(X2, pc_after_cache_op);
cg.STR(X2, X1, offsetof(GuestContext, pc));
cg.MOV(X2, static_cast<u32>(op_kind));
cg.STR(W2, X1, offsetof(GuestContext, cache_operation));
cg.STR(src_reg, X1, offsetof(GuestContext, cache_operation_address));
static_assert(std::is_same_v<std::underlying_type_t<HaltReason>, u64>);
oaknut::Label retry;
cg.ADD(X2, X1, offsetof(GuestContext, esr_el1));
cg.l(retry);
cg.LDAXR(X0, X2);
cg.STLXR(W3, XZR, X2);
cg.CBNZ(W3, retry);
cg.ORR(X0, X0, static_cast<u64>(HaltReason::CacheInvalidation));
cg.ADD(X1, X1, offsetof(GuestContext, host_ctx));
static_assert(offsetof(HostContext, host_sp) + 8 == offsetof(HostContext, host_tpidr_el0));
cg.LDP(X2, X3, X1, offsetof(HostContext, host_sp));
cg.MOV(SP, X2);
cg.MSR(oaknut::SystemReg::TPIDR_EL0, X3);
static constexpr size_t HOST_REGS_OFF = offsetof(HostContext, host_saved_regs);
static constexpr size_t HOST_VREGS_OFF = offsetof(HostContext, host_saved_vregs);
cg.LDP(X19, X20, X1, HOST_REGS_OFF);
cg.LDP(X21, X22, X1, HOST_REGS_OFF + 2 * sizeof(u64));
cg.LDP(X23, X24, X1, HOST_REGS_OFF + 4 * sizeof(u64));
cg.LDP(X25, X26, X1, HOST_REGS_OFF + 6 * sizeof(u64));
cg.LDP(X27, X28, X1, HOST_REGS_OFF + 8 * sizeof(u64));
cg.LDP(X29, X30, X1, HOST_REGS_OFF + 10 * sizeof(u64));
cg.LDP(Q8, Q9, X1, HOST_VREGS_OFF);
cg.LDP(Q10, Q11, X1, HOST_VREGS_OFF + 2 * sizeof(u128));
cg.LDP(Q12, Q13, X1, HOST_VREGS_OFF + 4 * sizeof(u128));
cg.LDP(Q14, Q15, X1, HOST_VREGS_OFF + 6 * sizeof(u128));
cg.RET();
if (is_pre) {
curr_patch->m_trampolines_pre.push_back({cg.offset(), module_dest});
} else {
curr_patch->m_trampolines.push_back({cg.offset(), module_dest});
}
cg.MRS(X2, oaknut::SystemReg::TPIDR_EL0);
cg.LDR(X2, X2, offsetof(NativeExecutionParameters, native_context));
cg.ADD(X0, X2, offsetof(GuestContext, host_ctx));
cg.STR(X30, X0, offsetof(HostContext, host_saved_regs) + 11 * sizeof(u64));
cg.STR(X30, SP, PRE_INDEXED, -16);
cg.BL(load_ctx);
cg.LDR(X30, SP, POST_INDEXED, 16);
cg.STR(X1, SP, PRE_INDEXED, -16);
cg.MRS(X1, oaknut::SystemReg::TPIDR_EL0);
cg.LDR(X1, X1, offsetof(NativeExecutionParameters, native_context));
cg.LDR(X30, X1, offsetof(GuestContext, cpu_registers) + sizeof(u64) * 30);
cg.LDR(X1, SP, POST_INDEXED, 16);
this->UnlockContext(cg);
if (is_pre) {
this->BranchToModulePre(module_dest);
} else {
this->BranchToModule(module_dest);
}
cg.l(pc_after_cache_op);
if (is_pre) {
this->WriteModulePcPre(module_dest);
} else {
this->WriteModulePc(module_dest);
}
}
void Patcher::WriteMrsHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg,
oaknut::SystemReg src_reg, oaknut::VectorCodeGenerator& cg) {
// Retrieve emulated TLS register from GuestContext.

View file

@ -78,6 +78,11 @@ private:
void LockContext(oaknut::VectorCodeGenerator& code);
void UnlockContext(oaknut::VectorCodeGenerator& code);
void WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id, oaknut::VectorCodeGenerator& code, oaknut::Label& save_ctx, oaknut::Label& load_ctx);
void WriteCacheOperationTrampoline(ModuleDestLabel module_dest, CacheOperationKind op_kind,
oaknut::XReg src_reg,
oaknut::VectorCodeGenerator& code,
oaknut::Label& save_ctx,
oaknut::Label& load_ctx);
void WriteMrsHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg, oaknut::SystemReg src_reg, oaknut::VectorCodeGenerator& code);
void WriteMsrHandler(ModuleDestLabel module_dest, oaknut::XReg src_reg, oaknut::VectorCodeGenerator& code);
void WriteCntpctHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg, oaknut::VectorCodeGenerator& code);
@ -88,6 +93,11 @@ private:
void LockContext() { LockContext(c); }
void UnlockContext() { UnlockContext(c); }
void WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id) { WriteSvcTrampoline(module_dest, svc_id, c, m_save_context, m_load_context); }
void WriteCacheOperationTrampoline(ModuleDestLabel module_dest, CacheOperationKind op_kind,
oaknut::XReg src_reg) {
WriteCacheOperationTrampoline(module_dest, op_kind, src_reg, c, m_save_context,
m_load_context);
}
void WriteMrsHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg, oaknut::SystemReg src_reg) { WriteMrsHandler(module_dest, dest_reg, src_reg, c); }
void WriteMsrHandler(ModuleDestLabel module_dest, oaknut::XReg src_reg) { WriteMsrHandler(module_dest, src_reg, c); }
void WriteCntpctHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg) { WriteCntpctHandler(module_dest, dest_reg, c); }

View file

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
@ -97,6 +97,7 @@ void PhysicalCore::RunThread(Kernel::KThread* thread) {
}
// Determine why we stopped.
const bool cache_invalidation = True(hr & Core::HaltReason::CacheInvalidation);
const bool supervisor_call = True(hr & Core::HaltReason::SupervisorCall);
const bool prefetch_abort = True(hr & Core::HaltReason::PrefetchAbort);
const bool breakpoint = True(hr & Core::HaltReason::InstructionBreakpoint);
@ -151,6 +152,11 @@ void PhysicalCore::RunThread(Kernel::KThread* thread) {
return;
}
if (cache_invalidation) {
interface->HandleCacheOperation(thread);
continue;
}
// Handle external interrupt sources.
if (interrupt || m_is_single_core) {
return;

View file

@ -639,6 +639,15 @@ struct Memory::Impl {
GetInteger(vaddr), []() {}, []() {});
}
[[nodiscard]] bool IsRasterizerCached(const Common::ProcessAddress vaddr) const {
const u64 addr = GetInteger(vaddr) & 0xffffffffffffULL;
if (!AddressSpaceContains(*current_page_table, addr, 1)) {
return false;
}
return current_page_table->entries[addr >> YUZU_PAGEBITS].ptr.Type() ==
Common::PageType::RasterizerCachedMemory;
}
/// @brief Reads a particular data type out of memory at the given virtual address.
/// @param vaddr The virtual address to read the data type from.
/// @tparam T The data type to read out of memory.
@ -1036,6 +1045,10 @@ void Memory::RasterizerMarkRegionCached(Common::ProcessAddress vaddr, u64 size,
impl->RasterizerMarkRegionCached(GetInteger(vaddr), size, cached);
}
bool Memory::IsRasterizerCached(Common::ProcessAddress vaddr) const {
return impl->IsRasterizerCached(vaddr);
}
void Memory::MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug) {
impl->MarkRegionDebug(GetInteger(vaddr), size, debug);
}

View file

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: 2014 Citra Emulator Project
@ -493,6 +493,8 @@ public:
void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers);
[[nodiscard]] bool IsRasterizerCached(Common::ProcessAddress vaddr) const;
bool InvalidateNCE(Common::ProcessAddress vaddr, size_t size);
bool InvalidateSeparateHeap(void* fault_address);

View file

@ -60,6 +60,8 @@ public:
void Reset() override {
ASSERT(references == 0);
VideoCommon::BankBase::Reset();
const auto& dev = device.GetLogical();
dev.ResetQueryPool(*query_pool, 0, BANK_SIZE);
host_results.fill(0ULL);
next_bank = 0;
}
@ -440,10 +442,6 @@ private:
}
current_bank = &bank_pool.GetBank(current_bank_id);
current_query_pool = current_bank->GetInnerPool();
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([query_pool = current_query_pool](vk::CommandBuffer cmdbuf) {
cmdbuf.ResetQueryPool(query_pool, 0, SamplesQueryBank::BANK_SIZE);
});
}
size_t ReserveBankSlot() {