[dynarmic] allow better dtrace diagnostics for code - do not clobber %rbp and save frame pointer (#2653)

Saving the %rbp pointer allows us to backref previous stackframes easily

Signed-off-by: lizzie <lizzie@eden-emu.dev>

Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/2653
Reviewed-by: CamilleLaVey <camillelavey99@gmail.com>
Reviewed-by: Caio Oliveira <caiooliveirafarias0@gmail.com>
Co-authored-by: lizzie <lizzie@eden-emu.dev>
Co-committed-by: lizzie <lizzie@eden-emu.dev>
This commit is contained in:
lizzie 2025-12-17 05:41:11 +01:00 committed by crueter
parent e4dccd5a5c
commit 50f8d4130d
No known key found for this signature in database
GPG key ID: 425ACD2D4830EBC6
7 changed files with 55 additions and 92 deletions

View file

@ -217,13 +217,13 @@ void A32EmitX64::ClearFastDispatchTable() {
}
void A32EmitX64::GenTerminalHandlers() {
// PC ends up in ebp, location_descriptor ends up in rbx
// PC ends up in edi, location_descriptor ends up in rbx
const auto calculate_location_descriptor = [this] {
// This calculation has to match up with IREmitter::PushRSB
code.mov(ebx, dword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)]);
code.shl(rbx, 32);
code.mov(ecx, MJitStateReg(A32::Reg::PC));
code.mov(ebp, ecx);
code.mov(edi, ecx);
code.or_(rbx, rcx);
};
@ -238,7 +238,7 @@ void A32EmitX64::GenTerminalHandlers() {
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, rsb_ptr)], eax);
code.cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(A32JitState, rsb_location_descriptors) + rax * sizeof(u64)]);
if (conf.HasOptimization(OptimizationFlag::FastDispatch)) {
code.jne(rsb_cache_miss);
code.jne(rsb_cache_miss, code.T_NEAR);
} else {
code.jne(code.GetReturnFromRunCodeAddress());
}
@ -251,20 +251,21 @@ void A32EmitX64::GenTerminalHandlers() {
terminal_handler_fast_dispatch_hint = code.getCurr<const void*>();
calculate_location_descriptor();
code.L(rsb_cache_miss);
code.mov(r12, reinterpret_cast<u64>(fast_dispatch_table.data()));
code.mov(rbp, rbx);
code.mov(r8, reinterpret_cast<u64>(fast_dispatch_table.data()));
//code.mov(r12d, MJitStateReg(A32::Reg::PC));
code.mov(r12, rbx);
if (code.HasHostFeature(HostFeature::SSE42)) {
code.crc32(rbp, r12);
code.crc32(r12, r8);
}
code.and_(ebp, fast_dispatch_table_mask);
code.lea(rbp, ptr[r12 + rbp]);
code.cmp(rbx, qword[rbp + offsetof(FastDispatchEntry, location_descriptor)]);
code.jne(fast_dispatch_cache_miss);
code.jmp(ptr[rbp + offsetof(FastDispatchEntry, code_ptr)]);
code.and_(r12d, fast_dispatch_table_mask);
code.lea(r12, ptr[r8 + r12]);
code.cmp(rbx, qword[r12 + offsetof(FastDispatchEntry, location_descriptor)]);
code.jne(fast_dispatch_cache_miss, code.T_NEAR);
code.jmp(ptr[r12 + offsetof(FastDispatchEntry, code_ptr)]);
code.L(fast_dispatch_cache_miss);
code.mov(qword[rbp + offsetof(FastDispatchEntry, location_descriptor)], rbx);
code.mov(qword[r12 + offsetof(FastDispatchEntry, location_descriptor)], rbx);
code.LookupBlock();
code.mov(ptr[rbp + offsetof(FastDispatchEntry, code_ptr)], rax);
code.mov(ptr[r12 + offsetof(FastDispatchEntry, code_ptr)], rax);
code.jmp(rax);
PerfMapRegister(terminal_handler_fast_dispatch_hint, code.getCurr(), "a32_terminal_handler_fast_dispatch_hint");

View file

@ -188,13 +188,14 @@ void A64EmitX64::ClearFastDispatchTable() {
}
void A64EmitX64::GenTerminalHandlers() {
// PC ends up in rbp, location_descriptor ends up in rbx
// PC ends up in rcx, location_descriptor ends up in rbx
static_assert(std::find(ABI_ALL_CALLEE_SAVE.begin(), ABI_ALL_CALLEE_SAVE.end(), HostLoc::R12) != ABI_ALL_CALLEE_SAVE.end());
const auto calculate_location_descriptor = [this] {
// This calculation has to match up with A64::LocationDescriptor::UniqueHash
// TODO: Optimization is available here based on known state of fpcr.
code.mov(rbp, qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)]);
code.mov(rdi, qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)]);
code.mov(rcx, A64::LocationDescriptor::pc_mask);
code.and_(rcx, rbp);
code.and_(rcx, rdi);
code.mov(ebx, dword[code.ABI_JIT_PTR + offsetof(A64JitState, fpcr)]);
code.and_(ebx, A64::LocationDescriptor::fpcr_mask);
code.shl(rbx, A64::LocationDescriptor::fpcr_shift);
@ -226,20 +227,21 @@ void A64EmitX64::GenTerminalHandlers() {
terminal_handler_fast_dispatch_hint = code.getCurr<const void*>();
calculate_location_descriptor();
code.L(rsb_cache_miss);
code.mov(r12, reinterpret_cast<u64>(fast_dispatch_table.data()));
code.mov(rbp, rbx);
code.mov(r8, reinterpret_cast<u64>(fast_dispatch_table.data()));
//code.mov(r12, qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)]);
code.mov(r12, rbx);
if (code.HasHostFeature(HostFeature::SSE42)) {
code.crc32(rbp, r12);
code.crc32(r12, r8);
}
code.and_(ebp, fast_dispatch_table_mask);
code.lea(rbp, ptr[r12 + rbp]);
code.cmp(rbx, qword[rbp + offsetof(FastDispatchEntry, location_descriptor)]);
code.jne(fast_dispatch_cache_miss);
code.jmp(ptr[rbp + offsetof(FastDispatchEntry, code_ptr)]);
code.and_(r12d, fast_dispatch_table_mask);
code.lea(r12, ptr[r8 + r12]);
code.cmp(rbx, qword[r12 + offsetof(FastDispatchEntry, location_descriptor)]);
code.jne(fast_dispatch_cache_miss, code.T_NEAR);
code.jmp(ptr[r12 + offsetof(FastDispatchEntry, code_ptr)]);
code.L(fast_dispatch_cache_miss);
code.mov(qword[rbp + offsetof(FastDispatchEntry, location_descriptor)], rbx);
code.mov(qword[r12 + offsetof(FastDispatchEntry, location_descriptor)], rbx);
code.LookupBlock();
code.mov(ptr[rbp + offsetof(FastDispatchEntry, code_ptr)], rax);
code.mov(ptr[r12 + offsetof(FastDispatchEntry, code_ptr)], rax);
code.jmp(rax);
PerfMapRegister(terminal_handler_fast_dispatch_hint, code.getCurr(), "a64_terminal_handler_fast_dispatch_hint");

View file

@ -28,7 +28,8 @@ static_assert(ABI_SHADOW_SPACE <= 32);
static FrameInfo CalculateFrameInfo(const size_t num_gprs, const size_t num_xmms, size_t frame_size) {
// We are initially 8 byte aligned because the return value is pushed onto an aligned stack after a call.
const size_t rsp_alignment = (num_gprs % 2 == 0) ? 8 : 0;
// (It's an extra GPR save due to %rbp)
const size_t rsp_alignment = ((num_gprs + 1) % 2 == 0) ? 8 : 0;
const size_t total_xmm_size = num_xmms * XMM_SIZE;
if (frame_size & 0xF) {
frame_size += 0x10 - (frame_size & 0xF);
@ -48,6 +49,8 @@ void ABI_PushRegistersAndAdjustStack(BlockOfCode& code, const size_t frame_size,
const size_t num_xmms = std::count_if(regs.begin(), regs.end(), HostLocIsXMM);
const FrameInfo frame_info = CalculateFrameInfo(num_gprs, num_xmms, frame_size);
code.push(rbp);
code.mov(rbp, rsp);
for (auto const gpr : regs)
if (HostLocIsGPR(gpr))
code.push(HostLocToReg64(gpr));
@ -93,6 +96,7 @@ void ABI_PopRegistersAndAdjustStack(BlockOfCode& code, const size_t frame_size,
if (HostLocIsGPR(gpr))
code.pop(HostLocToReg64(gpr));
}
code.pop(rbp);
}
void ABI_PushCalleeSaveRegistersAndAdjustStack(BlockOfCode& code, const std::size_t frame_size) {

View file

@ -370,7 +370,7 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
cmp(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], 0);
jne(return_to_caller_mxcsr_already_exited, T_NEAR);
lock(); or_(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], static_cast<u32>(HaltReason::Step));
lock(); or_(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], u32(HaltReason::Step));
SwitchMxcsrOnEntry();
jmp(ABI_PARAM2);

View file

@ -37,6 +37,9 @@
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/opt_passes.h"
#include "./A32/testenv.h"
#include "./A64/testenv.h"
using namespace Dynarmic;
std::string_view GetNameOfA32Instruction(u32 instruction) {
@ -65,7 +68,10 @@ void PrintA32Instruction(u32 instruction) {
fmt::print("should_continue: {}\n\n", should_continue);
fmt::print("IR:\n");
fmt::print("{}\n", IR::DumpBlock(ir_block));
Optimization::Optimize(ir_block, A32::UserConfig{}, {});
ArmTestEnv jit_env{};
Dynarmic::A32::UserConfig jit_user_config{};
jit_user_config.callbacks = &jit_env;
Optimization::Optimize(ir_block, jit_user_config, {});
fmt::print("Optimized IR:\n");
fmt::print("{}\n", IR::DumpBlock(ir_block));
}
@ -80,7 +86,10 @@ void PrintA64Instruction(u32 instruction) {
fmt::print("should_continue: {}\n\n", should_continue);
fmt::print("IR:\n");
fmt::print("{}\n", IR::DumpBlock(ir_block));
Optimization::Optimize(ir_block, A64::UserConfig{}, {});
A64TestEnv jit_env{};
Dynarmic::A64::UserConfig jit_user_config{};
jit_user_config.callbacks = &jit_env;
Optimization::Optimize(ir_block, jit_user_config, {});
fmt::print("Optimized IR:\n");
fmt::print("{}\n", IR::DumpBlock(ir_block));
}
@ -98,7 +107,10 @@ void PrintThumbInstruction(u32 instruction) {
fmt::print("should_continue: {}\n\n", should_continue);
fmt::print("IR:\n");
fmt::print("{}\n", IR::DumpBlock(ir_block));
Optimization::Optimize(ir_block, A32::UserConfig{}, {});
ThumbTestEnv jit_env{};
Dynarmic::A32::UserConfig jit_user_config{};
jit_user_config.callbacks = &jit_env;
Optimization::Optimize(ir_block, jit_user_config, {});
fmt::print("Optimized IR:\n");
fmt::print("{}\n", IR::DumpBlock(ir_block));
}
@ -219,7 +231,7 @@ void ExecuteA32Instruction(u32 instruction) {
*(iter->second) = *value;
fmt::print("> {} = 0x{:08x}\n", reg_name, *value);
}
} else if (reg_name == "mem" || reg_name == "memory") {
} else if (reg_name.starts_with("m")) {
fmt::print("address: ");
if (const auto address = get_value()) {
fmt::print("value: ");
@ -228,7 +240,7 @@ void ExecuteA32Instruction(u32 instruction) {
fmt::print("> mem[0x{:08x}] = 0x{:08x}\n", *address, *value);
}
}
} else if (reg_name == "end") {
} else if (reg_name == "exit" || reg_name == "end" || reg_name.starts_with("q")) {
break;
}
}
@ -244,6 +256,7 @@ void ExecuteA32Instruction(u32 instruction) {
env.MemoryWrite32(initial_pc + 4, 0xEAFFFFFE); // B +0
cpu.Run();
fmt::print("{}", fmt::join(cpu.Disassemble(), "\n"));
fmt::print("Registers modified:\n");
for (size_t i = 0; i < regs.size(); ++i) {