mirror of
https://git.eden-emu.dev/eden-emu/eden
synced 2026-04-10 05:28:56 +02:00
[video_core/engines] Macro HLE inline (#3653)
Should slightly boost perf on android, Desktop is mainly unaffected (for now) Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3653 Reviewed-by: CamilleLaVey <camillelavey99@gmail.com> Reviewed-by: DraVee <chimera@dravee.dev> Co-authored-by: lizzie <lizzie@eden-emu.dev> Co-committed-by: lizzie <lizzie@eden-emu.dev>
This commit is contained in:
parent
23566a1f7d
commit
c70b857c4f
4 changed files with 586 additions and 706 deletions
|
|
@ -1,4 +1,4 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||
|
|
@ -26,8 +26,15 @@ namespace Tegra::Engines {
|
|||
constexpr u32 MacroRegistersStart = 0xE00;
|
||||
|
||||
Maxwell3D::Maxwell3D(Core::System& system_, MemoryManager& memory_manager_)
|
||||
: draw_manager{std::make_unique<DrawManager>(this)}, system{system_},
|
||||
memory_manager{memory_manager_}, macro_engine{GetMacroEngine(*this)}, upload_state{memory_manager, regs.upload} {
|
||||
: draw_manager{std::make_unique<DrawManager>(this)}, system{system_}
|
||||
, memory_manager{memory_manager_}
|
||||
#ifdef ARCHITECTURE_x86_64
|
||||
, macro_engine(bool(Settings::values.disable_macro_jit))
|
||||
#else
|
||||
, macro_engine(true)
|
||||
#endif
|
||||
, upload_state{memory_manager, regs.upload}
|
||||
{
|
||||
dirty.flags.flip();
|
||||
InitializeRegisterDefaults();
|
||||
execution_mask.reset();
|
||||
|
|
@ -328,9 +335,9 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
|
|||
shadow_state.shadow_ram_control = static_cast<Regs::ShadowRamControl>(nonshadow_argument);
|
||||
return;
|
||||
case MAXWELL3D_REG_INDEX(load_mme.instruction_ptr):
|
||||
return macro_engine->ClearCode(regs.load_mme.instruction_ptr);
|
||||
return macro_engine.ClearCode(regs.load_mme.instruction_ptr);
|
||||
case MAXWELL3D_REG_INDEX(load_mme.instruction):
|
||||
return macro_engine->AddCode(regs.load_mme.instruction_ptr, argument);
|
||||
return macro_engine.AddCode(regs.load_mme.instruction_ptr, argument);
|
||||
case MAXWELL3D_REG_INDEX(load_mme.start_address):
|
||||
return ProcessMacroBind(argument);
|
||||
case MAXWELL3D_REG_INDEX(falcon[4]):
|
||||
|
|
@ -398,7 +405,7 @@ void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters)
|
|||
((method - MacroRegistersStart) >> 1) % static_cast<u32>(macro_positions.size());
|
||||
|
||||
// Execute the current macro.
|
||||
macro_engine->Execute(macro_positions[entry], parameters);
|
||||
macro_engine.Execute(*this, macro_positions[entry], parameters);
|
||||
|
||||
draw_manager->DrawDeferred();
|
||||
}
|
||||
|
|
@ -464,7 +471,7 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
|
|||
}
|
||||
|
||||
void Maxwell3D::ProcessMacroUpload(u32 data) {
|
||||
macro_engine->AddCode(regs.load_mme.instruction_ptr++, data);
|
||||
macro_engine.AddCode(regs.load_mme.instruction_ptr++, data);
|
||||
}
|
||||
|
||||
void Maxwell3D::ProcessMacroBind(u32 data) {
|
||||
|
|
|
|||
|
|
@ -3203,7 +3203,7 @@ private:
|
|||
std::vector<u32> macro_params;
|
||||
|
||||
/// Interpreter for the macro codes uploaded to the GPU.
|
||||
std::optional<MacroEngine> macro_engine;
|
||||
MacroEngine macro_engine;
|
||||
|
||||
Upload::State upload_state;
|
||||
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -7,8 +7,10 @@
|
|||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <ankerl/unordered_dense.h>
|
||||
#include <span>
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
#include <ankerl/unordered_dense.h>
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
|
||||
|
|
@ -98,62 +100,142 @@ union MethodAddress {
|
|||
|
||||
} // namespace Macro
|
||||
|
||||
class CachedMacro {
|
||||
public:
|
||||
CachedMacro(Engines::Maxwell3D& maxwell3d_)
|
||||
: maxwell3d{maxwell3d_}
|
||||
{}
|
||||
virtual ~CachedMacro() = default;
|
||||
struct HLEMacro {
|
||||
};
|
||||
/// @note: these macros have two versions, a normal and extended version, with the extended version
|
||||
/// also assigning the base vertex/instance.
|
||||
struct HLE_DrawArraysIndirect final {
|
||||
HLE_DrawArraysIndirect(bool extended_) noexcept : extended{extended_} {}
|
||||
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
|
||||
void Fallback(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters);
|
||||
bool extended;
|
||||
};
|
||||
/// @note: these macros have two versions, a normal and extended version, with the extended version
|
||||
/// also assigning the base vertex/instance.
|
||||
struct HLE_DrawIndexedIndirect final {
|
||||
explicit HLE_DrawIndexedIndirect(bool extended_) noexcept : extended{extended_} {}
|
||||
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
|
||||
void Fallback(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters);
|
||||
bool extended;
|
||||
};
|
||||
struct HLE_MultiLayerClear final {
|
||||
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
|
||||
};
|
||||
struct HLE_MultiDrawIndexedIndirectCount final {
|
||||
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
|
||||
void Fallback(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters);
|
||||
};
|
||||
struct HLE_DrawIndirectByteCount final {
|
||||
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
|
||||
void Fallback(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters);
|
||||
};
|
||||
struct HLE_C713C83D8F63CCF3 final {
|
||||
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
|
||||
};
|
||||
struct HLE_D7333D26E0A93EDE final {
|
||||
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
|
||||
};
|
||||
struct HLE_BindShader final {
|
||||
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
|
||||
};
|
||||
struct HLE_SetRasterBoundingBox final {
|
||||
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
|
||||
};
|
||||
struct HLE_ClearConstBuffer final {
|
||||
HLE_ClearConstBuffer(size_t base_size_) noexcept : base_size{base_size_} {}
|
||||
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
|
||||
size_t base_size;
|
||||
};
|
||||
struct HLE_ClearMemory final {
|
||||
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
|
||||
std::vector<u32> zero_memory;
|
||||
};
|
||||
struct HLE_TransformFeedbackSetup final {
|
||||
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
|
||||
};
|
||||
struct MacroInterpreterImpl final {
|
||||
MacroInterpreterImpl() {}
|
||||
MacroInterpreterImpl(std::span<const u32> code_) : code{code_} {}
|
||||
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> params, u32 method);
|
||||
void Reset();
|
||||
bool Step(Engines::Maxwell3D& maxwell3d, bool is_delay_slot);
|
||||
u32 GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b);
|
||||
void ProcessResult(Engines::Maxwell3D& maxwell3d, Macro::ResultOperation operation, u32 reg, u32 result);
|
||||
bool EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const;
|
||||
Macro::Opcode GetOpcode() const;
|
||||
u32 GetRegister(u32 register_id) const;
|
||||
void SetRegister(u32 register_id, u32 value);
|
||||
/// Sets the method address to use for the next Send instruction.
|
||||
[[nodiscard]] inline void SetMethodAddress(u32 address) noexcept {
|
||||
method_address.raw = address;
|
||||
}
|
||||
void Send(Engines::Maxwell3D& maxwell3d, u32 value);
|
||||
u32 Read(Engines::Maxwell3D& maxwell3d, u32 method) const;
|
||||
u32 FetchParameter();
|
||||
/// General purpose macro registers.
|
||||
std::array<u32, Macro::NUM_MACRO_REGISTERS> registers = {};
|
||||
/// Input parameters of the current macro.
|
||||
std::vector<u32> parameters;
|
||||
std::span<const u32> code;
|
||||
/// Program counter to execute at after the delay slot is executed.
|
||||
std::optional<u32> delayed_pc;
|
||||
/// Method address to use for the next Send instruction.
|
||||
Macro::MethodAddress method_address = {};
|
||||
/// Current program counter
|
||||
u32 pc{};
|
||||
/// Index of the next parameter that will be fetched by the 'parm' instruction.
|
||||
u32 next_parameter_index = 0;
|
||||
bool carry_flag = false;
|
||||
};
|
||||
struct DynamicCachedMacro {
|
||||
virtual ~DynamicCachedMacro() = default;
|
||||
/// Executes the macro code with the specified input parameters.
|
||||
/// @param parameters The parameters of the macro
|
||||
/// @param method The method to execute
|
||||
virtual void Execute(const std::vector<u32>& parameters, u32 method) = 0;
|
||||
Engines::Maxwell3D& maxwell3d;
|
||||
virtual void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, u32 method) = 0;
|
||||
};
|
||||
|
||||
class HLEMacro {
|
||||
public:
|
||||
explicit HLEMacro(Engines::Maxwell3D& maxwell3d_);
|
||||
~HLEMacro();
|
||||
// Allocates and returns a cached macro if the hash matches a known function.
|
||||
// Returns nullptr otherwise.
|
||||
[[nodiscard]] std::unique_ptr<CachedMacro> GetHLEProgram(u64 hash) const;
|
||||
private:
|
||||
Engines::Maxwell3D& maxwell3d;
|
||||
};
|
||||
|
||||
class MacroEngine {
|
||||
public:
|
||||
explicit MacroEngine(Engines::Maxwell3D& maxwell3d, bool is_interpreted);
|
||||
~MacroEngine();
|
||||
using AnyCachedMacro = std::variant<
|
||||
std::monostate,
|
||||
HLEMacro,
|
||||
HLE_DrawArraysIndirect,
|
||||
HLE_DrawIndexedIndirect,
|
||||
HLE_MultiDrawIndexedIndirectCount,
|
||||
HLE_MultiLayerClear,
|
||||
HLE_C713C83D8F63CCF3,
|
||||
HLE_D7333D26E0A93EDE,
|
||||
HLE_BindShader,
|
||||
HLE_SetRasterBoundingBox,
|
||||
HLE_ClearConstBuffer,
|
||||
HLE_ClearMemory,
|
||||
HLE_TransformFeedbackSetup,
|
||||
HLE_DrawIndirectByteCount,
|
||||
MacroInterpreterImpl,
|
||||
// Used for JIT x86 macro
|
||||
std::unique_ptr<DynamicCachedMacro>
|
||||
>;
|
||||
|
||||
struct MacroEngine {
|
||||
MacroEngine(bool is_interpreted_) noexcept : is_interpreted{is_interpreted_} {}
|
||||
// Store the uploaded macro code to compile them when they're called.
|
||||
void AddCode(u32 method, u32 data);
|
||||
|
||||
inline void AddCode(u32 method, u32 data) noexcept {
|
||||
uploaded_macro_code[method].push_back(data);
|
||||
}
|
||||
// Clear the code associated with a method.
|
||||
void ClearCode(u32 method);
|
||||
|
||||
inline void ClearCode(u32 method) noexcept {
|
||||
macro_cache.erase(method);
|
||||
uploaded_macro_code.erase(method);
|
||||
}
|
||||
// Compiles the macro if its not in the cache, and executes the compiled macro
|
||||
void Execute(u32 method, const std::vector<u32>& parameters);
|
||||
|
||||
protected:
|
||||
std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code);
|
||||
|
||||
private:
|
||||
void Execute(Engines::Maxwell3D& maxwell3d, u32 method, std::span<const u32> parameters);
|
||||
AnyCachedMacro Compile(Engines::Maxwell3D& maxwell3d, std::span<const u32> code);
|
||||
struct CacheInfo {
|
||||
std::unique_ptr<CachedMacro> lle_program{};
|
||||
std::unique_ptr<CachedMacro> hle_program{};
|
||||
AnyCachedMacro program;
|
||||
u64 hash{};
|
||||
bool has_hle_program{};
|
||||
};
|
||||
|
||||
ankerl::unordered_dense::map<u32, CacheInfo> macro_cache;
|
||||
ankerl::unordered_dense::map<u32, std::vector<u32>> uploaded_macro_code;
|
||||
std::optional<HLEMacro> hle_macros;
|
||||
Engines::Maxwell3D& maxwell3d;
|
||||
bool is_interpreted;
|
||||
};
|
||||
|
||||
std::optional<MacroEngine> GetMacroEngine(Engines::Maxwell3D& maxwell3d);
|
||||
|
||||
} // namespace Tegra
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue