[video_core/engines] Macro HLE inline (#3653)

Should slightly boost perf on android, Desktop is mainly unaffected (for now)

Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3653
Reviewed-by: CamilleLaVey <camillelavey99@gmail.com>
Reviewed-by: DraVee <chimera@dravee.dev>
Co-authored-by: lizzie <lizzie@eden-emu.dev>
Co-committed-by: lizzie <lizzie@eden-emu.dev>
This commit is contained in:
lizzie 2026-03-06 15:04:38 +01:00 committed by crueter
parent 23566a1f7d
commit c70b857c4f
No known key found for this signature in database
GPG key ID: 425ACD2D4830EBC6
4 changed files with 586 additions and 706 deletions

View file

@ -7,8 +7,10 @@
#pragma once
#include <memory>
#include <ankerl/unordered_dense.h>
#include <span>
#include <variant>
#include <vector>
#include <ankerl/unordered_dense.h>
#include "common/bit_field.h"
#include "common/common_types.h"
@ -98,62 +100,142 @@ union MethodAddress {
} // namespace Macro
class CachedMacro {
public:
CachedMacro(Engines::Maxwell3D& maxwell3d_)
: maxwell3d{maxwell3d_}
{}
virtual ~CachedMacro() = default;
struct HLEMacro {
};
/// @note: these macros have two versions, a normal and extended version, with the extended version
/// also assigning the base vertex/instance.
struct HLE_DrawArraysIndirect final {
HLE_DrawArraysIndirect(bool extended_) noexcept : extended{extended_} {}
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
void Fallback(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters);
bool extended;
};
/// @note: these macros have two versions, a normal and extended version, with the extended version
/// also assigning the base vertex/instance.
struct HLE_DrawIndexedIndirect final {
explicit HLE_DrawIndexedIndirect(bool extended_) noexcept : extended{extended_} {}
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
void Fallback(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters);
bool extended;
};
struct HLE_MultiLayerClear final {
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
};
struct HLE_MultiDrawIndexedIndirectCount final {
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
void Fallback(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters);
};
struct HLE_DrawIndirectByteCount final {
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
void Fallback(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters);
};
struct HLE_C713C83D8F63CCF3 final {
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
};
struct HLE_D7333D26E0A93EDE final {
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
};
struct HLE_BindShader final {
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
};
struct HLE_SetRasterBoundingBox final {
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
};
struct HLE_ClearConstBuffer final {
HLE_ClearConstBuffer(size_t base_size_) noexcept : base_size{base_size_} {}
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
size_t base_size;
};
struct HLE_ClearMemory final {
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
std::vector<u32> zero_memory;
};
struct HLE_TransformFeedbackSetup final {
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
};
struct MacroInterpreterImpl final {
MacroInterpreterImpl() {}
MacroInterpreterImpl(std::span<const u32> code_) : code{code_} {}
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> params, u32 method);
void Reset();
bool Step(Engines::Maxwell3D& maxwell3d, bool is_delay_slot);
u32 GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b);
void ProcessResult(Engines::Maxwell3D& maxwell3d, Macro::ResultOperation operation, u32 reg, u32 result);
bool EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const;
Macro::Opcode GetOpcode() const;
u32 GetRegister(u32 register_id) const;
void SetRegister(u32 register_id, u32 value);
/// Sets the method address to use for the next Send instruction.
[[nodiscard]] inline void SetMethodAddress(u32 address) noexcept {
method_address.raw = address;
}
void Send(Engines::Maxwell3D& maxwell3d, u32 value);
u32 Read(Engines::Maxwell3D& maxwell3d, u32 method) const;
u32 FetchParameter();
/// General purpose macro registers.
std::array<u32, Macro::NUM_MACRO_REGISTERS> registers = {};
/// Input parameters of the current macro.
std::vector<u32> parameters;
std::span<const u32> code;
/// Program counter to execute at after the delay slot is executed.
std::optional<u32> delayed_pc;
/// Method address to use for the next Send instruction.
Macro::MethodAddress method_address = {};
/// Current program counter
u32 pc{};
/// Index of the next parameter that will be fetched by the 'parm' instruction.
u32 next_parameter_index = 0;
bool carry_flag = false;
};
struct DynamicCachedMacro {
virtual ~DynamicCachedMacro() = default;
/// Executes the macro code with the specified input parameters.
/// @param parameters The parameters of the macro
/// @param method The method to execute
virtual void Execute(const std::vector<u32>& parameters, u32 method) = 0;
Engines::Maxwell3D& maxwell3d;
virtual void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, u32 method) = 0;
};
class HLEMacro {
public:
explicit HLEMacro(Engines::Maxwell3D& maxwell3d_);
~HLEMacro();
// Allocates and returns a cached macro if the hash matches a known function.
// Returns nullptr otherwise.
[[nodiscard]] std::unique_ptr<CachedMacro> GetHLEProgram(u64 hash) const;
private:
Engines::Maxwell3D& maxwell3d;
};
class MacroEngine {
public:
explicit MacroEngine(Engines::Maxwell3D& maxwell3d, bool is_interpreted);
~MacroEngine();
using AnyCachedMacro = std::variant<
std::monostate,
HLEMacro,
HLE_DrawArraysIndirect,
HLE_DrawIndexedIndirect,
HLE_MultiDrawIndexedIndirectCount,
HLE_MultiLayerClear,
HLE_C713C83D8F63CCF3,
HLE_D7333D26E0A93EDE,
HLE_BindShader,
HLE_SetRasterBoundingBox,
HLE_ClearConstBuffer,
HLE_ClearMemory,
HLE_TransformFeedbackSetup,
HLE_DrawIndirectByteCount,
MacroInterpreterImpl,
// Used for JIT x86 macro
std::unique_ptr<DynamicCachedMacro>
>;
struct MacroEngine {
MacroEngine(bool is_interpreted_) noexcept : is_interpreted{is_interpreted_} {}
// Store the uploaded macro code to compile them when they're called.
void AddCode(u32 method, u32 data);
inline void AddCode(u32 method, u32 data) noexcept {
uploaded_macro_code[method].push_back(data);
}
// Clear the code associated with a method.
void ClearCode(u32 method);
inline void ClearCode(u32 method) noexcept {
macro_cache.erase(method);
uploaded_macro_code.erase(method);
}
// Compiles the macro if its not in the cache, and executes the compiled macro
void Execute(u32 method, const std::vector<u32>& parameters);
protected:
std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code);
private:
void Execute(Engines::Maxwell3D& maxwell3d, u32 method, std::span<const u32> parameters);
AnyCachedMacro Compile(Engines::Maxwell3D& maxwell3d, std::span<const u32> code);
struct CacheInfo {
std::unique_ptr<CachedMacro> lle_program{};
std::unique_ptr<CachedMacro> hle_program{};
AnyCachedMacro program;
u64 hash{};
bool has_hle_program{};
};
ankerl::unordered_dense::map<u32, CacheInfo> macro_cache;
ankerl::unordered_dense::map<u32, std::vector<u32>> uploaded_macro_code;
std::optional<HLEMacro> hle_macros;
Engines::Maxwell3D& maxwell3d;
bool is_interpreted;
};
std::optional<MacroEngine> GetMacroEngine(Engines::Maxwell3D& maxwell3d);
} // namespace Tegra