mirror of
https://git.eden-emu.dev/eden-emu/eden
synced 2026-04-18 06:18:58 +02:00
[common, core] remove uneeded memory indirection overhead at startup (#3306)
for core stuff: just remove unique ptrs that dont need any pointer stability at all (afterall its an allocation within an allocation so yeah) for fibers: Main reasoning behind this is because virtualBuffer<> is stupidly fucking expensive and it also clutters my fstat view ALSO mmap is a syscall, syscalls are bad for performance or whatever ALSO std::vector<> is better suited for handling this kind of "fixed size thing where its like big but not THAT big" (512 KiB isn't going to kill your memory usage for each fiber...) for core.cpp stuff - inlines stuff into std::optional<> as opposed to std::unique_ptr<> (because yknow, we are making the Impl from an unique_ptr, allocating within an allocation is unnecessary) - reorganizes the structures a bit so padding doesnt screw us up (it's not perfect but eh saves a measly 44 bytes) - removes unused/dead code - uses std::vector<> instead of std::deque<> no perf impact expected, maybe some initialisation boost but very minimal impact nonethless lto gets rid of most calls anyways - the heavy issue is with shared_ptr and the cache coherency from the atomics... but i clumped them together because well, they kinda do not suffer from cache coherency - hopefully not a mistake this balloons the size of Impl to about 1.67 MB - which is fine because we throw it in the stack anyways REST OF INTERFACES: most of them ballooned in size as well, but overhead is ok since its an allocation within an alloc, no stack is used (when it comes to storing these i mean) Signed-off-by: lizzie lizzie@eden-emu.dev Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3306 Reviewed-by: CamilleLaVey <camillelavey99@gmail.com> Reviewed-by: MaranBr <maranbr@eden-emu.dev> Co-authored-by: lizzie <lizzie@eden-emu.dev> Co-committed-by: lizzie <lizzie@eden-emu.dev>
This commit is contained in:
parent
5768600c8b
commit
83a28dc251
40 changed files with 2602 additions and 2963 deletions
|
|
@ -87,12 +87,8 @@ add_library(video_core STATIC
|
|||
host1x/syncpoint_manager.h
|
||||
host1x/vic.cpp
|
||||
host1x/vic.h
|
||||
macro/macro.cpp
|
||||
macro/macro.h
|
||||
macro/macro_hle.cpp
|
||||
macro/macro_hle.h
|
||||
macro/macro_interpreter.cpp
|
||||
macro/macro_interpreter.h
|
||||
macro.cpp
|
||||
macro.h
|
||||
fence_manager.h
|
||||
gpu.cpp
|
||||
gpu.h
|
||||
|
|
@ -375,10 +371,6 @@ else()
|
|||
endif()
|
||||
|
||||
if (ARCHITECTURE_x86_64)
|
||||
target_sources(video_core PRIVATE
|
||||
macro/macro_jit_x64.cpp
|
||||
macro/macro_jit_x64.h
|
||||
)
|
||||
target_link_libraries(video_core PUBLIC xbyak::xbyak)
|
||||
endif()
|
||||
|
||||
|
|
|
|||
|
|
@ -107,35 +107,27 @@ bool DmaPusher::Step() {
|
|||
}
|
||||
|
||||
void DmaPusher::ProcessCommands(std::span<const CommandHeader> commands) {
|
||||
for (std::size_t index = 0; index < commands.size();) {
|
||||
const CommandHeader& command_header = commands[index];
|
||||
|
||||
if (dma_state.method_count) {
|
||||
// Data word of methods command
|
||||
dma_state.dma_word_offset = static_cast<u32>(index * sizeof(u32));
|
||||
if (dma_state.non_incrementing) {
|
||||
const u32 max_write = static_cast<u32>(
|
||||
std::min<std::size_t>(index + dma_state.method_count, commands.size()) - index);
|
||||
CallMultiMethod(&command_header.argument, max_write);
|
||||
dma_state.method_count -= max_write;
|
||||
dma_state.is_last_call = true;
|
||||
index += max_write;
|
||||
continue;
|
||||
} else {
|
||||
dma_state.is_last_call = dma_state.method_count <= 1;
|
||||
CallMethod(command_header.argument);
|
||||
}
|
||||
|
||||
if (!dma_state.non_incrementing) {
|
||||
dma_state.method++;
|
||||
}
|
||||
|
||||
if (dma_increment_once) {
|
||||
dma_state.non_incrementing = true;
|
||||
}
|
||||
|
||||
for (size_t index = 0; index < commands.size();) {
|
||||
// Data word of methods command
|
||||
if (dma_state.method_count && dma_state.non_incrementing) {
|
||||
auto const& command_header = commands[index]; //must ref (MUltiMethod re)
|
||||
dma_state.dma_word_offset = u32(index * sizeof(u32));
|
||||
const u32 max_write = u32(std::min<std::size_t>(index + dma_state.method_count, commands.size()) - index);
|
||||
CallMultiMethod(&command_header.argument, max_write);
|
||||
dma_state.method_count -= max_write;
|
||||
dma_state.is_last_call = true;
|
||||
index += max_write;
|
||||
} else if (dma_state.method_count) {
|
||||
auto const command_header = commands[index]; //can copy
|
||||
dma_state.dma_word_offset = u32(index * sizeof(u32));
|
||||
dma_state.is_last_call = dma_state.method_count <= 1;
|
||||
CallMethod(command_header.argument);
|
||||
dma_state.method += !dma_state.non_incrementing ? 1 : 0;
|
||||
dma_state.non_incrementing |= dma_increment_once;
|
||||
dma_state.method_count--;
|
||||
index++;
|
||||
} else {
|
||||
auto const command_header = commands[index]; //can copy
|
||||
// No command active - this is the first word of a new one
|
||||
switch (command_header.mode) {
|
||||
case SubmissionMode::Increasing:
|
||||
|
|
@ -151,8 +143,7 @@ void DmaPusher::ProcessCommands(std::span<const CommandHeader> commands) {
|
|||
case SubmissionMode::Inline:
|
||||
dma_state.method = command_header.method;
|
||||
dma_state.subchannel = command_header.subchannel;
|
||||
dma_state.dma_word_offset = static_cast<u64>(
|
||||
-static_cast<s64>(dma_state.dma_get)); // negate to set address as 0
|
||||
dma_state.dma_word_offset = u64(-s64(dma_state.dma_get)); // negate to set address as 0
|
||||
CallMethod(command_header.arg_count);
|
||||
dma_state.non_incrementing = true;
|
||||
dma_increment_once = false;
|
||||
|
|
@ -165,8 +156,8 @@ void DmaPusher::ProcessCommands(std::span<const CommandHeader> commands) {
|
|||
default:
|
||||
break;
|
||||
}
|
||||
index++;
|
||||
}
|
||||
index++;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -186,26 +177,24 @@ void DmaPusher::CallMethod(u32 argument) const {
|
|||
});
|
||||
} else {
|
||||
auto subchannel = subchannels[dma_state.subchannel];
|
||||
if (!subchannel->execution_mask[dma_state.method]) [[likely]] {
|
||||
if (!subchannel->execution_mask[dma_state.method]) {
|
||||
subchannel->method_sink.emplace_back(dma_state.method, argument);
|
||||
return;
|
||||
} else {
|
||||
subchannel->ConsumeSink();
|
||||
subchannel->current_dma_segment = dma_state.dma_get + dma_state.dma_word_offset;
|
||||
subchannel->CallMethod(dma_state.method, argument, dma_state.is_last_call);
|
||||
}
|
||||
subchannel->ConsumeSink();
|
||||
subchannel->current_dma_segment = dma_state.dma_get + dma_state.dma_word_offset;
|
||||
subchannel->CallMethod(dma_state.method, argument, dma_state.is_last_call);
|
||||
}
|
||||
}
|
||||
|
||||
void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) const {
|
||||
if (dma_state.method < non_puller_methods) {
|
||||
puller.CallMultiMethod(dma_state.method, dma_state.subchannel, base_start, num_methods,
|
||||
dma_state.method_count);
|
||||
puller.CallMultiMethod(dma_state.method, dma_state.subchannel, base_start, num_methods, dma_state.method_count);
|
||||
} else {
|
||||
auto subchannel = subchannels[dma_state.subchannel];
|
||||
subchannel->ConsumeSink();
|
||||
subchannel->current_dma_segment = dma_state.dma_get + dma_state.dma_word_offset;
|
||||
subchannel->CallMultiMethod(dma_state.method, base_start, num_methods,
|
||||
dma_state.method_count);
|
||||
subchannel->CallMultiMethod(dma_state.method, base_start, num_methods, dma_state.method_count);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -27,9 +27,7 @@ constexpr u32 MacroRegistersStart = 0xE00;
|
|||
|
||||
Maxwell3D::Maxwell3D(Core::System& system_, MemoryManager& memory_manager_)
|
||||
: draw_manager{std::make_unique<DrawManager>(this)}, system{system_},
|
||||
memory_manager{memory_manager_}, macro_engine{GetMacroEngine(*this)}, upload_state{
|
||||
memory_manager,
|
||||
regs.upload} {
|
||||
memory_manager{memory_manager_}, macro_engine{GetMacroEngine(*this)}, upload_state{memory_manager, regs.upload} {
|
||||
dirty.flags.flip();
|
||||
InitializeRegisterDefaults();
|
||||
execution_mask.reset();
|
||||
|
|
@ -329,8 +327,7 @@ void Maxwell3D::ProcessDirtyRegisters(u32 method, u32 argument) {
|
|||
}
|
||||
}
|
||||
|
||||
void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argument,
|
||||
bool is_last_call) {
|
||||
void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argument, bool is_last_call) {
|
||||
switch (method) {
|
||||
case MAXWELL3D_REG_INDEX(wait_for_idle):
|
||||
return rasterizer->WaitForIdle();
|
||||
|
|
@ -427,9 +424,7 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
|
|||
return;
|
||||
}
|
||||
|
||||
ASSERT_MSG(method < Regs::NUM_REGS,
|
||||
"Invalid Maxwell3D register, increase the size of the Regs structure");
|
||||
|
||||
ASSERT(method < Regs::NUM_REGS && "Invalid Maxwell3D register, increase the size of the Regs structure");
|
||||
const u32 argument = ProcessShadowRam(method, method_argument);
|
||||
ProcessDirtyRegisters(method, argument);
|
||||
ProcessMethodCall(method, argument, method_argument, is_last_call);
|
||||
|
|
@ -670,7 +665,7 @@ Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
|
|||
}
|
||||
|
||||
u32 Maxwell3D::GetRegisterValue(u32 method) const {
|
||||
ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register");
|
||||
ASSERT(method < Regs::NUM_REGS && "Invalid Maxwell3D register");
|
||||
return regs.reg_array[method];
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@
|
|||
#include "video_core/engines/engine_interface.h"
|
||||
#include "video_core/engines/engine_upload.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/macro/macro.h"
|
||||
#include "video_core/macro.h"
|
||||
#include "video_core/textures/texture.h"
|
||||
|
||||
namespace Core {
|
||||
|
|
@ -3203,7 +3203,7 @@ private:
|
|||
std::vector<u32> macro_params;
|
||||
|
||||
/// Interpreter for the macro codes uploaded to the GPU.
|
||||
std::unique_ptr<MacroEngine> macro_engine;
|
||||
std::optional<MacroEngine> macro_engine;
|
||||
|
||||
Upload::State upload_state;
|
||||
|
||||
|
|
|
|||
1667
src/video_core/macro.cpp
Normal file
1667
src/video_core/macro.cpp
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -1,3 +1,6 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
|
|
@ -95,24 +98,34 @@ union MethodAddress {
|
|||
|
||||
} // namespace Macro
|
||||
|
||||
class HLEMacro;
|
||||
|
||||
class CachedMacro {
|
||||
public:
|
||||
CachedMacro(Engines::Maxwell3D& maxwell3d_)
|
||||
: maxwell3d{maxwell3d_}
|
||||
{}
|
||||
virtual ~CachedMacro() = default;
|
||||
/**
|
||||
* Executes the macro code with the specified input parameters.
|
||||
*
|
||||
* @param parameters The parameters of the macro
|
||||
* @param method The method to execute
|
||||
*/
|
||||
/// Executes the macro code with the specified input parameters.
|
||||
/// @param parameters The parameters of the macro
|
||||
/// @param method The method to execute
|
||||
virtual void Execute(const std::vector<u32>& parameters, u32 method) = 0;
|
||||
Engines::Maxwell3D& maxwell3d;
|
||||
};
|
||||
|
||||
class HLEMacro {
|
||||
public:
|
||||
explicit HLEMacro(Engines::Maxwell3D& maxwell3d_);
|
||||
~HLEMacro();
|
||||
// Allocates and returns a cached macro if the hash matches a known function.
|
||||
// Returns nullptr otherwise.
|
||||
[[nodiscard]] std::unique_ptr<CachedMacro> GetHLEProgram(u64 hash) const;
|
||||
private:
|
||||
Engines::Maxwell3D& maxwell3d;
|
||||
};
|
||||
|
||||
class MacroEngine {
|
||||
public:
|
||||
explicit MacroEngine(Engines::Maxwell3D& maxwell3d);
|
||||
virtual ~MacroEngine();
|
||||
explicit MacroEngine(Engines::Maxwell3D& maxwell3d, bool is_interpreted);
|
||||
~MacroEngine();
|
||||
|
||||
// Store the uploaded macro code to compile them when they're called.
|
||||
void AddCode(u32 method, u32 data);
|
||||
|
|
@ -124,7 +137,7 @@ public:
|
|||
void Execute(u32 method, const std::vector<u32>& parameters);
|
||||
|
||||
protected:
|
||||
virtual std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) = 0;
|
||||
std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code);
|
||||
|
||||
private:
|
||||
struct CacheInfo {
|
||||
|
|
@ -136,10 +149,11 @@ private:
|
|||
|
||||
std::unordered_map<u32, CacheInfo> macro_cache;
|
||||
std::unordered_map<u32, std::vector<u32>> uploaded_macro_code;
|
||||
std::unique_ptr<HLEMacro> hle_macros;
|
||||
std::optional<HLEMacro> hle_macros;
|
||||
Engines::Maxwell3D& maxwell3d;
|
||||
bool is_interpreted;
|
||||
};
|
||||
|
||||
std::unique_ptr<MacroEngine> GetMacroEngine(Engines::Maxwell3D& maxwell3d);
|
||||
std::optional<MacroEngine> GetMacroEngine(Engines::Maxwell3D& maxwell3d);
|
||||
|
||||
} // namespace Tegra
|
||||
|
|
@ -1,140 +0,0 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <optional>
|
||||
#include <span>
|
||||
|
||||
#include "common/container_hash.h"
|
||||
|
||||
#include <fstream>
|
||||
#include "common/assert.h"
|
||||
#include "common/fs/fs.h"
|
||||
#include "common/fs/path_util.h"
|
||||
#include "common/settings.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/macro/macro.h"
|
||||
#include "video_core/macro/macro_hle.h"
|
||||
#include "video_core/macro/macro_interpreter.h"
|
||||
|
||||
#ifdef ARCHITECTURE_x86_64
|
||||
#include "video_core/macro/macro_jit_x64.h"
|
||||
#endif
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
static void Dump(u64 hash, std::span<const u32> code, bool decompiled = false) {
|
||||
const auto base_dir{Common::FS::GetEdenPath(Common::FS::EdenPath::DumpDir)};
|
||||
const auto macro_dir{base_dir / "macros"};
|
||||
if (!Common::FS::CreateDir(base_dir) || !Common::FS::CreateDir(macro_dir)) {
|
||||
LOG_ERROR(Common_Filesystem, "Failed to create macro dump directories");
|
||||
return;
|
||||
}
|
||||
auto name{macro_dir / fmt::format("{:016x}.macro", hash)};
|
||||
|
||||
if (decompiled) {
|
||||
auto new_name{macro_dir / fmt::format("decompiled_{:016x}.macro", hash)};
|
||||
if (Common::FS::Exists(name)) {
|
||||
(void)Common::FS::RenameFile(name, new_name);
|
||||
return;
|
||||
}
|
||||
name = new_name;
|
||||
}
|
||||
|
||||
std::fstream macro_file(name, std::ios::out | std::ios::binary);
|
||||
if (!macro_file) {
|
||||
LOG_ERROR(Common_Filesystem, "Unable to open or create file at {}",
|
||||
Common::FS::PathToUTF8String(name));
|
||||
return;
|
||||
}
|
||||
macro_file.write(reinterpret_cast<const char*>(code.data()), code.size_bytes());
|
||||
}
|
||||
|
||||
MacroEngine::MacroEngine(Engines::Maxwell3D& maxwell3d_)
|
||||
: hle_macros{std::make_unique<Tegra::HLEMacro>(maxwell3d_)}, maxwell3d{maxwell3d_} {}
|
||||
|
||||
MacroEngine::~MacroEngine() = default;
|
||||
|
||||
void MacroEngine::AddCode(u32 method, u32 data) {
|
||||
uploaded_macro_code[method].push_back(data);
|
||||
}
|
||||
|
||||
void MacroEngine::ClearCode(u32 method) {
|
||||
macro_cache.erase(method);
|
||||
uploaded_macro_code.erase(method);
|
||||
}
|
||||
|
||||
void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) {
|
||||
auto compiled_macro = macro_cache.find(method);
|
||||
if (compiled_macro != macro_cache.end()) {
|
||||
const auto& cache_info = compiled_macro->second;
|
||||
if (cache_info.has_hle_program) {
|
||||
cache_info.hle_program->Execute(parameters, method);
|
||||
} else {
|
||||
maxwell3d.RefreshParameters();
|
||||
cache_info.lle_program->Execute(parameters, method);
|
||||
}
|
||||
} else {
|
||||
// Macro not compiled, check if it's uploaded and if so, compile it
|
||||
std::optional<u32> mid_method;
|
||||
const auto macro_code = uploaded_macro_code.find(method);
|
||||
if (macro_code == uploaded_macro_code.end()) {
|
||||
for (const auto& [method_base, code] : uploaded_macro_code) {
|
||||
if (method >= method_base && (method - method_base) < code.size()) {
|
||||
mid_method = method_base;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!mid_method.has_value()) {
|
||||
ASSERT_MSG(false, "Macro 0x{0:x} was not uploaded", method);
|
||||
return;
|
||||
}
|
||||
}
|
||||
auto& cache_info = macro_cache[method];
|
||||
|
||||
if (!mid_method.has_value()) {
|
||||
cache_info.lle_program = Compile(macro_code->second);
|
||||
cache_info.hash = Common::HashValue(macro_code->second);
|
||||
} else {
|
||||
const auto& macro_cached = uploaded_macro_code[mid_method.value()];
|
||||
const auto rebased_method = method - mid_method.value();
|
||||
auto& code = uploaded_macro_code[method];
|
||||
code.resize(macro_cached.size() - rebased_method);
|
||||
std::memcpy(code.data(), macro_cached.data() + rebased_method,
|
||||
code.size() * sizeof(u32));
|
||||
cache_info.hash = Common::HashValue(code);
|
||||
cache_info.lle_program = Compile(code);
|
||||
}
|
||||
|
||||
auto hle_program = hle_macros->GetHLEProgram(cache_info.hash);
|
||||
if (!hle_program || Settings::values.disable_macro_hle) {
|
||||
maxwell3d.RefreshParameters();
|
||||
cache_info.lle_program->Execute(parameters, method);
|
||||
} else {
|
||||
cache_info.has_hle_program = true;
|
||||
cache_info.hle_program = std::move(hle_program);
|
||||
cache_info.hle_program->Execute(parameters, method);
|
||||
}
|
||||
|
||||
if (Settings::values.dump_macros) {
|
||||
Dump(cache_info.hash, macro_code->second, cache_info.has_hle_program);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<MacroEngine> GetMacroEngine(Engines::Maxwell3D& maxwell3d) {
|
||||
if (Settings::values.disable_macro_jit) {
|
||||
return std::make_unique<MacroInterpreter>(maxwell3d);
|
||||
}
|
||||
#ifdef ARCHITECTURE_x86_64
|
||||
return std::make_unique<MacroJITx64>(maxwell3d);
|
||||
#else
|
||||
return std::make_unique<MacroInterpreter>(maxwell3d);
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace Tegra
|
||||
|
|
@ -1,606 +0,0 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#include <array>
|
||||
#include <vector>
|
||||
#include "common/assert.h"
|
||||
#include "common/scope_exit.h"
|
||||
#include "video_core/dirty_flags.h"
|
||||
#include "video_core/engines/draw_manager.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/macro/macro.h"
|
||||
#include "video_core/macro/macro_hle.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
using Maxwell3D = Engines::Maxwell3D;
|
||||
|
||||
namespace {
|
||||
|
||||
bool IsTopologySafe(Maxwell3D::Regs::PrimitiveTopology topology) {
|
||||
switch (topology) {
|
||||
case Maxwell3D::Regs::PrimitiveTopology::Points:
|
||||
case Maxwell3D::Regs::PrimitiveTopology::Lines:
|
||||
case Maxwell3D::Regs::PrimitiveTopology::LineLoop:
|
||||
case Maxwell3D::Regs::PrimitiveTopology::LineStrip:
|
||||
case Maxwell3D::Regs::PrimitiveTopology::Triangles:
|
||||
case Maxwell3D::Regs::PrimitiveTopology::TriangleStrip:
|
||||
case Maxwell3D::Regs::PrimitiveTopology::TriangleFan:
|
||||
case Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency:
|
||||
case Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency:
|
||||
case Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency:
|
||||
case Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency:
|
||||
case Maxwell3D::Regs::PrimitiveTopology::Patches:
|
||||
return true;
|
||||
case Maxwell3D::Regs::PrimitiveTopology::Quads:
|
||||
case Maxwell3D::Regs::PrimitiveTopology::QuadStrip:
|
||||
case Maxwell3D::Regs::PrimitiveTopology::Polygon:
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
class HLEMacroImpl : public CachedMacro {
|
||||
public:
|
||||
explicit HLEMacroImpl(Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {}
|
||||
|
||||
protected:
|
||||
Maxwell3D& maxwell3d;
|
||||
};
|
||||
|
||||
/*
|
||||
* @note: these macros have two versions, a normal and extended version, with the extended version
|
||||
* also assigning the base vertex/instance.
|
||||
*/
|
||||
template <bool extended>
|
||||
class HLE_DrawArraysIndirect final : public HLEMacroImpl {
|
||||
public:
|
||||
explicit HLE_DrawArraysIndirect(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
|
||||
|
||||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
|
||||
auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0]);
|
||||
if (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology)) {
|
||||
Fallback(parameters);
|
||||
return;
|
||||
}
|
||||
|
||||
auto& params = maxwell3d.draw_manager->GetIndirectParams();
|
||||
params.is_byte_count = false;
|
||||
params.is_indexed = false;
|
||||
params.include_count = false;
|
||||
params.count_start_address = 0;
|
||||
params.indirect_start_address = maxwell3d.GetMacroAddress(1);
|
||||
params.buffer_size = 4 * sizeof(u32);
|
||||
params.max_draw_counts = 1;
|
||||
params.stride = 0;
|
||||
|
||||
if constexpr (extended) {
|
||||
maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro;
|
||||
maxwell3d.SetHLEReplacementAttributeType(
|
||||
0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseInstance);
|
||||
}
|
||||
|
||||
maxwell3d.draw_manager->DrawArrayIndirect(topology);
|
||||
|
||||
if constexpr (extended) {
|
||||
maxwell3d.engine_state = Maxwell3D::EngineHint::None;
|
||||
maxwell3d.replace_table.clear();
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
void Fallback(const std::vector<u32>& parameters) {
|
||||
SCOPE_EXIT {
|
||||
if (extended) {
|
||||
maxwell3d.engine_state = Maxwell3D::EngineHint::None;
|
||||
maxwell3d.replace_table.clear();
|
||||
}
|
||||
};
|
||||
maxwell3d.RefreshParameters();
|
||||
const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]);
|
||||
|
||||
auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0]);
|
||||
const u32 vertex_first = parameters[3];
|
||||
const u32 vertex_count = parameters[1];
|
||||
|
||||
if (!IsTopologySafe(topology) &&
|
||||
static_cast<size_t>(maxwell3d.GetMaxCurrentVertices()) <
|
||||
static_cast<size_t>(vertex_first) + static_cast<size_t>(vertex_count)) {
|
||||
ASSERT_MSG(false, "Faulty draw!");
|
||||
return;
|
||||
}
|
||||
|
||||
const u32 base_instance = parameters[4];
|
||||
if constexpr (extended) {
|
||||
maxwell3d.regs.global_base_instance_index = base_instance;
|
||||
maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro;
|
||||
maxwell3d.SetHLEReplacementAttributeType(
|
||||
0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseInstance);
|
||||
}
|
||||
|
||||
maxwell3d.draw_manager->DrawArray(topology, vertex_first, vertex_count, base_instance,
|
||||
instance_count);
|
||||
|
||||
if constexpr (extended) {
|
||||
maxwell3d.regs.global_base_instance_index = 0;
|
||||
maxwell3d.engine_state = Maxwell3D::EngineHint::None;
|
||||
maxwell3d.replace_table.clear();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/*
|
||||
* @note: these macros have two versions, a normal and extended version, with the extended version
|
||||
* also assigning the base vertex/instance.
|
||||
*/
|
||||
template <bool extended>
|
||||
class HLE_DrawIndexedIndirect final : public HLEMacroImpl {
|
||||
public:
|
||||
explicit HLE_DrawIndexedIndirect(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
|
||||
|
||||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
|
||||
auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0]);
|
||||
if (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology)) {
|
||||
Fallback(parameters);
|
||||
return;
|
||||
}
|
||||
|
||||
const u32 estimate = static_cast<u32>(maxwell3d.EstimateIndexBufferSize());
|
||||
const u32 element_base = parameters[4];
|
||||
const u32 base_instance = parameters[5];
|
||||
maxwell3d.regs.vertex_id_base = element_base;
|
||||
maxwell3d.regs.global_base_vertex_index = element_base;
|
||||
maxwell3d.regs.global_base_instance_index = base_instance;
|
||||
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
|
||||
if constexpr (extended) {
|
||||
maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro;
|
||||
maxwell3d.SetHLEReplacementAttributeType(
|
||||
0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex);
|
||||
maxwell3d.SetHLEReplacementAttributeType(
|
||||
0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance);
|
||||
}
|
||||
auto& params = maxwell3d.draw_manager->GetIndirectParams();
|
||||
params.is_byte_count = false;
|
||||
params.is_indexed = true;
|
||||
params.include_count = false;
|
||||
params.count_start_address = 0;
|
||||
params.indirect_start_address = maxwell3d.GetMacroAddress(1);
|
||||
params.buffer_size = 5 * sizeof(u32);
|
||||
params.max_draw_counts = 1;
|
||||
params.stride = 0;
|
||||
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
|
||||
maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, estimate);
|
||||
maxwell3d.regs.vertex_id_base = 0x0;
|
||||
maxwell3d.regs.global_base_vertex_index = 0x0;
|
||||
maxwell3d.regs.global_base_instance_index = 0x0;
|
||||
if constexpr (extended) {
|
||||
maxwell3d.engine_state = Maxwell3D::EngineHint::None;
|
||||
maxwell3d.replace_table.clear();
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
void Fallback(const std::vector<u32>& parameters) {
|
||||
maxwell3d.RefreshParameters();
|
||||
const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]);
|
||||
const u32 element_base = parameters[4];
|
||||
const u32 base_instance = parameters[5];
|
||||
maxwell3d.regs.vertex_id_base = element_base;
|
||||
maxwell3d.regs.global_base_vertex_index = element_base;
|
||||
maxwell3d.regs.global_base_instance_index = base_instance;
|
||||
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
|
||||
if constexpr (extended) {
|
||||
maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro;
|
||||
maxwell3d.SetHLEReplacementAttributeType(
|
||||
0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex);
|
||||
maxwell3d.SetHLEReplacementAttributeType(
|
||||
0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance);
|
||||
}
|
||||
|
||||
maxwell3d.draw_manager->DrawIndex(
|
||||
static_cast<Tegra::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]), parameters[3],
|
||||
parameters[1], element_base, base_instance, instance_count);
|
||||
|
||||
maxwell3d.regs.vertex_id_base = 0x0;
|
||||
maxwell3d.regs.global_base_vertex_index = 0x0;
|
||||
maxwell3d.regs.global_base_instance_index = 0x0;
|
||||
if constexpr (extended) {
|
||||
maxwell3d.engine_state = Maxwell3D::EngineHint::None;
|
||||
maxwell3d.replace_table.clear();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class HLE_MultiLayerClear final : public HLEMacroImpl {
|
||||
public:
|
||||
explicit HLE_MultiLayerClear(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
|
||||
|
||||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
|
||||
maxwell3d.RefreshParameters();
|
||||
ASSERT(parameters.size() == 1);
|
||||
|
||||
const Maxwell3D::Regs::ClearSurface clear_params{parameters[0]};
|
||||
const u32 rt_index = clear_params.RT;
|
||||
const u32 num_layers = maxwell3d.regs.rt[rt_index].depth;
|
||||
ASSERT(clear_params.layer == 0);
|
||||
|
||||
maxwell3d.regs.clear_surface.raw = clear_params.raw;
|
||||
maxwell3d.draw_manager->Clear(num_layers);
|
||||
}
|
||||
};
|
||||
|
||||
class HLE_MultiDrawIndexedIndirectCount final : public HLEMacroImpl {
|
||||
public:
|
||||
explicit HLE_MultiDrawIndexedIndirectCount(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
|
||||
|
||||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
|
||||
const auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[2]);
|
||||
if (!IsTopologySafe(topology)) {
|
||||
Fallback(parameters);
|
||||
return;
|
||||
}
|
||||
|
||||
const u32 start_indirect = parameters[0];
|
||||
const u32 end_indirect = parameters[1];
|
||||
if (start_indirect >= end_indirect) {
|
||||
// Nothing to do.
|
||||
return;
|
||||
}
|
||||
|
||||
const u32 padding = parameters[3]; // padding is in words
|
||||
|
||||
// size of each indirect segment
|
||||
const u32 indirect_words = 5 + padding;
|
||||
const u32 stride = indirect_words * sizeof(u32);
|
||||
const std::size_t draw_count = end_indirect - start_indirect;
|
||||
const u32 estimate = static_cast<u32>(maxwell3d.EstimateIndexBufferSize());
|
||||
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
|
||||
auto& params = maxwell3d.draw_manager->GetIndirectParams();
|
||||
params.is_byte_count = false;
|
||||
params.is_indexed = true;
|
||||
params.include_count = true;
|
||||
params.count_start_address = maxwell3d.GetMacroAddress(4);
|
||||
params.indirect_start_address = maxwell3d.GetMacroAddress(5);
|
||||
params.buffer_size = stride * draw_count;
|
||||
params.max_draw_counts = draw_count;
|
||||
params.stride = stride;
|
||||
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
|
||||
maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro;
|
||||
maxwell3d.SetHLEReplacementAttributeType(
|
||||
0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex);
|
||||
maxwell3d.SetHLEReplacementAttributeType(
|
||||
0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance);
|
||||
maxwell3d.SetHLEReplacementAttributeType(0, 0x648,
|
||||
Maxwell3D::HLEReplacementAttributeType::DrawID);
|
||||
maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, estimate);
|
||||
maxwell3d.engine_state = Maxwell3D::EngineHint::None;
|
||||
maxwell3d.replace_table.clear();
|
||||
}
|
||||
|
||||
private:
|
||||
void Fallback(const std::vector<u32>& parameters) {
|
||||
SCOPE_EXIT {
|
||||
// Clean everything.
|
||||
maxwell3d.regs.vertex_id_base = 0x0;
|
||||
maxwell3d.engine_state = Maxwell3D::EngineHint::None;
|
||||
maxwell3d.replace_table.clear();
|
||||
};
|
||||
maxwell3d.RefreshParameters();
|
||||
const u32 start_indirect = parameters[0];
|
||||
const u32 end_indirect = parameters[1];
|
||||
if (start_indirect >= end_indirect) {
|
||||
// Nothing to do.
|
||||
return;
|
||||
}
|
||||
const auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[2]);
|
||||
const u32 padding = parameters[3];
|
||||
const std::size_t max_draws = parameters[4];
|
||||
|
||||
const u32 indirect_words = 5 + padding;
|
||||
const std::size_t first_draw = start_indirect;
|
||||
const std::size_t effective_draws = end_indirect - start_indirect;
|
||||
const std::size_t last_draw = start_indirect + (std::min)(effective_draws, max_draws);
|
||||
|
||||
for (std::size_t index = first_draw; index < last_draw; index++) {
|
||||
const std::size_t base = index * indirect_words + 5;
|
||||
const u32 base_vertex = parameters[base + 3];
|
||||
const u32 base_instance = parameters[base + 4];
|
||||
maxwell3d.regs.vertex_id_base = base_vertex;
|
||||
maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro;
|
||||
maxwell3d.SetHLEReplacementAttributeType(
|
||||
0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex);
|
||||
maxwell3d.SetHLEReplacementAttributeType(
|
||||
0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance);
|
||||
maxwell3d.CallMethod(0x8e3, 0x648, true);
|
||||
maxwell3d.CallMethod(0x8e4, static_cast<u32>(index), true);
|
||||
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
|
||||
maxwell3d.draw_manager->DrawIndex(topology, parameters[base + 2], parameters[base],
|
||||
base_vertex, base_instance, parameters[base + 1]);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class HLE_DrawIndirectByteCount final : public HLEMacroImpl {
|
||||
public:
|
||||
explicit HLE_DrawIndirectByteCount(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
|
||||
|
||||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
|
||||
const bool force = maxwell3d.Rasterizer().HasDrawTransformFeedback();
|
||||
|
||||
auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0] & 0xFFFFU);
|
||||
if (!force && (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology))) {
|
||||
Fallback(parameters);
|
||||
return;
|
||||
}
|
||||
auto& params = maxwell3d.draw_manager->GetIndirectParams();
|
||||
params.is_byte_count = true;
|
||||
params.is_indexed = false;
|
||||
params.include_count = false;
|
||||
params.count_start_address = 0;
|
||||
params.indirect_start_address = maxwell3d.GetMacroAddress(2);
|
||||
params.buffer_size = 4;
|
||||
params.max_draw_counts = 1;
|
||||
params.stride = parameters[1];
|
||||
maxwell3d.regs.draw.begin = parameters[0];
|
||||
maxwell3d.regs.draw_auto_stride = parameters[1];
|
||||
maxwell3d.regs.draw_auto_byte_count = parameters[2];
|
||||
|
||||
maxwell3d.draw_manager->DrawArrayIndirect(topology);
|
||||
}
|
||||
|
||||
private:
|
||||
void Fallback(const std::vector<u32>& parameters) {
|
||||
maxwell3d.RefreshParameters();
|
||||
|
||||
maxwell3d.regs.draw.begin = parameters[0];
|
||||
maxwell3d.regs.draw_auto_stride = parameters[1];
|
||||
maxwell3d.regs.draw_auto_byte_count = parameters[2];
|
||||
|
||||
maxwell3d.draw_manager->DrawArray(
|
||||
maxwell3d.regs.draw.topology, 0,
|
||||
maxwell3d.regs.draw_auto_byte_count / maxwell3d.regs.draw_auto_stride, 0, 1);
|
||||
}
|
||||
};
|
||||
|
||||
class HLE_C713C83D8F63CCF3 final : public HLEMacroImpl {
|
||||
public:
|
||||
explicit HLE_C713C83D8F63CCF3(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
|
||||
|
||||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
|
||||
maxwell3d.RefreshParameters();
|
||||
const u32 offset = (parameters[0] & 0x3FFFFFFF) << 2;
|
||||
const u32 address = maxwell3d.regs.shadow_scratch[24];
|
||||
auto& const_buffer = maxwell3d.regs.const_buffer;
|
||||
const_buffer.size = 0x7000;
|
||||
const_buffer.address_high = (address >> 24) & 0xFF;
|
||||
const_buffer.address_low = address << 8;
|
||||
const_buffer.offset = offset;
|
||||
}
|
||||
};
|
||||
|
||||
class HLE_D7333D26E0A93EDE final : public HLEMacroImpl {
|
||||
public:
|
||||
explicit HLE_D7333D26E0A93EDE(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
|
||||
|
||||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
|
||||
maxwell3d.RefreshParameters();
|
||||
const size_t index = parameters[0];
|
||||
const u32 address = maxwell3d.regs.shadow_scratch[42 + index];
|
||||
const u32 size = maxwell3d.regs.shadow_scratch[47 + index];
|
||||
auto& const_buffer = maxwell3d.regs.const_buffer;
|
||||
const_buffer.size = size;
|
||||
const_buffer.address_high = (address >> 24) & 0xFF;
|
||||
const_buffer.address_low = address << 8;
|
||||
}
|
||||
};
|
||||
|
||||
class HLE_BindShader final : public HLEMacroImpl {
|
||||
public:
|
||||
explicit HLE_BindShader(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
|
||||
|
||||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
|
||||
maxwell3d.RefreshParameters();
|
||||
auto& regs = maxwell3d.regs;
|
||||
const u32 index = parameters[0];
|
||||
if ((parameters[1] - regs.shadow_scratch[28 + index]) == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
regs.pipelines[index & 0xF].offset = parameters[2];
|
||||
maxwell3d.dirty.flags[VideoCommon::Dirty::Shaders] = true;
|
||||
regs.shadow_scratch[28 + index] = parameters[1];
|
||||
regs.shadow_scratch[34 + index] = parameters[2];
|
||||
|
||||
const u32 address = parameters[4];
|
||||
auto& const_buffer = regs.const_buffer;
|
||||
const_buffer.size = 0x10000;
|
||||
const_buffer.address_high = (address >> 24) & 0xFF;
|
||||
const_buffer.address_low = address << 8;
|
||||
|
||||
const size_t bind_group_id = parameters[3] & 0x7F;
|
||||
auto& bind_group = regs.bind_groups[bind_group_id];
|
||||
bind_group.raw_config = 0x11;
|
||||
maxwell3d.ProcessCBBind(bind_group_id);
|
||||
}
|
||||
};
|
||||
|
||||
class HLE_SetRasterBoundingBox final : public HLEMacroImpl {
|
||||
public:
|
||||
explicit HLE_SetRasterBoundingBox(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
|
||||
|
||||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
|
||||
maxwell3d.RefreshParameters();
|
||||
const u32 raster_mode = parameters[0];
|
||||
auto& regs = maxwell3d.regs;
|
||||
const u32 raster_enabled = maxwell3d.regs.conservative_raster_enable;
|
||||
const u32 scratch_data = maxwell3d.regs.shadow_scratch[52];
|
||||
regs.raster_bounding_box.raw = raster_mode & 0xFFFFF00F;
|
||||
regs.raster_bounding_box.pad.Assign(scratch_data & raster_enabled);
|
||||
}
|
||||
};
|
||||
|
||||
template <size_t base_size>
|
||||
class HLE_ClearConstBuffer final : public HLEMacroImpl {
|
||||
public:
|
||||
explicit HLE_ClearConstBuffer(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
|
||||
|
||||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
|
||||
maxwell3d.RefreshParameters();
|
||||
static constexpr std::array<u32, base_size> zeroes{};
|
||||
auto& regs = maxwell3d.regs;
|
||||
regs.const_buffer.size = static_cast<u32>(base_size);
|
||||
regs.const_buffer.address_high = parameters[0];
|
||||
regs.const_buffer.address_low = parameters[1];
|
||||
regs.const_buffer.offset = 0;
|
||||
maxwell3d.ProcessCBMultiData(zeroes.data(), parameters[2] * 4);
|
||||
}
|
||||
};
|
||||
|
||||
class HLE_ClearMemory final : public HLEMacroImpl {
|
||||
public:
|
||||
explicit HLE_ClearMemory(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
|
||||
|
||||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
|
||||
maxwell3d.RefreshParameters();
|
||||
|
||||
const u32 needed_memory = parameters[2] / sizeof(u32);
|
||||
if (needed_memory > zero_memory.size()) {
|
||||
zero_memory.resize(needed_memory, 0);
|
||||
}
|
||||
auto& regs = maxwell3d.regs;
|
||||
regs.upload.line_length_in = parameters[2];
|
||||
regs.upload.line_count = 1;
|
||||
regs.upload.dest.address_high = parameters[0];
|
||||
regs.upload.dest.address_low = parameters[1];
|
||||
maxwell3d.CallMethod(static_cast<size_t>(MAXWELL3D_REG_INDEX(launch_dma)), 0x1011, true);
|
||||
maxwell3d.CallMultiMethod(static_cast<size_t>(MAXWELL3D_REG_INDEX(inline_data)),
|
||||
zero_memory.data(), needed_memory, needed_memory);
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<u32> zero_memory;
|
||||
};
|
||||
|
||||
class HLE_TransformFeedbackSetup final : public HLEMacroImpl {
|
||||
public:
|
||||
explicit HLE_TransformFeedbackSetup(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
|
||||
|
||||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
|
||||
maxwell3d.RefreshParameters();
|
||||
|
||||
auto& regs = maxwell3d.regs;
|
||||
regs.transform_feedback_enabled = 1;
|
||||
regs.transform_feedback.buffers[0].start_offset = 0;
|
||||
regs.transform_feedback.buffers[1].start_offset = 0;
|
||||
regs.transform_feedback.buffers[2].start_offset = 0;
|
||||
regs.transform_feedback.buffers[3].start_offset = 0;
|
||||
|
||||
regs.upload.line_length_in = 4;
|
||||
regs.upload.line_count = 1;
|
||||
regs.upload.dest.address_high = parameters[0];
|
||||
regs.upload.dest.address_low = parameters[1];
|
||||
maxwell3d.CallMethod(static_cast<size_t>(MAXWELL3D_REG_INDEX(launch_dma)), 0x1011, true);
|
||||
maxwell3d.CallMethod(static_cast<size_t>(MAXWELL3D_REG_INDEX(inline_data)),
|
||||
regs.transform_feedback.controls[0].stride, true);
|
||||
|
||||
maxwell3d.Rasterizer().RegisterTransformFeedback(regs.upload.dest.Address());
|
||||
}
|
||||
};
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
HLEMacro::HLEMacro(Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {
|
||||
builders.emplace(0x0D61FC9FAAC9FCADULL,
|
||||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
|
||||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
|
||||
return std::make_unique<HLE_DrawArraysIndirect<false>>(maxwell3d__);
|
||||
}));
|
||||
builders.emplace(0x8A4D173EB99A8603ULL,
|
||||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
|
||||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
|
||||
return std::make_unique<HLE_DrawArraysIndirect<true>>(maxwell3d__);
|
||||
}));
|
||||
builders.emplace(0x771BB18C62444DA0ULL,
|
||||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
|
||||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
|
||||
return std::make_unique<HLE_DrawIndexedIndirect<false>>(maxwell3d__);
|
||||
}));
|
||||
builders.emplace(0x0217920100488FF7ULL,
|
||||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
|
||||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
|
||||
return std::make_unique<HLE_DrawIndexedIndirect<true>>(maxwell3d__);
|
||||
}));
|
||||
builders.emplace(0x3F5E74B9C9A50164ULL,
|
||||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
|
||||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
|
||||
return std::make_unique<HLE_MultiDrawIndexedIndirectCount>(
|
||||
maxwell3d__);
|
||||
}));
|
||||
builders.emplace(0xEAD26C3E2109B06BULL,
|
||||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
|
||||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
|
||||
return std::make_unique<HLE_MultiLayerClear>(maxwell3d__);
|
||||
}));
|
||||
builders.emplace(0xC713C83D8F63CCF3ULL,
|
||||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
|
||||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
|
||||
return std::make_unique<HLE_C713C83D8F63CCF3>(maxwell3d__);
|
||||
}));
|
||||
builders.emplace(0xD7333D26E0A93EDEULL,
|
||||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
|
||||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
|
||||
return std::make_unique<HLE_D7333D26E0A93EDE>(maxwell3d__);
|
||||
}));
|
||||
builders.emplace(0xEB29B2A09AA06D38ULL,
|
||||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
|
||||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
|
||||
return std::make_unique<HLE_BindShader>(maxwell3d__);
|
||||
}));
|
||||
builders.emplace(0xDB1341DBEB4C8AF7ULL,
|
||||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
|
||||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
|
||||
return std::make_unique<HLE_SetRasterBoundingBox>(maxwell3d__);
|
||||
}));
|
||||
builders.emplace(0x6C97861D891EDf7EULL,
|
||||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
|
||||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
|
||||
return std::make_unique<HLE_ClearConstBuffer<0x5F00>>(maxwell3d__);
|
||||
}));
|
||||
builders.emplace(0xD246FDDF3A6173D7ULL,
|
||||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
|
||||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
|
||||
return std::make_unique<HLE_ClearConstBuffer<0x7000>>(maxwell3d__);
|
||||
}));
|
||||
builders.emplace(0xEE4D0004BEC8ECF4ULL,
|
||||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
|
||||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
|
||||
return std::make_unique<HLE_ClearMemory>(maxwell3d__);
|
||||
}));
|
||||
builders.emplace(0xFC0CF27F5FFAA661ULL,
|
||||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
|
||||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
|
||||
return std::make_unique<HLE_TransformFeedbackSetup>(maxwell3d__);
|
||||
}));
|
||||
builders.emplace(0xB5F74EDB717278ECULL,
|
||||
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
|
||||
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
|
||||
return std::make_unique<HLE_DrawIndirectByteCount>(maxwell3d__);
|
||||
}));
|
||||
}
|
||||
|
||||
HLEMacro::~HLEMacro() = default;
|
||||
|
||||
std::unique_ptr<CachedMacro> HLEMacro::GetHLEProgram(u64 hash) const {
|
||||
const auto it = builders.find(hash);
|
||||
if (it == builders.end()) {
|
||||
return nullptr;
|
||||
}
|
||||
return it->second(maxwell3d);
|
||||
}
|
||||
|
||||
} // namespace Tegra
|
||||
|
|
@ -1,33 +0,0 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
namespace Engines {
|
||||
class Maxwell3D;
|
||||
}
|
||||
|
||||
class HLEMacro {
|
||||
public:
|
||||
explicit HLEMacro(Engines::Maxwell3D& maxwell3d_);
|
||||
~HLEMacro();
|
||||
|
||||
// Allocates and returns a cached macro if the hash matches a known function.
|
||||
// Returns nullptr otherwise.
|
||||
[[nodiscard]] std::unique_ptr<CachedMacro> GetHLEProgram(u64 hash) const;
|
||||
|
||||
private:
|
||||
Engines::Maxwell3D& maxwell3d;
|
||||
std::unordered_map<u64, std::function<std::unique_ptr<CachedMacro>(Engines::Maxwell3D&)>>
|
||||
builders;
|
||||
};
|
||||
|
||||
} // namespace Tegra
|
||||
|
|
@ -1,362 +0,0 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <array>
|
||||
#include <optional>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/macro/macro_interpreter.h"
|
||||
|
||||
namespace Tegra {
|
||||
namespace {
|
||||
class MacroInterpreterImpl final : public CachedMacro {
|
||||
public:
|
||||
explicit MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_)
|
||||
: maxwell3d{maxwell3d_}, code{code_} {}
|
||||
|
||||
void Execute(const std::vector<u32>& params, u32 method) override;
|
||||
|
||||
private:
|
||||
/// Resets the execution engine state, zeroing registers, etc.
|
||||
void Reset();
|
||||
|
||||
/**
|
||||
* Executes a single macro instruction located at the current program counter. Returns whether
|
||||
* the interpreter should keep running.
|
||||
*
|
||||
* @param is_delay_slot Whether the current step is being executed due to a delay slot in a
|
||||
* previous instruction.
|
||||
*/
|
||||
bool Step(bool is_delay_slot);
|
||||
|
||||
/// Calculates the result of an ALU operation. src_a OP src_b;
|
||||
u32 GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b);
|
||||
|
||||
/// Performs the result operation on the input result and stores it in the specified register
|
||||
/// (if necessary).
|
||||
void ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result);
|
||||
|
||||
/// Evaluates the branch condition and returns whether the branch should be taken or not.
|
||||
bool EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const;
|
||||
|
||||
/// Reads an opcode at the current program counter location.
|
||||
Macro::Opcode GetOpcode() const;
|
||||
|
||||
/// Returns the specified register's value. Register 0 is hardcoded to always return 0.
|
||||
u32 GetRegister(u32 register_id) const;
|
||||
|
||||
/// Sets the register to the input value.
|
||||
void SetRegister(u32 register_id, u32 value);
|
||||
|
||||
/// Sets the method address to use for the next Send instruction.
|
||||
void SetMethodAddress(u32 address);
|
||||
|
||||
/// Calls a GPU Engine method with the input parameter.
|
||||
void Send(u32 value);
|
||||
|
||||
/// Reads a GPU register located at the method address.
|
||||
u32 Read(u32 method) const;
|
||||
|
||||
/// Returns the next parameter in the parameter queue.
|
||||
u32 FetchParameter();
|
||||
|
||||
Engines::Maxwell3D& maxwell3d;
|
||||
|
||||
/// Current program counter
|
||||
u32 pc{};
|
||||
/// Program counter to execute at after the delay slot is executed.
|
||||
std::optional<u32> delayed_pc;
|
||||
|
||||
/// General purpose macro registers.
|
||||
std::array<u32, Macro::NUM_MACRO_REGISTERS> registers = {};
|
||||
|
||||
/// Method address to use for the next Send instruction.
|
||||
Macro::MethodAddress method_address = {};
|
||||
|
||||
/// Input parameters of the current macro.
|
||||
std::unique_ptr<u32[]> parameters;
|
||||
std::size_t num_parameters = 0;
|
||||
std::size_t parameters_capacity = 0;
|
||||
/// Index of the next parameter that will be fetched by the 'parm' instruction.
|
||||
u32 next_parameter_index = 0;
|
||||
|
||||
bool carry_flag = false;
|
||||
const std::vector<u32>& code;
|
||||
};
|
||||
|
||||
void MacroInterpreterImpl::Execute(const std::vector<u32>& params, u32 method) {
|
||||
Reset();
|
||||
|
||||
registers[1] = params[0];
|
||||
num_parameters = params.size();
|
||||
|
||||
if (num_parameters > parameters_capacity) {
|
||||
parameters_capacity = num_parameters;
|
||||
parameters = std::make_unique<u32[]>(num_parameters);
|
||||
}
|
||||
std::memcpy(parameters.get(), params.data(), num_parameters * sizeof(u32));
|
||||
|
||||
// Execute the code until we hit an exit condition.
|
||||
bool keep_executing = true;
|
||||
while (keep_executing) {
|
||||
keep_executing = Step(false);
|
||||
}
|
||||
|
||||
// Assert the the macro used all the input parameters
|
||||
ASSERT(next_parameter_index == num_parameters);
|
||||
}
|
||||
|
||||
void MacroInterpreterImpl::Reset() {
|
||||
registers = {};
|
||||
pc = 0;
|
||||
delayed_pc = {};
|
||||
method_address.raw = 0;
|
||||
num_parameters = 0;
|
||||
// The next parameter index starts at 1, because $r1 already has the value of the first
|
||||
// parameter.
|
||||
next_parameter_index = 1;
|
||||
carry_flag = false;
|
||||
}
|
||||
|
||||
bool MacroInterpreterImpl::Step(bool is_delay_slot) {
|
||||
u32 base_address = pc;
|
||||
|
||||
Macro::Opcode opcode = GetOpcode();
|
||||
pc += 4;
|
||||
|
||||
// Update the program counter if we were delayed
|
||||
if (delayed_pc) {
|
||||
ASSERT(is_delay_slot);
|
||||
pc = *delayed_pc;
|
||||
delayed_pc = {};
|
||||
}
|
||||
|
||||
switch (opcode.operation) {
|
||||
case Macro::Operation::ALU: {
|
||||
u32 result = GetALUResult(opcode.alu_operation, GetRegister(opcode.src_a),
|
||||
GetRegister(opcode.src_b));
|
||||
ProcessResult(opcode.result_operation, opcode.dst, result);
|
||||
break;
|
||||
}
|
||||
case Macro::Operation::AddImmediate: {
|
||||
ProcessResult(opcode.result_operation, opcode.dst,
|
||||
GetRegister(opcode.src_a) + opcode.immediate);
|
||||
break;
|
||||
}
|
||||
case Macro::Operation::ExtractInsert: {
|
||||
u32 dst = GetRegister(opcode.src_a);
|
||||
u32 src = GetRegister(opcode.src_b);
|
||||
|
||||
src = (src >> opcode.bf_src_bit) & opcode.GetBitfieldMask();
|
||||
dst &= ~(opcode.GetBitfieldMask() << opcode.bf_dst_bit);
|
||||
dst |= src << opcode.bf_dst_bit;
|
||||
ProcessResult(opcode.result_operation, opcode.dst, dst);
|
||||
break;
|
||||
}
|
||||
case Macro::Operation::ExtractShiftLeftImmediate: {
|
||||
u32 dst = GetRegister(opcode.src_a);
|
||||
u32 src = GetRegister(opcode.src_b);
|
||||
|
||||
u32 result = ((src >> dst) & opcode.GetBitfieldMask()) << opcode.bf_dst_bit;
|
||||
|
||||
ProcessResult(opcode.result_operation, opcode.dst, result);
|
||||
break;
|
||||
}
|
||||
case Macro::Operation::ExtractShiftLeftRegister: {
|
||||
u32 dst = GetRegister(opcode.src_a);
|
||||
u32 src = GetRegister(opcode.src_b);
|
||||
|
||||
u32 result = ((src >> opcode.bf_src_bit) & opcode.GetBitfieldMask()) << dst;
|
||||
|
||||
ProcessResult(opcode.result_operation, opcode.dst, result);
|
||||
break;
|
||||
}
|
||||
case Macro::Operation::Read: {
|
||||
u32 result = Read(GetRegister(opcode.src_a) + opcode.immediate);
|
||||
ProcessResult(opcode.result_operation, opcode.dst, result);
|
||||
break;
|
||||
}
|
||||
case Macro::Operation::Branch: {
|
||||
ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid");
|
||||
u32 value = GetRegister(opcode.src_a);
|
||||
bool taken = EvaluateBranchCondition(opcode.branch_condition, value);
|
||||
if (taken) {
|
||||
// Ignore the delay slot if the branch has the annul bit.
|
||||
if (opcode.branch_annul) {
|
||||
pc = base_address + opcode.GetBranchTarget();
|
||||
return true;
|
||||
}
|
||||
|
||||
delayed_pc = base_address + opcode.GetBranchTarget();
|
||||
// Execute one more instruction due to the delay slot.
|
||||
return Step(true);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented macro operation {}", opcode.operation.Value());
|
||||
break;
|
||||
}
|
||||
|
||||
// An instruction with the Exit flag will not actually
|
||||
// cause an exit if it's executed inside a delay slot.
|
||||
if (opcode.is_exit && !is_delay_slot) {
|
||||
// Exit has a delay slot, execute the next instruction
|
||||
Step(true);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
u32 MacroInterpreterImpl::GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b) {
|
||||
switch (operation) {
|
||||
case Macro::ALUOperation::Add: {
|
||||
const u64 result{static_cast<u64>(src_a) + src_b};
|
||||
carry_flag = result > 0xffffffff;
|
||||
return static_cast<u32>(result);
|
||||
}
|
||||
case Macro::ALUOperation::AddWithCarry: {
|
||||
const u64 result{static_cast<u64>(src_a) + src_b + (carry_flag ? 1ULL : 0ULL)};
|
||||
carry_flag = result > 0xffffffff;
|
||||
return static_cast<u32>(result);
|
||||
}
|
||||
case Macro::ALUOperation::Subtract: {
|
||||
const u64 result{static_cast<u64>(src_a) - src_b};
|
||||
carry_flag = result < 0x100000000;
|
||||
return static_cast<u32>(result);
|
||||
}
|
||||
case Macro::ALUOperation::SubtractWithBorrow: {
|
||||
const u64 result{static_cast<u64>(src_a) - src_b - (carry_flag ? 0ULL : 1ULL)};
|
||||
carry_flag = result < 0x100000000;
|
||||
return static_cast<u32>(result);
|
||||
}
|
||||
case Macro::ALUOperation::Xor:
|
||||
return src_a ^ src_b;
|
||||
case Macro::ALUOperation::Or:
|
||||
return src_a | src_b;
|
||||
case Macro::ALUOperation::And:
|
||||
return src_a & src_b;
|
||||
case Macro::ALUOperation::AndNot:
|
||||
return src_a & ~src_b;
|
||||
case Macro::ALUOperation::Nand:
|
||||
return ~(src_a & src_b);
|
||||
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented ALU operation {}", operation);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
void MacroInterpreterImpl::ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result) {
|
||||
switch (operation) {
|
||||
case Macro::ResultOperation::IgnoreAndFetch:
|
||||
// Fetch parameter and ignore result.
|
||||
SetRegister(reg, FetchParameter());
|
||||
break;
|
||||
case Macro::ResultOperation::Move:
|
||||
// Move result.
|
||||
SetRegister(reg, result);
|
||||
break;
|
||||
case Macro::ResultOperation::MoveAndSetMethod:
|
||||
// Move result and use as Method Address.
|
||||
SetRegister(reg, result);
|
||||
SetMethodAddress(result);
|
||||
break;
|
||||
case Macro::ResultOperation::FetchAndSend:
|
||||
// Fetch parameter and send result.
|
||||
SetRegister(reg, FetchParameter());
|
||||
Send(result);
|
||||
break;
|
||||
case Macro::ResultOperation::MoveAndSend:
|
||||
// Move and send result.
|
||||
SetRegister(reg, result);
|
||||
Send(result);
|
||||
break;
|
||||
case Macro::ResultOperation::FetchAndSetMethod:
|
||||
// Fetch parameter and use result as Method Address.
|
||||
SetRegister(reg, FetchParameter());
|
||||
SetMethodAddress(result);
|
||||
break;
|
||||
case Macro::ResultOperation::MoveAndSetMethodFetchAndSend:
|
||||
// Move result and use as Method Address, then fetch and send parameter.
|
||||
SetRegister(reg, result);
|
||||
SetMethodAddress(result);
|
||||
Send(FetchParameter());
|
||||
break;
|
||||
case Macro::ResultOperation::MoveAndSetMethodSend:
|
||||
// Move result and use as Method Address, then send bits 12:17 of result.
|
||||
SetRegister(reg, result);
|
||||
SetMethodAddress(result);
|
||||
Send((result >> 12) & 0b111111);
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented result operation {}", operation);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bool MacroInterpreterImpl::EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const {
|
||||
switch (cond) {
|
||||
case Macro::BranchCondition::Zero:
|
||||
return value == 0;
|
||||
case Macro::BranchCondition::NotZero:
|
||||
return value != 0;
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
Macro::Opcode MacroInterpreterImpl::GetOpcode() const {
|
||||
ASSERT((pc % sizeof(u32)) == 0);
|
||||
ASSERT(pc < code.size() * sizeof(u32));
|
||||
return {code[pc / sizeof(u32)]};
|
||||
}
|
||||
|
||||
u32 MacroInterpreterImpl::GetRegister(u32 register_id) const {
|
||||
return registers.at(register_id);
|
||||
}
|
||||
|
||||
void MacroInterpreterImpl::SetRegister(u32 register_id, u32 value) {
|
||||
// Register 0 is hardwired as the zero register.
|
||||
// Ensure no writes to it actually occur.
|
||||
if (register_id == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
registers.at(register_id) = value;
|
||||
}
|
||||
|
||||
void MacroInterpreterImpl::SetMethodAddress(u32 address) {
|
||||
method_address.raw = address;
|
||||
}
|
||||
|
||||
void MacroInterpreterImpl::Send(u32 value) {
|
||||
maxwell3d.CallMethod(method_address.address, value, true);
|
||||
// Increment the method address by the method increment.
|
||||
method_address.address.Assign(method_address.address.Value() +
|
||||
method_address.increment.Value());
|
||||
}
|
||||
|
||||
u32 MacroInterpreterImpl::Read(u32 method) const {
|
||||
return maxwell3d.GetRegisterValue(method);
|
||||
}
|
||||
|
||||
u32 MacroInterpreterImpl::FetchParameter() {
|
||||
ASSERT(next_parameter_index < num_parameters);
|
||||
return parameters[next_parameter_index++];
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d_)
|
||||
: MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {}
|
||||
|
||||
std::unique_ptr<CachedMacro> MacroInterpreter::Compile(const std::vector<u32>& code) {
|
||||
return std::make_unique<MacroInterpreterImpl>(maxwell3d, code);
|
||||
}
|
||||
|
||||
} // namespace Tegra
|
||||
|
|
@ -1,27 +0,0 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/macro/macro.h"
|
||||
|
||||
namespace Tegra {
|
||||
namespace Engines {
|
||||
class Maxwell3D;
|
||||
}
|
||||
|
||||
class MacroInterpreter final : public MacroEngine {
|
||||
public:
|
||||
explicit MacroInterpreter(Engines::Maxwell3D& maxwell3d_);
|
||||
|
||||
protected:
|
||||
std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) override;
|
||||
|
||||
private:
|
||||
Engines::Maxwell3D& maxwell3d;
|
||||
};
|
||||
|
||||
} // namespace Tegra
|
||||
|
|
@ -1,678 +0,0 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <array>
|
||||
#include <bitset>
|
||||
#include <optional>
|
||||
|
||||
#include <xbyak/xbyak.h>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/bit_field.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/x64/xbyak_abi.h"
|
||||
#include "common/x64/xbyak_util.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/macro/macro_interpreter.h"
|
||||
#include "video_core/macro/macro_jit_x64.h"
|
||||
|
||||
namespace Tegra {
|
||||
namespace {
|
||||
constexpr Xbyak::Reg64 STATE = Xbyak::util::rbx;
|
||||
constexpr Xbyak::Reg32 RESULT = Xbyak::util::r10d;
|
||||
constexpr Xbyak::Reg64 MAX_PARAMETER = Xbyak::util::r11;
|
||||
constexpr Xbyak::Reg64 PARAMETERS = Xbyak::util::r12;
|
||||
constexpr Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d;
|
||||
constexpr Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15;
|
||||
|
||||
constexpr std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({
|
||||
STATE,
|
||||
RESULT,
|
||||
MAX_PARAMETER,
|
||||
PARAMETERS,
|
||||
METHOD_ADDRESS,
|
||||
BRANCH_HOLDER,
|
||||
});
|
||||
|
||||
// Arbitrarily chosen based on current booting games.
|
||||
constexpr size_t MAX_CODE_SIZE = 0x10000;
|
||||
|
||||
std::bitset<32> PersistentCallerSavedRegs() {
|
||||
return PERSISTENT_REGISTERS & Common::X64::ABI_ALL_CALLER_SAVED;
|
||||
}
|
||||
|
||||
/// @brief Must enforce W^X constraints, as we yet don't havea global "NO_EXECUTE" support flag
|
||||
/// the speed loss is minimal, and in fact may be negligible, however for your peace of mind
|
||||
/// I simply included known OSes whom had W^X issues
|
||||
#if defined(__OpenBSD__) || defined(__NetBSD__) || defined(__DragonFly__)
|
||||
static const auto default_cg_mode = Xbyak::DontSetProtectRWE;
|
||||
#else
|
||||
static const auto default_cg_mode = nullptr; //Allow RWE
|
||||
#endif
|
||||
|
||||
class MacroJITx64Impl final : public Xbyak::CodeGenerator, public CachedMacro {
|
||||
public:
|
||||
explicit MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_)
|
||||
: Xbyak::CodeGenerator(MAX_CODE_SIZE, default_cg_mode)
|
||||
, code{code_}, maxwell3d{maxwell3d_} {
|
||||
Compile();
|
||||
}
|
||||
|
||||
void Execute(const std::vector<u32>& parameters, u32 method) override;
|
||||
|
||||
void Compile_ALU(Macro::Opcode opcode);
|
||||
void Compile_AddImmediate(Macro::Opcode opcode);
|
||||
void Compile_ExtractInsert(Macro::Opcode opcode);
|
||||
void Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode);
|
||||
void Compile_ExtractShiftLeftRegister(Macro::Opcode opcode);
|
||||
void Compile_Read(Macro::Opcode opcode);
|
||||
void Compile_Branch(Macro::Opcode opcode);
|
||||
|
||||
private:
|
||||
void Optimizer_ScanFlags();
|
||||
|
||||
void Compile();
|
||||
bool Compile_NextInstruction();
|
||||
|
||||
Xbyak::Reg32 Compile_FetchParameter();
|
||||
Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst);
|
||||
|
||||
void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg);
|
||||
void Compile_Send(Xbyak::Reg32 value);
|
||||
|
||||
Macro::Opcode GetOpCode() const;
|
||||
|
||||
struct JITState {
|
||||
Engines::Maxwell3D* maxwell3d{};
|
||||
std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{};
|
||||
u32 carry_flag{};
|
||||
};
|
||||
static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0");
|
||||
using ProgramType = void (*)(JITState*, const u32*, const u32*);
|
||||
|
||||
struct OptimizerState {
|
||||
bool can_skip_carry{};
|
||||
bool has_delayed_pc{};
|
||||
bool zero_reg_skip{};
|
||||
bool skip_dummy_addimmediate{};
|
||||
bool optimize_for_method_move{};
|
||||
bool enable_asserts{};
|
||||
};
|
||||
OptimizerState optimizer{};
|
||||
|
||||
std::optional<Macro::Opcode> next_opcode{};
|
||||
ProgramType program{nullptr};
|
||||
|
||||
std::array<Xbyak::Label, MAX_CODE_SIZE> labels;
|
||||
std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip;
|
||||
Xbyak::Label end_of_code{};
|
||||
|
||||
bool is_delay_slot{};
|
||||
u32 pc{};
|
||||
|
||||
const std::vector<u32>& code;
|
||||
Engines::Maxwell3D& maxwell3d;
|
||||
};
|
||||
|
||||
void MacroJITx64Impl::Execute(const std::vector<u32>& parameters, u32 method) {
|
||||
ASSERT_OR_EXECUTE(program != nullptr, { return; });
|
||||
JITState state{};
|
||||
state.maxwell3d = &maxwell3d;
|
||||
state.registers = {};
|
||||
program(&state, parameters.data(), parameters.data() + parameters.size());
|
||||
}
|
||||
|
||||
void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) {
|
||||
const bool is_a_zero = opcode.src_a == 0;
|
||||
const bool is_b_zero = opcode.src_b == 0;
|
||||
const bool valid_operation = !is_a_zero && !is_b_zero;
|
||||
[[maybe_unused]] const bool is_move_operation = !is_a_zero && is_b_zero;
|
||||
const bool has_zero_register = is_a_zero || is_b_zero;
|
||||
const bool no_zero_reg_skip = opcode.alu_operation == Macro::ALUOperation::AddWithCarry ||
|
||||
opcode.alu_operation == Macro::ALUOperation::SubtractWithBorrow;
|
||||
|
||||
Xbyak::Reg32 src_a;
|
||||
Xbyak::Reg32 src_b;
|
||||
|
||||
if (!optimizer.zero_reg_skip || no_zero_reg_skip) {
|
||||
src_a = Compile_GetRegister(opcode.src_a, RESULT);
|
||||
src_b = Compile_GetRegister(opcode.src_b, eax);
|
||||
} else {
|
||||
if (!is_a_zero) {
|
||||
src_a = Compile_GetRegister(opcode.src_a, RESULT);
|
||||
}
|
||||
if (!is_b_zero) {
|
||||
src_b = Compile_GetRegister(opcode.src_b, eax);
|
||||
}
|
||||
}
|
||||
|
||||
bool has_emitted = false;
|
||||
|
||||
switch (opcode.alu_operation) {
|
||||
case Macro::ALUOperation::Add:
|
||||
if (optimizer.zero_reg_skip) {
|
||||
if (valid_operation) {
|
||||
add(src_a, src_b);
|
||||
}
|
||||
} else {
|
||||
add(src_a, src_b);
|
||||
}
|
||||
|
||||
if (!optimizer.can_skip_carry) {
|
||||
setc(byte[STATE + offsetof(JITState, carry_flag)]);
|
||||
}
|
||||
break;
|
||||
case Macro::ALUOperation::AddWithCarry:
|
||||
bt(dword[STATE + offsetof(JITState, carry_flag)], 0);
|
||||
adc(src_a, src_b);
|
||||
setc(byte[STATE + offsetof(JITState, carry_flag)]);
|
||||
break;
|
||||
case Macro::ALUOperation::Subtract:
|
||||
if (optimizer.zero_reg_skip) {
|
||||
if (valid_operation) {
|
||||
sub(src_a, src_b);
|
||||
has_emitted = true;
|
||||
}
|
||||
} else {
|
||||
sub(src_a, src_b);
|
||||
has_emitted = true;
|
||||
}
|
||||
if (!optimizer.can_skip_carry && has_emitted) {
|
||||
setc(byte[STATE + offsetof(JITState, carry_flag)]);
|
||||
}
|
||||
break;
|
||||
case Macro::ALUOperation::SubtractWithBorrow:
|
||||
bt(dword[STATE + offsetof(JITState, carry_flag)], 0);
|
||||
sbb(src_a, src_b);
|
||||
setc(byte[STATE + offsetof(JITState, carry_flag)]);
|
||||
break;
|
||||
case Macro::ALUOperation::Xor:
|
||||
if (optimizer.zero_reg_skip) {
|
||||
if (valid_operation) {
|
||||
xor_(src_a, src_b);
|
||||
}
|
||||
} else {
|
||||
xor_(src_a, src_b);
|
||||
}
|
||||
break;
|
||||
case Macro::ALUOperation::Or:
|
||||
if (optimizer.zero_reg_skip) {
|
||||
if (valid_operation) {
|
||||
or_(src_a, src_b);
|
||||
}
|
||||
} else {
|
||||
or_(src_a, src_b);
|
||||
}
|
||||
break;
|
||||
case Macro::ALUOperation::And:
|
||||
if (optimizer.zero_reg_skip) {
|
||||
if (!has_zero_register) {
|
||||
and_(src_a, src_b);
|
||||
}
|
||||
} else {
|
||||
and_(src_a, src_b);
|
||||
}
|
||||
break;
|
||||
case Macro::ALUOperation::AndNot:
|
||||
if (optimizer.zero_reg_skip) {
|
||||
if (!is_a_zero) {
|
||||
not_(src_b);
|
||||
and_(src_a, src_b);
|
||||
}
|
||||
} else {
|
||||
not_(src_b);
|
||||
and_(src_a, src_b);
|
||||
}
|
||||
break;
|
||||
case Macro::ALUOperation::Nand:
|
||||
if (optimizer.zero_reg_skip) {
|
||||
if (!is_a_zero) {
|
||||
and_(src_a, src_b);
|
||||
not_(src_a);
|
||||
}
|
||||
} else {
|
||||
and_(src_a, src_b);
|
||||
not_(src_a);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented ALU operation {}", opcode.alu_operation.Value());
|
||||
break;
|
||||
}
|
||||
Compile_ProcessResult(opcode.result_operation, opcode.dst);
|
||||
}
|
||||
|
||||
void MacroJITx64Impl::Compile_AddImmediate(Macro::Opcode opcode) {
|
||||
if (optimizer.skip_dummy_addimmediate) {
|
||||
// Games tend to use this as an exit instruction placeholder. It's to encode an instruction
|
||||
// without doing anything. In our case we can just not emit anything.
|
||||
if (opcode.result_operation == Macro::ResultOperation::Move && opcode.dst == 0) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
// Check for redundant moves
|
||||
if (optimizer.optimize_for_method_move &&
|
||||
opcode.result_operation == Macro::ResultOperation::MoveAndSetMethod) {
|
||||
if (next_opcode.has_value()) {
|
||||
const auto next = *next_opcode;
|
||||
if (next.result_operation == Macro::ResultOperation::MoveAndSetMethod &&
|
||||
opcode.dst == next.dst) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (optimizer.zero_reg_skip && opcode.src_a == 0) {
|
||||
if (opcode.immediate == 0) {
|
||||
xor_(RESULT, RESULT);
|
||||
} else {
|
||||
mov(RESULT, opcode.immediate);
|
||||
}
|
||||
} else {
|
||||
auto result = Compile_GetRegister(opcode.src_a, RESULT);
|
||||
if (opcode.immediate > 2) {
|
||||
add(result, opcode.immediate);
|
||||
} else if (opcode.immediate == 1) {
|
||||
inc(result);
|
||||
} else if (opcode.immediate < 0) {
|
||||
sub(result, opcode.immediate * -1);
|
||||
}
|
||||
}
|
||||
Compile_ProcessResult(opcode.result_operation, opcode.dst);
|
||||
}
|
||||
|
||||
void MacroJITx64Impl::Compile_ExtractInsert(Macro::Opcode opcode) {
|
||||
auto dst = Compile_GetRegister(opcode.src_a, RESULT);
|
||||
auto src = Compile_GetRegister(opcode.src_b, eax);
|
||||
|
||||
const u32 mask = ~(opcode.GetBitfieldMask() << opcode.bf_dst_bit);
|
||||
and_(dst, mask);
|
||||
shr(src, opcode.bf_src_bit);
|
||||
and_(src, opcode.GetBitfieldMask());
|
||||
shl(src, opcode.bf_dst_bit);
|
||||
or_(dst, src);
|
||||
|
||||
Compile_ProcessResult(opcode.result_operation, opcode.dst);
|
||||
}
|
||||
|
||||
void MacroJITx64Impl::Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode) {
|
||||
const auto dst = Compile_GetRegister(opcode.src_a, ecx);
|
||||
const auto src = Compile_GetRegister(opcode.src_b, RESULT);
|
||||
|
||||
shr(src, dst.cvt8());
|
||||
and_(src, opcode.GetBitfieldMask());
|
||||
shl(src, opcode.bf_dst_bit);
|
||||
|
||||
Compile_ProcessResult(opcode.result_operation, opcode.dst);
|
||||
}
|
||||
|
||||
void MacroJITx64Impl::Compile_ExtractShiftLeftRegister(Macro::Opcode opcode) {
|
||||
const auto dst = Compile_GetRegister(opcode.src_a, ecx);
|
||||
const auto src = Compile_GetRegister(opcode.src_b, RESULT);
|
||||
|
||||
shr(src, opcode.bf_src_bit);
|
||||
and_(src, opcode.GetBitfieldMask());
|
||||
shl(src, dst.cvt8());
|
||||
|
||||
Compile_ProcessResult(opcode.result_operation, opcode.dst);
|
||||
}
|
||||
|
||||
void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) {
|
||||
if (optimizer.zero_reg_skip && opcode.src_a == 0) {
|
||||
if (opcode.immediate == 0) {
|
||||
xor_(RESULT, RESULT);
|
||||
} else {
|
||||
mov(RESULT, opcode.immediate);
|
||||
}
|
||||
} else {
|
||||
auto result = Compile_GetRegister(opcode.src_a, RESULT);
|
||||
if (opcode.immediate > 2) {
|
||||
add(result, opcode.immediate);
|
||||
} else if (opcode.immediate == 1) {
|
||||
inc(result);
|
||||
} else if (opcode.immediate < 0) {
|
||||
sub(result, opcode.immediate * -1);
|
||||
}
|
||||
}
|
||||
|
||||
// Equivalent to Engines::Maxwell3D::GetRegisterValue:
|
||||
if (optimizer.enable_asserts) {
|
||||
Xbyak::Label pass_range_check;
|
||||
cmp(RESULT, static_cast<u32>(Engines::Maxwell3D::Regs::NUM_REGS));
|
||||
jb(pass_range_check);
|
||||
int3();
|
||||
L(pass_range_check);
|
||||
}
|
||||
mov(rax, qword[STATE]);
|
||||
mov(RESULT,
|
||||
dword[rax + offsetof(Engines::Maxwell3D, regs) +
|
||||
offsetof(Engines::Maxwell3D::Regs, reg_array) + RESULT.cvt64() * sizeof(u32)]);
|
||||
|
||||
Compile_ProcessResult(opcode.result_operation, opcode.dst);
|
||||
}
|
||||
|
||||
void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) {
|
||||
maxwell3d->CallMethod(method_address.address, value, true);
|
||||
}
|
||||
|
||||
void MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) {
|
||||
Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
|
||||
mov(Common::X64::ABI_PARAM1, qword[STATE]);
|
||||
mov(Common::X64::ABI_PARAM2.cvt32(), METHOD_ADDRESS);
|
||||
mov(Common::X64::ABI_PARAM3.cvt32(), value);
|
||||
Common::X64::CallFarFunction(*this, &Send);
|
||||
Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
|
||||
|
||||
Xbyak::Label dont_process{};
|
||||
// Get increment
|
||||
test(METHOD_ADDRESS, 0x3f000);
|
||||
// If zero, method address doesn't update
|
||||
je(dont_process);
|
||||
|
||||
mov(ecx, METHOD_ADDRESS);
|
||||
and_(METHOD_ADDRESS, 0xfff);
|
||||
shr(ecx, 12);
|
||||
and_(ecx, 0x3f);
|
||||
lea(eax, ptr[rcx + METHOD_ADDRESS.cvt64()]);
|
||||
sal(ecx, 12);
|
||||
or_(eax, ecx);
|
||||
|
||||
mov(METHOD_ADDRESS, eax);
|
||||
|
||||
L(dont_process);
|
||||
}
|
||||
|
||||
void MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) {
|
||||
ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid");
|
||||
const s32 jump_address =
|
||||
static_cast<s32>(pc) + static_cast<s32>(opcode.GetBranchTarget() / sizeof(s32));
|
||||
|
||||
Xbyak::Label end;
|
||||
auto value = Compile_GetRegister(opcode.src_a, eax);
|
||||
cmp(value, 0); // test(value, value);
|
||||
if (optimizer.has_delayed_pc) {
|
||||
switch (opcode.branch_condition) {
|
||||
case Macro::BranchCondition::Zero:
|
||||
jne(end, T_NEAR);
|
||||
break;
|
||||
case Macro::BranchCondition::NotZero:
|
||||
je(end, T_NEAR);
|
||||
break;
|
||||
}
|
||||
|
||||
if (opcode.branch_annul) {
|
||||
xor_(BRANCH_HOLDER, BRANCH_HOLDER);
|
||||
jmp(labels[jump_address], T_NEAR);
|
||||
} else {
|
||||
Xbyak::Label handle_post_exit{};
|
||||
Xbyak::Label skip{};
|
||||
jmp(skip, T_NEAR);
|
||||
|
||||
L(handle_post_exit);
|
||||
xor_(BRANCH_HOLDER, BRANCH_HOLDER);
|
||||
jmp(labels[jump_address], T_NEAR);
|
||||
|
||||
L(skip);
|
||||
mov(BRANCH_HOLDER, handle_post_exit);
|
||||
jmp(delay_skip[pc], T_NEAR);
|
||||
}
|
||||
} else {
|
||||
switch (opcode.branch_condition) {
|
||||
case Macro::BranchCondition::Zero:
|
||||
je(labels[jump_address], T_NEAR);
|
||||
break;
|
||||
case Macro::BranchCondition::NotZero:
|
||||
jne(labels[jump_address], T_NEAR);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
L(end);
|
||||
}
|
||||
|
||||
void MacroJITx64Impl::Optimizer_ScanFlags() {
|
||||
optimizer.can_skip_carry = true;
|
||||
optimizer.has_delayed_pc = false;
|
||||
for (auto raw_op : code) {
|
||||
Macro::Opcode op{};
|
||||
op.raw = raw_op;
|
||||
|
||||
if (op.operation == Macro::Operation::ALU) {
|
||||
// Scan for any ALU operations which actually use the carry flag, if they don't exist in
|
||||
// our current code we can skip emitting the carry flag handling operations
|
||||
if (op.alu_operation == Macro::ALUOperation::AddWithCarry ||
|
||||
op.alu_operation == Macro::ALUOperation::SubtractWithBorrow) {
|
||||
optimizer.can_skip_carry = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (op.operation == Macro::Operation::Branch) {
|
||||
if (!op.branch_annul) {
|
||||
optimizer.has_delayed_pc = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MacroJITx64Impl::Compile() {
|
||||
labels.fill(Xbyak::Label());
|
||||
|
||||
Common::X64::ABI_PushRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8);
|
||||
// JIT state
|
||||
mov(STATE, Common::X64::ABI_PARAM1);
|
||||
mov(PARAMETERS, Common::X64::ABI_PARAM2);
|
||||
mov(MAX_PARAMETER, Common::X64::ABI_PARAM3);
|
||||
xor_(RESULT, RESULT);
|
||||
xor_(METHOD_ADDRESS, METHOD_ADDRESS);
|
||||
xor_(BRANCH_HOLDER, BRANCH_HOLDER);
|
||||
|
||||
mov(dword[STATE + offsetof(JITState, registers) + 4], Compile_FetchParameter());
|
||||
|
||||
// Track get register for zero registers and mark it as no-op
|
||||
optimizer.zero_reg_skip = true;
|
||||
|
||||
// AddImmediate tends to be used as a NOP instruction, if we detect this we can
|
||||
// completely skip the entire code path and no emit anything
|
||||
optimizer.skip_dummy_addimmediate = true;
|
||||
|
||||
// SMO tends to emit a lot of unnecessary method moves, we can mitigate this by only emitting
|
||||
// one if our register isn't "dirty"
|
||||
optimizer.optimize_for_method_move = true;
|
||||
|
||||
// Enable run-time assertions in JITted code
|
||||
optimizer.enable_asserts = false;
|
||||
|
||||
// Check to see if we can skip emitting certain instructions
|
||||
Optimizer_ScanFlags();
|
||||
|
||||
const u32 op_count = static_cast<u32>(code.size());
|
||||
for (u32 i = 0; i < op_count; i++) {
|
||||
if (i < op_count - 1) {
|
||||
pc = i + 1;
|
||||
next_opcode = GetOpCode();
|
||||
} else {
|
||||
next_opcode = {};
|
||||
}
|
||||
pc = i;
|
||||
Compile_NextInstruction();
|
||||
}
|
||||
|
||||
L(end_of_code);
|
||||
|
||||
Common::X64::ABI_PopRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8);
|
||||
ret();
|
||||
ready();
|
||||
program = getCode<ProgramType>();
|
||||
}
|
||||
|
||||
bool MacroJITx64Impl::Compile_NextInstruction() {
|
||||
const auto opcode = GetOpCode();
|
||||
if (labels[pc].getAddress()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
L(labels[pc]);
|
||||
|
||||
switch (opcode.operation) {
|
||||
case Macro::Operation::ALU:
|
||||
Compile_ALU(opcode);
|
||||
break;
|
||||
case Macro::Operation::AddImmediate:
|
||||
Compile_AddImmediate(opcode);
|
||||
break;
|
||||
case Macro::Operation::ExtractInsert:
|
||||
Compile_ExtractInsert(opcode);
|
||||
break;
|
||||
case Macro::Operation::ExtractShiftLeftImmediate:
|
||||
Compile_ExtractShiftLeftImmediate(opcode);
|
||||
break;
|
||||
case Macro::Operation::ExtractShiftLeftRegister:
|
||||
Compile_ExtractShiftLeftRegister(opcode);
|
||||
break;
|
||||
case Macro::Operation::Read:
|
||||
Compile_Read(opcode);
|
||||
break;
|
||||
case Macro::Operation::Branch:
|
||||
Compile_Branch(opcode);
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented opcode {}", opcode.operation.Value());
|
||||
break;
|
||||
}
|
||||
|
||||
if (optimizer.has_delayed_pc) {
|
||||
if (opcode.is_exit) {
|
||||
mov(rax, end_of_code);
|
||||
test(BRANCH_HOLDER, BRANCH_HOLDER);
|
||||
cmove(BRANCH_HOLDER, rax);
|
||||
// Jump to next instruction to skip delay slot check
|
||||
je(labels[pc + 1], T_NEAR);
|
||||
} else {
|
||||
// TODO(ogniK): Optimize delay slot branching
|
||||
Xbyak::Label no_delay_slot{};
|
||||
test(BRANCH_HOLDER, BRANCH_HOLDER);
|
||||
je(no_delay_slot, T_NEAR);
|
||||
mov(rax, BRANCH_HOLDER);
|
||||
xor_(BRANCH_HOLDER, BRANCH_HOLDER);
|
||||
jmp(rax);
|
||||
L(no_delay_slot);
|
||||
}
|
||||
L(delay_skip[pc]);
|
||||
if (opcode.is_exit) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
test(BRANCH_HOLDER, BRANCH_HOLDER);
|
||||
jne(end_of_code, T_NEAR);
|
||||
if (opcode.is_exit) {
|
||||
inc(BRANCH_HOLDER);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static void WarnInvalidParameter(uintptr_t parameter, uintptr_t max_parameter) {
|
||||
LOG_CRITICAL(HW_GPU,
|
||||
"Macro JIT: invalid parameter access 0x{:x} (0x{:x} is the last parameter)",
|
||||
parameter, max_parameter - sizeof(u32));
|
||||
}
|
||||
|
||||
Xbyak::Reg32 MacroJITx64Impl::Compile_FetchParameter() {
|
||||
Xbyak::Label parameter_ok{};
|
||||
cmp(PARAMETERS, MAX_PARAMETER);
|
||||
jb(parameter_ok, T_NEAR);
|
||||
Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
|
||||
mov(Common::X64::ABI_PARAM1, PARAMETERS);
|
||||
mov(Common::X64::ABI_PARAM2, MAX_PARAMETER);
|
||||
Common::X64::CallFarFunction(*this, &WarnInvalidParameter);
|
||||
Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
|
||||
L(parameter_ok);
|
||||
mov(eax, dword[PARAMETERS]);
|
||||
add(PARAMETERS, sizeof(u32));
|
||||
return eax;
|
||||
}
|
||||
|
||||
Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) {
|
||||
if (index == 0) {
|
||||
// Register 0 is always zero
|
||||
xor_(dst, dst);
|
||||
} else {
|
||||
mov(dst, dword[STATE + offsetof(JITState, registers) + index * sizeof(u32)]);
|
||||
}
|
||||
|
||||
return dst;
|
||||
}
|
||||
|
||||
void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) {
|
||||
const auto SetRegister = [this](u32 reg_index, const Xbyak::Reg32& result) {
|
||||
// Register 0 is supposed to always return 0. NOP is implemented as a store to the zero
|
||||
// register.
|
||||
if (reg_index == 0) {
|
||||
return;
|
||||
}
|
||||
mov(dword[STATE + offsetof(JITState, registers) + reg_index * sizeof(u32)], result);
|
||||
};
|
||||
const auto SetMethodAddress = [this](const Xbyak::Reg32& reg32) { mov(METHOD_ADDRESS, reg32); };
|
||||
|
||||
switch (operation) {
|
||||
case Macro::ResultOperation::IgnoreAndFetch:
|
||||
SetRegister(reg, Compile_FetchParameter());
|
||||
break;
|
||||
case Macro::ResultOperation::Move:
|
||||
SetRegister(reg, RESULT);
|
||||
break;
|
||||
case Macro::ResultOperation::MoveAndSetMethod:
|
||||
SetRegister(reg, RESULT);
|
||||
SetMethodAddress(RESULT);
|
||||
break;
|
||||
case Macro::ResultOperation::FetchAndSend:
|
||||
// Fetch parameter and send result.
|
||||
SetRegister(reg, Compile_FetchParameter());
|
||||
Compile_Send(RESULT);
|
||||
break;
|
||||
case Macro::ResultOperation::MoveAndSend:
|
||||
// Move and send result.
|
||||
SetRegister(reg, RESULT);
|
||||
Compile_Send(RESULT);
|
||||
break;
|
||||
case Macro::ResultOperation::FetchAndSetMethod:
|
||||
// Fetch parameter and use result as Method Address.
|
||||
SetRegister(reg, Compile_FetchParameter());
|
||||
SetMethodAddress(RESULT);
|
||||
break;
|
||||
case Macro::ResultOperation::MoveAndSetMethodFetchAndSend:
|
||||
// Move result and use as Method Address, then fetch and send parameter.
|
||||
SetRegister(reg, RESULT);
|
||||
SetMethodAddress(RESULT);
|
||||
Compile_Send(Compile_FetchParameter());
|
||||
break;
|
||||
case Macro::ResultOperation::MoveAndSetMethodSend:
|
||||
// Move result and use as Method Address, then send bits 12:17 of result.
|
||||
SetRegister(reg, RESULT);
|
||||
SetMethodAddress(RESULT);
|
||||
shr(RESULT, 12);
|
||||
and_(RESULT, 0b111111);
|
||||
Compile_Send(RESULT);
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented macro operation {}", operation);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Macro::Opcode MacroJITx64Impl::GetOpCode() const {
|
||||
ASSERT(pc < code.size());
|
||||
return {code[pc]};
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d_)
|
||||
: MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {}
|
||||
|
||||
std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) {
|
||||
return std::make_unique<MacroJITx64Impl>(maxwell3d, code);
|
||||
}
|
||||
} // namespace Tegra
|
||||
|
|
@ -1,26 +0,0 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/macro/macro.h"
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
namespace Engines {
|
||||
class Maxwell3D;
|
||||
}
|
||||
|
||||
class MacroJITx64 final : public MacroEngine {
|
||||
public:
|
||||
explicit MacroJITx64(Engines::Maxwell3D& maxwell3d_);
|
||||
|
||||
protected:
|
||||
std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) override;
|
||||
|
||||
private:
|
||||
Engines::Maxwell3D& maxwell3d;
|
||||
};
|
||||
|
||||
} // namespace Tegra
|
||||
|
|
@ -1214,19 +1214,16 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageV
|
|||
ImageView::~ImageView() = default;
|
||||
|
||||
GLuint ImageView::StorageView(Shader::TextureType texture_type, Shader::ImageFormat image_format) {
|
||||
if (image_format == Shader::ImageFormat::Typeless) {
|
||||
if (image_format == Shader::ImageFormat::Typeless)
|
||||
return Handle(texture_type);
|
||||
}
|
||||
const bool is_signed{image_format == Shader::ImageFormat::R8_SINT ||
|
||||
image_format == Shader::ImageFormat::R16_SINT};
|
||||
if (!storage_views) {
|
||||
storage_views = std::make_unique<StorageViews>();
|
||||
}
|
||||
const bool is_signed = image_format == Shader::ImageFormat::R8_SINT
|
||||
|| image_format == Shader::ImageFormat::R16_SINT;
|
||||
if (!storage_views)
|
||||
storage_views.emplace();
|
||||
auto& type_views{is_signed ? storage_views->signeds : storage_views->unsigneds};
|
||||
GLuint& view{type_views[static_cast<size_t>(texture_type)]};
|
||||
if (view == 0) {
|
||||
GLuint& view{type_views[size_t(texture_type)]};
|
||||
if (view == 0)
|
||||
view = MakeView(texture_type, ShaderFormat(image_format));
|
||||
}
|
||||
return view;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -302,7 +302,7 @@ private:
|
|||
|
||||
std::array<GLuint, Shader::NUM_TEXTURE_TYPES> views{};
|
||||
std::vector<OGLTextureView> stored_views;
|
||||
std::unique_ptr<StorageViews> storage_views;
|
||||
std::optional<StorageViews> storage_views;
|
||||
GLenum internal_format = GL_NONE;
|
||||
GLuint default_handle = 0;
|
||||
u32 buffer_size = 0;
|
||||
|
|
|
|||
|
|
@ -376,7 +376,6 @@ void RasterizerVulkan::DrawTexture() {
|
|||
}
|
||||
|
||||
void RasterizerVulkan::Clear(u32 layer_count) {
|
||||
|
||||
FlushWork();
|
||||
gpu_memory->FlushCaching();
|
||||
|
||||
|
|
@ -396,9 +395,7 @@ void RasterizerVulkan::Clear(u32 layer_count) {
|
|||
scheduler.RequestRenderpass(framebuffer);
|
||||
|
||||
query_cache.NotifySegment(true);
|
||||
query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64,
|
||||
maxwell3d->regs.zpass_pixel_count_enable);
|
||||
|
||||
query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, maxwell3d->regs.zpass_pixel_count_enable);
|
||||
u32 up_scale = 1;
|
||||
u32 down_shift = 0;
|
||||
if (texture_cache.IsRescaling()) {
|
||||
|
|
@ -443,14 +440,14 @@ void RasterizerVulkan::Clear(u32 layer_count) {
|
|||
offset = 0;
|
||||
return;
|
||||
}
|
||||
if (offset >= static_cast<s32>(limit)) {
|
||||
offset = static_cast<s32>(limit);
|
||||
if (offset >= s32(limit)) {
|
||||
offset = s32(limit);
|
||||
extent = 0;
|
||||
return;
|
||||
}
|
||||
const u64 end_coord = static_cast<u64>(offset) + extent;
|
||||
const u64 end_coord = u64(offset) + extent;
|
||||
if (end_coord > limit) {
|
||||
extent = limit - static_cast<u32>(offset);
|
||||
extent = limit - u32(offset);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -464,30 +461,22 @@ void RasterizerVulkan::Clear(u32 layer_count) {
|
|||
|
||||
const u32 color_attachment = regs.clear_surface.RT;
|
||||
if (use_color && framebuffer->HasAspectColorBit(color_attachment)) {
|
||||
const auto format =
|
||||
VideoCore::Surface::PixelFormatFromRenderTargetFormat(regs.rt[color_attachment].format);
|
||||
const auto format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(regs.rt[color_attachment].format);
|
||||
bool is_integer = IsPixelFormatInteger(format);
|
||||
bool is_signed = IsPixelFormatSignedInteger(format);
|
||||
size_t int_size = PixelComponentSizeBitsInteger(format);
|
||||
VkClearValue clear_value{};
|
||||
if (!is_integer) {
|
||||
std::memcpy(clear_value.color.float32, regs.clear_color.data(),
|
||||
regs.clear_color.size() * sizeof(f32));
|
||||
std::memcpy(clear_value.color.float32, regs.clear_color.data(), regs.clear_color.size() * sizeof(f32));
|
||||
} else if (!is_signed) {
|
||||
for (size_t i = 0; i < 4; i++) {
|
||||
clear_value.color.uint32[i] = static_cast<u32>(
|
||||
static_cast<f32>(static_cast<u64>(int_size) << 1U) * regs.clear_color[i]);
|
||||
}
|
||||
for (size_t i = 0; i < 4; i++)
|
||||
clear_value.color.uint32[i] = u32(f32(u64(int_size) << 1U) * regs.clear_color[i]);
|
||||
} else {
|
||||
for (size_t i = 0; i < 4; i++) {
|
||||
clear_value.color.int32[i] =
|
||||
static_cast<s32>(static_cast<f32>(static_cast<s64>(int_size - 1) << 1) *
|
||||
(regs.clear_color[i] - 0.5f));
|
||||
}
|
||||
for (size_t i = 0; i < 4; i++)
|
||||
clear_value.color.int32[i] = s32(f32(s64(int_size - 1) << 1) * (regs.clear_color[i] - 0.5f));
|
||||
}
|
||||
|
||||
if (regs.clear_surface.R && regs.clear_surface.G && regs.clear_surface.B &&
|
||||
regs.clear_surface.A) {
|
||||
if (regs.clear_surface.R && regs.clear_surface.G && regs.clear_surface.B && regs.clear_surface.A) {
|
||||
scheduler.Record([color_attachment, clear_value, clear_rect](vk::CommandBuffer cmdbuf) {
|
||||
const VkClearAttachment attachment{
|
||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
|
|
@ -497,14 +486,11 @@ void RasterizerVulkan::Clear(u32 layer_count) {
|
|||
cmdbuf.ClearAttachments(attachment, clear_rect);
|
||||
});
|
||||
} else {
|
||||
u8 color_mask = static_cast<u8>(regs.clear_surface.R | regs.clear_surface.G << 1 |
|
||||
regs.clear_surface.B << 2 | regs.clear_surface.A << 3);
|
||||
u8 color_mask = u8(regs.clear_surface.R | regs.clear_surface.G << 1 | regs.clear_surface.B << 2 | regs.clear_surface.A << 3);
|
||||
Region2D dst_region = {
|
||||
Offset2D{.x = clear_rect.rect.offset.x, .y = clear_rect.rect.offset.y},
|
||||
Offset2D{.x = clear_rect.rect.offset.x +
|
||||
static_cast<s32>(clear_rect.rect.extent.width),
|
||||
.y = clear_rect.rect.offset.y +
|
||||
static_cast<s32>(clear_rect.rect.extent.height)}};
|
||||
Offset2D{.x = clear_rect.rect.offset.x + s32(clear_rect.rect.extent.width),
|
||||
.y = clear_rect.rect.offset.y + s32(clear_rect.rect.extent.height)}};
|
||||
blit_image.ClearColor(framebuffer, color_mask, regs.clear_color, dst_region);
|
||||
}
|
||||
}
|
||||
|
|
@ -527,11 +513,10 @@ void RasterizerVulkan::Clear(u32 layer_count) {
|
|||
regs.stencil_front_mask != 0) {
|
||||
Region2D dst_region = {
|
||||
Offset2D{.x = clear_rect.rect.offset.x, .y = clear_rect.rect.offset.y},
|
||||
Offset2D{.x = clear_rect.rect.offset.x + static_cast<s32>(clear_rect.rect.extent.width),
|
||||
.y = clear_rect.rect.offset.y +
|
||||
static_cast<s32>(clear_rect.rect.extent.height)}};
|
||||
Offset2D{.x = clear_rect.rect.offset.x + s32(clear_rect.rect.extent.width),
|
||||
.y = clear_rect.rect.offset.y + s32(clear_rect.rect.extent.height)}};
|
||||
blit_image.ClearDepthStencil(framebuffer, use_depth, regs.clear_depth,
|
||||
static_cast<u8>(regs.stencil_front_mask), regs.clear_stencil,
|
||||
u8(regs.stencil_front_mask), regs.clear_stencil,
|
||||
regs.stencil_front_func_mask, dst_region);
|
||||
} else {
|
||||
scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil,
|
||||
|
|
|
|||
|
|
@ -860,8 +860,7 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, Scheduler& sched
|
|||
compute_pass_descriptor_queue, memory_allocator);
|
||||
}
|
||||
if (device.IsStorageImageMultisampleSupported()) {
|
||||
msaa_copy_pass = std::make_unique<MSAACopyPass>(
|
||||
device, scheduler, descriptor_pool, staging_buffer_pool, compute_pass_descriptor_queue);
|
||||
msaa_copy_pass.emplace(device, scheduler, descriptor_pool, staging_buffer_pool, compute_pass_descriptor_queue);
|
||||
}
|
||||
if (!device.IsKhrImageFormatListSupported()) {
|
||||
return;
|
||||
|
|
@ -1675,10 +1674,10 @@ void Image::UploadMemory(VkBuffer buffer, VkDeviceSize offset,
|
|||
// CHANGE: Gate the MSAA path more strictly and only use it for color, when the pass and device
|
||||
// support are available. Avoid running the MSAA path when prerequisites aren't met,
|
||||
// preventing validation and runtime issues.
|
||||
const bool wants_msaa_upload = info.num_samples > 1 &&
|
||||
(aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) != 0 &&
|
||||
runtime->CanUploadMSAA() && runtime->msaa_copy_pass != nullptr &&
|
||||
runtime->device.IsStorageImageMultisampleSupported();
|
||||
const bool wants_msaa_upload = info.num_samples > 1
|
||||
&& (aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) != 0
|
||||
&& runtime->CanUploadMSAA() && runtime->msaa_copy_pass.has_value()
|
||||
&& runtime->device.IsStorageImageMultisampleSupported();
|
||||
|
||||
if (wants_msaa_upload) {
|
||||
// Create a temporary non-MSAA image to upload the data first
|
||||
|
|
@ -2047,8 +2046,7 @@ bool Image::BlitScaleHelper(bool scale_up) {
|
|||
const u32 scaled_width = resolution.ScaleUp(info.size.width);
|
||||
const u32 scaled_height = is_2d ? resolution.ScaleUp(info.size.height) : info.size.height;
|
||||
std::unique_ptr<ImageView>& blit_view = scale_up ? scale_view : normal_view;
|
||||
std::unique_ptr<Framebuffer>& blit_framebuffer =
|
||||
scale_up ? scale_framebuffer : normal_framebuffer;
|
||||
std::optional<Framebuffer>& blit_framebuffer = scale_up ? scale_framebuffer : normal_framebuffer;
|
||||
if (!blit_view) {
|
||||
const auto view_info = ImageViewInfo(ImageViewType::e2D, info.format);
|
||||
blit_view = std::make_unique<ImageView>(*runtime, view_info, NULL_IMAGE_ID, *this);
|
||||
|
|
@ -2060,11 +2058,11 @@ bool Image::BlitScaleHelper(bool scale_up) {
|
|||
const u32 dst_height = scale_up ? scaled_height : info.size.height;
|
||||
const Region2D src_region{
|
||||
.start = {0, 0},
|
||||
.end = {static_cast<s32>(src_width), static_cast<s32>(src_height)},
|
||||
.end = {s32(src_width), s32(src_height)},
|
||||
};
|
||||
const Region2D dst_region{
|
||||
.start = {0, 0},
|
||||
.end = {static_cast<s32>(dst_width), static_cast<s32>(dst_height)},
|
||||
.end = {s32(dst_width), s32(dst_height)},
|
||||
};
|
||||
const VkExtent2D extent{
|
||||
.width = (std::max)(scaled_width, info.size.width),
|
||||
|
|
@ -2073,21 +2071,15 @@ bool Image::BlitScaleHelper(bool scale_up) {
|
|||
|
||||
auto* view_ptr = blit_view.get();
|
||||
if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) {
|
||||
if (!blit_framebuffer) {
|
||||
blit_framebuffer =
|
||||
std::make_unique<Framebuffer>(*runtime, view_ptr, nullptr, extent, scale_up);
|
||||
}
|
||||
|
||||
runtime->blit_image_helper.BlitColor(blit_framebuffer.get(), *blit_view, dst_region,
|
||||
src_region, operation, BLIT_OPERATION);
|
||||
if (!blit_framebuffer)
|
||||
blit_framebuffer.emplace(*runtime, view_ptr, nullptr, extent, scale_up);
|
||||
runtime->blit_image_helper.BlitColor(&*blit_framebuffer, *blit_view,
|
||||
dst_region, src_region, operation, BLIT_OPERATION);
|
||||
} else if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
|
||||
if (!blit_framebuffer) {
|
||||
blit_framebuffer =
|
||||
std::make_unique<Framebuffer>(*runtime, nullptr, view_ptr, extent, scale_up);
|
||||
}
|
||||
runtime->blit_image_helper.BlitDepthStencil(blit_framebuffer.get(), *blit_view,
|
||||
dst_region, src_region, operation,
|
||||
BLIT_OPERATION);
|
||||
if (!blit_framebuffer)
|
||||
blit_framebuffer.emplace(*runtime, nullptr, view_ptr, extent, scale_up);
|
||||
runtime->blit_image_helper.BlitDepthStencil(&*blit_framebuffer, *blit_view,
|
||||
dst_region, src_region, operation, BLIT_OPERATION);
|
||||
} else {
|
||||
// TODO: Use helper blits where applicable
|
||||
flags &= ~ImageFlagBits::Rescaled;
|
||||
|
|
@ -2200,9 +2192,9 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
|
|||
}
|
||||
}
|
||||
|
||||
ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info,
|
||||
ImageId image_id_, Image& image, const SlotVector<Image>& slot_imgs)
|
||||
: ImageView{runtime, info, image_id_, image} {
|
||||
ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, ImageId image_id_, Image& image, const SlotVector<Image>& slot_imgs)
|
||||
: ImageView{runtime, info, image_id_, image}
|
||||
{
|
||||
slot_images = &slot_imgs;
|
||||
}
|
||||
|
||||
|
|
@ -2267,33 +2259,25 @@ VkImageView ImageView::ColorView() {
|
|||
|
||||
VkImageView ImageView::StorageView(Shader::TextureType texture_type,
|
||||
Shader::ImageFormat image_format) {
|
||||
if (!image_handle) {
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
if (image_format == Shader::ImageFormat::Typeless) {
|
||||
return Handle(texture_type);
|
||||
}
|
||||
const bool is_signed{image_format == Shader::ImageFormat::R8_SINT ||
|
||||
image_format == Shader::ImageFormat::R16_SINT};
|
||||
if (!storage_views) {
|
||||
storage_views = std::make_unique<StorageViews>();
|
||||
}
|
||||
auto& views{is_signed ? storage_views->signeds : storage_views->unsigneds};
|
||||
auto& view{views[static_cast<size_t>(texture_type)]};
|
||||
if (view) {
|
||||
if (image_handle) {
|
||||
if (image_format == Shader::ImageFormat::Typeless) {
|
||||
return Handle(texture_type);
|
||||
}
|
||||
const bool is_signed = image_format == Shader::ImageFormat::R8_SINT
|
||||
|| image_format == Shader::ImageFormat::R16_SINT;
|
||||
if (!storage_views)
|
||||
storage_views.emplace();
|
||||
auto& views{is_signed ? storage_views->signeds : storage_views->unsigneds};
|
||||
auto& view{views[size_t(texture_type)]};
|
||||
if (!view)
|
||||
view = MakeView(Format(image_format), VK_IMAGE_ASPECT_COLOR_BIT);
|
||||
return *view;
|
||||
}
|
||||
view = MakeView(Format(image_format), VK_IMAGE_ASPECT_COLOR_BIT);
|
||||
return *view;
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
bool ImageView::IsRescaled() const noexcept {
|
||||
if (!slot_images) {
|
||||
return false;
|
||||
}
|
||||
const auto& slots = *slot_images;
|
||||
const auto& src_image = slots[image_id];
|
||||
return src_image.IsRescaled();
|
||||
return (*slot_images)[image_id].IsRescaled();
|
||||
}
|
||||
|
||||
vk::ImageView ImageView::MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask) {
|
||||
|
|
|
|||
|
|
@ -133,7 +133,7 @@ public:
|
|||
vk::Buffer swizzle_table_buffer;
|
||||
VkDeviceSize swizzle_table_size = 0;
|
||||
|
||||
std::unique_ptr<MSAACopyPass> msaa_copy_pass;
|
||||
std::optional<MSAACopyPass> msaa_copy_pass;
|
||||
const Settings::ResolutionScalingInfo& resolution;
|
||||
std::array<std::vector<VkFormat>, VideoCore::Surface::MaxPixelFormat> view_formats;
|
||||
|
||||
|
|
@ -141,6 +141,89 @@ public:
|
|||
std::array<vk::Buffer, indexing_slots> buffers{};
|
||||
};
|
||||
|
||||
class Framebuffer {
|
||||
public:
|
||||
explicit Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers,
|
||||
ImageView* depth_buffer, const VideoCommon::RenderTargets& key);
|
||||
|
||||
explicit Framebuffer(TextureCacheRuntime& runtime, ImageView* color_buffer,
|
||||
ImageView* depth_buffer, VkExtent2D extent, bool is_rescaled);
|
||||
|
||||
~Framebuffer();
|
||||
|
||||
Framebuffer(const Framebuffer&) = delete;
|
||||
Framebuffer& operator=(const Framebuffer&) = delete;
|
||||
|
||||
Framebuffer(Framebuffer&&) = default;
|
||||
Framebuffer& operator=(Framebuffer&&) = default;
|
||||
|
||||
void CreateFramebuffer(TextureCacheRuntime& runtime,
|
||||
std::span<ImageView*, NUM_RT> color_buffers, ImageView* depth_buffer,
|
||||
bool is_rescaled = false);
|
||||
|
||||
[[nodiscard]] VkFramebuffer Handle() const noexcept {
|
||||
return *framebuffer;
|
||||
}
|
||||
|
||||
[[nodiscard]] VkRenderPass RenderPass() const noexcept {
|
||||
return renderpass;
|
||||
}
|
||||
|
||||
[[nodiscard]] VkExtent2D RenderArea() const noexcept {
|
||||
return render_area;
|
||||
}
|
||||
|
||||
[[nodiscard]] VkSampleCountFlagBits Samples() const noexcept {
|
||||
return samples;
|
||||
}
|
||||
|
||||
[[nodiscard]] u32 NumColorBuffers() const noexcept {
|
||||
return num_color_buffers;
|
||||
}
|
||||
|
||||
[[nodiscard]] u32 NumImages() const noexcept {
|
||||
return num_images;
|
||||
}
|
||||
|
||||
[[nodiscard]] const std::array<VkImage, 9>& Images() const noexcept {
|
||||
return images;
|
||||
}
|
||||
|
||||
[[nodiscard]] const std::array<VkImageSubresourceRange, 9>& ImageRanges() const noexcept {
|
||||
return image_ranges;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool HasAspectColorBit(size_t index) const noexcept {
|
||||
return (image_ranges.at(rt_map[index]).aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) != 0;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool HasAspectDepthBit() const noexcept {
|
||||
return has_depth;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool HasAspectStencilBit() const noexcept {
|
||||
return has_stencil;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool IsRescaled() const noexcept {
|
||||
return is_rescaled;
|
||||
}
|
||||
|
||||
private:
|
||||
vk::Framebuffer framebuffer;
|
||||
VkRenderPass renderpass{};
|
||||
VkExtent2D render_area{};
|
||||
VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT;
|
||||
u32 num_color_buffers = 0;
|
||||
u32 num_images = 0;
|
||||
std::array<VkImage, 9> images{};
|
||||
std::array<VkImageSubresourceRange, 9> image_ranges{};
|
||||
std::array<size_t, NUM_RT> rt_map{};
|
||||
bool has_depth{};
|
||||
bool has_stencil{};
|
||||
bool is_rescaled{};
|
||||
};
|
||||
|
||||
class Image : public VideoCommon::ImageBase {
|
||||
public:
|
||||
explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
|
||||
|
|
@ -226,10 +309,9 @@ private:
|
|||
VkImageAspectFlags aspect_mask = 0;
|
||||
bool initialized = false;
|
||||
|
||||
std::unique_ptr<Framebuffer> scale_framebuffer;
|
||||
std::optional<Framebuffer> scale_framebuffer;
|
||||
std::optional<Framebuffer> normal_framebuffer;
|
||||
std::unique_ptr<ImageView> scale_view;
|
||||
|
||||
std::unique_ptr<Framebuffer> normal_framebuffer;
|
||||
std::unique_ptr<ImageView> normal_view;
|
||||
};
|
||||
|
||||
|
|
@ -297,7 +379,7 @@ private:
|
|||
const SlotVector<Image>* slot_images = nullptr;
|
||||
|
||||
std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> image_views;
|
||||
std::unique_ptr<StorageViews> storage_views;
|
||||
std::optional<StorageViews> storage_views;
|
||||
vk::ImageView depth_view;
|
||||
vk::ImageView stencil_view;
|
||||
vk::ImageView color_view;
|
||||
|
|
@ -331,89 +413,6 @@ private:
|
|||
vk::Sampler sampler_default_anisotropy;
|
||||
};
|
||||
|
||||
class Framebuffer {
|
||||
public:
|
||||
explicit Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers,
|
||||
ImageView* depth_buffer, const VideoCommon::RenderTargets& key);
|
||||
|
||||
explicit Framebuffer(TextureCacheRuntime& runtime, ImageView* color_buffer,
|
||||
ImageView* depth_buffer, VkExtent2D extent, bool is_rescaled);
|
||||
|
||||
~Framebuffer();
|
||||
|
||||
Framebuffer(const Framebuffer&) = delete;
|
||||
Framebuffer& operator=(const Framebuffer&) = delete;
|
||||
|
||||
Framebuffer(Framebuffer&&) = default;
|
||||
Framebuffer& operator=(Framebuffer&&) = default;
|
||||
|
||||
void CreateFramebuffer(TextureCacheRuntime& runtime,
|
||||
std::span<ImageView*, NUM_RT> color_buffers, ImageView* depth_buffer,
|
||||
bool is_rescaled = false);
|
||||
|
||||
[[nodiscard]] VkFramebuffer Handle() const noexcept {
|
||||
return *framebuffer;
|
||||
}
|
||||
|
||||
[[nodiscard]] VkRenderPass RenderPass() const noexcept {
|
||||
return renderpass;
|
||||
}
|
||||
|
||||
[[nodiscard]] VkExtent2D RenderArea() const noexcept {
|
||||
return render_area;
|
||||
}
|
||||
|
||||
[[nodiscard]] VkSampleCountFlagBits Samples() const noexcept {
|
||||
return samples;
|
||||
}
|
||||
|
||||
[[nodiscard]] u32 NumColorBuffers() const noexcept {
|
||||
return num_color_buffers;
|
||||
}
|
||||
|
||||
[[nodiscard]] u32 NumImages() const noexcept {
|
||||
return num_images;
|
||||
}
|
||||
|
||||
[[nodiscard]] const std::array<VkImage, 9>& Images() const noexcept {
|
||||
return images;
|
||||
}
|
||||
|
||||
[[nodiscard]] const std::array<VkImageSubresourceRange, 9>& ImageRanges() const noexcept {
|
||||
return image_ranges;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool HasAspectColorBit(size_t index) const noexcept {
|
||||
return (image_ranges.at(rt_map[index]).aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) != 0;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool HasAspectDepthBit() const noexcept {
|
||||
return has_depth;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool HasAspectStencilBit() const noexcept {
|
||||
return has_stencil;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool IsRescaled() const noexcept {
|
||||
return is_rescaled;
|
||||
}
|
||||
|
||||
private:
|
||||
vk::Framebuffer framebuffer;
|
||||
VkRenderPass renderpass{};
|
||||
VkExtent2D render_area{};
|
||||
VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT;
|
||||
u32 num_color_buffers = 0;
|
||||
u32 num_images = 0;
|
||||
std::array<VkImage, 9> images{};
|
||||
std::array<VkImageSubresourceRange, 9> image_ranges{};
|
||||
std::array<size_t, NUM_RT> rt_map{};
|
||||
bool has_depth{};
|
||||
bool has_stencil{};
|
||||
bool is_rescaled{};
|
||||
};
|
||||
|
||||
struct TextureCacheParams {
|
||||
static constexpr bool ENABLE_VALIDATION = true;
|
||||
static constexpr bool FRAMEBUFFER_BLITS = false;
|
||||
|
|
|
|||
|
|
@ -596,10 +596,10 @@ FramebufferId TextureCache<P>::GetFramebufferId(const RenderTargets& key) {
|
|||
return framebuffer_id;
|
||||
}
|
||||
std::array<ImageView*, NUM_RT> color_buffers;
|
||||
std::ranges::transform(key.color_buffer_ids, color_buffers.begin(),
|
||||
[this](ImageViewId id) { return id ? &slot_image_views[id] : nullptr; });
|
||||
ImageView* const depth_buffer =
|
||||
key.depth_buffer_id ? &slot_image_views[key.depth_buffer_id] : nullptr;
|
||||
std::ranges::transform(key.color_buffer_ids, color_buffers.begin(), [this](ImageViewId id) {
|
||||
return id ? &slot_image_views[id] : nullptr;
|
||||
});
|
||||
ImageView* const depth_buffer = key.depth_buffer_id ? &slot_image_views[key.depth_buffer_id] : nullptr;
|
||||
framebuffer_id = slot_framebuffers.insert(runtime, color_buffers, depth_buffer, key);
|
||||
return framebuffer_id;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue