mirror of
https://git.eden-emu.dev/eden-emu/eden
synced 2026-04-10 03:18:55 +02:00
[maxwell] Refactor execution mask initialization to use fill() instead of reset()
This commit is contained in:
parent
3d0eb4b5d7
commit
ad2225b5d0
7 changed files with 94 additions and 16 deletions
|
|
@ -122,7 +122,35 @@ void DmaPusher::ProcessCommands(std::span<const CommandHeader> commands) {
|
||||||
dma_state.is_last_call = true;
|
dma_state.is_last_call = true;
|
||||||
index += max_write;
|
index += max_write;
|
||||||
} else if (dma_state.method_count) {
|
} else if (dma_state.method_count) {
|
||||||
auto const command_header = commands[index]; //can copy
|
if (!dma_state.non_incrementing && !dma_increment_once &&
|
||||||
|
dma_state.method >= non_puller_methods) {
|
||||||
|
auto subchannel = subchannels[dma_state.subchannel];
|
||||||
|
const u32 available = u32(std::min<size_t>(
|
||||||
|
index + dma_state.method_count, commands.size()) - index);
|
||||||
|
u32 batch = 0;
|
||||||
|
u32 method = dma_state.method;
|
||||||
|
while (batch < available) {
|
||||||
|
const bool needs_exec =
|
||||||
|
(method < Engines::EngineInterface::EXECUTION_MASK_TABLE_SIZE)
|
||||||
|
? subchannel->execution_mask[method]
|
||||||
|
: subchannel->execution_mask_default;
|
||||||
|
if (needs_exec) break;
|
||||||
|
batch++;
|
||||||
|
method++;
|
||||||
|
}
|
||||||
|
if (batch > 0) {
|
||||||
|
auto& sink = subchannel->method_sink;
|
||||||
|
sink.reserve(sink.size() + batch);
|
||||||
|
for (u32 j = 0; j < batch; j++) {
|
||||||
|
sink.emplace_back(dma_state.method + j, commands[index + j].argument);
|
||||||
|
}
|
||||||
|
dma_state.method += batch;
|
||||||
|
dma_state.method_count -= batch;
|
||||||
|
index += batch;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
auto const command_header = commands[index];
|
||||||
dma_state.dma_word_offset = u32(index * sizeof(u32));
|
dma_state.dma_word_offset = u32(index * sizeof(u32));
|
||||||
dma_state.is_last_call = dma_state.method_count <= 1;
|
dma_state.is_last_call = dma_state.method_count <= 1;
|
||||||
CallMethod(command_header.argument);
|
CallMethod(command_header.argument);
|
||||||
|
|
@ -181,7 +209,11 @@ void DmaPusher::CallMethod(u32 argument) const {
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
auto subchannel = subchannels[dma_state.subchannel];
|
auto subchannel = subchannels[dma_state.subchannel];
|
||||||
if (!subchannel->execution_mask[dma_state.method]) {
|
const bool needs_execution =
|
||||||
|
(dma_state.method < Engines::EngineInterface::EXECUTION_MASK_TABLE_SIZE)
|
||||||
|
? subchannel->execution_mask[dma_state.method]
|
||||||
|
: subchannel->execution_mask_default;
|
||||||
|
if (!needs_execution) {
|
||||||
subchannel->method_sink.emplace_back(dma_state.method, argument);
|
subchannel->method_sink.emplace_back(dma_state.method, argument);
|
||||||
} else {
|
} else {
|
||||||
subchannel->ConsumeSink();
|
subchannel->ConsumeSink();
|
||||||
|
|
|
||||||
|
|
@ -6,9 +6,9 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <bitset>
|
#include <array>
|
||||||
#include <limits>
|
|
||||||
#include <vector>
|
#include <boost/container/small_vector.hpp>
|
||||||
|
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
|
|
||||||
|
|
@ -41,8 +41,11 @@ public:
|
||||||
ConsumeSinkImpl();
|
ConsumeSinkImpl();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::bitset<(std::numeric_limits<u16>::max)()> execution_mask{};
|
static constexpr size_t EXECUTION_MASK_TABLE_SIZE = 0xE00;
|
||||||
std::vector<std::pair<u32, u32>> method_sink{};
|
|
||||||
|
std::array<u8, EXECUTION_MASK_TABLE_SIZE> execution_mask{};
|
||||||
|
bool execution_mask_default{};
|
||||||
|
boost::container::small_vector<std::pair<u32, u32>, 64> method_sink{};
|
||||||
bool current_dirty{};
|
bool current_dirty{};
|
||||||
GPUVAddr current_dma_segment;
|
GPUVAddr current_dma_segment;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -26,7 +26,7 @@ Fermi2D::Fermi2D(MemoryManager& memory_manager_) : memory_manager{memory_manager
|
||||||
regs.src.depth = 1;
|
regs.src.depth = 1;
|
||||||
regs.dst.depth = 1;
|
regs.dst.depth = 1;
|
||||||
|
|
||||||
execution_mask.reset();
|
execution_mask.fill(0);
|
||||||
execution_mask[FERMI2D_REG_INDEX(pixels_from_memory.src_y0) + 1] = true;
|
execution_mask[FERMI2D_REG_INDEX(pixels_from_memory.src_y0) + 1] = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -18,7 +18,7 @@ namespace Tegra::Engines {
|
||||||
|
|
||||||
KeplerCompute::KeplerCompute(Core::System& system_, MemoryManager& memory_manager_)
|
KeplerCompute::KeplerCompute(Core::System& system_, MemoryManager& memory_manager_)
|
||||||
: system{system_}, memory_manager{memory_manager_}, upload_state{memory_manager, regs.upload} {
|
: system{system_}, memory_manager{memory_manager_}, upload_state{memory_manager, regs.upload} {
|
||||||
execution_mask.reset();
|
execution_mask.fill(0);
|
||||||
execution_mask[KEPLER_COMPUTE_REG_INDEX(exec_upload)] = true;
|
execution_mask[KEPLER_COMPUTE_REG_INDEX(exec_upload)] = true;
|
||||||
execution_mask[KEPLER_COMPUTE_REG_INDEX(data_upload)] = true;
|
execution_mask[KEPLER_COMPUTE_REG_INDEX(data_upload)] = true;
|
||||||
execution_mask[KEPLER_COMPUTE_REG_INDEX(launch)] = true;
|
execution_mask[KEPLER_COMPUTE_REG_INDEX(launch)] = true;
|
||||||
|
|
|
||||||
|
|
@ -22,7 +22,7 @@ KeplerMemory::~KeplerMemory() = default;
|
||||||
void KeplerMemory::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
|
void KeplerMemory::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
|
||||||
upload_state.BindRasterizer(rasterizer_);
|
upload_state.BindRasterizer(rasterizer_);
|
||||||
|
|
||||||
execution_mask.reset();
|
execution_mask.fill(0);
|
||||||
execution_mask[KEPLERMEMORY_REG_INDEX(exec)] = true;
|
execution_mask[KEPLERMEMORY_REG_INDEX(exec)] = true;
|
||||||
execution_mask[KEPLERMEMORY_REG_INDEX(data)] = true;
|
execution_mask[KEPLERMEMORY_REG_INDEX(data)] = true;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -4,8 +4,14 @@
|
||||||
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <optional>
|
#include <optional>
|
||||||
|
|
||||||
|
#if defined(_MSC_VER) && !defined(__clang__)
|
||||||
|
#include <intrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/bit_util.h"
|
#include "common/bit_util.h"
|
||||||
#include "common/scope_exit.h"
|
#include "common/scope_exit.h"
|
||||||
|
|
@ -22,6 +28,16 @@
|
||||||
|
|
||||||
namespace Tegra::Engines {
|
namespace Tegra::Engines {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
inline void PrefetchLine(const void* addr) {
|
||||||
|
#if defined(_MSC_VER) && !defined(__clang__)
|
||||||
|
_mm_prefetch(static_cast<const char*>(addr), _MM_HINT_T0);
|
||||||
|
#else
|
||||||
|
__builtin_prefetch(addr, 0, 1);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
} // namespace
|
||||||
|
|
||||||
/// First register id that is actually a Macro call.
|
/// First register id that is actually a Macro call.
|
||||||
constexpr u32 MacroRegistersStart = 0xE00;
|
constexpr u32 MacroRegistersStart = 0xE00;
|
||||||
|
|
||||||
|
|
@ -37,9 +53,10 @@ Maxwell3D::Maxwell3D(Core::System& system_, MemoryManager& memory_manager_)
|
||||||
{
|
{
|
||||||
dirty.flags.flip();
|
dirty.flags.flip();
|
||||||
InitializeRegisterDefaults();
|
InitializeRegisterDefaults();
|
||||||
execution_mask.reset();
|
execution_mask.fill(0);
|
||||||
for (size_t i = 0; i < execution_mask.size(); i++)
|
for (size_t i = 0; i < EXECUTION_MASK_TABLE_SIZE; i++)
|
||||||
execution_mask[i] = IsMethodExecutable(u32(i));
|
execution_mask[i] = IsMethodExecutable(u32(i));
|
||||||
|
execution_mask_default = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
Maxwell3D::~Maxwell3D() = default;
|
Maxwell3D::~Maxwell3D() = default;
|
||||||
|
|
@ -298,18 +315,44 @@ u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Maxwell3D::ConsumeSinkImpl() {
|
void Maxwell3D::ConsumeSinkImpl() {
|
||||||
|
std::stable_sort(method_sink.begin(), method_sink.end(),
|
||||||
|
[](const auto& a, const auto& b) { return a.first < b.first; });
|
||||||
|
|
||||||
|
const auto sink_size = method_sink.size();
|
||||||
const auto control = shadow_state.shadow_ram_control;
|
const auto control = shadow_state.shadow_ram_control;
|
||||||
if (control == Regs::ShadowRamControl::Track || control == Regs::ShadowRamControl::TrackWithFilter) {
|
if (control == Regs::ShadowRamControl::Track || control == Regs::ShadowRamControl::TrackWithFilter) {
|
||||||
for (auto [method, value] : method_sink) {
|
for (size_t i = 0; i < sink_size; ++i) {
|
||||||
|
const auto [method, value] = method_sink[i];
|
||||||
|
if (i + 1 < sink_size) {
|
||||||
|
const u32 next = method_sink[i + 1].first;
|
||||||
|
PrefetchLine(®s.reg_array[next]);
|
||||||
|
PrefetchLine(&shadow_state.reg_array[next]);
|
||||||
|
PrefetchLine(&dirty.tables[0][next]);
|
||||||
|
}
|
||||||
shadow_state.reg_array[method] = value;
|
shadow_state.reg_array[method] = value;
|
||||||
ProcessDirtyRegisters(method, value);
|
ProcessDirtyRegisters(method, value);
|
||||||
}
|
}
|
||||||
} else if (control == Regs::ShadowRamControl::Replay) {
|
} else if (control == Regs::ShadowRamControl::Replay) {
|
||||||
for (auto [method, value] : method_sink)
|
for (size_t i = 0; i < sink_size; ++i) {
|
||||||
|
const auto [method, value] = method_sink[i];
|
||||||
|
if (i + 1 < sink_size) {
|
||||||
|
const u32 next = method_sink[i + 1].first;
|
||||||
|
PrefetchLine(®s.reg_array[next]);
|
||||||
|
PrefetchLine(&shadow_state.reg_array[next]);
|
||||||
|
PrefetchLine(&dirty.tables[0][next]);
|
||||||
|
}
|
||||||
ProcessDirtyRegisters(method, shadow_state.reg_array[method]);
|
ProcessDirtyRegisters(method, shadow_state.reg_array[method]);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
for (auto [method, value] : method_sink)
|
for (size_t i = 0; i < sink_size; ++i) {
|
||||||
|
const auto [method, value] = method_sink[i];
|
||||||
|
if (i + 1 < sink_size) {
|
||||||
|
const u32 next = method_sink[i + 1].first;
|
||||||
|
PrefetchLine(®s.reg_array[next]);
|
||||||
|
PrefetchLine(&dirty.tables[0][next]);
|
||||||
|
}
|
||||||
ProcessDirtyRegisters(method, value);
|
ProcessDirtyRegisters(method, value);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
method_sink.clear();
|
method_sink.clear();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -23,7 +23,7 @@ using namespace Texture;
|
||||||
|
|
||||||
MaxwellDMA::MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_)
|
MaxwellDMA::MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_)
|
||||||
: system{system_}, memory_manager{memory_manager_} {
|
: system{system_}, memory_manager{memory_manager_} {
|
||||||
execution_mask.reset();
|
execution_mask.fill(0);
|
||||||
execution_mask[offsetof(Regs, launch_dma) / sizeof(u32)] = true;
|
execution_mask[offsetof(Regs, launch_dma) / sizeof(u32)] = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue