[video_core/engines/maxwell3d] memory inline DrawState to reduce indirection on hot paths (#3758)
Some checks are pending
tx-src / sources (push) Waiting to run
Check Strings / check-strings (push) Waiting to run

usual indirection removal
helps very slightly to codegen

the idea is basically to reduce the amount of pointer deference overall in the code, and use idiomatic std::variant<>-isms to not rely on vtables/unique_ptr overhead
this should allow the compiler to emit better code
of course it's a tiny optimisation and only CPU side, but allows us to reduce indirection which is almost always a good thing

"but youre passing more parameters to the function!!!" its literally memoized into a register my friend

Signed-off-by: lizzie <lizzie@eden-emu.dev>
Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3758
Reviewed-by: crueter <crueter@eden-emu.dev>
Reviewed-by: MaranBr <maranbr@eden-emu.dev>
This commit is contained in:
lizzie 2026-04-28 17:44:51 +02:00 committed by crueter
parent ed225f8a8b
commit 77decca678
No known key found for this signature in database
GPG key ID: 425ACD2D4830EBC6
15 changed files with 231 additions and 351 deletions

View file

@ -1,23 +1,24 @@
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/settings.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/draw_manager.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/rasterizer_interface.h"
namespace Tegra::Engines {
DrawManager::DrawManager(Maxwell3D* maxwell3d_) : maxwell3d(maxwell3d_) {}
void DrawManager::ProcessMethodCall(u32 method, u32 argument) {
const auto& regs{maxwell3d->regs};
void Maxwell3D::DrawManager::ProcessMethodCall(Maxwell3D& maxwell3d, u32 method, u32 argument) {
switch (method) {
case MAXWELL3D_REG_INDEX(clear_surface):
return Clear(1);
return Clear(maxwell3d, 1);
case MAXWELL3D_REG_INDEX(draw.begin):
return DrawBegin();
return DrawBegin(maxwell3d);
case MAXWELL3D_REG_INDEX(draw.end):
return DrawEnd();
return DrawEnd(maxwell3d);
case MAXWELL3D_REG_INDEX(vertex_buffer.first):
case MAXWELL3D_REG_INDEX(vertex_buffer.count):
case MAXWELL3D_REG_INDEX(index_buffer.first):
@ -33,33 +34,29 @@ void DrawManager::ProcessMethodCall(u32 method, u32 argument) {
case MAXWELL3D_REG_INDEX(index_buffer32_first):
case MAXWELL3D_REG_INDEX(index_buffer16_first):
case MAXWELL3D_REG_INDEX(index_buffer8_first):
return DrawIndexSmall(argument);
return DrawIndexSmall(maxwell3d, argument);
case MAXWELL3D_REG_INDEX(draw_inline_index):
SetInlineIndexBuffer(argument);
SetInlineIndexBuffer(maxwell3d, argument);
break;
case MAXWELL3D_REG_INDEX(inline_index_2x16.even):
SetInlineIndexBuffer(regs.inline_index_2x16.even);
SetInlineIndexBuffer(regs.inline_index_2x16.odd);
SetInlineIndexBuffer(maxwell3d, maxwell3d.regs.inline_index_2x16.even);
SetInlineIndexBuffer(maxwell3d, maxwell3d.regs.inline_index_2x16.odd);
break;
case MAXWELL3D_REG_INDEX(inline_index_4x8.index0):
SetInlineIndexBuffer(regs.inline_index_4x8.index0);
SetInlineIndexBuffer(regs.inline_index_4x8.index1);
SetInlineIndexBuffer(regs.inline_index_4x8.index2);
SetInlineIndexBuffer(regs.inline_index_4x8.index3);
SetInlineIndexBuffer(maxwell3d, maxwell3d.regs.inline_index_4x8.index0);
SetInlineIndexBuffer(maxwell3d, maxwell3d.regs.inline_index_4x8.index1);
SetInlineIndexBuffer(maxwell3d, maxwell3d.regs.inline_index_4x8.index2);
SetInlineIndexBuffer(maxwell3d, maxwell3d.regs.inline_index_4x8.index3);
break;
case MAXWELL3D_REG_INDEX(vertex_array_instance_first):
DrawArrayInstanced(regs.vertex_array_instance_first.topology.Value(),
regs.vertex_array_instance_first.start.Value(),
regs.vertex_array_instance_first.count.Value(), false);
DrawArrayInstanced(maxwell3d, maxwell3d.regs.vertex_array_instance_first.topology.Value(), maxwell3d.regs.vertex_array_instance_first.start.Value(), maxwell3d.regs.vertex_array_instance_first.count.Value(), false);
break;
case MAXWELL3D_REG_INDEX(vertex_array_instance_subsequent): {
DrawArrayInstanced(regs.vertex_array_instance_subsequent.topology.Value(),
regs.vertex_array_instance_subsequent.start.Value(),
regs.vertex_array_instance_subsequent.count.Value(), true);
DrawArrayInstanced(maxwell3d, maxwell3d.regs.vertex_array_instance_subsequent.topology.Value(), maxwell3d.regs.vertex_array_instance_subsequent.start.Value(), maxwell3d.regs.vertex_array_instance_subsequent.count.Value(), true);
break;
}
case MAXWELL3D_REG_INDEX(draw_texture.src_y0): {
DrawTexture();
DrawTexture(maxwell3d);
break;
}
default:
@ -67,101 +64,87 @@ void DrawManager::ProcessMethodCall(u32 method, u32 argument) {
}
}
void DrawManager::Clear(u32 layer_count) {
if (maxwell3d->ShouldExecute()) {
maxwell3d->rasterizer->Clear(layer_count);
void Maxwell3D::DrawManager::Clear(Maxwell3D& maxwell3d, u32 layer_count) {
if (maxwell3d.ShouldExecute()) {
maxwell3d.rasterizer->Clear(layer_count);
}
}
void DrawManager::DrawDeferred() {
void Maxwell3D::DrawManager::DrawDeferred(Maxwell3D& maxwell3d) {
if (draw_state.draw_mode != DrawMode::Instance || draw_state.instance_count == 0) {
return;
}
DrawEnd(draw_state.instance_count + 1, true);
DrawEnd(maxwell3d, draw_state.instance_count + 1, true);
draw_state.instance_count = 0;
}
void DrawManager::DrawArray(PrimitiveTopology topology, u32 vertex_first, u32 vertex_count,
u32 base_instance, u32 num_instances) {
void Maxwell3D::DrawManager::DrawArray(Maxwell3D& maxwell3d, Maxwell3D::Regs::PrimitiveTopology topology, u32 vertex_first, u32 vertex_count, u32 base_instance, u32 num_instances) {
draw_state.topology = topology;
draw_state.vertex_buffer.first = vertex_first;
draw_state.vertex_buffer.count = vertex_count;
draw_state.base_instance = base_instance;
ProcessDraw(false, num_instances);
ProcessDraw(maxwell3d, false, num_instances);
}
void DrawManager::DrawArrayInstanced(PrimitiveTopology topology, u32 vertex_first, u32 vertex_count,
bool subsequent) {
void Maxwell3D::DrawManager::DrawArrayInstanced(Maxwell3D& maxwell3d, Maxwell3D::Regs::PrimitiveTopology topology, u32 vertex_first, u32 vertex_count, bool subsequent) {
draw_state.topology = topology;
draw_state.vertex_buffer.first = vertex_first;
draw_state.vertex_buffer.count = vertex_count;
if (!subsequent) {
draw_state.instance_count = 1;
}
draw_state.base_instance = draw_state.instance_count - 1;
draw_state.draw_mode = DrawMode::Instance;
draw_state.instance_count++;
ProcessDraw(false, 1);
ProcessDraw(maxwell3d, false, 1);
}
void DrawManager::DrawIndex(PrimitiveTopology topology, u32 index_first, u32 index_count,
u32 base_index, u32 base_instance, u32 num_instances) {
const auto& regs{maxwell3d->regs};
void Maxwell3D::DrawManager::DrawIndex(Maxwell3D& maxwell3d, Maxwell3D::Regs::PrimitiveTopology topology, u32 index_first, u32 index_count, u32 base_index, u32 base_instance, u32 num_instances) {
draw_state.topology = topology;
draw_state.index_buffer = regs.index_buffer;
draw_state.index_buffer = maxwell3d.regs.index_buffer;
draw_state.index_buffer.first = index_first;
draw_state.index_buffer.count = index_count;
draw_state.base_index = base_index;
draw_state.base_instance = base_instance;
ProcessDraw(true, num_instances);
ProcessDraw(maxwell3d, true, num_instances);
}
void DrawManager::DrawArrayIndirect(PrimitiveTopology topology) {
void Maxwell3D::DrawManager::DrawArrayIndirect(Maxwell3D& maxwell3d, Maxwell3D::Regs::PrimitiveTopology topology) {
draw_state.topology = topology;
ProcessDrawIndirect();
ProcessDrawIndirect(maxwell3d);
}
void DrawManager::DrawIndexedIndirect(PrimitiveTopology topology, u32 index_first,
u32 index_count) {
const auto& regs{maxwell3d->regs};
void Maxwell3D::DrawManager::DrawIndexedIndirect(Maxwell3D& maxwell3d, Maxwell3D::Regs::PrimitiveTopology topology, u32 index_first, u32 index_count) {
draw_state.topology = topology;
draw_state.index_buffer = regs.index_buffer;
draw_state.index_buffer = maxwell3d.regs.index_buffer;
draw_state.index_buffer.first = index_first;
draw_state.index_buffer.count = index_count;
ProcessDrawIndirect();
ProcessDrawIndirect(maxwell3d);
}
void DrawManager::SetInlineIndexBuffer(u32 index) {
draw_state.inline_index_draw_indexes.push_back(static_cast<u8>(index & 0x000000ff));
draw_state.inline_index_draw_indexes.push_back(static_cast<u8>((index & 0x0000ff00) >> 8));
draw_state.inline_index_draw_indexes.push_back(static_cast<u8>((index & 0x00ff0000) >> 16));
draw_state.inline_index_draw_indexes.push_back(static_cast<u8>((index & 0xff000000) >> 24));
void Maxwell3D::DrawManager::SetInlineIndexBuffer(Maxwell3D& maxwell3d, u32 index) {
draw_state.inline_index_draw_indexes.push_back(u8(index & 0x000000ff));
draw_state.inline_index_draw_indexes.push_back(u8((index & 0x0000ff00) >> 8));
draw_state.inline_index_draw_indexes.push_back(u8((index & 0x00ff0000) >> 16));
draw_state.inline_index_draw_indexes.push_back(u8((index & 0xff000000) >> 24));
draw_state.draw_mode = DrawMode::InlineIndex;
}
void DrawManager::DrawBegin() {
const auto& regs{maxwell3d->regs};
auto reset_instance_count = regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::First;
auto increment_instance_count =
regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Subsequent;
void Maxwell3D::DrawManager::DrawBegin(Maxwell3D& maxwell3d) {
auto reset_instance_count = maxwell3d.regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::First;
auto increment_instance_count = maxwell3d.regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Subsequent;
if (reset_instance_count) {
DrawDeferred();
DrawDeferred(maxwell3d);
draw_state.instance_count = 0;
draw_state.draw_mode = DrawMode::General;
} else if (increment_instance_count) {
draw_state.instance_count++;
draw_state.draw_mode = DrawMode::Instance;
}
draw_state.topology = regs.draw.topology;
draw_state.topology = maxwell3d.regs.draw.topology;
}
void DrawManager::DrawEnd(u32 instance_count, bool force_draw) {
const auto& regs{maxwell3d->regs};
void Maxwell3D::DrawManager::DrawEnd(Maxwell3D& maxwell3d, u32 instance_count, bool force_draw) {
switch (draw_state.draw_mode) {
case DrawMode::Instance:
if (!force_draw) {
@ -169,119 +152,100 @@ void DrawManager::DrawEnd(u32 instance_count, bool force_draw) {
}
[[fallthrough]];
case DrawMode::General:
draw_state.base_instance = regs.global_base_instance_index;
draw_state.base_index = regs.global_base_vertex_index;
draw_state.base_instance = maxwell3d.regs.global_base_instance_index;
draw_state.base_index = maxwell3d.regs.global_base_vertex_index;
if (draw_state.draw_indexed) {
draw_state.index_buffer = regs.index_buffer;
ProcessDraw(true, instance_count);
draw_state.index_buffer = maxwell3d.regs.index_buffer;
ProcessDraw(maxwell3d, true, instance_count);
} else {
draw_state.vertex_buffer = regs.vertex_buffer;
ProcessDraw(false, instance_count);
draw_state.vertex_buffer = maxwell3d.regs.vertex_buffer;
ProcessDraw(maxwell3d, false, instance_count);
}
draw_state.draw_indexed = false;
break;
case DrawMode::InlineIndex:
draw_state.base_instance = regs.global_base_instance_index;
draw_state.base_index = regs.global_base_vertex_index;
draw_state.index_buffer = regs.index_buffer;
draw_state.index_buffer.count =
static_cast<u32>(draw_state.inline_index_draw_indexes.size() / 4);
draw_state.base_instance = maxwell3d.regs.global_base_instance_index;
draw_state.base_index = maxwell3d.regs.global_base_vertex_index;
draw_state.index_buffer = maxwell3d.regs.index_buffer;
draw_state.index_buffer.count = u32(draw_state.inline_index_draw_indexes.size() / 4);
draw_state.index_buffer.format = Maxwell3D::Regs::IndexFormat::UnsignedInt;
maxwell3d->dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
ProcessDraw(true, instance_count);
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
ProcessDraw(maxwell3d, true, instance_count);
draw_state.inline_index_draw_indexes.clear();
break;
}
}
void DrawManager::DrawIndexSmall(u32 argument) {
const auto& regs{maxwell3d->regs};
IndexBufferSmall index_small_params{argument};
draw_state.base_instance = regs.global_base_instance_index;
draw_state.base_index = regs.global_base_vertex_index;
draw_state.index_buffer = regs.index_buffer;
void Maxwell3D::DrawManager::DrawIndexSmall(Maxwell3D& maxwell3d, u32 argument) {
Maxwell3D::Regs::IndexBufferSmall index_small_params{argument};
draw_state.base_instance = maxwell3d.regs.global_base_instance_index;
draw_state.base_index = maxwell3d.regs.global_base_vertex_index;
draw_state.index_buffer = maxwell3d.regs.index_buffer;
draw_state.index_buffer.first = index_small_params.first;
draw_state.index_buffer.count = index_small_params.count;
draw_state.topology = index_small_params.topology;
maxwell3d->dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
ProcessDraw(true, 1);
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
ProcessDraw(maxwell3d, true, 1);
}
void DrawManager::DrawTexture() {
const auto& regs{maxwell3d->regs};
draw_texture_state.dst_x0 = static_cast<float>(regs.draw_texture.dst_x0) / 4096.f;
draw_texture_state.dst_y0 = static_cast<float>(regs.draw_texture.dst_y0) / 4096.f;
const auto dst_width = static_cast<float>(regs.draw_texture.dst_width) / 4096.f;
const auto dst_height = static_cast<float>(regs.draw_texture.dst_height) / 4096.f;
const bool lower_left{regs.window_origin.mode !=
Maxwell3D::Regs::WindowOrigin::Mode::UpperLeft};
void Maxwell3D::DrawManager::DrawTexture(Maxwell3D& maxwell3d) {
draw_texture_state.dst_x0 = f32(maxwell3d.regs.draw_texture.dst_x0) / 4096.f;
draw_texture_state.dst_y0 = f32(maxwell3d.regs.draw_texture.dst_y0) / 4096.f;
const auto dst_width = f32(maxwell3d.regs.draw_texture.dst_width) / 4096.f;
const auto dst_height = f32(maxwell3d.regs.draw_texture.dst_height) / 4096.f;
const bool lower_left{maxwell3d.regs.window_origin.mode != Maxwell3D::Regs::WindowOrigin::Mode::UpperLeft};
if (lower_left) {
draw_texture_state.dst_y0 =
static_cast<f32>(regs.surface_clip.height) - draw_texture_state.dst_y0;
draw_texture_state.dst_y0 = f32(maxwell3d.regs.surface_clip.height) - draw_texture_state.dst_y0;
}
draw_texture_state.dst_x1 = draw_texture_state.dst_x0 + dst_width;
draw_texture_state.dst_y1 = draw_texture_state.dst_y0 + dst_height;
draw_texture_state.src_x0 = static_cast<float>(regs.draw_texture.src_x0) / 4096.f;
draw_texture_state.src_y0 = static_cast<float>(regs.draw_texture.src_y0) / 4096.f;
draw_texture_state.src_x1 =
(static_cast<float>(regs.draw_texture.dx_du) / 4294967296.f) * dst_width +
draw_texture_state.src_x0;
draw_texture_state.src_y1 =
(static_cast<float>(regs.draw_texture.dy_dv) / 4294967296.f) * dst_height +
draw_texture_state.src_y0;
draw_texture_state.src_sampler = regs.draw_texture.src_sampler;
draw_texture_state.src_texture = regs.draw_texture.src_texture;
maxwell3d->rasterizer->DrawTexture();
draw_texture_state.src_x0 = f32(maxwell3d.regs.draw_texture.src_x0) / 4096.f;
draw_texture_state.src_y0 = f32(maxwell3d.regs.draw_texture.src_y0) / 4096.f;
draw_texture_state.src_x1 = (f32(maxwell3d.regs.draw_texture.dx_du) / 4294967296.f) * dst_width + draw_texture_state.src_x0;
draw_texture_state.src_y1 = (f32(maxwell3d.regs.draw_texture.dy_dv) / 4294967296.f) * dst_height + draw_texture_state.src_y0;
draw_texture_state.src_sampler = maxwell3d.regs.draw_texture.src_sampler;
draw_texture_state.src_texture = maxwell3d.regs.draw_texture.src_texture;
maxwell3d.rasterizer->DrawTexture();
}
void DrawManager::UpdateTopology() {
const auto& regs{maxwell3d->regs};
switch (regs.primitive_topology_control) {
case PrimitiveTopologyControl::UseInBeginMethods:
void Maxwell3D::DrawManager::UpdateTopology(Maxwell3D& maxwell3d) {
switch (maxwell3d.regs.primitive_topology_control) {
case Maxwell3D::Regs::PrimitiveTopologyControl::UseInBeginMethods:
break;
case PrimitiveTopologyControl::UseSeparateState:
switch (regs.topology_override) {
case PrimitiveTopologyOverride::None:
case Maxwell3D::Regs::PrimitiveTopologyControl::UseSeparateState:
switch (maxwell3d.regs.topology_override) {
case Maxwell3D::Regs::PrimitiveTopologyOverride::None:
break;
case PrimitiveTopologyOverride::Points:
draw_state.topology = PrimitiveTopology::Points;
case Maxwell3D::Regs::PrimitiveTopologyOverride::Points:
draw_state.topology = Maxwell3D::Regs::PrimitiveTopology::Points;
break;
case PrimitiveTopologyOverride::Lines:
draw_state.topology = PrimitiveTopology::Lines;
case Maxwell3D::Regs::PrimitiveTopologyOverride::Lines:
draw_state.topology = Maxwell3D::Regs::PrimitiveTopology::Lines;
break;
case PrimitiveTopologyOverride::LineStrip:
draw_state.topology = PrimitiveTopology::LineStrip;
case Maxwell3D::Regs::PrimitiveTopologyOverride::LineStrip:
draw_state.topology = Maxwell3D::Regs::PrimitiveTopology::LineStrip;
break;
default:
draw_state.topology = static_cast<PrimitiveTopology>(regs.topology_override);
draw_state.topology = Maxwell3D::Regs::PrimitiveTopology(maxwell3d.regs.topology_override);
break;
}
break;
}
}
void DrawManager::ProcessDraw(bool draw_indexed, u32 instance_count) {
LOG_TRACE(HW_GPU, "called, topology={}, count={}", draw_state.topology,
draw_indexed ? draw_state.index_buffer.count : draw_state.vertex_buffer.count);
UpdateTopology();
if (maxwell3d->ShouldExecute()) {
maxwell3d->rasterizer->Draw(draw_indexed, instance_count);
void Maxwell3D::DrawManager::ProcessDraw(Maxwell3D& maxwell3d, bool draw_indexed, u32 instance_count) {
LOG_TRACE(HW_GPU, "called, topology={}, count={}", draw_state.topology, draw_indexed ? draw_state.index_buffer.count : draw_state.vertex_buffer.count);
UpdateTopology(maxwell3d);
if (maxwell3d.ShouldExecute()) {
maxwell3d.rasterizer->Draw(draw_indexed, instance_count);
}
}
void DrawManager::ProcessDrawIndirect() {
LOG_TRACE(
HW_GPU,
"called, topology={}, is_indexed={}, includes_count={}, buffer_size={}, max_draw_count={}",
draw_state.topology, indirect_state.is_indexed, indirect_state.include_count,
indirect_state.buffer_size, indirect_state.max_draw_counts);
UpdateTopology();
if (maxwell3d->ShouldExecute()) {
maxwell3d->rasterizer->DrawIndirect();
void Maxwell3D::DrawManager::ProcessDrawIndirect(Maxwell3D& maxwell3d) {
LOG_TRACE(HW_GPU, "called, topology={}, is_indexed={}, includes_count={}, buffer_size={}, max_draw_count={}", draw_state.topology, indirect_state.is_indexed, indirect_state.include_count, indirect_state.buffer_size, indirect_state.max_draw_counts);
UpdateTopology(maxwell3d);
if (maxwell3d.ShouldExecute()) {
maxwell3d.rasterizer->DrawIndirect();
}
}
} // namespace Tegra::Engines

View file

@ -1,117 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
namespace VideoCore {
class RasterizerInterface;
}
namespace Tegra::Engines {
using PrimitiveTopologyControl = Maxwell3D::Regs::PrimitiveTopologyControl;
using PrimitiveTopology = Maxwell3D::Regs::PrimitiveTopology;
using PrimitiveTopologyOverride = Maxwell3D::Regs::PrimitiveTopologyOverride;
using IndexBuffer = Maxwell3D::Regs::IndexBuffer;
using VertexBuffer = Maxwell3D::Regs::VertexBuffer;
using IndexBufferSmall = Maxwell3D::Regs::IndexBufferSmall;
class DrawManager {
public:
enum class DrawMode : u32 { General = 0, Instance, InlineIndex };
struct State {
PrimitiveTopology topology{};
DrawMode draw_mode{};
bool draw_indexed{};
u32 base_index{};
VertexBuffer vertex_buffer;
IndexBuffer index_buffer;
u32 base_instance{};
u32 instance_count{};
std::vector<u8> inline_index_draw_indexes;
};
struct DrawTextureState {
f32 dst_x0;
f32 dst_y0;
f32 dst_x1;
f32 dst_y1;
f32 src_x0;
f32 src_y0;
f32 src_x1;
f32 src_y1;
u32 src_sampler;
u32 src_texture;
};
struct IndirectParams {
bool is_byte_count;
bool is_indexed;
bool include_count;
GPUVAddr count_start_address;
GPUVAddr indirect_start_address;
size_t buffer_size;
size_t max_draw_counts;
size_t stride;
};
explicit DrawManager(Maxwell3D* maxwell_3d);
void ProcessMethodCall(u32 method, u32 argument);
void Clear(u32 layer_count);
void DrawDeferred();
void DrawArray(PrimitiveTopology topology, u32 vertex_first, u32 vertex_count,
u32 base_instance, u32 num_instances);
void DrawArrayInstanced(PrimitiveTopology topology, u32 vertex_first, u32 vertex_count,
bool subsequent);
void DrawIndex(PrimitiveTopology topology, u32 index_first, u32 index_count, u32 base_index,
u32 base_instance, u32 num_instances);
void DrawArrayIndirect(PrimitiveTopology topology);
void DrawIndexedIndirect(PrimitiveTopology topology, u32 index_first, u32 index_count);
const State& GetDrawState() const {
return draw_state;
}
const DrawTextureState& GetDrawTextureState() const {
return draw_texture_state;
}
IndirectParams& GetIndirectParams() {
return indirect_state;
}
const IndirectParams& GetIndirectParams() const {
return indirect_state;
}
private:
void SetInlineIndexBuffer(u32 index);
void DrawBegin();
void DrawEnd(u32 instance_count = 1, bool force_draw = false);
void DrawIndexSmall(u32 argument);
void DrawTexture();
void UpdateTopology();
void ProcessDraw(bool draw_indexed, u32 instance_count);
void ProcessDrawIndirect();
Maxwell3D* maxwell3d{};
State draw_state{};
DrawTextureState draw_texture_state{};
IndirectParams indirect_state{};
};
} // namespace Tegra::Engines

View file

@ -13,7 +13,7 @@
#include "core/core.h"
#include "core/core_timing.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/draw_manager.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
@ -26,7 +26,8 @@ namespace Tegra::Engines {
constexpr u32 MacroRegistersStart = 0xE00;
Maxwell3D::Maxwell3D(Core::System& system_, MemoryManager& memory_manager_)
: draw_manager{std::make_unique<DrawManager>(this)}, system{system_}
: draw_manager()
, system{system_}
, memory_manager{memory_manager_}
#ifdef ARCHITECTURE_x86_64
, macro_engine(bool(Settings::values.disable_macro_jit))
@ -373,8 +374,7 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
case MAXWELL3D_REG_INDEX(sync_info):
return ProcessSyncPoint();
case MAXWELL3D_REG_INDEX(launch_dma):
return upload_state.ProcessExec(regs.launch_dma.memory_layout.Value() ==
Regs::LaunchDMA::Layout::Pitch);
return upload_state.ProcessExec(regs.launch_dma.memory_layout.Value() == Regs::LaunchDMA::Layout::Pitch);
case MAXWELL3D_REG_INDEX(inline_data):
upload_state.ProcessData(argument, is_last_call);
return;
@ -386,7 +386,7 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
case MAXWELL3D_REG_INDEX(tiled_cache_barrier):
return rasterizer->TiledCacheBarrier();
default:
draw_manager->ProcessMethodCall(method, argument);
draw_manager.ProcessMethodCall(*this, method, argument);
break;
}
}
@ -401,8 +401,7 @@ void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters)
// Execute the current macro.
macro_engine.Execute(*this, macro_positions[entry], parameters);
draw_manager->DrawDeferred();
draw_manager.DrawDeferred(*this);
}
void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {

View file

@ -25,6 +25,7 @@
#include "video_core/gpu.h"
#include "video_core/macro.h"
#include "video_core/textures/texture.h"
#include "video_core/engines/maxwell_3d.h"
namespace Core {
class System;
@ -40,8 +41,6 @@ class RasterizerInterface;
namespace Tegra::Engines {
class DrawManager;
/**
* This Engine is known as GF100_3D. Documentation can be found in:
* https://github.com/NVIDIA/open-gpu-doc/blob/master/classes/3d/clb197.h
@ -543,7 +542,7 @@ public:
}
GPUVAddr StorageLimitAddress() const {
return (GPUVAddr{storage_limit_address_high} << 32) |
GPUVAddr{storage_limit_address_low};
GPUVAddr{storage_limit_address_low};
}
};
@ -819,7 +818,7 @@ public:
u32 Map(std::size_t index) const {
const std::array<u32, NumRenderTargets> maps{target0, target1, target2, target3,
target4, target5, target6, target7};
target4, target5, target6, target7};
ASSERT(index < maps.size());
return maps[index];
}
@ -1831,7 +1830,7 @@ public:
bool AnyEnabled() const {
return output0_enable || output1_enable || output2_enable || output3_enable ||
output4_enable || output5_enable || output6_enable || output7_enable;
output4_enable || output5_enable || output6_enable || output7_enable;
}
};
@ -1870,7 +1869,7 @@ public:
bool AnyEnabled() const {
return plane0 || plane1 || plane2 || plane3 || plane4 || plane5 || plane6 ||
plane7;
plane7;
}
};
@ -3023,8 +3022,7 @@ public:
u32 bindless_texture_const_buffer_slot; ///< 0x2608
u32 trap_handler; ///< 0x260C
INSERT_PADDING_BYTES_NOINIT(0x1F0);
std::array<std::array<StreamOutLayout, 32>, NumTransformFeedbackBuffers>
stream_out_layout; ///< 0x2800
std::array<std::array<StreamOutLayout, 32>, NumTransformFeedbackBuffers> stream_out_layout; ///< 0x2800
INSERT_PADDING_BYTES_NOINIT(0x93C);
ShaderPerformance shader_performance; ///< 0x333C
INSERT_PADDING_BYTES_NOINIT(0x18);
@ -3035,6 +3033,62 @@ public:
};
// clang-format on
struct DrawManager {
enum class DrawMode : u32 { General = 0, Instance, InlineIndex };
struct State {
Maxwell3D::Regs::PrimitiveTopology topology{};
DrawMode draw_mode{};
bool draw_indexed{};
u32 base_index{};
Maxwell3D::Regs::VertexBuffer vertex_buffer;
Maxwell3D::Regs::IndexBuffer index_buffer;
u32 base_instance{};
u32 instance_count{};
std::vector<u8> inline_index_draw_indexes;
};
struct DrawTextureState {
f32 dst_x0;
f32 dst_y0;
f32 dst_x1;
f32 dst_y1;
f32 src_x0;
f32 src_y0;
f32 src_x1;
f32 src_y1;
u32 src_sampler;
u32 src_texture;
};
struct IndirectParams {
bool is_byte_count;
bool is_indexed;
bool include_count;
GPUVAddr count_start_address;
GPUVAddr indirect_start_address;
size_t buffer_size;
size_t max_draw_counts;
size_t stride;
};
void ProcessMethodCall(Maxwell3D& maxwell3d, u32 method, u32 argument);
void Clear(Maxwell3D& maxwell3d, u32 layer_count);
void DrawDeferred(Maxwell3D& maxwell3d);
void DrawArray(Maxwell3D& maxwell3d, Maxwell3D::Regs::PrimitiveTopology topology, u32 vertex_first, u32 vertex_count, u32 base_instance, u32 num_instances);
void DrawArrayInstanced(Maxwell3D& maxwell3d, Maxwell3D::Regs::PrimitiveTopology topology, u32 vertex_first, u32 vertex_count, bool subsequent);
void DrawIndex(Maxwell3D& maxwell3d, Maxwell3D::Regs::PrimitiveTopology topology, u32 index_first, u32 index_count, u32 base_index, u32 base_instance, u32 num_instances);
void DrawArrayIndirect(Maxwell3D& maxwell3d, Maxwell3D::Regs::PrimitiveTopology topology);
void DrawIndexedIndirect(Maxwell3D& maxwell3d, Maxwell3D::Regs::PrimitiveTopology topology, u32 index_first, u32 index_count);
void SetInlineIndexBuffer(Maxwell3D& maxwell3d, u32 index);
void DrawBegin(Maxwell3D& maxwell3d);
void DrawEnd(Maxwell3D& maxwell3d, u32 instance_count = 1, bool force_draw = false);
void DrawIndexSmall(Maxwell3D& maxwell3d, u32 argument);
void DrawTexture(Maxwell3D& maxwell3d);
void UpdateTopology(Maxwell3D& maxwell3d);
void ProcessDraw(Maxwell3D& maxwell3d, bool draw_indexed, u32 instance_count);
void ProcessDrawIndirect(Maxwell3D& maxwell3d);
State draw_state{};
DrawTextureState draw_texture_state{};
IndirectParams indirect_state{};
};
Regs regs{};
/// Store temporary hw register values, used by some calls to restore state after a operation
@ -3102,8 +3156,7 @@ public:
Tables tables{};
} dirty;
std::unique_ptr<DrawManager> draw_manager;
friend class DrawManager;
DrawManager draw_manager;
GPUVAddr GetMacroAddress(size_t index) const {
return macro_addresses[index];