[vulkan] Adjustment VK_EXT_transform_feedback

This commit is contained in:
CamilleLaVey 2026-04-11 01:46:00 -04:00
parent b7ee3bef80
commit 8555a82888
15 changed files with 144 additions and 38 deletions

View file

@ -178,6 +178,9 @@ void DefineGenericOutput(EmitContext& ctx, size_t index, std::optional<u32> invo
ctx.Decorate(id, spv::Decoration::XfbBuffer, xfb_varying->buffer);
ctx.Decorate(id, spv::Decoration::XfbStride, xfb_varying->stride);
ctx.Decorate(id, spv::Decoration::Offset, xfb_varying->offset);
if (ctx.stage == Stage::Geometry && xfb_varying->stream != 0) {
ctx.Decorate(id, spv::Decoration::Stream, xfb_varying->stream);
}
}
if (num_components < 4 || element > 0) {
const std::string_view subswizzle{swizzle.substr(element, num_components)};

View file

@ -76,6 +76,7 @@ enum class TessSpacing {
struct TransformFeedbackVarying {
u32 buffer{};
u32 stream{};
u32 stride{};
u32 offset{};
u32 components{};

View file

@ -1067,26 +1067,29 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() {
HostBindings<typename P::Buffer> host_bindings;
for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
const Binding& binding = channel_state->transform_feedback_buffers[index];
if (maxwell3d->regs.transform_feedback.controls[index].varying_count == 0 &&
maxwell3d->regs.transform_feedback.controls[index].stride == 0) {
break;
const auto& control = maxwell3d->regs.transform_feedback.controls[index];
const bool has_layout = control.varying_count != 0 || control.stride != 0;
Buffer* host_buffer = &slot_buffers[NULL_BUFFER_ID];
u32 offset = 0;
u32 size = 0;
if (has_layout && binding.buffer_id != NULL_BUFFER_ID && binding.size != 0) {
Buffer& buffer = slot_buffers[binding.buffer_id];
TouchBuffer(buffer, binding.buffer_id);
size = binding.size;
SynchronizeBuffer(buffer, binding.device_addr, size);
MarkWrittenBuffer(binding.buffer_id, binding.device_addr, size);
offset = buffer.Offset(binding.device_addr);
buffer.MarkUsage(offset, size);
host_buffer = &buffer;
}
Buffer& buffer = slot_buffers[binding.buffer_id];
TouchBuffer(buffer, binding.buffer_id);
const u32 size = binding.size;
SynchronizeBuffer(buffer, binding.device_addr, size);
MarkWrittenBuffer(binding.buffer_id, binding.device_addr, size);
const u32 offset = buffer.Offset(binding.device_addr);
buffer.MarkUsage(offset, size);
host_bindings.buffers.push_back(&buffer);
host_bindings.buffers.push_back(host_buffer);
host_bindings.offsets.push_back(offset);
host_bindings.sizes.push_back(size);
}
if (host_bindings.buffers.size() > 0) {
runtime.BindTransformFeedbackBuffers(host_bindings);
}
runtime.BindTransformFeedbackBuffers(host_bindings);
}
template <class P>

View file

@ -285,11 +285,11 @@ void HLE_MultiDrawIndexedIndirectCount::Fallback(Engines::Maxwell3D& maxwell3d,
}
void HLE_DrawIndirectByteCount::Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method) {
const bool force = maxwell3d.Rasterizer().HasDrawTransformFeedback();
auto topology = Maxwell3D::Regs::PrimitiveTopology(parameters[0] & 0xFFFFU);
if (!force && (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology))) {
if (!force) {
Fallback(maxwell3d, parameters);
return;
}
auto topology = Maxwell3D::Regs::PrimitiveTopology(parameters[0] & 0xFFFFU);
auto& params = maxwell3d.draw_manager->GetIndirectParams();
params.is_byte_count = true;
params.is_indexed = false;

View file

@ -75,7 +75,7 @@ public:
}
u64 GetDependentMask() const {
return dependence_mask;
return dependent_mask;
}
u64 GetAmendValue() const {

View file

@ -637,12 +637,10 @@ void BufferCacheRuntime::BindTransformFeedbackBuffers(VideoCommon::HostBindings<
for (u32 i = 0; i < bindings.buffers.size(); ++i) {
auto handle = bindings.buffers[i]->Handle();
if (handle == VK_NULL_HANDLE) {
ReserveNullBuffer();
handle = *null_buffer;
bindings.offsets[i] = 0;
bindings.sizes[i] = VK_WHOLE_SIZE;
if (!device.HasNullDescriptor()) {
ReserveNullBuffer();
handle = *null_buffer;
}
bindings.sizes[i] = 0;
}
buffer_handles[i] = handle;
}

View file

@ -467,6 +467,10 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
bind_stage_info(4);
}
if (regs.transform_feedback_enabled != 0) {
scheduler.RequestOutsideRenderPassOperationContext();
}
buffer_cache.UpdateGraphicsBuffers(is_indexed);
buffer_cache.BindHostGeometryBuffers(is_indexed);

View file

@ -665,13 +665,17 @@ public:
offsets.fill(0);
last_queries.fill(0);
last_queries_stride.fill(1);
VkBufferUsageFlags counter_buffer_usage =
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
if (device.IsExtTransformFeedbackSupported()) {
counter_buffer_usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_COUNTER_BUFFER_BIT_EXT;
}
const VkBufferCreateInfo buffer_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = TFBQueryBank::QUERY_SIZE * NUM_STREAMS,
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_COUNTER_BUFFER_BIT_EXT,
.usage = counter_buffer_usage,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
@ -691,6 +695,9 @@ public:
~TFBCounterStreamer() = default;
void StartCounter() override {
if (!device.IsExtTransformFeedbackSupported()) {
return;
}
FlushBeginTFB();
has_started = true;
}
@ -705,7 +712,9 @@ public:
void CloseCounter() override {
if (has_flushed_end_pending) {
FlushEndTFB();
if (scheduler.IsRenderPassActive()) {
FlushEndTFB();
}
}
runtime.View3DRegs([this](Maxwell3D& maxwell3d) {
if (maxwell3d.regs.transform_feedback_enabled == 0) {
@ -755,6 +764,10 @@ public:
if (has_timestamp) {
new_query->flags |= VideoCommon::QueryFlagBits::HasTimestamp;
}
if (!device.IsExtTransformFeedbackSupported()) {
new_query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced;
return index;
}
if (!subreport_) {
new_query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced;
return index;
@ -765,6 +778,8 @@ public:
new_query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced;
return index;
}
scheduler.RequestOutsideRenderPassOperationContext();
CloseCounter();
auto [bank_slot, data_slot] = ProduceCounterBuffer(subreport);
new_query->start_bank_id = static_cast<u32>(bank_slot);
@ -788,6 +803,10 @@ public:
return out_topology;
}
u32 GetPatchVertices() const {
return patch_vertices;
}
bool HasUnsyncedQueries() const override {
return !pending_flush_queries.empty();
}
@ -854,6 +873,9 @@ public:
private:
void FlushBeginTFB() {
if (!device.IsExtTransformFeedbackSupported()) [[unlikely]] {
return;
}
if (has_flushed_end_pending) [[unlikely]] {
return;
}
@ -867,12 +889,24 @@ private:
});
return;
}
static constexpr VkMemoryBarrier COUNTER_RESUME_BARRIER{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT,
.dstAccessMask = VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT,
};
scheduler.Record([this, total = static_cast<u32>(buffers_count)](vk::CommandBuffer cmdbuf) {
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT,
VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT, 0,
COUNTER_RESUME_BARRIER);
cmdbuf.BeginTransformFeedbackEXT(0, total, counter_buffers.data(), offsets.data());
});
}
void FlushEndTFB() {
if (!device.IsExtTransformFeedbackSupported()) [[unlikely]] {
return;
}
if (!has_flushed_end_pending) [[unlikely]] {
UNREACHABLE();
return;
@ -902,6 +936,21 @@ private:
runtime.View3DRegs([this](Maxwell3D& maxwell3d) {
buffers_count = 0;
out_topology = maxwell3d.draw_manager->GetDrawState().topology;
patch_vertices = std::max(maxwell3d.regs.patch_vertices, 1U);
if (out_topology == Maxwell3D::Regs::PrimitiveTopology::Patches) {
switch (maxwell3d.regs.tessellation.params.output_primitives.Value()) {
case Maxwell3D::Regs::Tessellation::OutputPrimitives::Points:
out_topology = Maxwell3D::Regs::PrimitiveTopology::Points;
break;
case Maxwell3D::Regs::Tessellation::OutputPrimitives::Lines:
out_topology = Maxwell3D::Regs::PrimitiveTopology::LineStrip;
break;
case Maxwell3D::Regs::Tessellation::OutputPrimitives::Triangles_CW:
case Maxwell3D::Regs::Tessellation::OutputPrimitives::Triangles_CCW:
out_topology = Maxwell3D::Regs::PrimitiveTopology::TriangleStrip;
break;
}
}
for (size_t i = 0; i < Maxwell3D::Regs::NumTransformFeedbackBuffers; i++) {
const auto& tf = maxwell3d.regs.transform_feedback;
if (tf.buffers[i].enable == 0) {
@ -994,6 +1043,7 @@ private:
std::array<DAddr, NUM_STREAMS> last_queries;
std::array<size_t, NUM_STREAMS> last_queries_stride;
Maxwell3D::Regs::PrimitiveTopology out_topology;
u32 patch_vertices{1};
u64 streams_mask;
};
@ -1014,6 +1064,7 @@ public:
u64 stride{};
DAddr dependant_address{};
Maxwell3D::Regs::PrimitiveTopology topology{Maxwell3D::Regs::PrimitiveTopology::Points};
u32 patch_vertices{1};
size_t dependant_index{};
bool dependant_manage{};
};
@ -1030,6 +1081,10 @@ public:
~PrimitivesSucceededStreamer() = default;
void ResetCounter() override {
tfb_streamer.ResetCounter();
}
size_t WriteCounter(DAddr address, bool has_timestamp, u32 value,
std::optional<u32> subreport_) override {
auto index = BuildQuery();
@ -1047,6 +1102,7 @@ public:
auto dependant_address_opt = tfb_streamer.GetLastQueryStream(subreport);
bool must_manage_dependance = false;
new_query->topology = tfb_streamer.GetOutputTopology();
new_query->patch_vertices = tfb_streamer.GetPatchVertices();
if (dependant_address_opt) {
auto [dep_address, stride] = *dependant_address_opt;
new_query->dependant_address = dep_address;
@ -1067,6 +1123,7 @@ public:
}
new_query->stride = 1;
runtime.View3DRegs([new_query, subreport](Maxwell3D& maxwell3d) {
new_query->patch_vertices = std::max(maxwell3d.regs.patch_vertices, 1U);
for (size_t i = 0; i < Maxwell3D::Regs::NumTransformFeedbackBuffers; i++) {
const auto& tf = maxwell3d.regs.transform_feedback;
if (tf.buffers[i].enable == 0) {
@ -1130,27 +1187,39 @@ public:
}
}
query->value = [&]() -> u64 {
const auto saturating_subtract = [](u64 value, u64 amount) {
return value > amount ? value - amount : 0;
};
switch (query->topology) {
case Maxwell3D::Regs::PrimitiveTopology::Points:
return num_vertices;
case Maxwell3D::Regs::PrimitiveTopology::Lines:
return num_vertices / 2;
case Maxwell3D::Regs::PrimitiveTopology::LineLoop:
return (num_vertices / 2) + 1;
return num_vertices > 1 ? num_vertices : 0;
case Maxwell3D::Regs::PrimitiveTopology::LineStrip:
return num_vertices - 1;
case Maxwell3D::Regs::PrimitiveTopology::Patches:
return saturating_subtract(num_vertices, 1);
case Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency:
return num_vertices / 4;
case Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency:
return saturating_subtract(num_vertices, 3);
case Maxwell3D::Regs::PrimitiveTopology::Triangles:
case Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency:
return num_vertices / 3;
case Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency:
return num_vertices / 6;
case Maxwell3D::Regs::PrimitiveTopology::TriangleFan:
case Maxwell3D::Regs::PrimitiveTopology::TriangleStrip:
return saturating_subtract(num_vertices, 2);
case Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency:
return num_vertices - 2;
return num_vertices > 4 ? (num_vertices - 4) / 2 : 0;
case Maxwell3D::Regs::PrimitiveTopology::Quads:
return num_vertices / 4;
case Maxwell3D::Regs::PrimitiveTopology::QuadStrip:
return num_vertices > 2 ? (num_vertices - 2) / 2 : 0;
case Maxwell3D::Regs::PrimitiveTopology::Polygon:
return 1U;
return num_vertices >= 3 ? 1U : 0U;
case Maxwell3D::Regs::PrimitiveTopology::Patches:
return num_vertices / std::max<u64>(query->patch_vertices, 1U);
default:
return num_vertices;
}

View file

@ -577,11 +577,17 @@ void RasterizerVulkan::DispatchCompute() {
}
void RasterizerVulkan::ResetCounter(VideoCommon::QueryType type) {
if (type != VideoCommon::QueryType::ZPassPixelCount64) {
switch (type) {
case VideoCommon::QueryType::ZPassPixelCount64:
case VideoCommon::QueryType::StreamingByteCount:
case VideoCommon::QueryType::StreamingPrimitivesSucceeded:
case VideoCommon::QueryType::VtgPrimitivesOut:
query_cache.CounterReset(type);
return;
default:
LOG_DEBUG(Render_Vulkan, "Unimplemented counter reset={}", type);
return;
}
query_cache.CounterReset(type);
}
void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type,
@ -835,6 +841,10 @@ bool RasterizerVulkan::AccelerateConditionalRendering() {
return query_cache.AccelerateHostConditionalRendering();
}
bool RasterizerVulkan::HasDrawTransformFeedback() {
return device.IsTransformFeedbackDrawSupported();
}
bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) {

View file

@ -122,6 +122,7 @@ public:
void FlushCommands() override;
void TickFrame() override;
bool AccelerateConditionalRendering() override;
bool HasDrawTransformFeedback() override;
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) override;

View file

@ -324,6 +324,8 @@ void Scheduler::EndRenderPass()
return;
}
query_cache->CounterClose(VideoCommon::QueryType::StreamingByteCount);
// Log render pass end
if (Settings::values.gpu_logging_enabled.GetValue() &&
Settings::values.gpu_log_vulkan_calls.GetValue()) {

View file

@ -63,6 +63,11 @@ public:
/// of a renderpass.
void RequestOutsideRenderPassOperationContext();
/// Returns true when a render pass is currently active in the scheduler state.
bool IsRenderPassActive() const {
return state.renderpass != VK_NULL_HANDLE;
}
/// Update the pipeline to the current execution context.
bool UpdateGraphicsPipeline(GraphicsPipeline* pipeline);

View file

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
@ -88,13 +88,13 @@ std::pair<std::array<Shader::TransformFeedbackVarying, 256>, u32> MakeTransformF
return 0;
};
UNIMPLEMENTED_IF_MSG(layout.stream != 0, "Stream is not zero: {}", layout.stream);
Shader::TransformFeedbackVarying varying{
.buffer = static_cast<u32>(buffer),
.stride = layout.stride,
.offset = offset * 4,
.components = 1,
};
varying.stream = layout.stream;
const u32 base_offset = offset;
const auto attribute{get_attribute(offset)};
if (std::ranges::find(VECTORS, Common::AlignDown(attribute, 4)) != VECTORS.end()) {

View file

@ -1292,8 +1292,7 @@ void Device::RemoveUnsuitableExtensions() {
// VK_EXT_transform_feedback
extensions.transform_feedback =
features.transform_feedback.transformFeedback &&
properties.transform_feedback.maxTransformFeedbackBuffers > 0 &&
properties.transform_feedback.transformFeedbackQueries;
properties.transform_feedback.maxTransformFeedbackBuffers > 0;
RemoveExtensionFeatureIfUnsuitable(extensions.transform_feedback, features.transform_feedback,
VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME);

View file

@ -538,6 +538,17 @@ public:
return extensions.transform_feedback;
}
/// Returns true if transform feedback draw commands are supported.
bool IsTransformFeedbackDrawSupported() const {
return extensions.transform_feedback && properties.transform_feedback.transformFeedbackDraw;
}
/// Returns true if transform feedback query types are supported.
bool IsTransformFeedbackQueriesSupported() const {
return extensions.transform_feedback &&
properties.transform_feedback.transformFeedbackQueries;
}
/// Returns true if the device supports VK_EXT_transform_feedback properly.
bool AreTransformFeedbackGeometryStreamsSupported() const {
return features.transform_feedback.geometryStreams;