From 8555a82888fa55bba99389956bfcd1983eeae3f1 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Sat, 11 Apr 2026 01:46:00 -0400 Subject: [PATCH] [vulkan] Adjustment VK_EXT_transform_feedback --- .../backend/spirv/spirv_emit_context.cpp | 3 + src/shader_recompiler/runtime_info.h | 1 + src/video_core/buffer_cache/buffer_cache.h | 33 +++---- src/video_core/macro.cpp | 4 +- src/video_core/query_cache/query_stream.h | 2 +- .../renderer_vulkan/vk_buffer_cache.cpp | 8 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 4 + .../renderer_vulkan/vk_query_cache.cpp | 87 +++++++++++++++++-- .../renderer_vulkan/vk_rasterizer.cpp | 14 ++- .../renderer_vulkan/vk_rasterizer.h | 1 + .../renderer_vulkan/vk_scheduler.cpp | 2 + src/video_core/renderer_vulkan/vk_scheduler.h | 5 ++ src/video_core/transform_feedback.cpp | 4 +- .../vulkan_common/vulkan_device.cpp | 3 +- src/video_core/vulkan_common/vulkan_device.h | 11 +++ 15 files changed, 144 insertions(+), 38 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index b9a24496c9..169e83d9fd 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -178,6 +178,9 @@ void DefineGenericOutput(EmitContext& ctx, size_t index, std::optional invo ctx.Decorate(id, spv::Decoration::XfbBuffer, xfb_varying->buffer); ctx.Decorate(id, spv::Decoration::XfbStride, xfb_varying->stride); ctx.Decorate(id, spv::Decoration::Offset, xfb_varying->offset); + if (ctx.stage == Stage::Geometry && xfb_varying->stream != 0) { + ctx.Decorate(id, spv::Decoration::Stream, xfb_varying->stream); + } } if (num_components < 4 || element > 0) { const std::string_view subswizzle{swizzle.substr(element, num_components)}; diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index be10a9bb08..e6e1284762 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -76,6 +76,7 @@ enum class TessSpacing { struct TransformFeedbackVarying { u32 buffer{}; + u32 stream{}; u32 stride{}; u32 offset{}; u32 components{}; diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 014b4a318e..c857e90e02 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -1067,26 +1067,29 @@ void BufferCache

::BindHostTransformFeedbackBuffers() { HostBindings host_bindings; for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) { const Binding& binding = channel_state->transform_feedback_buffers[index]; - if (maxwell3d->regs.transform_feedback.controls[index].varying_count == 0 && - maxwell3d->regs.transform_feedback.controls[index].stride == 0) { - break; + const auto& control = maxwell3d->regs.transform_feedback.controls[index]; + const bool has_layout = control.varying_count != 0 || control.stride != 0; + + Buffer* host_buffer = &slot_buffers[NULL_BUFFER_ID]; + u32 offset = 0; + u32 size = 0; + + if (has_layout && binding.buffer_id != NULL_BUFFER_ID && binding.size != 0) { + Buffer& buffer = slot_buffers[binding.buffer_id]; + TouchBuffer(buffer, binding.buffer_id); + size = binding.size; + SynchronizeBuffer(buffer, binding.device_addr, size); + MarkWrittenBuffer(binding.buffer_id, binding.device_addr, size); + offset = buffer.Offset(binding.device_addr); + buffer.MarkUsage(offset, size); + host_buffer = &buffer; } - Buffer& buffer = slot_buffers[binding.buffer_id]; - TouchBuffer(buffer, binding.buffer_id); - const u32 size = binding.size; - SynchronizeBuffer(buffer, binding.device_addr, size); - MarkWrittenBuffer(binding.buffer_id, binding.device_addr, size); - - const u32 offset = buffer.Offset(binding.device_addr); - buffer.MarkUsage(offset, size); - host_bindings.buffers.push_back(&buffer); + host_bindings.buffers.push_back(host_buffer); host_bindings.offsets.push_back(offset); host_bindings.sizes.push_back(size); } - if (host_bindings.buffers.size() > 0) { - runtime.BindTransformFeedbackBuffers(host_bindings); - } + runtime.BindTransformFeedbackBuffers(host_bindings); } template diff --git a/src/video_core/macro.cpp b/src/video_core/macro.cpp index 66cea5afbd..2cda78c459 100644 --- a/src/video_core/macro.cpp +++ b/src/video_core/macro.cpp @@ -285,11 +285,11 @@ void HLE_MultiDrawIndexedIndirectCount::Fallback(Engines::Maxwell3D& maxwell3d, } void HLE_DrawIndirectByteCount::Execute(Engines::Maxwell3D& maxwell3d, std::span parameters, [[maybe_unused]] u32 method) { const bool force = maxwell3d.Rasterizer().HasDrawTransformFeedback(); - auto topology = Maxwell3D::Regs::PrimitiveTopology(parameters[0] & 0xFFFFU); - if (!force && (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology))) { + if (!force) { Fallback(maxwell3d, parameters); return; } + auto topology = Maxwell3D::Regs::PrimitiveTopology(parameters[0] & 0xFFFFU); auto& params = maxwell3d.draw_manager->GetIndirectParams(); params.is_byte_count = true; params.is_indexed = false; diff --git a/src/video_core/query_cache/query_stream.h b/src/video_core/query_cache/query_stream.h index 1d11b12752..130c079eac 100644 --- a/src/video_core/query_cache/query_stream.h +++ b/src/video_core/query_cache/query_stream.h @@ -75,7 +75,7 @@ public: } u64 GetDependentMask() const { - return dependence_mask; + return dependent_mask; } u64 GetAmendValue() const { diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 74f06427dd..a359502046 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -637,12 +637,10 @@ void BufferCacheRuntime::BindTransformFeedbackBuffers(VideoCommon::HostBindings< for (u32 i = 0; i < bindings.buffers.size(); ++i) { auto handle = bindings.buffers[i]->Handle(); if (handle == VK_NULL_HANDLE) { + ReserveNullBuffer(); + handle = *null_buffer; bindings.offsets[i] = 0; - bindings.sizes[i] = VK_WHOLE_SIZE; - if (!device.HasNullDescriptor()) { - ReserveNullBuffer(); - handle = *null_buffer; - } + bindings.sizes[i] = 0; } buffer_handles[i] = handle; } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 8f706a02c8..43fbefe425 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -467,6 +467,10 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) { bind_stage_info(4); } + if (regs.transform_feedback_enabled != 0) { + scheduler.RequestOutsideRenderPassOperationContext(); + } + buffer_cache.UpdateGraphicsBuffers(is_indexed); buffer_cache.BindHostGeometryBuffers(is_indexed); diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index fa031bf4d0..281983168e 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -665,13 +665,17 @@ public: offsets.fill(0); last_queries.fill(0); last_queries_stride.fill(1); + VkBufferUsageFlags counter_buffer_usage = + VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + if (device.IsExtTransformFeedbackSupported()) { + counter_buffer_usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_COUNTER_BUFFER_BIT_EXT; + } const VkBufferCreateInfo buffer_ci = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, .flags = 0, .size = TFBQueryBank::QUERY_SIZE * NUM_STREAMS, - .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | - VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_COUNTER_BUFFER_BIT_EXT, + .usage = counter_buffer_usage, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, .pQueueFamilyIndices = nullptr, @@ -691,6 +695,9 @@ public: ~TFBCounterStreamer() = default; void StartCounter() override { + if (!device.IsExtTransformFeedbackSupported()) { + return; + } FlushBeginTFB(); has_started = true; } @@ -705,7 +712,9 @@ public: void CloseCounter() override { if (has_flushed_end_pending) { - FlushEndTFB(); + if (scheduler.IsRenderPassActive()) { + FlushEndTFB(); + } } runtime.View3DRegs([this](Maxwell3D& maxwell3d) { if (maxwell3d.regs.transform_feedback_enabled == 0) { @@ -755,6 +764,10 @@ public: if (has_timestamp) { new_query->flags |= VideoCommon::QueryFlagBits::HasTimestamp; } + if (!device.IsExtTransformFeedbackSupported()) { + new_query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced; + return index; + } if (!subreport_) { new_query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced; return index; @@ -765,6 +778,8 @@ public: new_query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced; return index; } + + scheduler.RequestOutsideRenderPassOperationContext(); CloseCounter(); auto [bank_slot, data_slot] = ProduceCounterBuffer(subreport); new_query->start_bank_id = static_cast(bank_slot); @@ -788,6 +803,10 @@ public: return out_topology; } + u32 GetPatchVertices() const { + return patch_vertices; + } + bool HasUnsyncedQueries() const override { return !pending_flush_queries.empty(); } @@ -854,6 +873,9 @@ public: private: void FlushBeginTFB() { + if (!device.IsExtTransformFeedbackSupported()) [[unlikely]] { + return; + } if (has_flushed_end_pending) [[unlikely]] { return; } @@ -867,12 +889,24 @@ private: }); return; } + static constexpr VkMemoryBarrier COUNTER_RESUME_BARRIER{ + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT, + .dstAccessMask = VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT, + }; scheduler.Record([this, total = static_cast(buffers_count)](vk::CommandBuffer cmdbuf) { + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT, + VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT, 0, + COUNTER_RESUME_BARRIER); cmdbuf.BeginTransformFeedbackEXT(0, total, counter_buffers.data(), offsets.data()); }); } void FlushEndTFB() { + if (!device.IsExtTransformFeedbackSupported()) [[unlikely]] { + return; + } if (!has_flushed_end_pending) [[unlikely]] { UNREACHABLE(); return; @@ -902,6 +936,21 @@ private: runtime.View3DRegs([this](Maxwell3D& maxwell3d) { buffers_count = 0; out_topology = maxwell3d.draw_manager->GetDrawState().topology; + patch_vertices = std::max(maxwell3d.regs.patch_vertices, 1U); + if (out_topology == Maxwell3D::Regs::PrimitiveTopology::Patches) { + switch (maxwell3d.regs.tessellation.params.output_primitives.Value()) { + case Maxwell3D::Regs::Tessellation::OutputPrimitives::Points: + out_topology = Maxwell3D::Regs::PrimitiveTopology::Points; + break; + case Maxwell3D::Regs::Tessellation::OutputPrimitives::Lines: + out_topology = Maxwell3D::Regs::PrimitiveTopology::LineStrip; + break; + case Maxwell3D::Regs::Tessellation::OutputPrimitives::Triangles_CW: + case Maxwell3D::Regs::Tessellation::OutputPrimitives::Triangles_CCW: + out_topology = Maxwell3D::Regs::PrimitiveTopology::TriangleStrip; + break; + } + } for (size_t i = 0; i < Maxwell3D::Regs::NumTransformFeedbackBuffers; i++) { const auto& tf = maxwell3d.regs.transform_feedback; if (tf.buffers[i].enable == 0) { @@ -994,6 +1043,7 @@ private: std::array last_queries; std::array last_queries_stride; Maxwell3D::Regs::PrimitiveTopology out_topology; + u32 patch_vertices{1}; u64 streams_mask; }; @@ -1014,6 +1064,7 @@ public: u64 stride{}; DAddr dependant_address{}; Maxwell3D::Regs::PrimitiveTopology topology{Maxwell3D::Regs::PrimitiveTopology::Points}; + u32 patch_vertices{1}; size_t dependant_index{}; bool dependant_manage{}; }; @@ -1030,6 +1081,10 @@ public: ~PrimitivesSucceededStreamer() = default; + void ResetCounter() override { + tfb_streamer.ResetCounter(); + } + size_t WriteCounter(DAddr address, bool has_timestamp, u32 value, std::optional subreport_) override { auto index = BuildQuery(); @@ -1047,6 +1102,7 @@ public: auto dependant_address_opt = tfb_streamer.GetLastQueryStream(subreport); bool must_manage_dependance = false; new_query->topology = tfb_streamer.GetOutputTopology(); + new_query->patch_vertices = tfb_streamer.GetPatchVertices(); if (dependant_address_opt) { auto [dep_address, stride] = *dependant_address_opt; new_query->dependant_address = dep_address; @@ -1067,6 +1123,7 @@ public: } new_query->stride = 1; runtime.View3DRegs([new_query, subreport](Maxwell3D& maxwell3d) { + new_query->patch_vertices = std::max(maxwell3d.regs.patch_vertices, 1U); for (size_t i = 0; i < Maxwell3D::Regs::NumTransformFeedbackBuffers; i++) { const auto& tf = maxwell3d.regs.transform_feedback; if (tf.buffers[i].enable == 0) { @@ -1130,27 +1187,39 @@ public: } } query->value = [&]() -> u64 { + const auto saturating_subtract = [](u64 value, u64 amount) { + return value > amount ? value - amount : 0; + }; switch (query->topology) { case Maxwell3D::Regs::PrimitiveTopology::Points: return num_vertices; case Maxwell3D::Regs::PrimitiveTopology::Lines: return num_vertices / 2; case Maxwell3D::Regs::PrimitiveTopology::LineLoop: - return (num_vertices / 2) + 1; + return num_vertices > 1 ? num_vertices : 0; case Maxwell3D::Regs::PrimitiveTopology::LineStrip: - return num_vertices - 1; - case Maxwell3D::Regs::PrimitiveTopology::Patches: + return saturating_subtract(num_vertices, 1); + case Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency: + return num_vertices / 4; + case Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency: + return saturating_subtract(num_vertices, 3); case Maxwell3D::Regs::PrimitiveTopology::Triangles: - case Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency: return num_vertices / 3; + case Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency: + return num_vertices / 6; case Maxwell3D::Regs::PrimitiveTopology::TriangleFan: case Maxwell3D::Regs::PrimitiveTopology::TriangleStrip: + return saturating_subtract(num_vertices, 2); case Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency: - return num_vertices - 2; + return num_vertices > 4 ? (num_vertices - 4) / 2 : 0; case Maxwell3D::Regs::PrimitiveTopology::Quads: return num_vertices / 4; + case Maxwell3D::Regs::PrimitiveTopology::QuadStrip: + return num_vertices > 2 ? (num_vertices - 2) / 2 : 0; case Maxwell3D::Regs::PrimitiveTopology::Polygon: - return 1U; + return num_vertices >= 3 ? 1U : 0U; + case Maxwell3D::Regs::PrimitiveTopology::Patches: + return num_vertices / std::max(query->patch_vertices, 1U); default: return num_vertices; } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 38dc23ff43..743af16634 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -577,11 +577,17 @@ void RasterizerVulkan::DispatchCompute() { } void RasterizerVulkan::ResetCounter(VideoCommon::QueryType type) { - if (type != VideoCommon::QueryType::ZPassPixelCount64) { + switch (type) { + case VideoCommon::QueryType::ZPassPixelCount64: + case VideoCommon::QueryType::StreamingByteCount: + case VideoCommon::QueryType::StreamingPrimitivesSucceeded: + case VideoCommon::QueryType::VtgPrimitivesOut: + query_cache.CounterReset(type); + return; + default: LOG_DEBUG(Render_Vulkan, "Unimplemented counter reset={}", type); return; } - query_cache.CounterReset(type); } void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, @@ -835,6 +841,10 @@ bool RasterizerVulkan::AccelerateConditionalRendering() { return query_cache.AccelerateHostConditionalRendering(); } +bool RasterizerVulkan::HasDrawTransformFeedback() { + return device.IsTransformFeedbackDrawSupported(); +} + bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Config& copy_config) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index b689c6b660..06cbdd2679 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -122,6 +122,7 @@ public: void FlushCommands() override; void TickFrame() override; bool AccelerateConditionalRendering() override; + bool HasDrawTransformFeedback() override; bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Config& copy_config) override; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 226619d8d6..fdaf9baacc 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -324,6 +324,8 @@ void Scheduler::EndRenderPass() return; } + query_cache->CounterClose(VideoCommon::QueryType::StreamingByteCount); + // Log render pass end if (Settings::values.gpu_logging_enabled.GetValue() && Settings::values.gpu_log_vulkan_calls.GetValue()) { diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 00a912f2cd..0709c3a370 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -63,6 +63,11 @@ public: /// of a renderpass. void RequestOutsideRenderPassOperationContext(); + /// Returns true when a render pass is currently active in the scheduler state. + bool IsRenderPassActive() const { + return state.renderpass != VK_NULL_HANDLE; + } + /// Update the pipeline to the current execution context. bool UpdateGraphicsPipeline(GraphicsPipeline* pipeline); diff --git a/src/video_core/transform_feedback.cpp b/src/video_core/transform_feedback.cpp index a8f9da9853..53d29c08e2 100644 --- a/src/video_core/transform_feedback.cpp +++ b/src/video_core/transform_feedback.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project @@ -88,13 +88,13 @@ std::pair, u32> MakeTransformF return 0; }; - UNIMPLEMENTED_IF_MSG(layout.stream != 0, "Stream is not zero: {}", layout.stream); Shader::TransformFeedbackVarying varying{ .buffer = static_cast(buffer), .stride = layout.stride, .offset = offset * 4, .components = 1, }; + varying.stream = layout.stream; const u32 base_offset = offset; const auto attribute{get_attribute(offset)}; if (std::ranges::find(VECTORS, Common::AlignDown(attribute, 4)) != VECTORS.end()) { diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 8e5ca6ac6c..f48fe39e4e 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -1292,8 +1292,7 @@ void Device::RemoveUnsuitableExtensions() { // VK_EXT_transform_feedback extensions.transform_feedback = features.transform_feedback.transformFeedback && - properties.transform_feedback.maxTransformFeedbackBuffers > 0 && - properties.transform_feedback.transformFeedbackQueries; + properties.transform_feedback.maxTransformFeedbackBuffers > 0; RemoveExtensionFeatureIfUnsuitable(extensions.transform_feedback, features.transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME); diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 8914ef0eb3..cf341726a7 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -538,6 +538,17 @@ public: return extensions.transform_feedback; } + /// Returns true if transform feedback draw commands are supported. + bool IsTransformFeedbackDrawSupported() const { + return extensions.transform_feedback && properties.transform_feedback.transformFeedbackDraw; + } + + /// Returns true if transform feedback query types are supported. + bool IsTransformFeedbackQueriesSupported() const { + return extensions.transform_feedback && + properties.transform_feedback.transformFeedbackQueries; + } + /// Returns true if the device supports VK_EXT_transform_feedback properly. bool AreTransformFeedbackGeometryStreamsSupported() const { return features.transform_feedback.geometryStreams;