diff --git a/src/video_core/host_shaders/resolve_conditional_render.comp b/src/video_core/host_shaders/resolve_conditional_render.comp index 307e77d1ad..3bc92f94fa 100644 --- a/src/video_core/host_shaders/resolve_conditional_render.comp +++ b/src/video_core/host_shaders/resolve_conditional_render.comp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later @@ -5,16 +8,22 @@ layout(local_size_x = 1) in; -layout(std430, binding = 0) buffer Query { - uvec2 initial; - uvec2 unknown; - uvec2 current; +layout(std430, binding = 0) readonly buffer Query { + uint data[]; }; -layout(std430, binding = 1) buffer Result { +layout(std430, binding = 1) writeonly buffer Result { uint result; }; +layout(push_constant) uniform PushConstants { + uint compare_to_zero; +}; + void main() { - result = all(equal(initial, current)) ? 1 : 0; + if (compare_to_zero != 0u) { + result = (data[0] != 0u && data[1] != 0u) ? 1u : 0u; + } else { + result = (data[0] == data[4] && data[1] == data[5]) ? 1u : 0u; + } } diff --git a/src/video_core/query_cache/query_cache.h b/src/video_core/query_cache/query_cache.h index 4ed42487aa..6bed91a53e 100644 --- a/src/video_core/query_cache/query_cache.h +++ b/src/video_core/query_cache/query_cache.h @@ -412,6 +412,7 @@ bool QueryCacheBase::AccelerateHostConditionalRendering() { .found_query = nullptr, }; } + it_current = it_current_2; } auto* query = impl->ObtainQuery(it_current->second); qc_dirty |= True(query->flags & QueryFlagBits::IsHostManaged) && diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index d45a57f7bb..79f006305c 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -228,6 +228,10 @@ struct QueriesPrefixScanPushConstants { u32 accumulation_limit; u32 buffer_offset; }; + +struct ConditionalRenderingResolvePushConstants { + u32 compare_to_zero; +}; } // Anonymous namespace ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool, @@ -413,7 +417,8 @@ ConditionalRenderingResolvePass::ConditionalRenderingResolvePass( const Device& device_, Scheduler& scheduler_, DescriptorPool& descriptor_pool_, ComputePassDescriptorQueue& compute_pass_descriptor_queue_) : ComputePass(device_, descriptor_pool_, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS, - INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, nullptr, + INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, + COMPUTE_PUSH_CONSTANT_RANGE, RESOLVE_CONDITIONAL_RENDER_COMP_SPV), scheduler{scheduler_}, compute_pass_descriptor_queue{compute_pass_descriptor_queue_} {} @@ -430,7 +435,7 @@ void ConditionalRenderingResolvePass::Resolve(VkBuffer dst_buffer, VkBuffer src_ const void* const descriptor_data{compute_pass_descriptor_queue.UpdateData()}; scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([this, descriptor_data](vk::CommandBuffer cmdbuf) { + scheduler.Record([this, descriptor_data, compare_to_zero](vk::CommandBuffer cmdbuf) { static constexpr VkMemoryBarrier read_barrier{ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, .pNext = nullptr, @@ -443,6 +448,9 @@ void ConditionalRenderingResolvePass::Resolve(VkBuffer dst_buffer, VkBuffer src_ .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, .dstAccessMask = VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT, }; + const ConditionalRenderingResolvePushConstants uniforms{ + .compare_to_zero = compare_to_zero ? 1U : 0U, + }; const VkDescriptorSet set = descriptor_allocator.Commit(); device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data); @@ -450,6 +458,7 @@ void ConditionalRenderingResolvePass::Resolve(VkBuffer dst_buffer, VkBuffer src_ VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, read_barrier); cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {}); + cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms); cmdbuf.Dispatch(1, 1, 1); cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, write_barrier); diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 8518d89eee..29e5385c65 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -1202,16 +1202,24 @@ struct QueryCacheRuntimeImpl { hcr_setup.pNext = nullptr; hcr_setup.flags = 0; - conditional_resolve_pass = std::make_unique( - device, scheduler, descriptor_pool, compute_pass_descriptor_queue); + const bool has_conditional_rendering = device.IsExtConditionalRendering(); + if (has_conditional_rendering) { + conditional_resolve_pass = std::make_unique( + device, scheduler, descriptor_pool, compute_pass_descriptor_queue); + } + + VkBufferUsageFlags hcr_buffer_usage = + VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + if (has_conditional_rendering) { + hcr_buffer_usage |= VK_BUFFER_USAGE_CONDITIONAL_RENDERING_BIT_EXT; + } const VkBufferCreateInfo buffer_ci = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, .flags = 0, .size = sizeof(u32), - .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | - VK_BUFFER_USAGE_CONDITIONAL_RENDERING_BIT_EXT, + .usage = hcr_buffer_usage, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, .pQueueFamilyIndices = nullptr, @@ -1338,15 +1346,18 @@ void QueryCacheRuntime::HostConditionalRenderingCompareValueImpl(VideoCommon::Lo } } -void QueryCacheRuntime::HostConditionalRenderingCompareBCImpl(DAddr address, bool is_equal) { +void QueryCacheRuntime::HostConditionalRenderingCompareBCImpl(DAddr address, bool is_equal, + bool compare_to_zero) { VkBuffer to_resolve; u32 to_resolve_offset; + const u32 resolve_size = compare_to_zero ? 8 : 24; { std::scoped_lock lk(impl->buffer_cache.mutex); - static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::NoSynchronize; + const auto sync_info = compare_to_zero ? VideoCommon::ObtainBufferSynchronize::FullSynchronize + : VideoCommon::ObtainBufferSynchronize::NoSynchronize; const auto post_op = VideoCommon::ObtainBufferOperation::DoNothing; const auto [buffer, offset] = - impl->buffer_cache.ObtainCPUBuffer(address, 24, sync_info, post_op); + impl->buffer_cache.ObtainCPUBuffer(address, resolve_size, sync_info, post_op); to_resolve = buffer->Handle(); to_resolve_offset = static_cast(offset); } @@ -1355,7 +1366,7 @@ void QueryCacheRuntime::HostConditionalRenderingCompareBCImpl(DAddr address, boo PauseHostConditionalRendering(); } impl->conditional_resolve_pass->Resolve(*impl->hcr_resolve_buffer, to_resolve, - to_resolve_offset, false); + to_resolve_offset, compare_to_zero); impl->hcr_setup.buffer = *impl->hcr_resolve_buffer; impl->hcr_setup.offset = 0; impl->hcr_setup.flags = is_equal ? 0 : VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT; @@ -1371,7 +1382,7 @@ bool QueryCacheRuntime::HostConditionalRenderingCompareValue(VideoCommon::Lookup if (!impl->device.IsExtConditionalRendering()) { return false; } - HostConditionalRenderingCompareValueImpl(object_1, false); + HostConditionalRenderingCompareBCImpl(object_1.address, true, true); return true; } @@ -1421,6 +1432,7 @@ bool QueryCacheRuntime::HostConditionalRenderingCompareValues(VideoCommon::Looku const bool is_gpu_high = Settings::IsGPULevelHigh(); if ((!is_gpu_high && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) || driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY || driver_id == VK_DRIVER_ID_ARM_PROPRIETARY || driver_id == VK_DRIVER_ID_MESA_TURNIP) { + EndHostConditionalRendering(); return true; } @@ -1437,10 +1449,12 @@ bool QueryCacheRuntime::HostConditionalRenderingCompareValues(VideoCommon::Looku } if (!is_gpu_high) { + EndHostConditionalRendering(); return true; } if (!is_in_bc[0] && !is_in_bc[1]) { + EndHostConditionalRendering(); return true; } HostConditionalRenderingCompareBCImpl(object_1.address, equal_check); diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h index e2aa4d991e..bbb5234e11 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.h +++ b/src/video_core/renderer_vulkan/vk_query_cache.h @@ -63,7 +63,8 @@ public: private: void HostConditionalRenderingCompareValueImpl(VideoCommon::LookupData object, bool is_equal); - void HostConditionalRenderingCompareBCImpl(DAddr address, bool is_equal); + void HostConditionalRenderingCompareBCImpl(DAddr address, bool is_equal, + bool compare_to_zero = false); friend struct QueryCacheRuntimeImpl; std::unique_ptr impl; }; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index f1d208e711..38dc23ff43 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -225,6 +225,7 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) { UpdateDynamicStates(); + query_cache.NotifySegment(true); HandleTransformFeedback(); query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, maxwell3d->regs.zpass_pixel_count_enable); @@ -336,6 +337,7 @@ void RasterizerVulkan::DrawTexture() { UpdateDynamicStates(); + query_cache.NotifySegment(true); query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, maxwell3d->regs.zpass_pixel_count_enable); const auto& draw_texture_state = maxwell3d->draw_manager->GetDrawTextureState();