From 9a07bd0570ceba262b4a918f4927e5674b6f7269 Mon Sep 17 00:00:00 2001 From: lizzie Date: Thu, 5 Mar 2026 07:32:18 +0100 Subject: [PATCH 01/68] [vk] unify VkSurfaceKHR with Android and the rest of platforms; remove technically incorrect nullptr() ctor for handles (#2971) Removes some odd #ifdef-ing that just can use a shrimple opaque type. Also removes nullptr() ctor'ing for vulkan handles and such; it's not incorrect per se like how `void *p = 0;` isn't incorrect, just that, y'know, any static analyzer will go "woah". Also there isn't any guarantee that handles `sizeof(Handle) == sizeof(void*)` so may as well :) Signed-off-by: lizzie lizzie@eden-emu.dev Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/2971 Reviewed-by: CamilleLaVey Reviewed-by: MaranBr Co-authored-by: lizzie Co-committed-by: lizzie --- src/video_core/renderer_vulkan/blit_image.cpp | 8 +-- .../renderer_vulkan/renderer_vulkan.cpp | 8 +-- .../renderer_vulkan/vk_compute_pass.cpp | 8 +-- .../renderer_vulkan/vk_compute_pipeline.cpp | 36 ++++++------- .../renderer_vulkan/vk_graphics_pipeline.cpp | 44 ++++++++------- .../renderer_vulkan/vk_present_manager.cpp | 20 ++----- .../renderer_vulkan/vk_present_manager.h | 16 ++---- .../renderer_vulkan/vk_query_cache.cpp | 2 +- .../renderer_vulkan/vk_query_cache.h | 5 +- .../renderer_vulkan/vk_scheduler.cpp | 2 +- src/video_core/renderer_vulkan/vk_scheduler.h | 8 +-- .../renderer_vulkan/vk_swapchain.cpp | 54 +++---------------- src/video_core/renderer_vulkan/vk_swapchain.h | 22 ++------ src/video_core/vulkan_common/vulkan.h | 5 +- .../vulkan_common/vulkan_device.cpp | 2 +- .../vulkan_common/vulkan_surface.cpp | 2 +- src/video_core/vulkan_common/vulkan_wrapper.h | 44 +++++++-------- 17 files changed, 104 insertions(+), 182 deletions(-) diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index 07611ef98c..789f4da2ed 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -1032,7 +1032,7 @@ void BlitImageHelper::ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRend VkShaderModule frag_shader = *convert_float_to_depth_frag; const std::array stages = MakeStages(*full_screen_vert, frag_shader); const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci = GetPipelineInputAssemblyStateCreateInfo(device); - pipeline = device.GetLogical().CreateGraphicsPipeline({ + pipeline = device.GetLogical().CreateGraphicsPipeline(VkGraphicsPipelineCreateInfo{ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, .pNext = nullptr, .flags = 0, @@ -1062,7 +1062,7 @@ void BlitImageHelper::ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRend VkShaderModule frag_shader = *convert_depth_to_float_frag; const std::array stages = MakeStages(*full_screen_vert, frag_shader); const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci = GetPipelineInputAssemblyStateCreateInfo(device); - pipeline = device.GetLogical().CreateGraphicsPipeline({ + pipeline = device.GetLogical().CreateGraphicsPipeline(VkGraphicsPipelineCreateInfo{ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, .pNext = nullptr, .flags = 0, @@ -1093,7 +1093,7 @@ void BlitImageHelper::ConvertPipelineEx(vk::Pipeline& pipeline, VkRenderPass ren } const std::array stages = MakeStages(*full_screen_vert, *module); const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci = GetPipelineInputAssemblyStateCreateInfo(device); - pipeline = device.GetLogical().CreateGraphicsPipeline({ + pipeline = device.GetLogical().CreateGraphicsPipeline(VkGraphicsPipelineCreateInfo{ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, .pNext = nullptr, .flags = 0, @@ -1135,7 +1135,7 @@ void BlitImageHelper::ConvertPipeline(vk::Pipeline& pipeline, VkRenderPass rende is_target_depth ? *convert_float_to_depth_frag : *convert_depth_to_float_frag; const std::array stages = MakeStages(*full_screen_vert, frag_shader); const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci = GetPipelineInputAssemblyStateCreateInfo(device); - pipeline = device.GetLogical().CreateGraphicsPipeline({ + pipeline = device.GetLogical().CreateGraphicsPipeline(VkGraphicsPipelineCreateInfo{ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, .pNext = nullptr, .flags = 0, diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index d1e607e75f..cb1b1a5362 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -137,14 +137,8 @@ try memory_allocator, scheduler, swapchain, -#ifdef ANDROID - surface) - , -#else *surface) - , -#endif - blit_swapchain(device_memory, + , blit_swapchain(device_memory, device, memory_allocator, present_manager, diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 22e646afe9..d45a57f7bb 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -285,7 +285,7 @@ ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool, .requiredSubgroupSize = optional_subgroup_size ? *optional_subgroup_size : 32U, }; bool use_setup_size = device.IsExtSubgroupSizeControlSupported() && optional_subgroup_size; - pipeline = device.GetLogical().CreateComputePipeline({ + pipeline = device.GetLogical().CreateComputePipeline(VkComputePipelineCreateInfo{ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .pNext = nullptr, .flags = 0, @@ -299,7 +299,7 @@ ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool, .pSpecializationInfo = nullptr, }, .layout = *layout, - .basePipelineHandle = nullptr, + .basePipelineHandle = {}, .basePipelineIndex = 0, }); } @@ -944,7 +944,7 @@ MSAACopyPass::MSAACopyPass(const Device& device_, Scheduler& scheduler_, .codeSize = static_cast(code.size_bytes()), .pCode = code.data(), }); - pipelines[i] = device.GetLogical().CreateComputePipeline({ + pipelines[i] = device.GetLogical().CreateComputePipeline(VkComputePipelineCreateInfo{ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .pNext = nullptr, .flags = 0, @@ -958,7 +958,7 @@ MSAACopyPass::MSAACopyPass(const Device& device_, Scheduler& scheduler_, .pSpecializationInfo = nullptr, }, .layout = *layout, - .basePipelineHandle = nullptr, + .basePipelineHandle = {}, .basePipelineIndex = 0, }); }; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 6a6fe2b830..1a62324c95 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -67,26 +67,24 @@ ComputePipeline::ComputePipeline(const Device& device_, vk::PipelineCache& pipel if (device.IsKhrPipelineExecutablePropertiesEnabled() && Settings::values.renderer_debug.GetValue()) { flags |= VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR; } - pipeline = device.GetLogical().CreateComputePipeline( - { - .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = flags, - .stage{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = - device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr, - .flags = 0, - .stage = VK_SHADER_STAGE_COMPUTE_BIT, - .module = *spv_module, - .pName = "main", - .pSpecializationInfo = nullptr, - }, - .layout = *pipeline_layout, - .basePipelineHandle = 0, - .basePipelineIndex = 0, + pipeline = device.GetLogical().CreateComputePipeline(VkComputePipelineCreateInfo{ + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = flags, + .stage{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = + device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = *spv_module, + .pName = "main", + .pSpecializationInfo = nullptr, }, - *pipeline_cache); + .layout = *pipeline_layout, + .basePipelineHandle = 0, + .basePipelineIndex = 0, + }, *pipeline_cache); // Log compute pipeline creation if (Settings::values.gpu_logging_enabled.GetValue()) { diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 5b11a34232..e989bf6b31 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -946,29 +946,27 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { flags |= VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR; } - pipeline = device.GetLogical().CreateGraphicsPipeline( - { - .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = flags, - .stageCount = static_cast(shader_stages.size()), - .pStages = shader_stages.data(), - .pVertexInputState = &vertex_input_ci, - .pInputAssemblyState = &input_assembly_ci, - .pTessellationState = &tessellation_ci, - .pViewportState = &viewport_ci, - .pRasterizationState = &rasterization_ci, - .pMultisampleState = &multisample_ci, - .pDepthStencilState = &depth_stencil_ci, - .pColorBlendState = &color_blend_ci, - .pDynamicState = &dynamic_state_ci, - .layout = *pipeline_layout, - .renderPass = render_pass, - .subpass = 0, - .basePipelineHandle = nullptr, - .basePipelineIndex = 0, - }, - *pipeline_cache); + pipeline = device.GetLogical().CreateGraphicsPipeline({ + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = flags, + .stageCount = static_cast(shader_stages.size()), + .pStages = shader_stages.data(), + .pVertexInputState = &vertex_input_ci, + .pInputAssemblyState = &input_assembly_ci, + .pTessellationState = &tessellation_ci, + .pViewportState = &viewport_ci, + .pRasterizationState = &rasterization_ci, + .pMultisampleState = &multisample_ci, + .pDepthStencilState = &depth_stencil_ci, + .pColorBlendState = &color_blend_ci, + .pDynamicState = &dynamic_state_ci, + .layout = *pipeline_layout, + .renderPass = render_pass, + .subpass = 0, + .basePipelineHandle = nullptr, + .basePipelineIndex = 0, + }, *pipeline_cache); // Log graphics pipeline creation if (Settings::values.gpu_logging_enabled.GetValue()) { diff --git a/src/video_core/renderer_vulkan/vk_present_manager.cpp b/src/video_core/renderer_vulkan/vk_present_manager.cpp index 3b5c2e3c01..aa019a4160 100644 --- a/src/video_core/renderer_vulkan/vk_present_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_present_manager.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project @@ -101,22 +101,14 @@ PresentManager::PresentManager(const vk::Instance& instance_, MemoryAllocator& memory_allocator_, Scheduler& scheduler_, Swapchain& swapchain_, -#ifdef ANDROID - vk::SurfaceKHR& surface_) -#else - VkSurfaceKHR_T* surface_handle_) -#endif + VkSurfaceKHR_T* surface_) : instance{instance_} , render_window{render_window_} , device{device_} , memory_allocator{memory_allocator_} , scheduler{scheduler_} , swapchain{swapchain_} -#ifdef ANDROID , surface{surface_} -#else - , surface_handle{surface_handle_} -#endif , blit_supported{CanBlitToSwapchain(device.GetPhysical(), swapchain.GetImageViewFormat())} , use_present_thread{Settings::values.async_presentation.GetValue()} { @@ -299,11 +291,7 @@ void PresentManager::PresentThread(std::stop_token token) { } void PresentManager::RecreateSwapchain(Frame* frame) { -#ifndef ANDROID - swapchain.Create(surface_handle, frame->width, frame->height); // Pass raw pointer -#else - swapchain.Create(*surface, frame->width, frame->height); // Pass raw pointer -#endif + swapchain.Create(surface, frame->width, frame->height); // Pass raw pointer SetImageCount(); } @@ -322,7 +310,7 @@ void PresentManager::CopyToSwapchain(Frame* frame) { // Recreate surface and swapchain if needed. if (requires_recreation) { #ifdef ANDROID - surface = CreateSurface(instance, render_window.GetWindowInfo()); + surface = *CreateSurface(instance, render_window.GetWindowInfo()).address(); #endif RecreateSwapchain(frame); } diff --git a/src/video_core/renderer_vulkan/vk_present_manager.h b/src/video_core/renderer_vulkan/vk_present_manager.h index aacc9b025a..3d5cc32102 100644 --- a/src/video_core/renderer_vulkan/vk_present_manager.h +++ b/src/video_core/renderer_vulkan/vk_present_manager.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project @@ -15,8 +15,6 @@ #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" -struct VkSurfaceKHR_T; - namespace Core::Frontend { class EmuWindow; } // namespace Core::Frontend @@ -46,11 +44,7 @@ public: MemoryAllocator& memory_allocator, Scheduler& scheduler, Swapchain& swapchain, -#ifdef ANDROID - vk::SurfaceKHR& surface); -#else - VkSurfaceKHR_T* surface_handle); -#endif + VkSurfaceKHR_T* surface); ~PresentManager(); /// Returns the last used presentation frame @@ -84,11 +78,7 @@ private: MemoryAllocator& memory_allocator; Scheduler& scheduler; Swapchain& swapchain; -#ifdef ANDROID - vk::SurfaceKHR& surface; -#else - VkSurfaceKHR_T* surface_handle; -#endif + VkSurfaceKHR_T* surface; vk::CommandPool cmdpool; std::vector frames; boost::container::deque present_queue; diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 415259c72c..7cdb3acadd 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -1280,7 +1280,7 @@ void QueryCacheRuntime::EndHostConditionalRendering() { PauseHostConditionalRendering(); impl->hcr_is_set = false; impl->is_hcr_running = false; - impl->hcr_buffer = nullptr; + impl->hcr_buffer = VkBuffer{}; impl->hcr_offset = 0; } diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h index b8dae9bc2d..e2aa4d991e 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.h +++ b/src/video_core/renderer_vulkan/vk_query_cache.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later @@ -35,7 +38,7 @@ public: ~QueryCacheRuntime(); template - void SyncValues(std::span values, VkBuffer base_src_buffer = nullptr); + void SyncValues(std::span values, VkBuffer base_src_buffer = VkBuffer{}); void Barriers(bool is_prebarrier); diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 0a032cdae0..947de6a80e 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -377,7 +377,7 @@ void Scheduler::EndRenderPass() VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, 0, nullptr, nullptr, vk::Span(barriers.data(), num_images)); }); - state.renderpass = nullptr; + state.renderpass = VkRenderPass{}; num_renderpass_images = 0; } diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 667f136ee6..00a912f2cd 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -44,10 +44,10 @@ public: ~Scheduler(); /// Sends the current execution context to the GPU. - u64 Flush(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr); + u64 Flush(VkSemaphore signal_semaphore = {}, VkSemaphore wait_semaphore = {}); /// Sends the current execution context to the GPU and waits for it to complete. - void Finish(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr); + void Finish(VkSemaphore signal_semaphore = {}, VkSemaphore wait_semaphore = {}); /// Waits for the worker thread to finish executing everything. After this function returns it's /// safe to touch worker resources. @@ -237,8 +237,8 @@ private: }; struct State { - VkRenderPass renderpass = nullptr; - VkFramebuffer framebuffer = nullptr; + VkRenderPass renderpass{}; + VkFramebuffer framebuffer{}; VkExtent2D render_area = {0, 0}; GraphicsPipeline* graphics_pipeline = nullptr; bool is_rescaling = false; diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index b89e981444..cd8f948d8b 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -109,38 +109,22 @@ VkCompositeAlphaFlagBitsKHR ChooseAlphaFlags(const VkSurfaceCapabilitiesKHR& cap } // Anonymous namespace Swapchain::Swapchain( -#ifdef ANDROID - VkSurfaceKHR surface_, -#else - VkSurfaceKHR_T* surface_handle_, -#endif + VkSurfaceKHR_T* surface_, const Device& device_, Scheduler& scheduler_, u32 width_, u32 height_) -#ifdef ANDROID : surface(surface_) -#else - : surface_handle{surface_handle_} -#endif , device{device_} , scheduler{scheduler_} { -#ifdef ANDROID Create(surface, width_, height_); -#else - Create(surface_handle, width_, height_); -#endif } Swapchain::~Swapchain() = default; void Swapchain::Create( -#ifdef ANDROID - VkSurfaceKHR surface_, -#else - VkSurfaceKHR_T* surface_handle_, -#endif + VkSurfaceKHR_T* surface_, u32 width_, u32 height_) { @@ -148,18 +132,10 @@ void Swapchain::Create( is_suboptimal = false; width = width_; height = height_; -#ifdef ANDROID surface = surface_; -#else - surface_handle = surface_handle_; -#endif const auto physical_device = device.GetPhysical(); -#ifdef ANDROID - const auto capabilities{physical_device.GetSurfaceCapabilitiesKHR(surface)}; -#else - const auto capabilities{physical_device.GetSurfaceCapabilitiesKHR(surface_handle)}; -#endif + const auto capabilities{physical_device.GetSurfaceCapabilitiesKHR(VkSurfaceKHR(surface))}; if (capabilities.maxImageExtent.width == 0 || capabilities.maxImageExtent.height == 0) { return; } @@ -254,14 +230,8 @@ void Swapchain::Present(VkSemaphore render_semaphore) { void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities) { const auto physical_device{device.GetPhysical()}; - -#ifdef ANDROID - const auto formats{physical_device.GetSurfaceFormatsKHR(surface)}; - const auto present_modes = physical_device.GetSurfacePresentModesKHR(surface); -#else - const auto formats{physical_device.GetSurfaceFormatsKHR(surface_handle)}; - const auto present_modes = physical_device.GetSurfacePresentModesKHR(surface_handle); -#endif + const auto formats{physical_device.GetSurfaceFormatsKHR(VkSurfaceKHR(surface))}; + const auto present_modes = physical_device.GetSurfacePresentModesKHR(VkSurfaceKHR(surface)); has_mailbox = std::find(present_modes.begin(), present_modes.end(), VK_PRESENT_MODE_MAILBOX_KHR) != present_modes.end(); @@ -290,11 +260,7 @@ void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities) { .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR, .pNext = nullptr, .flags = 0, -#ifdef ANDROID - .surface = surface, -#else - .surface = surface_handle, -#endif + .surface = VkSurfaceKHR(surface), .minImageCount = requested_image_count, .imageFormat = surface_format.format, .imageColorSpace = surface_format.colorSpace, @@ -313,7 +279,7 @@ void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities) { .compositeAlpha = alpha_flags, .presentMode = present_mode, .clipped = VK_FALSE, - .oldSwapchain = nullptr, + .oldSwapchain = VkSwapchainKHR{}, }; const u32 graphics_family{device.GetGraphicsFamily()}; const u32 present_family{device.GetPresentFamily()}; @@ -345,11 +311,7 @@ void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities) { swapchain_ci.flags |= VK_SWAPCHAIN_CREATE_MUTABLE_FORMAT_BIT_KHR; } // Request the size again to reduce the possibility of a TOCTOU race condition. -#ifdef ANDROID - const auto updated_capabilities = physical_device.GetSurfaceCapabilitiesKHR(surface); -#else - const auto updated_capabilities = physical_device.GetSurfaceCapabilitiesKHR(surface_handle); -#endif + const auto updated_capabilities = physical_device.GetSurfaceCapabilitiesKHR(VkSurfaceKHR(surface)); swapchain_ci.imageExtent = ChooseSwapExtent(updated_capabilities, width, height); // Don't add code within this and the swapchain creation. swapchain = device.GetLogical().CreateSwapchainKHR(swapchain_ci); diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h index 7e99bf8fa7..d926cc118a 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.h +++ b/src/video_core/renderer_vulkan/vk_swapchain.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project @@ -11,8 +11,6 @@ #include "common/common_types.h" #include "video_core/vulkan_common/vulkan_wrapper.h" -struct VkSurfaceKHR_T; - namespace Layout { struct FramebufferLayout; } @@ -25,11 +23,7 @@ class Scheduler; class Swapchain { public: explicit Swapchain( -#ifdef ANDROID - VkSurfaceKHR surface, -#else - VkSurfaceKHR_T* surface_handle, -#endif + VkSurfaceKHR_T* surface, const Device& device, Scheduler& scheduler, u32 width, @@ -38,11 +32,7 @@ public: /// Creates (or recreates) the swapchain with a given size. void Create( -#ifdef ANDROID - VkSurfaceKHR surface, -#else - VkSurfaceKHR_T* surface_handle, -#endif + VkSurfaceKHR_T* surface, u32 width, u32 height); @@ -128,11 +118,7 @@ private: bool NeedsPresentModeUpdate() const; -#ifdef ANDROID - VkSurfaceKHR surface; -#else - VkSurfaceKHR_T* surface_handle; -#endif + VkSurfaceKHR_T* surface; const Device& device; Scheduler& scheduler; diff --git a/src/video_core/vulkan_common/vulkan.h b/src/video_core/vulkan_common/vulkan.h index 8d2e8e2a37..2cc0f0d7f0 100644 --- a/src/video_core/vulkan_common/vulkan.h +++ b/src/video_core/vulkan_common/vulkan.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project @@ -40,3 +40,6 @@ #undef False #undef None #undef True + +// "Catch-all" handle for both Android and.. the rest of platforms +struct VkSurfaceKHR_T; diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index a2ff3ee6ed..b51c57d380 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -419,7 +419,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR : instance{instance_}, dld{dld_}, physical{physical_}, format_properties(GetFormatProperties(physical)) { // Get suitability and device properties. - const bool is_suitable = GetSuitability(surface != nullptr); + const bool is_suitable = GetSuitability(surface != VkSurfaceKHR{}); const VkDriverId driver_id = properties.driver.driverID; diff --git a/src/video_core/vulkan_common/vulkan_surface.cpp b/src/video_core/vulkan_common/vulkan_surface.cpp index dc65d3960a..761b7759c8 100644 --- a/src/video_core/vulkan_common/vulkan_surface.cpp +++ b/src/video_core/vulkan_common/vulkan_surface.cpp @@ -15,7 +15,7 @@ vk::SurfaceKHR CreateSurface( const vk::Instance& instance, [[maybe_unused]] const Core::Frontend::EmuWindow::WindowSystemInfo& window_info) { [[maybe_unused]] const vk::InstanceDispatch& dld = instance.Dispatch(); - VkSurfaceKHR unsafe_surface = nullptr; + VkSurfaceKHR unsafe_surface = VkSurfaceKHR{}; #ifdef _WIN32 if (window_info.type == Core::Frontend::WindowSystemType::Windows) { diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index 5c04132f7b..872fbd858e 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h @@ -404,13 +404,13 @@ public: /// Construct a handle transferring the ownership from another handle. Handle(Handle&& rhs) noexcept - : handle{std::exchange(rhs.handle, nullptr)}, owner{rhs.owner}, dld{rhs.dld} {} + : handle{std::exchange(rhs.handle, Type{})}, owner{rhs.owner}, dld{rhs.dld} {} /// Assign the current handle transferring the ownership from another handle. /// Destroys any previously held object. Handle& operator=(Handle&& rhs) noexcept { Release(); - handle = std::exchange(rhs.handle, nullptr); + handle = std::exchange(rhs.handle, Type{}); owner = rhs.owner; dld = rhs.dld; return *this; @@ -424,7 +424,7 @@ public: /// Destroys any held object. void reset() noexcept { Release(); - handle = nullptr; + handle = Type{}; } /// Returns the address of the held object. @@ -440,7 +440,7 @@ public: /// Returns true when there's a held object. explicit operator bool() const noexcept { - return handle != nullptr; + return handle != Type{}; } #ifndef ANDROID @@ -455,7 +455,7 @@ public: #endif protected: - Type handle = nullptr; + Type handle{}; OwnerType owner = nullptr; const Dispatch* dld = nullptr; @@ -463,7 +463,7 @@ private: /// Destroys the held object if it exists. void Release() noexcept { if (handle) { - Destroy(owner, handle, *dld); + Destroy(OwnerType(owner), Type(handle), *dld); } } }; @@ -506,7 +506,7 @@ public: /// Destroys any held object. void reset() noexcept { Release(); - handle = nullptr; + handle = {}; } /// Returns the address of the held object. @@ -522,7 +522,7 @@ public: /// Returns true when there's a held object. explicit operator bool() const noexcept { - return handle != nullptr; + return handle != Type{}; } #ifndef ANDROID @@ -537,7 +537,7 @@ public: #endif protected: - Type handle = nullptr; + Type handle{}; const Dispatch* dld = nullptr; private: @@ -607,7 +607,7 @@ private: std::unique_ptr allocations; std::size_t num = 0; VkDevice device = nullptr; - PoolType pool = nullptr; + PoolType pool{}; const DeviceDispatch* dld = nullptr; }; @@ -669,12 +669,12 @@ public: Image& operator=(const Image&) = delete; Image(Image&& rhs) noexcept - : handle{std::exchange(rhs.handle, nullptr)}, usage{rhs.usage}, owner{rhs.owner}, + : handle{std::exchange(rhs.handle, VkImage{})}, usage{rhs.usage}, owner{rhs.owner}, allocator{rhs.allocator}, allocation{rhs.allocation}, dld{rhs.dld} {} Image& operator=(Image&& rhs) noexcept { Release(); - handle = std::exchange(rhs.handle, nullptr); + handle = std::exchange(rhs.handle, VkImage{}); usage = rhs.usage; owner = rhs.owner; allocator = rhs.allocator; @@ -693,11 +693,11 @@ public: void reset() noexcept { Release(); - handle = nullptr; + handle = VkImage{}; } explicit operator bool() const noexcept { - return handle != nullptr; + return handle != VkImage{}; } void SetObjectNameEXT(const char* name) const; @@ -709,7 +709,7 @@ public: private: void Release() const noexcept; - VkImage handle = nullptr; + VkImage handle{}; VkImageUsageFlags usage{}; VkDevice owner = nullptr; VmaAllocator allocator = nullptr; @@ -730,13 +730,13 @@ public: Buffer& operator=(const Buffer&) = delete; Buffer(Buffer&& rhs) noexcept - : handle{std::exchange(rhs.handle, nullptr)}, owner{rhs.owner}, allocator{rhs.allocator}, + : handle{std::exchange(rhs.handle, VkBuffer{})}, owner{rhs.owner}, allocator{rhs.allocator}, allocation{rhs.allocation}, mapped{rhs.mapped}, is_coherent{rhs.is_coherent}, dld{rhs.dld} {} Buffer& operator=(Buffer&& rhs) noexcept { Release(); - handle = std::exchange(rhs.handle, nullptr); + handle = std::exchange(rhs.handle, VkBuffer{}); owner = rhs.owner; allocator = rhs.allocator; allocation = rhs.allocation; @@ -756,11 +756,11 @@ public: void reset() noexcept { Release(); - handle = nullptr; + handle = VkBuffer{}; } explicit operator bool() const noexcept { - return handle != nullptr; + return handle != VkBuffer{}; } /// Returns the host mapped memory, an empty span otherwise. @@ -786,7 +786,7 @@ public: private: void Release() const noexcept; - VkBuffer handle = nullptr; + VkBuffer handle{}; VkDevice owner = nullptr; VmaAllocator allocator = nullptr; VmaAllocation allocation = nullptr; @@ -1020,10 +1020,10 @@ public: [[nodiscard]] PipelineLayout CreatePipelineLayout(const VkPipelineLayoutCreateInfo& ci) const; [[nodiscard]] Pipeline CreateGraphicsPipeline(const VkGraphicsPipelineCreateInfo& ci, - VkPipelineCache cache = nullptr) const; + VkPipelineCache cache = {}) const; [[nodiscard]] Pipeline CreateComputePipeline(const VkComputePipelineCreateInfo& ci, - VkPipelineCache cache = nullptr) const; + VkPipelineCache cache = {}) const; [[nodiscard]] Sampler CreateSampler(const VkSamplerCreateInfo& ci) const; From 529b0694995c84c51515022c52a66c67ceacdc65 Mon Sep 17 00:00:00 2001 From: xbzk Date: Thu, 5 Mar 2026 13:58:46 +0100 Subject: [PATCH 02/68] [android,ui] fixed top disalignment between buttons of each column in settings fragment (#3675) this silly little thing tickles obsessive compulsive disturbed fellas a lot hu3 was shipped along PR 3660, which was rediscussed for other reason, hence this tiny lonely PR. Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3675 Reviewed-by: DraVee Reviewed-by: MaranBr Co-authored-by: xbzk Co-committed-by: xbzk --- .../features/fetcher/SpacingItemDecoration.kt | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/fetcher/SpacingItemDecoration.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/fetcher/SpacingItemDecoration.kt index f3d000a739..b3ffcc2a35 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/fetcher/SpacingItemDecoration.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/fetcher/SpacingItemDecoration.kt @@ -1,10 +1,11 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later package org.yuzu.yuzu_emu.features.fetcher import android.graphics.Rect import android.view.View +import androidx.recyclerview.widget.GridLayoutManager import androidx.recyclerview.widget.RecyclerView class SpacingItemDecoration(private val spacing: Int) : RecyclerView.ItemDecoration() { @@ -15,8 +16,20 @@ class SpacingItemDecoration(private val spacing: Int) : RecyclerView.ItemDecorat state: RecyclerView.State ) { outRect.bottom = spacing - if (parent.getChildAdapterPosition(view) == 0) { + + val position = parent.getChildAdapterPosition(view) + if (position == RecyclerView.NO_POSITION) return + + if (position == 0) { outRect.top = spacing + return + } + + // If the item is in the first row, but NOT in first column add top spacing as well + val layoutManager = parent.layoutManager + if (layoutManager is GridLayoutManager && layoutManager.spanSizeLookup.getSpanGroupIndex(position, layoutManager.spanCount) == 0) { + outRect.top = spacing + return } } } From 23566a1f7dc639946e6d3935f4951d4d2bce8461 Mon Sep 17 00:00:00 2001 From: MaranBr Date: Fri, 6 Mar 2026 15:02:59 +0100 Subject: [PATCH 03/68] [prepo] Add support for missing PlayReport commands (#3674) This fixes: `[ 433.095195] Debug core\hle\service\service.cpp:operator ():69: Assertion Failed! Unknown / unimplemented function '10107': port='prepo:u' cmd_buf={[0]=0x110006, [1]=0x80000014, [2]=0x1, [3]=0x0, [4]=0x0, [5]=0x191080, [6]=0x5A7350F8, [7]=0x112, [8]=0x5A735158}` Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3674 Reviewed-by: CamilleLaVey Reviewed-by: DraVee Reviewed-by: Maufeat Co-authored-by: MaranBr Co-committed-by: MaranBr --- src/core/hle/service/prepo/prepo.cpp | 8 +++++--- src/core/reporter.h | 4 ++++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/core/hle/service/prepo/prepo.cpp b/src/core/hle/service/prepo/prepo.cpp index 4fc59d0e10..bfc5539903 100644 --- a/src/core/hle/service/prepo/prepo.cpp +++ b/src/core/hle/service/prepo/prepo.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project @@ -28,8 +28,10 @@ public: {10101, &PlayReport::SaveReportWithUser, "SaveReportWithUserOld"}, {10102, &PlayReport::SaveReport, "SaveReportOld2"}, {10103, &PlayReport::SaveReportWithUser, "SaveReportWithUserOld2"}, - {10104, &PlayReport::SaveReport, "SaveReport"}, - {10105, &PlayReport::SaveReportWithUser, "SaveReportWithUser"}, + {10104, &PlayReport::SaveReport, "SaveReportOld3"}, + {10105, &PlayReport::SaveReportWithUser, "SaveReportWithUserOld3"}, + {10106, &PlayReport::SaveReport, "SaveReport"}, + {10107, &PlayReport::SaveReportWithUser, "SaveReportWithUser"}, {10200, &PlayReport::RequestImmediateTransmission, "RequestImmediateTransmission"}, {10300, &PlayReport::GetTransmissionStatus, "GetTransmissionStatus"}, {10400, &PlayReport::GetSystemSessionId, "GetSystemSessionId"}, diff --git a/src/core/reporter.h b/src/core/reporter.h index db1ca3ba0c..1eee8da31f 100644 --- a/src/core/reporter.h +++ b/src/core/reporter.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -53,6 +56,7 @@ public: enum class PlayReportType { Old, Old2, + Old3, New, System, }; From c70b857c4f8324621c3d3f7dfe659856c8000878 Mon Sep 17 00:00:00 2001 From: lizzie Date: Fri, 6 Mar 2026 15:04:38 +0100 Subject: [PATCH 04/68] [video_core/engines] Macro HLE inline (#3653) Should slightly boost perf on android, Desktop is mainly unaffected (for now) Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3653 Reviewed-by: CamilleLaVey Reviewed-by: DraVee Co-authored-by: lizzie Co-committed-by: lizzie --- src/video_core/engines/maxwell_3d.cpp | 21 +- src/video_core/engines/maxwell_3d.h | 2 +- src/video_core/macro.cpp | 1103 ++++++++++--------------- src/video_core/macro.h | 166 +++- 4 files changed, 586 insertions(+), 706 deletions(-) diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 7dbb8f6617..e48f294a5a 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project @@ -26,8 +26,15 @@ namespace Tegra::Engines { constexpr u32 MacroRegistersStart = 0xE00; Maxwell3D::Maxwell3D(Core::System& system_, MemoryManager& memory_manager_) - : draw_manager{std::make_unique(this)}, system{system_}, - memory_manager{memory_manager_}, macro_engine{GetMacroEngine(*this)}, upload_state{memory_manager, regs.upload} { + : draw_manager{std::make_unique(this)}, system{system_} + , memory_manager{memory_manager_} +#ifdef ARCHITECTURE_x86_64 + , macro_engine(bool(Settings::values.disable_macro_jit)) +#else + , macro_engine(true) +#endif + , upload_state{memory_manager, regs.upload} +{ dirty.flags.flip(); InitializeRegisterDefaults(); execution_mask.reset(); @@ -328,9 +335,9 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume shadow_state.shadow_ram_control = static_cast(nonshadow_argument); return; case MAXWELL3D_REG_INDEX(load_mme.instruction_ptr): - return macro_engine->ClearCode(regs.load_mme.instruction_ptr); + return macro_engine.ClearCode(regs.load_mme.instruction_ptr); case MAXWELL3D_REG_INDEX(load_mme.instruction): - return macro_engine->AddCode(regs.load_mme.instruction_ptr, argument); + return macro_engine.AddCode(regs.load_mme.instruction_ptr, argument); case MAXWELL3D_REG_INDEX(load_mme.start_address): return ProcessMacroBind(argument); case MAXWELL3D_REG_INDEX(falcon[4]): @@ -398,7 +405,7 @@ void Maxwell3D::CallMacroMethod(u32 method, const std::vector& parameters) ((method - MacroRegistersStart) >> 1) % static_cast(macro_positions.size()); // Execute the current macro. - macro_engine->Execute(macro_positions[entry], parameters); + macro_engine.Execute(*this, macro_positions[entry], parameters); draw_manager->DrawDeferred(); } @@ -464,7 +471,7 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, } void Maxwell3D::ProcessMacroUpload(u32 data) { - macro_engine->AddCode(regs.load_mme.instruction_ptr++, data); + macro_engine.AddCode(regs.load_mme.instruction_ptr++, data); } void Maxwell3D::ProcessMacroBind(u32 data) { diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 5312c04b6f..52546e4279 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -3203,7 +3203,7 @@ private: std::vector macro_params; /// Interpreter for the macro codes uploaded to the GPU. - std::optional macro_engine; + MacroEngine macro_engine; Upload::State upload_state; diff --git a/src/video_core/macro.cpp b/src/video_core/macro.cpp index 3fe69be4dd..0d1fe0a52b 100644 --- a/src/video_core/macro.cpp +++ b/src/video_core/macro.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project @@ -10,6 +10,7 @@ #include #include +#include #ifdef ARCHITECTURE_x86_64 // xbyak hates human beings #ifdef __GNUC__ @@ -73,601 +74,411 @@ bool IsTopologySafe(Maxwell3D::Regs::PrimitiveTopology topology) { } } -class HLEMacroImpl : public CachedMacro { -public: - explicit HLEMacroImpl(Maxwell3D& maxwell3d_) - : CachedMacro(maxwell3d_) - {} -}; +} // Anonymous namespace -/// @note: these macros have two versions, a normal and extended version, with the extended version -/// also assigning the base vertex/instance. -template -class HLE_DrawArraysIndirect final : public HLEMacroImpl { -public: - explicit HLE_DrawArraysIndirect(Maxwell3D& maxwell3d_) - : HLEMacroImpl(maxwell3d_) - {} - - void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { - auto topology = static_cast(parameters[0]); - if (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology)) { - Fallback(parameters); - return; - } - - auto& params = maxwell3d.draw_manager->GetIndirectParams(); - params.is_byte_count = false; - params.is_indexed = false; - params.include_count = false; - params.count_start_address = 0; - params.indirect_start_address = maxwell3d.GetMacroAddress(1); - params.buffer_size = 4 * sizeof(u32); - params.max_draw_counts = 1; - params.stride = 0; - - if (extended) { - maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro; - maxwell3d.SetHLEReplacementAttributeType(0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseInstance); - } - - maxwell3d.draw_manager->DrawArrayIndirect(topology); - - if (extended) { - maxwell3d.engine_state = Maxwell3D::EngineHint::None; - maxwell3d.replace_table.clear(); - } +void HLE_DrawArraysIndirect::Execute(Engines::Maxwell3D& maxwell3d, std::span parameters, [[maybe_unused]] u32 method) { + auto topology = static_cast(parameters[0]); + if (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology)) { + Fallback(maxwell3d, parameters); + return; } -private: - void Fallback(const std::vector& parameters) { - SCOPE_EXIT { - if (extended) { - maxwell3d.engine_state = Maxwell3D::EngineHint::None; - maxwell3d.replace_table.clear(); - } - }; - maxwell3d.RefreshParameters(); - const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); + auto& params = maxwell3d.draw_manager->GetIndirectParams(); + params.is_byte_count = false; + params.is_indexed = false; + params.include_count = false; + params.count_start_address = 0; + params.indirect_start_address = maxwell3d.GetMacroAddress(1); + params.buffer_size = 4 * sizeof(u32); + params.max_draw_counts = 1; + params.stride = 0; - auto topology = static_cast(parameters[0]); - const u32 vertex_first = parameters[3]; - const u32 vertex_count = parameters[1]; - - if (!IsTopologySafe(topology) && size_t(maxwell3d.GetMaxCurrentVertices()) < size_t(vertex_first) + size_t(vertex_count)) { - ASSERT(false && "Faulty draw!"); - return; - } - - const u32 base_instance = parameters[4]; - if (extended) { - maxwell3d.regs.global_base_instance_index = base_instance; - maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro; - maxwell3d.SetHLEReplacementAttributeType( - 0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseInstance); - } - - maxwell3d.draw_manager->DrawArray(topology, vertex_first, vertex_count, base_instance, - instance_count); - - if (extended) { - maxwell3d.regs.global_base_instance_index = 0; - maxwell3d.engine_state = Maxwell3D::EngineHint::None; - maxwell3d.replace_table.clear(); - } - } -}; - -/* - * @note: these macros have two versions, a normal and extended version, with the extended version - * also assigning the base vertex/instance. - */ -template -class HLE_DrawIndexedIndirect final : public HLEMacroImpl { -public: - explicit HLE_DrawIndexedIndirect(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} - - void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { - auto topology = static_cast(parameters[0]); - if (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology)) { - Fallback(parameters); - return; - } - - const u32 estimate = static_cast(maxwell3d.EstimateIndexBufferSize()); - const u32 element_base = parameters[4]; - const u32 base_instance = parameters[5]; - maxwell3d.regs.vertex_id_base = element_base; - maxwell3d.regs.global_base_vertex_index = element_base; - maxwell3d.regs.global_base_instance_index = base_instance; - maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; - if (extended) { - maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro; - maxwell3d.SetHLEReplacementAttributeType(0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex); - maxwell3d.SetHLEReplacementAttributeType(0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance); - } - auto& params = maxwell3d.draw_manager->GetIndirectParams(); - params.is_byte_count = false; - params.is_indexed = true; - params.include_count = false; - params.count_start_address = 0; - params.indirect_start_address = maxwell3d.GetMacroAddress(1); - params.buffer_size = 5 * sizeof(u32); - params.max_draw_counts = 1; - params.stride = 0; - maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; - maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, estimate); - maxwell3d.regs.vertex_id_base = 0x0; - maxwell3d.regs.global_base_vertex_index = 0x0; - maxwell3d.regs.global_base_instance_index = 0x0; - if (extended) { - maxwell3d.engine_state = Maxwell3D::EngineHint::None; - maxwell3d.replace_table.clear(); - } - } - -private: - void Fallback(const std::vector& parameters) { - maxwell3d.RefreshParameters(); - const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); - const u32 element_base = parameters[4]; - const u32 base_instance = parameters[5]; - maxwell3d.regs.vertex_id_base = element_base; - maxwell3d.regs.global_base_vertex_index = element_base; - maxwell3d.regs.global_base_instance_index = base_instance; - maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; - if (extended) { - maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro; - maxwell3d.SetHLEReplacementAttributeType(0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex); - maxwell3d.SetHLEReplacementAttributeType(0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance); - } - - maxwell3d.draw_manager->DrawIndex(Tegra::Maxwell3D::Regs::PrimitiveTopology(parameters[0]), parameters[3], parameters[1], element_base, base_instance, instance_count); - - maxwell3d.regs.vertex_id_base = 0x0; - maxwell3d.regs.global_base_vertex_index = 0x0; - maxwell3d.regs.global_base_instance_index = 0x0; - if (extended) { - maxwell3d.engine_state = Maxwell3D::EngineHint::None; - maxwell3d.replace_table.clear(); - } - } -}; - -class HLE_MultiLayerClear final : public HLEMacroImpl { -public: - explicit HLE_MultiLayerClear(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} - - void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { - maxwell3d.RefreshParameters(); - ASSERT(parameters.size() == 1); - - const Maxwell3D::Regs::ClearSurface clear_params{parameters[0]}; - const u32 rt_index = clear_params.RT; - const u32 num_layers = maxwell3d.regs.rt[rt_index].depth; - ASSERT(clear_params.layer == 0); - - maxwell3d.regs.clear_surface.raw = clear_params.raw; - maxwell3d.draw_manager->Clear(num_layers); - } -}; - -class HLE_MultiDrawIndexedIndirectCount final : public HLEMacroImpl { -public: - explicit HLE_MultiDrawIndexedIndirectCount(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} - - void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { - const auto topology = Maxwell3D::Regs::PrimitiveTopology(parameters[2]); - if (!IsTopologySafe(topology)) { - Fallback(parameters); - return; - } - - const u32 start_indirect = parameters[0]; - const u32 end_indirect = parameters[1]; - if (start_indirect >= end_indirect) { - // Nothing to do. - return; - } - - const u32 padding = parameters[3]; // padding is in words - - // size of each indirect segment - const u32 indirect_words = 5 + padding; - const u32 stride = indirect_words * sizeof(u32); - const std::size_t draw_count = end_indirect - start_indirect; - const u32 estimate = static_cast(maxwell3d.EstimateIndexBufferSize()); - maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; - auto& params = maxwell3d.draw_manager->GetIndirectParams(); - params.is_byte_count = false; - params.is_indexed = true; - params.include_count = true; - params.count_start_address = maxwell3d.GetMacroAddress(4); - params.indirect_start_address = maxwell3d.GetMacroAddress(5); - params.buffer_size = stride * draw_count; - params.max_draw_counts = draw_count; - params.stride = stride; - maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; + if (extended) { maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro; - maxwell3d.SetHLEReplacementAttributeType( - 0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex); - maxwell3d.SetHLEReplacementAttributeType( - 0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance); - maxwell3d.SetHLEReplacementAttributeType(0, 0x648, - Maxwell3D::HLEReplacementAttributeType::DrawID); - maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, estimate); + maxwell3d.SetHLEReplacementAttributeType(0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseInstance); + } + + maxwell3d.draw_manager->DrawArrayIndirect(topology); + + if (extended) { maxwell3d.engine_state = Maxwell3D::EngineHint::None; maxwell3d.replace_table.clear(); } - -private: - void Fallback(const std::vector& parameters) { - SCOPE_EXIT { - // Clean everything. - maxwell3d.regs.vertex_id_base = 0x0; +} +void HLE_DrawArraysIndirect::Fallback(Engines::Maxwell3D& maxwell3d, std::span parameters) { + SCOPE_EXIT { + if (extended) { maxwell3d.engine_state = Maxwell3D::EngineHint::None; maxwell3d.replace_table.clear(); - }; - maxwell3d.RefreshParameters(); - const u32 start_indirect = parameters[0]; - const u32 end_indirect = parameters[1]; - if (start_indirect >= end_indirect) { - // Nothing to do. - return; - } - const auto topology = static_cast(parameters[2]); - const u32 padding = parameters[3]; - const std::size_t max_draws = parameters[4]; - - const u32 indirect_words = 5 + padding; - const std::size_t first_draw = start_indirect; - const std::size_t effective_draws = end_indirect - start_indirect; - const std::size_t last_draw = start_indirect + (std::min)(effective_draws, max_draws); - - for (std::size_t index = first_draw; index < last_draw; index++) { - const std::size_t base = index * indirect_words + 5; - const u32 base_vertex = parameters[base + 3]; - const u32 base_instance = parameters[base + 4]; - maxwell3d.regs.vertex_id_base = base_vertex; - maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro; - maxwell3d.SetHLEReplacementAttributeType( - 0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex); - maxwell3d.SetHLEReplacementAttributeType( - 0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance); - maxwell3d.CallMethod(0x8e3, 0x648, true); - maxwell3d.CallMethod(0x8e4, static_cast(index), true); - maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; - maxwell3d.draw_manager->DrawIndex(topology, parameters[base + 2], parameters[base], - base_vertex, base_instance, parameters[base + 1]); } + }; + maxwell3d.RefreshParameters(); + const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); + auto topology = Maxwell3D::Regs::PrimitiveTopology(parameters[0]); + const u32 vertex_first = parameters[3]; + const u32 vertex_count = parameters[1]; + if (!IsTopologySafe(topology) && size_t(maxwell3d.GetMaxCurrentVertices()) < size_t(vertex_first) + size_t(vertex_count)) { + ASSERT(false && "Faulty draw!"); + return; } -}; - -class HLE_DrawIndirectByteCount final : public HLEMacroImpl { -public: - explicit HLE_DrawIndirectByteCount(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} - - void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { - const bool force = maxwell3d.Rasterizer().HasDrawTransformFeedback(); - - auto topology = static_cast(parameters[0] & 0xFFFFU); - if (!force && (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology))) { - Fallback(parameters); - return; - } - auto& params = maxwell3d.draw_manager->GetIndirectParams(); - params.is_byte_count = true; - params.is_indexed = false; - params.include_count = false; - params.count_start_address = 0; - params.indirect_start_address = maxwell3d.GetMacroAddress(2); - params.buffer_size = 4; - params.max_draw_counts = 1; - params.stride = parameters[1]; - maxwell3d.regs.draw.begin = parameters[0]; - maxwell3d.regs.draw_auto_stride = parameters[1]; - maxwell3d.regs.draw_auto_byte_count = parameters[2]; - - maxwell3d.draw_manager->DrawArrayIndirect(topology); + const u32 base_instance = parameters[4]; + if (extended) { + maxwell3d.regs.global_base_instance_index = base_instance; + maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro; + maxwell3d.SetHLEReplacementAttributeType(0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseInstance); } - -private: - void Fallback(const std::vector& parameters) { - maxwell3d.RefreshParameters(); - - maxwell3d.regs.draw.begin = parameters[0]; - maxwell3d.regs.draw_auto_stride = parameters[1]; - maxwell3d.regs.draw_auto_byte_count = parameters[2]; - - maxwell3d.draw_manager->DrawArray( - maxwell3d.regs.draw.topology, 0, - maxwell3d.regs.draw_auto_byte_count / maxwell3d.regs.draw_auto_stride, 0, 1); - } -}; - -class HLE_C713C83D8F63CCF3 final : public HLEMacroImpl { -public: - explicit HLE_C713C83D8F63CCF3(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} - - void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { - maxwell3d.RefreshParameters(); - const u32 offset = (parameters[0] & 0x3FFFFFFF) << 2; - const u32 address = maxwell3d.regs.shadow_scratch[24]; - auto& const_buffer = maxwell3d.regs.const_buffer; - const_buffer.size = 0x7000; - const_buffer.address_high = (address >> 24) & 0xFF; - const_buffer.address_low = address << 8; - const_buffer.offset = offset; - } -}; - -class HLE_D7333D26E0A93EDE final : public HLEMacroImpl { -public: - explicit HLE_D7333D26E0A93EDE(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} - - void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { - maxwell3d.RefreshParameters(); - const size_t index = parameters[0]; - const u32 address = maxwell3d.regs.shadow_scratch[42 + index]; - const u32 size = maxwell3d.regs.shadow_scratch[47 + index]; - auto& const_buffer = maxwell3d.regs.const_buffer; - const_buffer.size = size; - const_buffer.address_high = (address >> 24) & 0xFF; - const_buffer.address_low = address << 8; - } -}; - -class HLE_BindShader final : public HLEMacroImpl { -public: - explicit HLE_BindShader(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} - - void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { - maxwell3d.RefreshParameters(); - auto& regs = maxwell3d.regs; - const u32 index = parameters[0]; - if ((parameters[1] - regs.shadow_scratch[28 + index]) == 0) { - return; - } - - regs.pipelines[index & 0xF].offset = parameters[2]; - maxwell3d.dirty.flags[VideoCommon::Dirty::Shaders] = true; - regs.shadow_scratch[28 + index] = parameters[1]; - regs.shadow_scratch[34 + index] = parameters[2]; - - const u32 address = parameters[4]; - auto& const_buffer = regs.const_buffer; - const_buffer.size = 0x10000; - const_buffer.address_high = (address >> 24) & 0xFF; - const_buffer.address_low = address << 8; - - const size_t bind_group_id = parameters[3] & 0x7F; - auto& bind_group = regs.bind_groups[bind_group_id]; - bind_group.raw_config = 0x11; - maxwell3d.ProcessCBBind(bind_group_id); - } -}; - -class HLE_SetRasterBoundingBox final : public HLEMacroImpl { -public: - explicit HLE_SetRasterBoundingBox(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} - - void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { - maxwell3d.RefreshParameters(); - const u32 raster_mode = parameters[0]; - auto& regs = maxwell3d.regs; - const u32 raster_enabled = maxwell3d.regs.conservative_raster_enable; - const u32 scratch_data = maxwell3d.regs.shadow_scratch[52]; - regs.raster_bounding_box.raw = raster_mode & 0xFFFFF00F; - regs.raster_bounding_box.pad.Assign(scratch_data & raster_enabled); - } -}; - -template -class HLE_ClearConstBuffer final : public HLEMacroImpl { -public: - explicit HLE_ClearConstBuffer(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} - - void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { - maxwell3d.RefreshParameters(); - static constexpr std::array zeroes{}; - auto& regs = maxwell3d.regs; - regs.const_buffer.size = u32(base_size); - regs.const_buffer.address_high = parameters[0]; - regs.const_buffer.address_low = parameters[1]; - regs.const_buffer.offset = 0; - maxwell3d.ProcessCBMultiData(zeroes.data(), parameters[2] * 4); - } -}; - -class HLE_ClearMemory final : public HLEMacroImpl { -public: - explicit HLE_ClearMemory(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} - - void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { - maxwell3d.RefreshParameters(); - - const u32 needed_memory = parameters[2] / sizeof(u32); - if (needed_memory > zero_memory.size()) { - zero_memory.resize(needed_memory, 0); - } - auto& regs = maxwell3d.regs; - regs.upload.line_length_in = parameters[2]; - regs.upload.line_count = 1; - regs.upload.dest.address_high = parameters[0]; - regs.upload.dest.address_low = parameters[1]; - maxwell3d.CallMethod(size_t(MAXWELL3D_REG_INDEX(launch_dma)), 0x1011, true); - maxwell3d.CallMultiMethod(size_t(MAXWELL3D_REG_INDEX(inline_data)), zero_memory.data(), needed_memory, needed_memory); - } - -private: - std::vector zero_memory; -}; - -class HLE_TransformFeedbackSetup final : public HLEMacroImpl { -public: - explicit HLE_TransformFeedbackSetup(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} - - void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { - maxwell3d.RefreshParameters(); - - auto& regs = maxwell3d.regs; - regs.transform_feedback_enabled = 1; - regs.transform_feedback.buffers[0].start_offset = 0; - regs.transform_feedback.buffers[1].start_offset = 0; - regs.transform_feedback.buffers[2].start_offset = 0; - regs.transform_feedback.buffers[3].start_offset = 0; - - regs.upload.line_length_in = 4; - regs.upload.line_count = 1; - regs.upload.dest.address_high = parameters[0]; - regs.upload.dest.address_low = parameters[1]; - maxwell3d.CallMethod(size_t(MAXWELL3D_REG_INDEX(launch_dma)), 0x1011, true); - maxwell3d.CallMethod(size_t(MAXWELL3D_REG_INDEX(inline_data)), regs.transform_feedback.controls[0].stride, true); - - maxwell3d.Rasterizer().RegisterTransformFeedback(regs.upload.dest.Address()); - } -}; - -} // Anonymous namespace - -HLEMacro::HLEMacro(Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {} - -HLEMacro::~HLEMacro() = default; - -std::unique_ptr HLEMacro::GetHLEProgram(u64 hash) const { - // Compiler will make you a GREAT job at making an ad-hoc hash table :) - switch (hash) { - case 0x0D61FC9FAAC9FCADULL: return std::make_unique>(maxwell3d); - case 0x8A4D173EB99A8603ULL: return std::make_unique>(maxwell3d); - case 0x771BB18C62444DA0ULL: return std::make_unique>(maxwell3d); - case 0x0217920100488FF7ULL: return std::make_unique>(maxwell3d); - case 0x3F5E74B9C9A50164ULL: return std::make_unique(maxwell3d); - case 0xEAD26C3E2109B06BULL: return std::make_unique(maxwell3d); - case 0xC713C83D8F63CCF3ULL: return std::make_unique(maxwell3d); - case 0xD7333D26E0A93EDEULL: return std::make_unique(maxwell3d); - case 0xEB29B2A09AA06D38ULL: return std::make_unique(maxwell3d); - case 0xDB1341DBEB4C8AF7ULL: return std::make_unique(maxwell3d); - case 0x6C97861D891EDf7EULL: return std::make_unique>(maxwell3d); - case 0xD246FDDF3A6173D7ULL: return std::make_unique>(maxwell3d); - case 0xEE4D0004BEC8ECF4ULL: return std::make_unique(maxwell3d); - case 0xFC0CF27F5FFAA661ULL: return std::make_unique(maxwell3d); - case 0xB5F74EDB717278ECULL: return std::make_unique(maxwell3d); - default: - return nullptr; + maxwell3d.draw_manager->DrawArray(topology, vertex_first, vertex_count, base_instance, instance_count); + if (extended) { + maxwell3d.regs.global_base_instance_index = 0; + maxwell3d.engine_state = Maxwell3D::EngineHint::None; + maxwell3d.replace_table.clear(); } } -namespace { -class MacroInterpreterImpl final : public CachedMacro { -public: - explicit MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d_, const std::vector& code_) - : CachedMacro(maxwell3d_) - , code{code_} - {} +void HLE_DrawIndexedIndirect::Execute(Engines::Maxwell3D& maxwell3d, std::span parameters, [[maybe_unused]] u32 method) { + auto topology = static_cast(parameters[0]); + if (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology)) { + Fallback(maxwell3d, parameters); + return; + } - void Execute(const std::vector& params, u32 method) override; + const u32 estimate = u32(maxwell3d.EstimateIndexBufferSize()); + const u32 element_base = parameters[4]; + const u32 base_instance = parameters[5]; + maxwell3d.regs.vertex_id_base = element_base; + maxwell3d.regs.global_base_vertex_index = element_base; + maxwell3d.regs.global_base_instance_index = base_instance; + maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; + if (extended) { + maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro; + maxwell3d.SetHLEReplacementAttributeType(0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex); + maxwell3d.SetHLEReplacementAttributeType(0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance); + } + auto& params = maxwell3d.draw_manager->GetIndirectParams(); + params.is_byte_count = false; + params.is_indexed = true; + params.include_count = false; + params.count_start_address = 0; + params.indirect_start_address = maxwell3d.GetMacroAddress(1); + params.buffer_size = 5 * sizeof(u32); + params.max_draw_counts = 1; + params.stride = 0; + maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; + maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, estimate); + maxwell3d.regs.vertex_id_base = 0x0; + maxwell3d.regs.global_base_vertex_index = 0x0; + maxwell3d.regs.global_base_instance_index = 0x0; + if (extended) { + maxwell3d.engine_state = Maxwell3D::EngineHint::None; + maxwell3d.replace_table.clear(); + } +} +void HLE_DrawIndexedIndirect::Fallback(Engines::Maxwell3D& maxwell3d, std::span parameters) { + maxwell3d.RefreshParameters(); + const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); + const u32 element_base = parameters[4]; + const u32 base_instance = parameters[5]; + maxwell3d.regs.vertex_id_base = element_base; + maxwell3d.regs.global_base_vertex_index = element_base; + maxwell3d.regs.global_base_instance_index = base_instance; + maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; + if (extended) { + maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro; + maxwell3d.SetHLEReplacementAttributeType(0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex); + maxwell3d.SetHLEReplacementAttributeType(0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance); + } + maxwell3d.draw_manager->DrawIndex(Tegra::Maxwell3D::Regs::PrimitiveTopology(parameters[0]), parameters[3], parameters[1], element_base, base_instance, instance_count); + maxwell3d.regs.vertex_id_base = 0x0; + maxwell3d.regs.global_base_vertex_index = 0x0; + maxwell3d.regs.global_base_instance_index = 0x0; + if (extended) { + maxwell3d.engine_state = Maxwell3D::EngineHint::None; + maxwell3d.replace_table.clear(); + } +} +void HLE_MultiLayerClear::Execute(Engines::Maxwell3D& maxwell3d, std::span parameters, [[maybe_unused]] u32 method) { + maxwell3d.RefreshParameters(); + ASSERT(parameters.size() == 1); -private: - /// Resets the execution engine state, zeroing registers, etc. - void Reset(); + const Maxwell3D::Regs::ClearSurface clear_params{parameters[0]}; + const u32 rt_index = clear_params.RT; + const u32 num_layers = maxwell3d.regs.rt[rt_index].depth; + ASSERT(clear_params.layer == 0); - /** - * Executes a single macro instruction located at the current program counter. Returns whether - * the interpreter should keep running. - * - * @param is_delay_slot Whether the current step is being executed due to a delay slot in a - * previous instruction. - */ - bool Step(bool is_delay_slot); + maxwell3d.regs.clear_surface.raw = clear_params.raw; + maxwell3d.draw_manager->Clear(num_layers); +} +void HLE_MultiDrawIndexedIndirectCount::Execute(Engines::Maxwell3D& maxwell3d, std::span parameters, [[maybe_unused]] u32 method) { + const auto topology = Maxwell3D::Regs::PrimitiveTopology(parameters[2]); + if (!IsTopologySafe(topology)) { + Fallback(maxwell3d, parameters); + return; + } - /// Calculates the result of an ALU operation. src_a OP src_b; - u32 GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b); + const u32 start_indirect = parameters[0]; + const u32 end_indirect = parameters[1]; + if (start_indirect >= end_indirect) { + // Nothing to do. + return; + } - /// Performs the result operation on the input result and stores it in the specified register - /// (if necessary). - void ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result); + const u32 padding = parameters[3]; // padding is in words - /// Evaluates the branch condition and returns whether the branch should be taken or not. - bool EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const; + // size of each indirect segment + const u32 indirect_words = 5 + padding; + const u32 stride = indirect_words * sizeof(u32); + const std::size_t draw_count = end_indirect - start_indirect; + const u32 estimate = static_cast(maxwell3d.EstimateIndexBufferSize()); + maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; + auto& params = maxwell3d.draw_manager->GetIndirectParams(); + params.is_byte_count = false; + params.is_indexed = true; + params.include_count = true; + params.count_start_address = maxwell3d.GetMacroAddress(4); + params.indirect_start_address = maxwell3d.GetMacroAddress(5); + params.buffer_size = stride * draw_count; + params.max_draw_counts = draw_count; + params.stride = stride; + maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; + maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro; + maxwell3d.SetHLEReplacementAttributeType(0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex); + maxwell3d.SetHLEReplacementAttributeType(0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance); + maxwell3d.SetHLEReplacementAttributeType(0, 0x648, Maxwell3D::HLEReplacementAttributeType::DrawID); + maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, estimate); + maxwell3d.engine_state = Maxwell3D::EngineHint::None; + maxwell3d.replace_table.clear(); +} +void HLE_MultiDrawIndexedIndirectCount::Fallback(Engines::Maxwell3D& maxwell3d, std::span parameters) { + SCOPE_EXIT { + // Clean everything. + maxwell3d.regs.vertex_id_base = 0x0; + maxwell3d.engine_state = Maxwell3D::EngineHint::None; + maxwell3d.replace_table.clear(); + }; + maxwell3d.RefreshParameters(); + const u32 start_indirect = parameters[0]; + const u32 end_indirect = parameters[1]; + if (start_indirect >= end_indirect) { + // Nothing to do. + return; + } + const auto topology = static_cast(parameters[2]); + const u32 padding = parameters[3]; + const std::size_t max_draws = parameters[4]; + const u32 indirect_words = 5 + padding; + const std::size_t first_draw = start_indirect; + const std::size_t effective_draws = end_indirect - start_indirect; + const std::size_t last_draw = start_indirect + (std::min)(effective_draws, max_draws); + for (std::size_t index = first_draw; index < last_draw; index++) { + const std::size_t base = index * indirect_words + 5; + const u32 base_vertex = parameters[base + 3]; + const u32 base_instance = parameters[base + 4]; + maxwell3d.regs.vertex_id_base = base_vertex; + maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro; + maxwell3d.SetHLEReplacementAttributeType(0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex); + maxwell3d.SetHLEReplacementAttributeType(0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance); + maxwell3d.CallMethod(0x8e3, 0x648, true); + maxwell3d.CallMethod(0x8e4, static_cast(index), true); + maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; + maxwell3d.draw_manager->DrawIndex(topology, parameters[base + 2], parameters[base], base_vertex, base_instance, parameters[base + 1]); + } +} +void HLE_DrawIndirectByteCount::Execute(Engines::Maxwell3D& maxwell3d, std::span parameters, [[maybe_unused]] u32 method) { + const bool force = maxwell3d.Rasterizer().HasDrawTransformFeedback(); + auto topology = Maxwell3D::Regs::PrimitiveTopology(parameters[0] & 0xFFFFU); + if (!force && (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology))) { + Fallback(maxwell3d, parameters); + return; + } + auto& params = maxwell3d.draw_manager->GetIndirectParams(); + params.is_byte_count = true; + params.is_indexed = false; + params.include_count = false; + params.count_start_address = 0; + params.indirect_start_address = maxwell3d.GetMacroAddress(2); + params.buffer_size = 4; + params.max_draw_counts = 1; + params.stride = parameters[1]; + maxwell3d.regs.draw.begin = parameters[0]; + maxwell3d.regs.draw_auto_stride = parameters[1]; + maxwell3d.regs.draw_auto_byte_count = parameters[2]; + maxwell3d.draw_manager->DrawArrayIndirect(topology); +} +void HLE_DrawIndirectByteCount::Fallback(Engines::Maxwell3D& maxwell3d, std::span parameters) { + maxwell3d.RefreshParameters(); - /// Reads an opcode at the current program counter location. - Macro::Opcode GetOpcode() const; + maxwell3d.regs.draw.begin = parameters[0]; + maxwell3d.regs.draw_auto_stride = parameters[1]; + maxwell3d.regs.draw_auto_byte_count = parameters[2]; - /// Returns the specified register's value. Register 0 is hardcoded to always return 0. - u32 GetRegister(u32 register_id) const; + maxwell3d.draw_manager->DrawArray( + maxwell3d.regs.draw.topology, 0, + maxwell3d.regs.draw_auto_byte_count / maxwell3d.regs.draw_auto_stride, 0, 1); +} +void HLE_C713C83D8F63CCF3::Execute(Engines::Maxwell3D& maxwell3d, std::span parameters, [[maybe_unused]] u32 method) { + maxwell3d.RefreshParameters(); + const u32 offset = (parameters[0] & 0x3FFFFFFF) << 2; + const u32 address = maxwell3d.regs.shadow_scratch[24]; + auto& const_buffer = maxwell3d.regs.const_buffer; + const_buffer.size = 0x7000; + const_buffer.address_high = (address >> 24) & 0xFF; + const_buffer.address_low = address << 8; + const_buffer.offset = offset; +} +void HLE_D7333D26E0A93EDE::Execute(Engines::Maxwell3D& maxwell3d, std::span parameters, [[maybe_unused]] u32 method) { + maxwell3d.RefreshParameters(); + const size_t index = parameters[0]; + const u32 address = maxwell3d.regs.shadow_scratch[42 + index]; + const u32 size = maxwell3d.regs.shadow_scratch[47 + index]; + auto& const_buffer = maxwell3d.regs.const_buffer; + const_buffer.size = size; + const_buffer.address_high = (address >> 24) & 0xFF; + const_buffer.address_low = address << 8; +} +void HLE_BindShader::Execute(Engines::Maxwell3D& maxwell3d, std::span parameters, [[maybe_unused]] u32 method) { + maxwell3d.RefreshParameters(); + auto& regs = maxwell3d.regs; + const u32 index = parameters[0]; + if ((parameters[1] - regs.shadow_scratch[28 + index]) == 0) { + return; + } - /// Sets the register to the input value. - void SetRegister(u32 register_id, u32 value); + regs.pipelines[index & 0xF].offset = parameters[2]; + maxwell3d.dirty.flags[VideoCommon::Dirty::Shaders] = true; + regs.shadow_scratch[28 + index] = parameters[1]; + regs.shadow_scratch[34 + index] = parameters[2]; - /// Sets the method address to use for the next Send instruction. - void SetMethodAddress(u32 address); + const u32 address = parameters[4]; + auto& const_buffer = regs.const_buffer; + const_buffer.size = 0x10000; + const_buffer.address_high = (address >> 24) & 0xFF; + const_buffer.address_low = address << 8; - /// Calls a GPU Engine method with the input parameter. - void Send(u32 value); + const size_t bind_group_id = parameters[3] & 0x7F; + auto& bind_group = regs.bind_groups[bind_group_id]; + bind_group.raw_config = 0x11; + maxwell3d.ProcessCBBind(bind_group_id); +} +void HLE_SetRasterBoundingBox::Execute(Engines::Maxwell3D& maxwell3d, std::span parameters, [[maybe_unused]] u32 method) { + maxwell3d.RefreshParameters(); + const u32 raster_mode = parameters[0]; + auto& regs = maxwell3d.regs; + const u32 raster_enabled = maxwell3d.regs.conservative_raster_enable; + const u32 scratch_data = maxwell3d.regs.shadow_scratch[52]; + regs.raster_bounding_box.raw = raster_mode & 0xFFFFF00F; + regs.raster_bounding_box.pad.Assign(scratch_data & raster_enabled); +} +void HLE_ClearConstBuffer::Execute(Engines::Maxwell3D& maxwell3d, std::span parameters, [[maybe_unused]] u32 method) { + static constexpr std::array zeroes{}; //must be bigger than either 7000 or 5F00 + maxwell3d.RefreshParameters(); + auto& regs = maxwell3d.regs; + regs.const_buffer.size = u32(base_size); + regs.const_buffer.address_high = parameters[0]; + regs.const_buffer.address_low = parameters[1]; + regs.const_buffer.offset = 0; + maxwell3d.ProcessCBMultiData(zeroes.data(), parameters[2] * 4); +} +void HLE_ClearMemory::Execute(Engines::Maxwell3D& maxwell3d, std::span parameters, [[maybe_unused]] u32 method) { + maxwell3d.RefreshParameters(); + const u32 needed_memory = parameters[2] / sizeof(u32); + if (needed_memory > zero_memory.size()) { + zero_memory.resize(needed_memory, 0); + } + auto& regs = maxwell3d.regs; + regs.upload.line_length_in = parameters[2]; + regs.upload.line_count = 1; + regs.upload.dest.address_high = parameters[0]; + regs.upload.dest.address_low = parameters[1]; + maxwell3d.CallMethod(size_t(MAXWELL3D_REG_INDEX(launch_dma)), 0x1011, true); + maxwell3d.CallMultiMethod(size_t(MAXWELL3D_REG_INDEX(inline_data)), zero_memory.data(), needed_memory, needed_memory); +} +void HLE_TransformFeedbackSetup::Execute(Engines::Maxwell3D& maxwell3d, std::span parameters, [[maybe_unused]] u32 method) { + maxwell3d.RefreshParameters(); + auto& regs = maxwell3d.regs; + regs.transform_feedback_enabled = 1; + regs.transform_feedback.buffers[0].start_offset = 0; + regs.transform_feedback.buffers[1].start_offset = 0; + regs.transform_feedback.buffers[2].start_offset = 0; + regs.transform_feedback.buffers[3].start_offset = 0; + regs.upload.line_length_in = 4; + regs.upload.line_count = 1; + regs.upload.dest.address_high = parameters[0]; + regs.upload.dest.address_low = parameters[1]; + maxwell3d.CallMethod(size_t(MAXWELL3D_REG_INDEX(launch_dma)), 0x1011, true); + maxwell3d.CallMethod(size_t(MAXWELL3D_REG_INDEX(inline_data)), regs.transform_feedback.controls[0].stride, true); + maxwell3d.Rasterizer().RegisterTransformFeedback(regs.upload.dest.Address()); +} - /// Reads a GPU register located at the method address. - u32 Read(u32 method) const; +#define HLE_MACRO_LIST \ + HLE_MACRO_ELEM(0x0D61FC9FAAC9FCADULL, HLE_DrawArraysIndirect, (false)) \ + HLE_MACRO_ELEM(0x8A4D173EB99A8603ULL, HLE_DrawArraysIndirect, (true)) \ + HLE_MACRO_ELEM(0x771BB18C62444DA0ULL, HLE_DrawIndexedIndirect, (false)) \ + HLE_MACRO_ELEM(0x0217920100488FF7ULL, HLE_DrawIndexedIndirect, (true)) \ + HLE_MACRO_ELEM(0x3F5E74B9C9A50164ULL, HLE_MultiDrawIndexedIndirectCount, ()) \ + HLE_MACRO_ELEM(0xEAD26C3E2109B06BULL, HLE_MultiLayerClear, ()) \ + HLE_MACRO_ELEM(0xC713C83D8F63CCF3ULL, HLE_C713C83D8F63CCF3, ()) \ + HLE_MACRO_ELEM(0xD7333D26E0A93EDEULL, HLE_D7333D26E0A93EDE, ()) \ + HLE_MACRO_ELEM(0xEB29B2A09AA06D38ULL, HLE_BindShader, ()) \ + HLE_MACRO_ELEM(0xDB1341DBEB4C8AF7ULL, HLE_SetRasterBoundingBox, ()) \ + HLE_MACRO_ELEM(0x6C97861D891EDf7EULL, HLE_ClearConstBuffer, (0x5F00)) \ + HLE_MACRO_ELEM(0xD246FDDF3A6173D7ULL, HLE_ClearConstBuffer, (0x7000)) \ + HLE_MACRO_ELEM(0xEE4D0004BEC8ECF4ULL, HLE_ClearMemory, ()) \ + HLE_MACRO_ELEM(0xFC0CF27F5FFAA661ULL, HLE_TransformFeedbackSetup, ()) \ + HLE_MACRO_ELEM(0xB5F74EDB717278ECULL, HLE_DrawIndirectByteCount, ()) \ - /// Returns the next parameter in the parameter queue. - u32 FetchParameter(); +// Allocates and returns a cached macro if the hash matches a known function. +[[nodiscard]] inline AnyCachedMacro GetHLEProgram(u64 hash) noexcept { + // Compiler will make you a GREAT job at making an ad-hoc hash table :) + switch (hash) { +#define HLE_MACRO_ELEM(HASH, TY, VAL) case HASH: return TY VAL; + HLE_MACRO_LIST +#undef HLE_MACRO_ELEM + default: return std::monostate{}; + } +} +[[nodiscard]] inline bool CanBeHLEProgram(u64 hash) noexcept { + switch (hash) { +#define HLE_MACRO_ELEM(HASH, TY, VAL) case HASH: return true; + HLE_MACRO_LIST +#undef HLE_MACRO_ELEM + default: return false; + } +} - /// Current program counter - u32 pc{}; - /// Program counter to execute at after the delay slot is executed. - std::optional delayed_pc; - - /// General purpose macro registers. - std::array registers = {}; - - /// Method address to use for the next Send instruction. - Macro::MethodAddress method_address = {}; - - /// Input parameters of the current macro. - std::unique_ptr parameters; - std::size_t num_parameters = 0; - std::size_t parameters_capacity = 0; - /// Index of the next parameter that will be fetched by the 'parm' instruction. - u32 next_parameter_index = 0; - - bool carry_flag = false; - const std::vector& code; -}; - -void MacroInterpreterImpl::Execute(const std::vector& params, u32 method) { +void MacroInterpreterImpl::Execute(Engines::Maxwell3D& maxwell3d, std::span params, u32 method) { Reset(); registers[1] = params[0]; - num_parameters = params.size(); - - if (num_parameters > parameters_capacity) { - parameters_capacity = num_parameters; - parameters = std::make_unique(num_parameters); - } - std::memcpy(parameters.get(), params.data(), num_parameters * sizeof(u32)); + parameters.resize(params.size()); + std::memcpy(parameters.data(), params.data(), params.size() * sizeof(u32)); // Execute the code until we hit an exit condition. bool keep_executing = true; while (keep_executing) { - keep_executing = Step(false); + keep_executing = Step(maxwell3d, false); } // Assert the the macro used all the input parameters - ASSERT(next_parameter_index == num_parameters); + ASSERT(next_parameter_index == parameters.size()); } +/// Resets the execution engine state, zeroing registers, etc. void MacroInterpreterImpl::Reset() { registers = {}; pc = 0; delayed_pc = {}; method_address.raw = 0; - num_parameters = 0; + // Vector must hold its last indices otherwise wonky shit will happen // The next parameter index starts at 1, because $r1 already has the value of the first // parameter. next_parameter_index = 1; carry_flag = false; } -bool MacroInterpreterImpl::Step(bool is_delay_slot) { +/// @brief Executes a single macro instruction located at the current program counter. Returns whether +/// the interpreter should keep running. +/// @param is_delay_slot Whether the current step is being executed due to a delay slot in a previous instruction. +bool MacroInterpreterImpl::Step(Engines::Maxwell3D& maxwell3d, bool is_delay_slot) { u32 base_address = pc; Macro::Opcode opcode = GetOpcode(); @@ -682,14 +493,12 @@ bool MacroInterpreterImpl::Step(bool is_delay_slot) { switch (opcode.operation) { case Macro::Operation::ALU: { - u32 result = GetALUResult(opcode.alu_operation, GetRegister(opcode.src_a), - GetRegister(opcode.src_b)); - ProcessResult(opcode.result_operation, opcode.dst, result); + u32 result = GetALUResult(opcode.alu_operation, GetRegister(opcode.src_a), GetRegister(opcode.src_b)); + ProcessResult(maxwell3d, opcode.result_operation, opcode.dst, result); break; } case Macro::Operation::AddImmediate: { - ProcessResult(opcode.result_operation, opcode.dst, - GetRegister(opcode.src_a) + opcode.immediate); + ProcessResult(maxwell3d, opcode.result_operation, opcode.dst, GetRegister(opcode.src_a) + opcode.immediate); break; } case Macro::Operation::ExtractInsert: { @@ -699,7 +508,7 @@ bool MacroInterpreterImpl::Step(bool is_delay_slot) { src = (src >> opcode.bf_src_bit) & opcode.GetBitfieldMask(); dst &= ~(opcode.GetBitfieldMask() << opcode.bf_dst_bit); dst |= src << opcode.bf_dst_bit; - ProcessResult(opcode.result_operation, opcode.dst, dst); + ProcessResult(maxwell3d, opcode.result_operation, opcode.dst, dst); break; } case Macro::Operation::ExtractShiftLeftImmediate: { @@ -708,7 +517,7 @@ bool MacroInterpreterImpl::Step(bool is_delay_slot) { u32 result = ((src >> dst) & opcode.GetBitfieldMask()) << opcode.bf_dst_bit; - ProcessResult(opcode.result_operation, opcode.dst, result); + ProcessResult(maxwell3d, opcode.result_operation, opcode.dst, result); break; } case Macro::Operation::ExtractShiftLeftRegister: { @@ -717,12 +526,12 @@ bool MacroInterpreterImpl::Step(bool is_delay_slot) { u32 result = ((src >> opcode.bf_src_bit) & opcode.GetBitfieldMask()) << dst; - ProcessResult(opcode.result_operation, opcode.dst, result); + ProcessResult(maxwell3d, opcode.result_operation, opcode.dst, result); break; } case Macro::Operation::Read: { - u32 result = Read(GetRegister(opcode.src_a) + opcode.immediate); - ProcessResult(opcode.result_operation, opcode.dst, result); + u32 result = Read(maxwell3d, GetRegister(opcode.src_a) + opcode.immediate); + ProcessResult(maxwell3d, opcode.result_operation, opcode.dst, result); break; } case Macro::Operation::Branch: { @@ -738,7 +547,7 @@ bool MacroInterpreterImpl::Step(bool is_delay_slot) { delayed_pc = base_address + opcode.GetBranchTarget(); // Execute one more instruction due to the delay slot. - return Step(true); + return Step(maxwell3d, true); } break; } @@ -751,13 +560,13 @@ bool MacroInterpreterImpl::Step(bool is_delay_slot) { // cause an exit if it's executed inside a delay slot. if (opcode.is_exit && !is_delay_slot) { // Exit has a delay slot, execute the next instruction - Step(true); + Step(maxwell3d, true); return false; } - return true; } +/// Calculates the result of an ALU operation. src_a OP src_b; u32 MacroInterpreterImpl::GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b) { switch (operation) { case Macro::ALUOperation::Add: { @@ -797,7 +606,8 @@ u32 MacroInterpreterImpl::GetALUResult(Macro::ALUOperation operation, u32 src_a, } } -void MacroInterpreterImpl::ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result) { +/// Performs the result operation on the input result and stores it in the specified register (if necessary). +void MacroInterpreterImpl::ProcessResult(Engines::Maxwell3D& maxwell3d, Macro::ResultOperation operation, u32 reg, u32 result) { switch (operation) { case Macro::ResultOperation::IgnoreAndFetch: // Fetch parameter and ignore result. @@ -815,12 +625,12 @@ void MacroInterpreterImpl::ProcessResult(Macro::ResultOperation operation, u32 r case Macro::ResultOperation::FetchAndSend: // Fetch parameter and send result. SetRegister(reg, FetchParameter()); - Send(result); + Send(maxwell3d, result); break; case Macro::ResultOperation::MoveAndSend: // Move and send result. SetRegister(reg, result); - Send(result); + Send(maxwell3d, result); break; case Macro::ResultOperation::FetchAndSetMethod: // Fetch parameter and use result as Method Address. @@ -831,13 +641,13 @@ void MacroInterpreterImpl::ProcessResult(Macro::ResultOperation operation, u32 r // Move result and use as Method Address, then fetch and send parameter. SetRegister(reg, result); SetMethodAddress(result); - Send(FetchParameter()); + Send(maxwell3d, FetchParameter()); break; case Macro::ResultOperation::MoveAndSetMethodSend: // Move result and use as Method Address, then send bits 12:17 of result. SetRegister(reg, result); SetMethodAddress(result); - Send((result >> 12) & 0b111111); + Send(maxwell3d, (result >> 12) & 0b111111); break; default: UNIMPLEMENTED_MSG("Unimplemented result operation {}", operation); @@ -845,6 +655,7 @@ void MacroInterpreterImpl::ProcessResult(Macro::ResultOperation operation, u32 r } } +/// Evaluates the branch condition and returns whether the branch should be taken or not. bool MacroInterpreterImpl::EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const { switch (cond) { case Macro::BranchCondition::Zero: @@ -855,46 +666,44 @@ bool MacroInterpreterImpl::EvaluateBranchCondition(Macro::BranchCondition cond, UNREACHABLE(); } +/// Reads an opcode at the current program counter location. Macro::Opcode MacroInterpreterImpl::GetOpcode() const { ASSERT((pc % sizeof(u32)) == 0); ASSERT(pc < code.size() * sizeof(u32)); return {code[pc / sizeof(u32)]}; } +/// Returns the specified register's value. Register 0 is hardcoded to always return 0. u32 MacroInterpreterImpl::GetRegister(u32 register_id) const { - return registers.at(register_id); + return registers[register_id]; } +/// Sets the register to the input value. void MacroInterpreterImpl::SetRegister(u32 register_id, u32 value) { // Register 0 is hardwired as the zero register. // Ensure no writes to it actually occur. - if (register_id == 0) { + if (register_id == 0) return; - } - - registers.at(register_id) = value; + registers[register_id] = value; } -void MacroInterpreterImpl::SetMethodAddress(u32 address) { - method_address.raw = address; -} - -void MacroInterpreterImpl::Send(u32 value) { +/// Calls a GPU Engine method with the input parameter. +void MacroInterpreterImpl::Send(Engines::Maxwell3D& maxwell3d, u32 value) { maxwell3d.CallMethod(method_address.address, value, true); // Increment the method address by the method increment. - method_address.address.Assign(method_address.address.Value() + - method_address.increment.Value()); + method_address.address.Assign(method_address.address.Value() + method_address.increment.Value()); } -u32 MacroInterpreterImpl::Read(u32 method) const { +/// Reads a GPU register located at the method address. +u32 MacroInterpreterImpl::Read(Engines::Maxwell3D& maxwell3d, u32 method) const { return maxwell3d.GetRegisterValue(method); } +/// Returns the next parameter in the parameter queue. u32 MacroInterpreterImpl::FetchParameter() { - ASSERT(next_parameter_index < num_parameters); + ASSERT(next_parameter_index < parameters.size()); return parameters[next_parameter_index++]; } -} // Anonymous namespace #ifdef ARCHITECTURE_x86_64 namespace { @@ -930,17 +739,15 @@ static const auto default_cg_mode = Xbyak::DontSetProtectRWE; static const auto default_cg_mode = nullptr; //Allow RWE #endif -class MacroJITx64Impl final : public Xbyak::CodeGenerator, public CachedMacro { -public: - explicit MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector& code_) +struct MacroJITx64Impl final : public Xbyak::CodeGenerator, public DynamicCachedMacro { + explicit MacroJITx64Impl(std::span code_) : Xbyak::CodeGenerator(MAX_CODE_SIZE, default_cg_mode) - , CachedMacro(maxwell3d_) , code{code_} { Compile(); } - void Execute(const std::vector& parameters, u32 method) override; + void Execute(Engines::Maxwell3D& maxwell3d, std::span parameters, u32 method) override; void Compile_ALU(Macro::Opcode opcode); void Compile_AddImmediate(Macro::Opcode opcode); @@ -950,18 +757,13 @@ public: void Compile_Read(Macro::Opcode opcode); void Compile_Branch(Macro::Opcode opcode); -private: void Optimizer_ScanFlags(); - void Compile(); bool Compile_NextInstruction(); - Xbyak::Reg32 Compile_FetchParameter(); Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst); - void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg); void Compile_Send(Xbyak::Reg32 value); - Macro::Opcode GetOpCode() const; struct JITState { @@ -981,21 +783,17 @@ private: bool enable_asserts{}; }; OptimizerState optimizer{}; - std::optional next_opcode{}; ProgramType program{nullptr}; - std::array labels; std::array delay_skip; Xbyak::Label end_of_code{}; - bool is_delay_slot{}; u32 pc{}; - - const std::vector& code; + std::span code; }; -void MacroJITx64Impl::Execute(const std::vector& parameters, u32 method) { +void MacroJITx64Impl::Execute(Engines::Maxwell3D& maxwell3d, std::span parameters, u32 method) { ASSERT_OR_EXECUTE(program != nullptr, { return; }); JITState state{}; state.maxwell3d = &maxwell3d; @@ -1231,7 +1029,7 @@ void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) { Compile_ProcessResult(opcode.result_operation, opcode.dst); } -void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) { +static void MacroJIT_SendThunk(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) { maxwell3d->CallMethod(method_address.address, value, true); } @@ -1240,7 +1038,7 @@ void MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { mov(Common::X64::ABI_PARAM1, qword[STATE]); mov(Common::X64::ABI_PARAM2.cvt32(), METHOD_ADDRESS); mov(Common::X64::ABI_PARAM3.cvt32(), value); - Common::X64::CallFarFunction(*this, &Send); + Common::X64::CallFarFunction(*this, &MacroJIT_SendThunk); Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); Xbyak::Label dont_process{}; @@ -1452,10 +1250,8 @@ bool MacroJITx64Impl::Compile_NextInstruction() { return true; } -static void WarnInvalidParameter(uintptr_t parameter, uintptr_t max_parameter) { - LOG_CRITICAL(HW_GPU, - "Macro JIT: invalid parameter access 0x{:x} (0x{:x} is the last parameter)", - parameter, max_parameter - sizeof(u32)); +static void MacroJIT_ErrorThunk(uintptr_t parameter, uintptr_t max_parameter) { + LOG_CRITICAL(HW_GPU, "Macro JIT: invalid parameter access 0x{:x} (0x{:x} is the last parameter)", parameter, max_parameter - sizeof(u32)); } Xbyak::Reg32 MacroJITx64Impl::Compile_FetchParameter() { @@ -1465,7 +1261,7 @@ Xbyak::Reg32 MacroJITx64Impl::Compile_FetchParameter() { Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); mov(Common::X64::ABI_PARAM1, PARAMETERS); mov(Common::X64::ABI_PARAM2, MAX_PARAMETER); - Common::X64::CallFarFunction(*this, &WarnInvalidParameter); + Common::X64::CallFarFunction(*this, &MacroJIT_ErrorThunk); Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); L(parameter_ok); mov(eax, dword[PARAMETERS]); @@ -1574,33 +1370,42 @@ static void Dump(u64 hash, std::span code, bool decompiled = false) { macro_file.write(reinterpret_cast(code.data()), code.size_bytes()); } -MacroEngine::MacroEngine(Engines::Maxwell3D& maxwell3d_, bool is_interpreted_) - : hle_macros{std::make_optional(maxwell3d_)} - , maxwell3d{maxwell3d_} - , is_interpreted{is_interpreted_} -{} - -MacroEngine::~MacroEngine() = default; - -void MacroEngine::AddCode(u32 method, u32 data) { - uploaded_macro_code[method].push_back(data); -} - -void MacroEngine::ClearCode(u32 method) { - macro_cache.erase(method); - uploaded_macro_code.erase(method); -} - -void MacroEngine::Execute(u32 method, const std::vector& parameters) { - auto compiled_macro = macro_cache.find(method); - if (compiled_macro != macro_cache.end()) { - const auto& cache_info = compiled_macro->second; - if (cache_info.has_hle_program) { - cache_info.hle_program->Execute(parameters, method); - } else { - maxwell3d.RefreshParameters(); - cache_info.lle_program->Execute(parameters, method); - } +void MacroEngine::Execute(Engines::Maxwell3D& maxwell3d, u32 method, std::span parameters) { + auto const execute_variant = [&maxwell3d, ¶meters, method](AnyCachedMacro& acm) { + if (auto a = std::get_if(&acm)) + a->Execute(maxwell3d, parameters, method); + if (auto a = std::get_if(&acm)) + a->Execute(maxwell3d, parameters, method); + if (auto a = std::get_if(&acm)) + a->Execute(maxwell3d, parameters, method); + if (auto a = std::get_if(&acm)) + a->Execute(maxwell3d, parameters, method); + if (auto a = std::get_if(&acm)) + a->Execute(maxwell3d, parameters, method); + if (auto a = std::get_if(&acm)) + a->Execute(maxwell3d, parameters, method); + if (auto a = std::get_if(&acm)) + a->Execute(maxwell3d, parameters, method); + if (auto a = std::get_if(&acm)) + a->Execute(maxwell3d, parameters, method); + if (auto a = std::get_if(&acm)) + a->Execute(maxwell3d, parameters, method); + if (auto a = std::get_if(&acm)) + a->Execute(maxwell3d, parameters, method); + if (auto a = std::get_if(&acm)) + a->Execute(maxwell3d, parameters, method); + if (auto a = std::get_if(&acm)) + a->Execute(maxwell3d, parameters, method); + if (auto a = std::get_if(&acm)) + a->Execute(maxwell3d, parameters, method); + if (auto a = std::get_if>(&acm)) + a->get()->Execute(maxwell3d, parameters, method); + }; + if (auto const it = macro_cache.find(method); it != macro_cache.end()) { + auto& ci = it->second; + if (!CanBeHLEProgram(ci.hash) || Settings::values.disable_macro_hle) + maxwell3d.RefreshParameters(); //LLE must reload parameters + execute_variant(ci.program); } else { // Macro not compiled, check if it's uploaded and if so, compile it std::optional mid_method; @@ -1617,51 +1422,37 @@ void MacroEngine::Execute(u32 method, const std::vector& parameters) { return; } } - auto& cache_info = macro_cache[method]; - - if (!mid_method.has_value()) { - cache_info.lle_program = Compile(macro_code->second); - cache_info.hash = Common::HashValue(macro_code->second); - } else { + auto& ci = macro_cache[method]; + if (mid_method) { const auto& macro_cached = uploaded_macro_code[mid_method.value()]; const auto rebased_method = method - mid_method.value(); auto& code = uploaded_macro_code[method]; code.resize(macro_cached.size() - rebased_method); std::memcpy(code.data(), macro_cached.data() + rebased_method, code.size() * sizeof(u32)); - cache_info.hash = Common::HashValue(code); - cache_info.lle_program = Compile(code); - } - - auto hle_program = hle_macros->GetHLEProgram(cache_info.hash); - if (!hle_program || Settings::values.disable_macro_hle) { - maxwell3d.RefreshParameters(); - cache_info.lle_program->Execute(parameters, method); + ci.hash = Common::HashValue(code); + ci.program = Compile(maxwell3d, code); } else { - cache_info.has_hle_program = true; - cache_info.hle_program = std::move(hle_program); - cache_info.hle_program->Execute(parameters, method); + ci.program = Compile(maxwell3d, macro_code->second); + ci.hash = Common::HashValue(macro_code->second); } - + if (CanBeHLEProgram(ci.hash) && !Settings::values.disable_macro_hle) { + ci.program = GetHLEProgram(ci.hash); + } else { + maxwell3d.RefreshParameters(); + } + execute_variant(ci.program); if (Settings::values.dump_macros) { - Dump(cache_info.hash, macro_code->second, cache_info.has_hle_program); + Dump(ci.hash, macro_code->second, !std::holds_alternative(ci.program)); } } } -std::unique_ptr MacroEngine::Compile(const std::vector& code) { +AnyCachedMacro MacroEngine::Compile(Engines::Maxwell3D& maxwell3d, std::span code) { #ifdef ARCHITECTURE_x86_64 if (!is_interpreted) - return std::make_unique(maxwell3d, code); -#endif - return std::make_unique(maxwell3d, code); -} - -std::optional GetMacroEngine(Engines::Maxwell3D& maxwell3d) { -#ifdef ARCHITECTURE_x86_64 - return std::make_optional(maxwell3d, bool(Settings::values.disable_macro_jit)); -#else - return std::make_optional(maxwell3d, true); + return std::make_unique(code); #endif + return MacroInterpreterImpl(code); } } // namespace Tegra diff --git a/src/video_core/macro.h b/src/video_core/macro.h index 9bdb4219ce..a9a8f2de04 100644 --- a/src/video_core/macro.h +++ b/src/video_core/macro.h @@ -7,8 +7,10 @@ #pragma once #include -#include +#include +#include #include +#include #include "common/bit_field.h" #include "common/common_types.h" @@ -98,62 +100,142 @@ union MethodAddress { } // namespace Macro -class CachedMacro { -public: - CachedMacro(Engines::Maxwell3D& maxwell3d_) - : maxwell3d{maxwell3d_} - {} - virtual ~CachedMacro() = default; +struct HLEMacro { +}; +/// @note: these macros have two versions, a normal and extended version, with the extended version +/// also assigning the base vertex/instance. +struct HLE_DrawArraysIndirect final { + HLE_DrawArraysIndirect(bool extended_) noexcept : extended{extended_} {} + void Execute(Engines::Maxwell3D& maxwell3d, std::span parameters, [[maybe_unused]] u32 method); + void Fallback(Engines::Maxwell3D& maxwell3d, std::span parameters); + bool extended; +}; +/// @note: these macros have two versions, a normal and extended version, with the extended version +/// also assigning the base vertex/instance. +struct HLE_DrawIndexedIndirect final { + explicit HLE_DrawIndexedIndirect(bool extended_) noexcept : extended{extended_} {} + void Execute(Engines::Maxwell3D& maxwell3d, std::span parameters, [[maybe_unused]] u32 method); + void Fallback(Engines::Maxwell3D& maxwell3d, std::span parameters); + bool extended; +}; +struct HLE_MultiLayerClear final { + void Execute(Engines::Maxwell3D& maxwell3d, std::span parameters, [[maybe_unused]] u32 method); +}; +struct HLE_MultiDrawIndexedIndirectCount final { + void Execute(Engines::Maxwell3D& maxwell3d, std::span parameters, [[maybe_unused]] u32 method); + void Fallback(Engines::Maxwell3D& maxwell3d, std::span parameters); +}; +struct HLE_DrawIndirectByteCount final { + void Execute(Engines::Maxwell3D& maxwell3d, std::span parameters, [[maybe_unused]] u32 method); + void Fallback(Engines::Maxwell3D& maxwell3d, std::span parameters); +}; +struct HLE_C713C83D8F63CCF3 final { + void Execute(Engines::Maxwell3D& maxwell3d, std::span parameters, [[maybe_unused]] u32 method); +}; +struct HLE_D7333D26E0A93EDE final { + void Execute(Engines::Maxwell3D& maxwell3d, std::span parameters, [[maybe_unused]] u32 method); +}; +struct HLE_BindShader final { + void Execute(Engines::Maxwell3D& maxwell3d, std::span parameters, [[maybe_unused]] u32 method); +}; +struct HLE_SetRasterBoundingBox final { + void Execute(Engines::Maxwell3D& maxwell3d, std::span parameters, [[maybe_unused]] u32 method); +}; +struct HLE_ClearConstBuffer final { + HLE_ClearConstBuffer(size_t base_size_) noexcept : base_size{base_size_} {} + void Execute(Engines::Maxwell3D& maxwell3d, std::span parameters, [[maybe_unused]] u32 method); + size_t base_size; +}; +struct HLE_ClearMemory final { + void Execute(Engines::Maxwell3D& maxwell3d, std::span parameters, [[maybe_unused]] u32 method); + std::vector zero_memory; +}; +struct HLE_TransformFeedbackSetup final { + void Execute(Engines::Maxwell3D& maxwell3d, std::span parameters, [[maybe_unused]] u32 method); +}; +struct MacroInterpreterImpl final { + MacroInterpreterImpl() {} + MacroInterpreterImpl(std::span code_) : code{code_} {} + void Execute(Engines::Maxwell3D& maxwell3d, std::span params, u32 method); + void Reset(); + bool Step(Engines::Maxwell3D& maxwell3d, bool is_delay_slot); + u32 GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b); + void ProcessResult(Engines::Maxwell3D& maxwell3d, Macro::ResultOperation operation, u32 reg, u32 result); + bool EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const; + Macro::Opcode GetOpcode() const; + u32 GetRegister(u32 register_id) const; + void SetRegister(u32 register_id, u32 value); + /// Sets the method address to use for the next Send instruction. + [[nodiscard]] inline void SetMethodAddress(u32 address) noexcept { + method_address.raw = address; + } + void Send(Engines::Maxwell3D& maxwell3d, u32 value); + u32 Read(Engines::Maxwell3D& maxwell3d, u32 method) const; + u32 FetchParameter(); + /// General purpose macro registers. + std::array registers = {}; + /// Input parameters of the current macro. + std::vector parameters; + std::span code; + /// Program counter to execute at after the delay slot is executed. + std::optional delayed_pc; + /// Method address to use for the next Send instruction. + Macro::MethodAddress method_address = {}; + /// Current program counter + u32 pc{}; + /// Index of the next parameter that will be fetched by the 'parm' instruction. + u32 next_parameter_index = 0; + bool carry_flag = false; +}; +struct DynamicCachedMacro { + virtual ~DynamicCachedMacro() = default; /// Executes the macro code with the specified input parameters. /// @param parameters The parameters of the macro /// @param method The method to execute - virtual void Execute(const std::vector& parameters, u32 method) = 0; - Engines::Maxwell3D& maxwell3d; + virtual void Execute(Engines::Maxwell3D& maxwell3d, std::span parameters, u32 method) = 0; }; -class HLEMacro { -public: - explicit HLEMacro(Engines::Maxwell3D& maxwell3d_); - ~HLEMacro(); - // Allocates and returns a cached macro if the hash matches a known function. - // Returns nullptr otherwise. - [[nodiscard]] std::unique_ptr GetHLEProgram(u64 hash) const; -private: - Engines::Maxwell3D& maxwell3d; -}; - -class MacroEngine { -public: - explicit MacroEngine(Engines::Maxwell3D& maxwell3d, bool is_interpreted); - ~MacroEngine(); +using AnyCachedMacro = std::variant< + std::monostate, + HLEMacro, + HLE_DrawArraysIndirect, + HLE_DrawIndexedIndirect, + HLE_MultiDrawIndexedIndirectCount, + HLE_MultiLayerClear, + HLE_C713C83D8F63CCF3, + HLE_D7333D26E0A93EDE, + HLE_BindShader, + HLE_SetRasterBoundingBox, + HLE_ClearConstBuffer, + HLE_ClearMemory, + HLE_TransformFeedbackSetup, + HLE_DrawIndirectByteCount, + MacroInterpreterImpl, + // Used for JIT x86 macro + std::unique_ptr +>; +struct MacroEngine { + MacroEngine(bool is_interpreted_) noexcept : is_interpreted{is_interpreted_} {} // Store the uploaded macro code to compile them when they're called. - void AddCode(u32 method, u32 data); - + inline void AddCode(u32 method, u32 data) noexcept { + uploaded_macro_code[method].push_back(data); + } // Clear the code associated with a method. - void ClearCode(u32 method); - + inline void ClearCode(u32 method) noexcept { + macro_cache.erase(method); + uploaded_macro_code.erase(method); + } // Compiles the macro if its not in the cache, and executes the compiled macro - void Execute(u32 method, const std::vector& parameters); - -protected: - std::unique_ptr Compile(const std::vector& code); - -private: + void Execute(Engines::Maxwell3D& maxwell3d, u32 method, std::span parameters); + AnyCachedMacro Compile(Engines::Maxwell3D& maxwell3d, std::span code); struct CacheInfo { - std::unique_ptr lle_program{}; - std::unique_ptr hle_program{}; + AnyCachedMacro program; u64 hash{}; - bool has_hle_program{}; }; - ankerl::unordered_dense::map macro_cache; ankerl::unordered_dense::map> uploaded_macro_code; - std::optional hle_macros; - Engines::Maxwell3D& maxwell3d; bool is_interpreted; }; -std::optional GetMacroEngine(Engines::Maxwell3D& maxwell3d); - } // namespace Tegra From 2ed1328c93c4739c069e7a284cb82b0a72762dac Mon Sep 17 00:00:00 2001 From: lizzie Date: Fri, 6 Mar 2026 15:05:05 +0100 Subject: [PATCH 05/68] [vk] use static_vector instead of small_vector for TFB and other bindings (#3641) MK8D is a big offender, taking up lots of time memcpy'ing and memmov'ing small_vector<> AND to add salt to the wound it doesn't even do heap allocations (no game does I think) - so basically useless waste of compute time in hot path for NO reason :^) Signed-off-by: lizzie Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3641 Reviewed-by: CamilleLaVey Reviewed-by: DraVee Co-authored-by: lizzie Co-committed-by: lizzie --- .../buffer_cache/buffer_cache_base.h | 13 +++-- .../renderer_vulkan/vk_buffer_cache.cpp | 50 +++++++++---------- 2 files changed, 33 insertions(+), 30 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index 0596329392..08524bd854 100644 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h @@ -14,9 +14,12 @@ #include #include #include -#include #include +#include +#include +#include + #include "common/common_types.h" #include "common/div_ceil.h" #include "common/literals.h" @@ -94,10 +97,10 @@ static constexpr Binding NULL_BINDING{ template struct HostBindings { - boost::container::small_vector buffers; - boost::container::small_vector offsets; - boost::container::small_vector sizes; - boost::container::small_vector strides; + boost::container::static_vector buffers; + boost::container::static_vector offsets; + boost::container::static_vector sizes; + boost::container::static_vector strides; u32 min_index{NUM_VERTEX_BUFFERS}; u32 max_index{0}; }; diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index f4345262fb..c842cce709 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -10,6 +10,7 @@ #include #include +#include "video_core/buffer_cache/buffer_cache_base.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" @@ -583,18 +584,18 @@ void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset } void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings& bindings) { - boost::container::small_vector buffer_handles; - for (u32 index = 0; index < bindings.buffers.size(); ++index) { - auto handle = bindings.buffers[index]->Handle(); + boost::container::static_vector buffer_handles(bindings.buffers.size()); + for (u32 i = 0; i < bindings.buffers.size(); ++i) { + auto handle = bindings.buffers[i]->Handle(); if (handle == VK_NULL_HANDLE) { - bindings.offsets[index] = 0; - bindings.sizes[index] = VK_WHOLE_SIZE; + bindings.offsets[i] = 0; + bindings.sizes[i] = VK_WHOLE_SIZE; if (!device.HasNullDescriptor()) { ReserveNullBuffer(); handle = *null_buffer; } } - buffer_handles.push_back(handle); + buffer_handles[i] = handle; } const u32 device_max = device.GetMaxVertexInputBindings(); const u32 min_binding = (std::min)(bindings.min_index, device_max); @@ -604,19 +605,12 @@ void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings& bi return; } if (device.IsExtExtendedDynamicStateSupported()) { - scheduler.Record([bindings_ = std::move(bindings), - buffer_handles_ = std::move(buffer_handles), - binding_count](vk::CommandBuffer cmdbuf) { - cmdbuf.BindVertexBuffers2EXT(bindings_.min_index, binding_count, buffer_handles_.data(), - bindings_.offsets.data(), bindings_.sizes.data(), - bindings_.strides.data()); + scheduler.Record([bindings_ = std::move(bindings), buffer_handles_ = std::move(buffer_handles), binding_count](vk::CommandBuffer cmdbuf) { + cmdbuf.BindVertexBuffers2EXT(bindings_.min_index, binding_count, buffer_handles_.data(), bindings_.offsets.data(), bindings_.sizes.data(), bindings_.strides.data()); }); } else { - scheduler.Record([bindings_ = std::move(bindings), - buffer_handles_ = std::move(buffer_handles), - binding_count](vk::CommandBuffer cmdbuf) { - cmdbuf.BindVertexBuffers(bindings_.min_index, binding_count, buffer_handles_.data(), - bindings_.offsets.data()); + scheduler.Record([bindings_ = std::move(bindings), buffer_handles_ = std::move(buffer_handles), binding_count](vk::CommandBuffer cmdbuf) { + cmdbuf.BindVertexBuffers(bindings_.min_index, binding_count, buffer_handles_.data(), bindings_.offsets.data()); }); } } @@ -647,15 +641,21 @@ void BufferCacheRuntime::BindTransformFeedbackBuffers(VideoCommon::HostBindings< // Already logged in the rasterizer return; } - boost::container::small_vector buffer_handles; - for (u32 index = 0; index < bindings.buffers.size(); ++index) { - buffer_handles.push_back(bindings.buffers[index]->Handle()); + boost::container::static_vector buffer_handles(bindings.buffers.size()); + for (u32 i = 0; i < bindings.buffers.size(); ++i) { + auto handle = bindings.buffers[i]->Handle(); + if (handle == VK_NULL_HANDLE) { + bindings.offsets[i] = 0; + bindings.sizes[i] = VK_WHOLE_SIZE; + if (!device.HasNullDescriptor()) { + ReserveNullBuffer(); + handle = *null_buffer; + } + } + buffer_handles[i] = handle; } - scheduler.Record([bindings_ = std::move(bindings), - buffer_handles_ = std::move(buffer_handles)](vk::CommandBuffer cmdbuf) { - cmdbuf.BindTransformFeedbackBuffersEXT(0, static_cast(buffer_handles_.size()), - buffer_handles_.data(), bindings_.offsets.data(), - bindings_.sizes.data()); + scheduler.Record([bindings_ = std::move(bindings), buffer_handles_ = std::move(buffer_handles)](vk::CommandBuffer cmdbuf) { + cmdbuf.BindTransformFeedbackBuffersEXT(0, u32(buffer_handles_.size()), buffer_handles_.data(), bindings_.offsets.data(), bindings_.sizes.data()); }); } From b75e81af5e11cb09eae405d51c40489401794912 Mon Sep 17 00:00:00 2001 From: lizzie Date: Fri, 6 Mar 2026 15:05:39 +0100 Subject: [PATCH 06/68] [video_core/engines] implement stub NV01 timer, inline other channel engines (#3640) Signed-off-by: lizzie Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3640 Reviewed-by: CamilleLaVey Reviewed-by: DraVee Co-authored-by: lizzie Co-committed-by: lizzie --- src/video_core/control/channel_state.cpp | 15 ++++--- src/video_core/control/channel_state.h | 44 +++++++++---------- src/video_core/engines/engine_interface.h | 3 +- src/video_core/engines/maxwell_3d.h | 2 +- src/video_core/engines/nv01_timer.h | 52 +++++++++++++++++++++++ src/video_core/engines/puller.cpp | 42 +++++++++--------- src/video_core/engines/puller.h | 4 ++ 7 files changed, 109 insertions(+), 53 deletions(-) create mode 100644 src/video_core/engines/nv01_timer.h diff --git a/src/video_core/control/channel_state.cpp b/src/video_core/control/channel_state.cpp index 2539997d53..d07c7e2a83 100644 --- a/src/video_core/control/channel_state.cpp +++ b/src/video_core/control/channel_state.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: 2022 yuzu Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later @@ -19,12 +22,12 @@ ChannelState::ChannelState(s32 bind_id_) : bind_id{bind_id_}, initialized{} {} void ChannelState::Init(Core::System& system, GPU& gpu, u64 program_id_) { ASSERT(memory_manager); program_id = program_id_; - dma_pusher = std::make_unique(system, gpu, *memory_manager, *this); - maxwell_3d = std::make_unique(system, *memory_manager); - fermi_2d = std::make_unique(*memory_manager); - kepler_compute = std::make_unique(system, *memory_manager); - maxwell_dma = std::make_unique(system, *memory_manager); - kepler_memory = std::make_unique(system, *memory_manager); + dma_pusher.emplace(system, gpu, *memory_manager, *this); + maxwell_3d.emplace(system, *memory_manager); + fermi_2d.emplace(*memory_manager); + kepler_compute.emplace(system, *memory_manager); + maxwell_dma.emplace(system, *memory_manager); + kepler_memory.emplace(system, *memory_manager); initialized = true; } diff --git a/src/video_core/control/channel_state.h b/src/video_core/control/channel_state.h index b385f4939f..2984d2e09e 100644 --- a/src/video_core/control/channel_state.h +++ b/src/video_core/control/channel_state.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: 2022 yuzu Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later @@ -6,6 +9,12 @@ #include #include "common/common_types.h" +#include "video_core/engines/fermi_2d.h" +#include "video_core/engines/kepler_memory.h" +#include "video_core/engines/kepler_compute.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/engines/maxwell_dma.h" +#include "video_core/dma_pusher.h" namespace Core { class System; @@ -18,49 +27,34 @@ class RasterizerInterface; namespace Tegra { class GPU; - -namespace Engines { -class Puller; -class Fermi2D; -class Maxwell3D; -class MaxwellDMA; -class KeplerCompute; -class KeplerMemory; -} // namespace Engines - class MemoryManager; -class DmaPusher; namespace Control { struct ChannelState { explicit ChannelState(s32 bind_id); - ChannelState(const ChannelState& state) = delete; - ChannelState& operator=(const ChannelState&) = delete; - ChannelState(ChannelState&& other) noexcept = default; - ChannelState& operator=(ChannelState&& other) noexcept = default; void Init(Core::System& system, GPU& gpu, u64 program_id); void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); - s32 bind_id = -1; - u64 program_id = 0; /// 3D engine - std::unique_ptr maxwell_3d; + std::optional maxwell_3d; /// 2D engine - std::unique_ptr fermi_2d; + std::optional fermi_2d; /// Compute engine - std::unique_ptr kepler_compute; + std::optional kepler_compute; /// DMA engine - std::unique_ptr maxwell_dma; + std::optional maxwell_dma; /// Inline memory engine - std::unique_ptr kepler_memory; - + std::optional kepler_memory; + /// NV01 Timer + std::optional nv01_timer; + std::optional dma_pusher; std::shared_ptr memory_manager; - std::unique_ptr dma_pusher; - + s32 bind_id = -1; + u64 program_id = 0; bool initialized{}; }; diff --git a/src/video_core/engines/engine_interface.h b/src/video_core/engines/engine_interface.h index e271ecab59..bf3bd66aca 100644 --- a/src/video_core/engines/engine_interface.h +++ b/src/video_core/engines/engine_interface.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project @@ -15,6 +15,7 @@ namespace Tegra::Engines { enum class EngineTypes : u32 { + Nv01Timer, KeplerCompute, Maxwell3D, Fermi2D, diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 52546e4279..b73082b7ef 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -2258,7 +2258,7 @@ public: /// Returns whether the vertex array specified by index is supposed to be /// accessed per instance or not. bool IsInstancingEnabled(std::size_t index) const { - return is_instanced[index]; + return bool(is_instanced[index]); //FUCK YOU MSVC } }; diff --git a/src/video_core/engines/nv01_timer.h b/src/video_core/engines/nv01_timer.h new file mode 100644 index 0000000000..a8e60f9f53 --- /dev/null +++ b/src/video_core/engines/nv01_timer.h @@ -0,0 +1,52 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include "common/bit_field.h" +#include "common/common_funcs.h" +#include "common/common_types.h" +#include "common/logging/log.h" +#include "video_core/engines/engine_interface.h" +#include "video_core/engines/engine_upload.h" + +namespace Core { +class System; +} + +namespace Tegra { +class MemoryManager; +} + +namespace Tegra::Engines { +class Nv01Timer final : public EngineInterface { +public: + explicit Nv01Timer(Core::System& system_, MemoryManager& memory_manager) + : system{system_} + {} + ~Nv01Timer() override; + + /// Write the value to the register identified by method. + void CallMethod(u32 method, u32 method_argument, bool is_last_call) override { + LOG_DEBUG(HW_GPU, "method={}, argument={}, is_last_call={}", method, method_argument, is_last_call); + } + + /// Write multiple values to the register identified by method. + void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) override { + LOG_DEBUG(HW_GPU, "method={}, base_start={}, amount={}, pending={}", method, fmt::ptr(base_start), amount, methods_pending); + } + + struct Regs { + // No fucking idea + INSERT_PADDING_BYTES_NOINIT(0x48); + } regs{}; +private: + void ConsumeSinkImpl() override {} + Core::System& system; +}; +} diff --git a/src/video_core/engines/puller.cpp b/src/video_core/engines/puller.cpp index 8dd34c04ab..b5b4e5d7fa 100644 --- a/src/video_core/engines/puller.cpp +++ b/src/video_core/engines/puller.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: 2022 yuzu Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later @@ -34,24 +37,22 @@ void Puller::ProcessBindMethod(const MethodCall& method_call) { bound_engines[method_call.subchannel] = engine_id; switch (engine_id) { case EngineID::FERMI_TWOD_A: - dma_pusher.BindSubchannel(channel_state.fermi_2d.get(), method_call.subchannel, - EngineTypes::Fermi2D); + dma_pusher.BindSubchannel(&*channel_state.fermi_2d, method_call.subchannel, EngineTypes::Fermi2D); break; case EngineID::MAXWELL_B: - dma_pusher.BindSubchannel(channel_state.maxwell_3d.get(), method_call.subchannel, - EngineTypes::Maxwell3D); + dma_pusher.BindSubchannel(&*channel_state.maxwell_3d, method_call.subchannel, EngineTypes::Maxwell3D); break; case EngineID::KEPLER_COMPUTE_B: - dma_pusher.BindSubchannel(channel_state.kepler_compute.get(), method_call.subchannel, - EngineTypes::KeplerCompute); + dma_pusher.BindSubchannel(&*channel_state.kepler_compute, method_call.subchannel, EngineTypes::KeplerCompute); break; case EngineID::MAXWELL_DMA_COPY_A: - dma_pusher.BindSubchannel(channel_state.maxwell_dma.get(), method_call.subchannel, - EngineTypes::MaxwellDMA); + dma_pusher.BindSubchannel(&*channel_state.maxwell_dma, method_call.subchannel, EngineTypes::MaxwellDMA); break; case EngineID::KEPLER_INLINE_TO_MEMORY_B: - dma_pusher.BindSubchannel(channel_state.kepler_memory.get(), method_call.subchannel, - EngineTypes::KeplerMemory); + dma_pusher.BindSubchannel(&*channel_state.kepler_memory, method_call.subchannel, EngineTypes::KeplerMemory); + break; + case EngineID::NV01_TIMER: + dma_pusher.BindSubchannel(&*channel_state.nv01_timer, method_call.subchannel, EngineTypes::Nv01Timer); break; default: UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id); @@ -209,24 +210,22 @@ void Puller::CallEngineMethod(const MethodCall& method_call) { switch (engine) { case EngineID::FERMI_TWOD_A: - channel_state.fermi_2d->CallMethod(method_call.method, method_call.argument, - method_call.IsLastCall()); + channel_state.fermi_2d->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall()); break; case EngineID::MAXWELL_B: - channel_state.maxwell_3d->CallMethod(method_call.method, method_call.argument, - method_call.IsLastCall()); + channel_state.maxwell_3d->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall()); break; case EngineID::KEPLER_COMPUTE_B: - channel_state.kepler_compute->CallMethod(method_call.method, method_call.argument, - method_call.IsLastCall()); + channel_state.kepler_compute->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall()); break; case EngineID::MAXWELL_DMA_COPY_A: - channel_state.maxwell_dma->CallMethod(method_call.method, method_call.argument, - method_call.IsLastCall()); + channel_state.maxwell_dma->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall()); break; case EngineID::KEPLER_INLINE_TO_MEMORY_B: - channel_state.kepler_memory->CallMethod(method_call.method, method_call.argument, - method_call.IsLastCall()); + channel_state.kepler_memory->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall()); + break; + case EngineID::NV01_TIMER: + channel_state.nv01_timer->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall()); break; default: UNIMPLEMENTED_MSG("Unimplemented engine"); @@ -255,6 +254,9 @@ void Puller::CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_s case EngineID::KEPLER_INLINE_TO_MEMORY_B: channel_state.kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending); break; + case EngineID::NV01_TIMER: + channel_state.nv01_timer->CallMultiMethod(method, base_start, amount, methods_pending); + break; default: UNIMPLEMENTED_MSG("Unimplemented engine"); break; diff --git a/src/video_core/engines/puller.h b/src/video_core/engines/puller.h index d4175ee945..fe5102e3ed 100644 --- a/src/video_core/engines/puller.h +++ b/src/video_core/engines/puller.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: 2022 yuzu Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later @@ -20,6 +23,7 @@ class MemoryManager; class DmaPusher; enum class EngineID { + NV01_TIMER = 0x0004, FERMI_TWOD_A = 0x902D, // 2D Engine MAXWELL_B = 0xB197, // 3D Engine KEPLER_COMPUTE_B = 0xB1C0, From e4122dae1d56a8b358c46e450b3d50ca10ffcc21 Mon Sep 17 00:00:00 2001 From: crueter Date: Fri, 6 Mar 2026 16:38:21 +0100 Subject: [PATCH 07/68] [desktop] addons: open mod folder in rc menu (#3662) also fixed the multiselection being absolutely horrendous Signed-off-by: crueter Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3662 --- .../configure_per_game_addons.cpp | 21 +++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/src/yuzu/configuration/configure_per_game_addons.cpp b/src/yuzu/configuration/configure_per_game_addons.cpp index bdff73a040..1d2d358672 100644 --- a/src/yuzu/configuration/configure_per_game_addons.cpp +++ b/src/yuzu/configuration/configure_per_game_addons.cpp @@ -10,13 +10,14 @@ #include +#include #include #include #include +#include #include #include #include -#include #include "common/common_types.h" #include "common/fs/fs.h" @@ -42,7 +43,7 @@ ConfigurePerGameAddons::ConfigurePerGameAddons(Core::System& system_, QWidget* p item_model = new QStandardItemModel(tree_view); tree_view->setModel(item_model); tree_view->setAlternatingRowColors(true); - tree_view->setSelectionMode(QHeaderView::MultiSelection); + tree_view->setSelectionMode(QHeaderView::ExtendedSelection); tree_view->setSelectionBehavior(QHeaderView::SelectRows); tree_view->setVerticalScrollMode(QHeaderView::ScrollPerPixel); tree_view->setHorizontalScrollMode(QHeaderView::ScrollPerPixel); @@ -248,8 +249,11 @@ void ConfigurePerGameAddons::AddonDeleteRequested(QList selected) { void ConfigurePerGameAddons::showContextMenu(const QPoint& pos) { const QModelIndex index = tree_view->indexAt(pos); - auto selected = tree_view->selectionModel()->selectedIndexes(); - if (index.isValid() && selected.empty()) selected = {index}; + auto selected = tree_view->selectionModel()->selectedRows(); + if (index.isValid() && selected.empty()) { + QModelIndex idx = item_model->index(index.row(), 0); + if (idx.isValid()) selected << idx; + } if (selected.empty()) return; @@ -260,6 +264,15 @@ void ConfigurePerGameAddons::showContextMenu(const QPoint& pos) { AddonDeleteRequested(selected); }); + if (selected.length() == 1) { + auto loc = selected.at(0).data(PATCH_LOCATION).toString(); + if (QFileInfo::exists(loc)) { + QAction* open = menu.addAction(tr("&Open in File Manager")); + connect(open, &QAction::triggered, this, + [selected, loc]() { QDesktopServices::openUrl(QUrl::fromLocalFile(loc)); }); + } + } + menu.exec(tree_view->viewport()->mapToGlobal(pos)); } From c062931c9bef18afd5f0cd74329e8c3e32b4b598 Mon Sep 17 00:00:00 2001 From: lizzie Date: Fri, 6 Mar 2026 16:38:39 +0100 Subject: [PATCH 08/68] [qt] add translation table entry for debug_knobs,serial_battery and serial_unit (#3682) trivial qt change Signed-off-by: lizzie Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3682 Reviewed-by: DraVee Co-authored-by: lizzie Co-committed-by: lizzie --- src/qt_common/config/shared_translation.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/qt_common/config/shared_translation.cpp b/src/qt_common/config/shared_translation.cpp index f49c43ee2a..d1ed32134c 100644 --- a/src/qt_common/config/shared_translation.cpp +++ b/src/qt_common/config/shared_translation.cpp @@ -425,6 +425,9 @@ std::unique_ptr InitializeTranslations(QObject* parent) "their resolution, details and supported controllers and depending on this setting.\n" "Setting to Handheld can help improve performance for low end systems.")); INSERT(Settings, current_user, QString(), QString()); + INSERT(Settings, serial_unit, tr("Unit Serial"), QString()); + INSERT(Settings, serial_battery, tr("Battery Serial"), QString()); + INSERT(Settings, debug_knobs, tr("Debug knobs"), QString()); // Controls From ddac8c8eb500918bd8c89e0c330587c591206c2c Mon Sep 17 00:00:00 2001 From: xbzk Date: Fri, 6 Mar 2026 19:52:17 +0100 Subject: [PATCH 09/68] [vk] fix crash introduced in 9a07bd0570 (#3685) Fix for current crash on master. Just reverted only the necessary stuff so that PresentManager can hold a reference to khr and resist death upon application hold/restore. @Lizzie shall judge. Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3685 Co-authored-by: xbzk Co-committed-by: xbzk --- src/video_core/renderer_vulkan/renderer_vulkan.cpp | 2 +- src/video_core/renderer_vulkan/vk_present_manager.cpp | 6 +++--- src/video_core/renderer_vulkan/vk_present_manager.h | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index cb1b1a5362..1725bc8ccc 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -137,7 +137,7 @@ try memory_allocator, scheduler, swapchain, - *surface) + surface) , blit_swapchain(device_memory, device, memory_allocator, diff --git a/src/video_core/renderer_vulkan/vk_present_manager.cpp b/src/video_core/renderer_vulkan/vk_present_manager.cpp index aa019a4160..80853362ad 100644 --- a/src/video_core/renderer_vulkan/vk_present_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_present_manager.cpp @@ -101,7 +101,7 @@ PresentManager::PresentManager(const vk::Instance& instance_, MemoryAllocator& memory_allocator_, Scheduler& scheduler_, Swapchain& swapchain_, - VkSurfaceKHR_T* surface_) + vk::SurfaceKHR& surface_) : instance{instance_} , render_window{render_window_} , device{device_} @@ -291,7 +291,7 @@ void PresentManager::PresentThread(std::stop_token token) { } void PresentManager::RecreateSwapchain(Frame* frame) { - swapchain.Create(surface, frame->width, frame->height); // Pass raw pointer + swapchain.Create(*surface, frame->width, frame->height); // Pass raw pointer SetImageCount(); } @@ -310,7 +310,7 @@ void PresentManager::CopyToSwapchain(Frame* frame) { // Recreate surface and swapchain if needed. if (requires_recreation) { #ifdef ANDROID - surface = *CreateSurface(instance, render_window.GetWindowInfo()).address(); + surface = CreateSurface(instance, render_window.GetWindowInfo()); #endif RecreateSwapchain(frame); } diff --git a/src/video_core/renderer_vulkan/vk_present_manager.h b/src/video_core/renderer_vulkan/vk_present_manager.h index 3d5cc32102..c51f8ed77f 100644 --- a/src/video_core/renderer_vulkan/vk_present_manager.h +++ b/src/video_core/renderer_vulkan/vk_present_manager.h @@ -44,7 +44,7 @@ public: MemoryAllocator& memory_allocator, Scheduler& scheduler, Swapchain& swapchain, - VkSurfaceKHR_T* surface); + vk::SurfaceKHR& surface); ~PresentManager(); /// Returns the last used presentation frame @@ -78,7 +78,7 @@ private: MemoryAllocator& memory_allocator; Scheduler& scheduler; Swapchain& swapchain; - VkSurfaceKHR_T* surface; + vk::SurfaceKHR& surface; vk::CommandPool cmdpool; std::vector frames; boost::container::deque present_queue; From 8faeffdc7e81d09e6f5167d7c7faf386408f2c92 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Sat, 14 Feb 2026 22:07:15 -0400 Subject: [PATCH 10/68] [vulkan] removal of EDS3 and VIDS --- .../features/settings/model/BooleanSetting.kt | 1 - .../settings/model/view/SettingsItem.kt | 7 - .../settings/ui/SettingsFragmentPresenter.kt | 1 - .../app/src/main/res/values-ar/strings.xml | 2 - .../app/src/main/res/values-cs/strings.xml | 2 - .../app/src/main/res/values-de/strings.xml | 2 - .../app/src/main/res/values-fr/strings.xml | 2 - .../app/src/main/res/values-pl/strings.xml | 2 - .../src/main/res/values-pt-rBR/strings.xml | 2 - .../app/src/main/res/values-ru/strings.xml | 2 - .../app/src/main/res/values-uk/strings.xml | 2 - .../src/main/res/values-zh-rCN/strings.xml | 2 - .../src/main/res/values-zh-rTW/strings.xml | 2 - .../app/src/main/res/values/arrays.xml | 2 - .../app/src/main/res/values/strings.xml | 2 - src/common/settings.h | 7 - src/common/settings_enums.h | 2 +- src/qt_common/config/shared_translation.cpp | 6 - .../renderer_vulkan/fixed_pipeline_state.cpp | 75 ++--- .../renderer_vulkan/fixed_pipeline_state.h | 28 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 100 ++---- .../renderer_vulkan/vk_graphics_pipeline.h | 3 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 45 +-- .../renderer_vulkan/vk_rasterizer.cpp | 292 ------------------ .../renderer_vulkan/vk_rasterizer.h | 10 +- .../renderer_vulkan/vk_state_tracker.cpp | 23 +- .../renderer_vulkan/vk_state_tracker.h | 39 +-- .../vulkan_common/vulkan_debug_callback.cpp | 8 - .../vulkan_common/vulkan_device.cpp | 123 +------- src/video_core/vulkan_common/vulkan_device.h | 63 +--- .../vulkan_common/vulkan_wrapper.cpp | 11 - src/video_core/vulkan_common/vulkan_wrapper.h | 62 ---- 32 files changed, 70 insertions(+), 860 deletions(-) diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/BooleanSetting.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/BooleanSetting.kt index 2418003904..9b9f475725 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/BooleanSetting.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/BooleanSetting.kt @@ -30,7 +30,6 @@ enum class BooleanSetting(override val key: String) : AbstractBooleanSetting { BUFFER_REORDER_DISABLE("disable_buffer_reorder"), RENDERER_DEBUG("debug"), RENDERER_PATCH_OLD_QCOM_DRIVERS("patch_old_qcom_drivers"), - RENDERER_VERTEX_INPUT_DYNAMIC_STATE("vertex_input_dynamic_state"), RENDERER_PROVOKING_VERTEX("provoking_vertex"), RENDERER_DESCRIPTOR_INDEXING("descriptor_indexing"), RENDERER_SAMPLE_SHADING("sample_shading"), diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt index a8bd44983b..bbf3674d17 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt @@ -148,13 +148,6 @@ abstract class SettingsItem( descriptionId = R.string.provoking_vertex_description ) ) - put( - SwitchSetting( - BooleanSetting.RENDERER_VERTEX_INPUT_DYNAMIC_STATE, - titleId = R.string.vertex_input_dynamic_state, - descriptionId = R.string.vertex_input_dynamic_state_description - ) - ) put( SwitchSetting( BooleanSetting.RENDERER_DESCRIPTOR_INDEXING, diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt index 77104e0614..0e9a0df977 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt @@ -291,7 +291,6 @@ class SettingsFragmentPresenter( add(HeaderSetting(R.string.extensions)) add(IntSetting.RENDERER_DYNA_STATE.key) - add(BooleanSetting.RENDERER_VERTEX_INPUT_DYNAMIC_STATE.key) add(BooleanSetting.RENDERER_PROVOKING_VERTEX.key) add(BooleanSetting.RENDERER_DESCRIPTOR_INDEXING.key) add(IntSetting.RENDERER_SAMPLE_SHADING.key) diff --git a/src/android/app/src/main/res/values-ar/strings.xml b/src/android/app/src/main/res/values-ar/strings.xml index 05f65ecf78..7f3982f49b 100644 --- a/src/android/app/src/main/res/values-ar/strings.xml +++ b/src/android/app/src/main/res/values-ar/strings.xml @@ -506,8 +506,6 @@ الحالة الديناميكية الموسعة يتحكم هذا الخيار في عدد الميزات التي يمكن استخدامها في حالة الديناميكية الموسعة. تسمح الأرقام الأعلى بمزيد من الميزات ويمكن أن تزيد من الأداء، ولكنها قد تسبب مشاكل مع بعض برامج التشغيل والأجهزة. معطل - حالة ديناميكية لإدخال الرأس - يتيح ميزة الحالة الديناميكية لإدخال الرأس لتحسين الجودة والأداء. الرأس المثير يحسن الإضاءة ومعالجة الرؤوس في بعض الألعاب. مدعوم فقط على وحدات معالجة الرسومات Vulkan 1.0+. فهرسة الوصف diff --git a/src/android/app/src/main/res/values-cs/strings.xml b/src/android/app/src/main/res/values-cs/strings.xml index b56c21c9b0..c3ee5e6f53 100644 --- a/src/android/app/src/main/res/values-cs/strings.xml +++ b/src/android/app/src/main/res/values-cs/strings.xml @@ -488,8 +488,6 @@ Úroveň EDS Určuje počet funkcí využívaných v rámci rozšířeného dynamického stavu API Vulkan (Extended Dynamic State). Vyšší hodnoty umožňují využít více funkcí a mohou zvýšit výkon, ale u některých ovladačů a výrobců grafických karet mohou způsobovat problémy s kompatibilitou. Vypnuto - Dynamický stav vstupu vrcholů (Vertex Input) - Aktivuje funkci dynamického stavu vstupu vrcholů (Vertex Input Dynamic State) pro lepší kvalitu a výkon. Určující vrchol Zlepšuje osvětlení a zpracování vrcholů v některých hrách. Podporováno pouze na GPU s API Vulkan 1.0+. Indexování deskriptorů diff --git a/src/android/app/src/main/res/values-de/strings.xml b/src/android/app/src/main/res/values-de/strings.xml index 7524402e6e..5c499a4080 100644 --- a/src/android/app/src/main/res/values-de/strings.xml +++ b/src/android/app/src/main/res/values-de/strings.xml @@ -486,8 +486,6 @@ Wird der Handheld-Modus verwendet, verringert es die Auflösung und erhöht die Erweiterter dynamischer Status Steuert die Anzahl der Funktionen, die im \"Vertex Input Dynamic State\" werden können. Höhere Werte ermöglichen mehr Funktionen und können die Leistung steigern, können aber bei einigen Treibern und Anbietern zu Problemen führen. Deaktiviert - Vertex Input Dynamic State - Aktiviert die Funktion \"Vertex Input Dynamic State\" für bessere Qualität und Leistung. Provokanter Vertex Verbessert die Beleuchtung und die Vertex-Verarbeitung in einigen Spielen. Wird nur von GPUs mit Vulkan 1.0+ unterstützt. Deskriptor-Indizierung diff --git a/src/android/app/src/main/res/values-fr/strings.xml b/src/android/app/src/main/res/values-fr/strings.xml index add275870d..4294e6d81e 100644 --- a/src/android/app/src/main/res/values-fr/strings.xml +++ b/src/android/app/src/main/res/values-fr/strings.xml @@ -436,8 +436,6 @@ Compile les shaders de manière asynchrone. Cela peut réduire les saccades mais peut aussi provoquer des problèmes graphiques. État dynamique étendu Désactivé - État dynamique d\'entrée de sommet - Active la fonctionnalité d\'état dynamique des entrées de sommets pour une meilleure qualité et de meilleures performances. Provoque des Vertex Améliore l`éclairage et la gestion des vertex dans certains jeux. Pris en charge uniquement par les GPU Vulkan 1.0+. Indexation des descripteurs diff --git a/src/android/app/src/main/res/values-pl/strings.xml b/src/android/app/src/main/res/values-pl/strings.xml index 3d69cce8f3..6954b65fe1 100644 --- a/src/android/app/src/main/res/values-pl/strings.xml +++ b/src/android/app/src/main/res/values-pl/strings.xml @@ -488,8 +488,6 @@ Rozszerzony stan dynamiczny Kontroluje liczbę funkcji, które mogą być używane w Extended Dynamic State. Wyższe wartości pozwalają na użycie większej liczby funkcji i mogą zwiększyć wydajność, ale mogą powodować problemy z niektórymi sterownikami i u niektórych producentów. Wyłączone - Dynamiczny stan wejścia wierzchołków - Włącza funkcję dynamicznego stanu wejścia wierzchołków, poprawiając jakość i wydajność. Wierzchołek prowokujący Poprawia oświetlenie i obsługę wierzchołków w niektórych grach. Obsługiwane tylko przez GPU Vulkan 1.0+. Indeksowanie deskryptorów diff --git a/src/android/app/src/main/res/values-pt-rBR/strings.xml b/src/android/app/src/main/res/values-pt-rBR/strings.xml index 08e2695d2e..5d1843fdae 100644 --- a/src/android/app/src/main/res/values-pt-rBR/strings.xml +++ b/src/android/app/src/main/res/values-pt-rBR/strings.xml @@ -471,8 +471,6 @@ Compila shaders de forma assíncrona. Isso pode reduzir engasgos, mas também pode introduzir falhas gráficas. Extended Dynamic State Desativado - Vertex Input Dynamic State - Ativa o recurso de vertex input dynamic state para melhor qualidade e desempenho. Provoking Vertex Vértice Provocante: Melhora a iluminação e o processamento de vértices em certos jogos. Suportado apenas em GPUs com Vulkan 1.0 ou superior. Descriptor Indexing diff --git a/src/android/app/src/main/res/values-ru/strings.xml b/src/android/app/src/main/res/values-ru/strings.xml index a9a3cceaae..de2bcea0f0 100644 --- a/src/android/app/src/main/res/values-ru/strings.xml +++ b/src/android/app/src/main/res/values-ru/strings.xml @@ -498,8 +498,6 @@ Расширенное динамическое состояние Управляет количеством функций, доступных в режиме «Расширенное динамическое состояние». Большее число позволяет задействовать больше функций и может повысить производительность, но способно вызывать проблемы с некоторыми драйверами и графикой. Отключено - Динамическое состояние ввода вершин - Включает функцию динамического состояния ввода вершин для повышения качества и производительности Определяющая вершина Улучшает освещение и обработку вершин в некоторых играх. Поддерживается только ГПУ с Vulkan 1.0+. Индексирование дескрипторов diff --git a/src/android/app/src/main/res/values-uk/strings.xml b/src/android/app/src/main/res/values-uk/strings.xml index ada2445d05..343d38103f 100644 --- a/src/android/app/src/main/res/values-uk/strings.xml +++ b/src/android/app/src/main/res/values-uk/strings.xml @@ -502,8 +502,6 @@ Розширений динамічний стан Керує кількістю функцій, які можна використовувати в «Розширеному динамічному стані». Вище число дозволяє більше функцій і може покращити продуктивність, але може спричинити проблеми з деякими драйверами й виробниками. Вимкнено - Динамічний стан введення вершин - Вмикає можливість динамічного стану введення вершин для кращих якості й продуктивності. Провокативна вершина Покращує освітлення та взаємодію з вершинами у деяких іграх. Лише для ГП з підтримкою Vulkan 1.0+. Індексація дескрипторів diff --git a/src/android/app/src/main/res/values-zh-rCN/strings.xml b/src/android/app/src/main/res/values-zh-rCN/strings.xml index 08b55297a7..b1da5135dc 100644 --- a/src/android/app/src/main/res/values-zh-rCN/strings.xml +++ b/src/android/app/src/main/res/values-zh-rCN/strings.xml @@ -496,8 +496,6 @@ 扩展动态状态 控制在扩展动态状态中可使用的函数数量。更高的数值允许启用更多功能,并可能提升性能,但同时也可能导致额外的图形问题。 已禁用 - 顶点输入动态状态 - 开启顶点输入动态状态功能来获得更好的质量和性能。 引发顶点 改善某些游戏中的光照和顶点处理。仅支持Vulkan 1.0+ GPU。 描述符索引 diff --git a/src/android/app/src/main/res/values-zh-rTW/strings.xml b/src/android/app/src/main/res/values-zh-rTW/strings.xml index c7061ebc03..b593f97575 100644 --- a/src/android/app/src/main/res/values-zh-rTW/strings.xml +++ b/src/android/app/src/main/res/values-zh-rTW/strings.xml @@ -467,8 +467,6 @@ 非同步編譯著色器。這可能會減少卡頓,但也可能導致圖形錯誤。 擴展動態狀態 已停用 - 頂點輸入動態狀態 - 啟用頂點輸入動態狀態以取得更佳的品質及性能 引發頂點 改善某些遊戲中的光照和頂點處理。僅支援Vulkan 1.0+ GPU。 描述符索引 diff --git a/src/android/app/src/main/res/values/arrays.xml b/src/android/app/src/main/res/values/arrays.xml index 565decb390..31709eb89b 100644 --- a/src/android/app/src/main/res/values/arrays.xml +++ b/src/android/app/src/main/res/values/arrays.xml @@ -632,14 +632,12 @@ @string/disabled ExtendedDynamicState 1 ExtendedDynamicState 2 - ExtendedDynamicState 3 0 1 2 - 3 diff --git a/src/android/app/src/main/res/values/strings.xml b/src/android/app/src/main/res/values/strings.xml index 7d094effcb..de74b6c2eb 100644 --- a/src/android/app/src/main/res/values/strings.xml +++ b/src/android/app/src/main/res/values/strings.xml @@ -531,8 +531,6 @@ Extended Dynamic State Controls the number of features that can be used in Extended Dynamic State. Higher numbers allow for more features and can increase performance, but may cause issues with some drivers and vendors. Disabled - Vertex Input Dynamic State - Enables vertex input dynamic state feature for better quality and performance. Provoking Vertex Improves lighting and vertex handling in certain games. Only supported on Vulkan 1.0+ GPUs. Descriptor Indexing diff --git a/src/common/settings.h b/src/common/settings.h index 7c6c0d062f..237cd7f0b3 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -601,13 +601,6 @@ struct Values { Category::RendererExtensions, Specialization::Scalar}; - SwitchableSetting vertex_input_dynamic_state{linkage, -#if defined (ANDROID) - false, -#else - true, -#endif - "vertex_input_dynamic_state", Category::RendererExtensions}; SwitchableSetting provoking_vertex{linkage, false, "provoking_vertex", Category::RendererExtensions}; SwitchableSetting descriptor_indexing{linkage, false, "descriptor_indexing", Category::RendererExtensions}; diff --git a/src/common/settings_enums.h b/src/common/settings_enums.h index 638be4127f..5de0641b69 100644 --- a/src/common/settings_enums.h +++ b/src/common/settings_enums.h @@ -154,7 +154,7 @@ ENUM(GpuUnswizzleSize, VerySmall, Small, Normal, Large, VeryLarge) ENUM(GpuUnswizzle, VeryLow, Low, Normal, Medium, High) ENUM(GpuUnswizzleChunk, VeryLow, Low, Normal, Medium, High) ENUM(TemperatureUnits, Celsius, Fahrenheit) -ENUM(ExtendedDynamicState, Disabled, EDS1, EDS2, EDS3); +ENUM(ExtendedDynamicState, Disabled, EDS1, EDS2); ENUM(GpuLogLevel, Off, Errors, Standard, Verbose, All) ENUM(GameListMode, TreeView, GridView); ENUM(SpeedMode, Standard, Turbo, Slow); diff --git a/src/qt_common/config/shared_translation.cpp b/src/qt_common/config/shared_translation.cpp index d1ed32134c..095335dc81 100644 --- a/src/qt_common/config/shared_translation.cpp +++ b/src/qt_common/config/shared_translation.cpp @@ -368,11 +368,6 @@ std::unique_ptr InitializeTranslations(QObject* parent) "Higher states allow for more features and can increase performance, but may cause " "additional graphical issues.")); - INSERT(Settings, - vertex_input_dynamic_state, - tr("Vertex Input Dynamic State"), - tr("Enables vertex input dynamic state feature for better quality and performance.")); - INSERT(Settings, provoking_vertex, tr("Provoking Vertex"), @@ -799,7 +794,6 @@ std::unique_ptr ComboboxEnumeration(QObject* parent) PAIR(ExtendedDynamicState, Disabled, tr("Disabled")), PAIR(ExtendedDynamicState, EDS1, tr("ExtendedDynamicState 1")), PAIR(ExtendedDynamicState, EDS2, tr("ExtendedDynamicState 2")), - PAIR(ExtendedDynamicState, EDS3, tr("ExtendedDynamicState 3")), }}); translations->insert({Settings::EnumMetadata::Index(), diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 06cbd9e6da..c74f3824a0 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project @@ -60,9 +60,9 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, DynamicFe extended_dynamic_state.Assign(features.has_extended_dynamic_state ? 1 : 0); extended_dynamic_state_2.Assign(features.has_extended_dynamic_state_2 ? 1 : 0); extended_dynamic_state_2_logic_op.Assign(features.has_extended_dynamic_state_2_logic_op ? 1 : 0); - extended_dynamic_state_3_blend.Assign(features.has_extended_dynamic_state_3_blend ? 1 : 0); - extended_dynamic_state_3_enables.Assign(features.has_extended_dynamic_state_3_enables ? 1 : 0); - dynamic_vertex_input.Assign(features.has_dynamic_vertex_input ? 1 : 0); + reserved_dynamic_state_3_blend.Assign(0); + reserved_dynamic_state_3_enables.Assign(0); + reserved_bit_5.Assign(0); xfb_enabled.Assign(regs.transform_feedback_enabled != 0); ndc_minus_one_to_one.Assign(regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1 : 0); polygon_mode.Assign(PackPolygonMode(regs.polygon_mode_front)); @@ -103,43 +103,22 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, DynamicFe point_size = std::bit_cast(regs.point_size); if (maxwell3d.dirty.flags[Dirty::VertexInput]) { - if (features.has_dynamic_vertex_input) { - // Dirty flag will be reset by the command buffer update - static constexpr std::array LUT{ - 0u, // Invalid - 1u, // SignedNorm - 1u, // UnsignedNorm - 2u, // SignedInt - 3u, // UnsignedInt - 1u, // UnsignedScaled - 1u, // SignedScaled - 1u, // Float - }; - const auto& attrs = regs.vertex_attrib_format; - attribute_types = 0; - for (size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) { - const u32 mask = attrs[i].constant != 0 ? 0 : 3; - const u32 type = LUT[static_cast(attrs[i].type.Value())]; - attribute_types |= static_cast(type & mask) << (i * 2); - } - } else { - maxwell3d.dirty.flags[Dirty::VertexInput] = false; - enabled_divisors = 0; - for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { - const bool is_enabled = regs.vertex_stream_instances.IsInstancingEnabled(index); - binding_divisors[index] = is_enabled ? regs.vertex_streams[index].frequency : 0; - enabled_divisors |= (is_enabled ? u64{1} : 0) << index; - } - for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { - const auto& input = regs.vertex_attrib_format[index]; - auto& attribute = attributes[index]; - attribute.raw = 0; - attribute.enabled.Assign(input.constant ? 0 : 1); - attribute.buffer.Assign(input.buffer); - attribute.offset.Assign(input.offset); - attribute.type.Assign(static_cast(input.type.Value())); - attribute.size.Assign(static_cast(input.size.Value())); - } + maxwell3d.dirty.flags[Dirty::VertexInput] = false; + enabled_divisors = 0; + for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { + const bool is_enabled = regs.vertex_stream_instances.IsInstancingEnabled(index); + binding_divisors[index] = is_enabled ? regs.vertex_streams[index].frequency : 0; + enabled_divisors |= (is_enabled ? u64{1} : 0) << index; + } + for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { + const auto& input = regs.vertex_attrib_format[index]; + auto& attribute = attributes[index]; + attribute.raw = 0; + attribute.enabled.Assign(input.constant ? 0 : 1); + attribute.buffer.Assign(input.buffer); + attribute.offset.Assign(input.offset); + attribute.type.Assign(static_cast(input.type.Value())); + attribute.size.Assign(static_cast(input.size.Value())); } } if (maxwell3d.dirty.flags[Dirty::ViewportSwizzles]) { @@ -160,17 +139,13 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, DynamicFe if (!extended_dynamic_state_2_logic_op) { dynamic_state.Refresh2(regs, topology_, extended_dynamic_state_2); } - if (!extended_dynamic_state_3_blend) { - if (maxwell3d.dirty.flags[Dirty::Blending]) { - maxwell3d.dirty.flags[Dirty::Blending] = false; - for (size_t index = 0; index < attachments.size(); ++index) { - attachments[index].Refresh(regs, index); - } + if (maxwell3d.dirty.flags[Dirty::Blending]) { + maxwell3d.dirty.flags[Dirty::Blending] = false; + for (size_t index = 0; index < attachments.size(); ++index) { + attachments[index].Refresh(regs, index); } } - if (!extended_dynamic_state_3_enables) { - dynamic_state.Refresh3(regs); - } + dynamic_state.Refresh3(regs); if (xfb_enabled) { RefreshXfbState(xfb_state, regs); } diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index c5bc14f448..030c62a883 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project @@ -25,9 +25,6 @@ struct DynamicFeatures { bool has_extended_dynamic_state_2; bool has_extended_dynamic_state_2_logic_op; bool has_extended_dynamic_state_2_patch_control_points; - bool has_extended_dynamic_state_3_blend; - bool has_extended_dynamic_state_3_enables; - bool has_dynamic_vertex_input; }; struct FixedPipelineState { @@ -191,9 +188,9 @@ struct FixedPipelineState { BitField<0, 1, u32> extended_dynamic_state; BitField<1, 1, u32> extended_dynamic_state_2; BitField<2, 1, u32> extended_dynamic_state_2_logic_op; - BitField<3, 1, u32> extended_dynamic_state_3_blend; - BitField<4, 1, u32> extended_dynamic_state_3_enables; - BitField<5, 1, u32> dynamic_vertex_input; + BitField<3, 1, u32> reserved_dynamic_state_3_blend; + BitField<4, 1, u32> reserved_dynamic_state_3_enables; + BitField<5, 1, u32> reserved_bit_5; BitField<6, 1, u32> xfb_enabled; BitField<7, 1, u32> ndc_minus_one_to_one; BitField<8, 2, u32> polygon_mode; @@ -225,10 +222,7 @@ struct FixedPipelineState { u32 point_size; std::array viewport_swizzles; - union { - u64 attribute_types; // Used with VK_EXT_vertex_input_dynamic_state - u64 enabled_divisors; - }; + u64 enabled_divisors; DynamicState dynamic_state; std::array attachments; @@ -260,14 +254,6 @@ struct FixedPipelineState { // When transform feedback is enabled, use the whole struct return sizeof(*this); } - if (dynamic_vertex_input && extended_dynamic_state_3_blend) { - // Exclude dynamic state and attributes - return offsetof(FixedPipelineState, dynamic_state); - } - if (dynamic_vertex_input) { - // Exclude dynamic state - return offsetof(FixedPipelineState, attributes); - } if (extended_dynamic_state) { // Exclude dynamic state return offsetof(FixedPipelineState, vertex_strides); @@ -275,10 +261,6 @@ struct FixedPipelineState { // Default return offsetof(FixedPipelineState, xfb_state); } - - u32 DynamicAttributeType(size_t index) const noexcept { - return (attribute_types >> (index * 2)) & 0b11; - } }; static_assert(std::has_unique_object_representations_v); static_assert(std::is_trivially_copyable_v); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index e989bf6b31..774d613c25 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -577,38 +577,35 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { static_vector vertex_bindings; static_vector vertex_binding_divisors; static_vector vertex_attributes; - if (!key.state.dynamic_vertex_input) { - const size_t num_vertex_arrays = (std::min)( - Maxwell::NumVertexArrays, static_cast(device.GetMaxVertexInputBindings())); - for (size_t index = 0; index < num_vertex_arrays; ++index) { - const bool instanced = key.state.binding_divisors[index] != 0; - const auto rate = - instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; - vertex_bindings.push_back({ + const size_t num_vertex_arrays = + (std::min)(Maxwell::NumVertexArrays, static_cast(device.GetMaxVertexInputBindings())); + for (size_t index = 0; index < num_vertex_arrays; ++index) { + const bool instanced = key.state.binding_divisors[index] != 0; + const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; + vertex_bindings.push_back({ + .binding = static_cast(index), + .stride = key.state.vertex_strides[index], + .inputRate = rate, + }); + if (instanced) { + vertex_binding_divisors.push_back({ .binding = static_cast(index), - .stride = key.state.vertex_strides[index], - .inputRate = rate, - }); - if (instanced) { - vertex_binding_divisors.push_back({ - .binding = static_cast(index), - .divisor = key.state.binding_divisors[index], - }); - } - } - for (size_t index = 0; index < key.state.attributes.size(); ++index) { - const auto& attribute = key.state.attributes[index]; - if (!attribute.enabled || !stage_infos[0].loads.Generic(index)) { - continue; - } - vertex_attributes.push_back({ - .location = static_cast(index), - .binding = attribute.buffer, - .format = MaxwellToVK::VertexFormat(device, attribute.Type(), attribute.Size()), - .offset = attribute.offset, + .divisor = key.state.binding_divisors[index], }); } } + for (size_t index = 0; index < key.state.attributes.size(); ++index) { + const auto& attribute = key.state.attributes[index]; + if (!attribute.enabled || !stage_infos[0].loads.Generic(index)) { + continue; + } + vertex_attributes.push_back({ + .location = static_cast(index), + .binding = attribute.buffer, + .format = MaxwellToVK::VertexFormat(device, attribute.Type(), attribute.Size()), + .offset = attribute.offset, + }); + } ASSERT(vertex_attributes.size() <= device.GetMaxVertexInputAttributes()); VkPipelineVertexInputStateCreateInfo vertex_input_ci{ @@ -850,17 +847,7 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { }; dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end()); - // VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT is part of EDS1 - // Only use it if VIDS is not active (VIDS replaces it with full vertex input control) - if (!key.state.dynamic_vertex_input) { - dynamic_states.push_back(VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT); - } - } - - // VK_DYNAMIC_STATE_VERTEX_INPUT_EXT (VIDS) - Independent from EDS - // Provides full dynamic vertex input control, replaces VERTEX_INPUT_BINDING_STRIDE - if (key.state.dynamic_vertex_input) { - dynamic_states.push_back(VK_DYNAMIC_STATE_VERTEX_INPUT_EXT); + dynamic_states.push_back(VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT); } // EDS2 - Core (3 states) @@ -878,41 +865,6 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { dynamic_states.push_back(VK_DYNAMIC_STATE_LOGIC_OP_EXT); } - // EDS3 - Blending (composite: 3 states) - if (key.state.extended_dynamic_state_3_blend) { - static constexpr std::array extended3{ - VK_DYNAMIC_STATE_COLOR_BLEND_ENABLE_EXT, - VK_DYNAMIC_STATE_COLOR_BLEND_EQUATION_EXT, - VK_DYNAMIC_STATE_COLOR_WRITE_MASK_EXT, - }; - dynamic_states.insert(dynamic_states.end(), extended3.begin(), extended3.end()); - } - - // EDS3 - Enables (composite: per-feature) - if (key.state.extended_dynamic_state_3_enables) { - if (device.SupportsDynamicState3DepthClampEnable()) { - dynamic_states.push_back(VK_DYNAMIC_STATE_DEPTH_CLAMP_ENABLE_EXT); - } - if (device.SupportsDynamicState3LogicOpEnable()) { - dynamic_states.push_back(VK_DYNAMIC_STATE_LOGIC_OP_ENABLE_EXT); - } - if (device.SupportsDynamicState3LineRasterizationMode()) { - dynamic_states.push_back(VK_DYNAMIC_STATE_LINE_RASTERIZATION_MODE_EXT); - } - if (device.SupportsDynamicState3ConservativeRasterizationMode()) { - dynamic_states.push_back(VK_DYNAMIC_STATE_CONSERVATIVE_RASTERIZATION_MODE_EXT); - } - if (device.SupportsDynamicState3LineStippleEnable()) { - dynamic_states.push_back(VK_DYNAMIC_STATE_LINE_STIPPLE_ENABLE_EXT); - } - if (device.SupportsDynamicState3AlphaToCoverageEnable()) { - dynamic_states.push_back(VK_DYNAMIC_STATE_ALPHA_TO_COVERAGE_ENABLE_EXT); - } - if (device.SupportsDynamicState3AlphaToOneEnable()) { - dynamic_states.push_back(VK_DYNAMIC_STATE_ALPHA_TO_ONE_ENABLE_EXT); - } - } - const VkPipelineDynamicStateCreateInfo dynamic_state_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, .pNext = nullptr, diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 34941d6e8d..29f5eba8dc 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project @@ -81,7 +81,6 @@ public: const GraphicsPipelineCacheKey& key, std::array stages, const std::array& infos); - bool HasDynamicVertexInput() const noexcept { return key.state.dynamic_vertex_input; } bool SupportsAlphaToCoverage() const noexcept { return fragment_has_color0_output; } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 77a4e8616a..8cf02a959c 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -58,7 +58,7 @@ using VideoCommon::FileEnvironment; using VideoCommon::GenericEnvironment; using VideoCommon::GraphicsEnvironment; -constexpr u32 CACHE_VERSION = 16; +constexpr u32 CACHE_VERSION = 17; constexpr std::array VULKAN_CACHE_MAGIC_NUMBER{'y', 'u', 'z', 'u', 'v', 'k', 'c', 'h'}; template @@ -132,20 +132,6 @@ Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribut return Shader::AttributeType::Float; } -Shader::AttributeType AttributeType(const FixedPipelineState& state, size_t index) { - switch (state.DynamicAttributeType(index)) { - case 0: - return Shader::AttributeType::Disabled; - case 1: - return Shader::AttributeType::Float; - case 2: - return Shader::AttributeType::SignedInt; - case 3: - return Shader::AttributeType::UnsignedInt; - } - return Shader::AttributeType::Disabled; -} - Shader::RuntimeInfo MakeRuntimeInfo(std::span programs, const GraphicsPipelineCacheKey& key, const Shader::IR::Program& program, @@ -183,14 +169,8 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span program } info.convert_depth_mode = gl_ndc; } - if (key.state.dynamic_vertex_input) { - for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { - info.generic_input_types[index] = AttributeType(key.state, index); - } - } else { - std::ranges::transform(key.state.attributes, info.generic_input_types.begin(), - &CastAttributeType); - } + std::ranges::transform(key.state.attributes, info.generic_input_types.begin(), + &CastAttributeType); break; case Shader::Stage::TessellationEval: info.tess_clockwise = key.state.tessellation_clockwise != 0; @@ -469,7 +449,7 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, // Level 0: Core Dynamic States only // Level 1: Core + EDS1 // Level 2: Core + EDS1 + EDS2 (accumulative) - // Level 3: Core + EDS1 + EDS2 + EDS3 (accumulative) + // Level 2: Core + EDS1 + EDS2 (accumulative) // Here we only verify if extensions were successfully loaded by the device dynamic_features.has_extended_dynamic_state = @@ -480,16 +460,6 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, dynamic_features.has_extended_dynamic_state_2_logic_op = device.IsExtExtendedDynamicState2ExtrasSupported(); dynamic_features.has_extended_dynamic_state_2_patch_control_points = false; - - dynamic_features.has_extended_dynamic_state_3_blend = - device.IsExtExtendedDynamicState3BlendingSupported(); - dynamic_features.has_extended_dynamic_state_3_enables = - device.IsExtExtendedDynamicState3EnablesSupported(); - - // VIDS: Independent toggle (not affected by dyna_state levels) - dynamic_features.has_dynamic_vertex_input = - device.IsExtVertexInputDynamicStateSupported() && - Settings::values.vertex_input_dynamic_state.GetValue(); } PipelineCache::~PipelineCache() { @@ -595,12 +565,7 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading (key.state.extended_dynamic_state_2 != 0) != dynamic_features.has_extended_dynamic_state_2 || (key.state.extended_dynamic_state_2_logic_op != 0) != - dynamic_features.has_extended_dynamic_state_2_logic_op || - (key.state.extended_dynamic_state_3_blend != 0) != - dynamic_features.has_extended_dynamic_state_3_blend || - (key.state.extended_dynamic_state_3_enables != 0) != - dynamic_features.has_extended_dynamic_state_3_enables || - (key.state.dynamic_vertex_input != 0) != dynamic_features.has_dynamic_vertex_input) { + dynamic_features.has_extended_dynamic_state_2_logic_op) { return; } workers.QueueWork([this, key, envs_ = std::move(envs), &state, &callback]() mutable { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 60b899a811..787ff47532 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -1044,42 +1044,6 @@ void RasterizerVulkan::UpdateDynamicStates() { UpdateLogicOp(regs); } - // EDS3 Enables: LogicOpEnable, DepthClamp, LineStipple, ConservativeRaster - if (device.IsExtExtendedDynamicState3EnablesSupported()) { - using namespace Tegra::Engines; - // AMD Workaround: LogicOp incompatible with float render targets - if (device.GetDriverID() == VkDriverIdKHR::VK_DRIVER_ID_AMD_OPEN_SOURCE || - device.GetDriverID() == VkDriverIdKHR::VK_DRIVER_ID_AMD_PROPRIETARY) { - const auto has_float = std::any_of( - regs.vertex_attrib_format.begin(), regs.vertex_attrib_format.end(), - [](const auto& attrib) { - return attrib.type == Maxwell3D::Regs::VertexAttribute::Type::Float; - } - ); - if (regs.logic_op.enable) { - regs.logic_op.enable = static_cast(!has_float); - } - } - UpdateLogicOpEnable(regs); - UpdateDepthClampEnable(regs); - UpdateLineRasterizationMode(regs); - UpdateLineStippleEnable(regs); - UpdateConservativeRasterizationMode(regs); - UpdateAlphaToCoverageEnable(regs); - UpdateAlphaToOneEnable(regs); - } - - // EDS3 Blending: ColorBlendEnable, ColorBlendEquation, ColorWriteMask - if (device.IsExtExtendedDynamicState3BlendingSupported()) { - UpdateBlending(regs); - } - - // Vertex Input Dynamic State: Independent from EDS levels - if (device.IsExtVertexInputDynamicStateSupported()) { - if (auto* gp = pipeline_cache.CurrentGraphicsPipeline(); gp && gp->HasDynamicVertexInput()) { - UpdateVertexInput(regs); - } - } } void RasterizerVulkan::HandleTransformFeedback() { @@ -1434,73 +1398,6 @@ void RasterizerVulkan::UpdateRasterizerDiscardEnable(Tegra::Engines::Maxwell3D:: }); } -void RasterizerVulkan::UpdateConservativeRasterizationMode(Tegra::Engines::Maxwell3D::Regs& regs) { - if (!state_tracker.TouchConservativeRasterizationMode()) { - return; - } - - if (!device.SupportsDynamicState3ConservativeRasterizationMode()) { - return; - } - - scheduler.Record([enable = regs.conservative_raster_enable](vk::CommandBuffer cmdbuf) { - cmdbuf.SetConservativeRasterizationModeEXT( - enable ? VK_CONSERVATIVE_RASTERIZATION_MODE_OVERESTIMATE_EXT - : VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT); - }); -} - -void RasterizerVulkan::UpdateLineStippleEnable(Tegra::Engines::Maxwell3D::Regs& regs) { - if (!state_tracker.TouchLineStippleEnable()) { - return; - } - - if (!device.SupportsDynamicState3LineStippleEnable()) { - return; - } - - scheduler.Record([enable = regs.line_stipple_enable](vk::CommandBuffer cmdbuf) { - cmdbuf.SetLineStippleEnableEXT(enable); - }); -} - -void RasterizerVulkan::UpdateLineRasterizationMode(Tegra::Engines::Maxwell3D::Regs& regs) { - if (!device.IsExtLineRasterizationSupported()) { - return; - } - if (!state_tracker.TouchLineRasterizationMode()) { - return; - } - - if (!device.SupportsDynamicState3LineRasterizationMode()) { - static std::once_flag warn_missing_rect; - std::call_once(warn_missing_rect, [] { - LOG_WARNING(Render_Vulkan, - "Driver lacks rectangular line rasterization support; skipping dynamic " - "line state updates"); - }); - return; - } - - const bool wants_smooth = regs.line_anti_alias_enable != 0; - VkLineRasterizationModeEXT mode = VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT; - if (wants_smooth) { - if (device.SupportsSmoothLines()) { - mode = VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT; - } else { - static std::once_flag warn_missing_smooth; - std::call_once(warn_missing_smooth, [] { - LOG_WARNING(Render_Vulkan, - "Line anti-aliasing requested but smoothLines feature unavailable; " - "using rectangular rasterization"); - }); - } - } - scheduler.Record([mode](vk::CommandBuffer cmdbuf) { - cmdbuf.SetLineRasterizationModeEXT(mode); - }); -} - void RasterizerVulkan::UpdateDepthBiasEnable(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchDepthBiasEnable()) { return; @@ -1536,70 +1433,6 @@ void RasterizerVulkan::UpdateDepthBiasEnable(Tegra::Engines::Maxwell3D::Regs& re [enable](vk::CommandBuffer cmdbuf) { cmdbuf.SetDepthBiasEnableEXT(enable != 0); }); } -void RasterizerVulkan::UpdateLogicOpEnable(Tegra::Engines::Maxwell3D::Regs& regs) { - if (!state_tracker.TouchLogicOpEnable()) { - return; - } - if (!device.SupportsDynamicState3LogicOpEnable()) { - return; - } - scheduler.Record([enable = regs.logic_op.enable](vk::CommandBuffer cmdbuf) { - cmdbuf.SetLogicOpEnableEXT(enable != 0); - }); -} - -void RasterizerVulkan::UpdateDepthClampEnable(Tegra::Engines::Maxwell3D::Regs& regs) { - if (!state_tracker.TouchDepthClampEnable()) { - return; - } - if (!device.SupportsDynamicState3DepthClampEnable()) { - return; - } - bool is_enabled = !(regs.viewport_clip_control.geometry_clip == - Maxwell::ViewportClipControl::GeometryClip::Passthrough || - regs.viewport_clip_control.geometry_clip == - Maxwell::ViewportClipControl::GeometryClip::FrustumXYZ || - regs.viewport_clip_control.geometry_clip == - Maxwell::ViewportClipControl::GeometryClip::FrustumZ); - scheduler.Record( - [is_enabled](vk::CommandBuffer cmdbuf) { cmdbuf.SetDepthClampEnableEXT(is_enabled); }); -} - -void RasterizerVulkan::UpdateAlphaToCoverageEnable(Tegra::Engines::Maxwell3D::Regs& regs) { - if (!state_tracker.TouchAlphaToCoverageEnable()) { - return; - } - if (!device.SupportsDynamicState3AlphaToCoverageEnable()) { - return; - } - GraphicsPipeline* const pipeline = pipeline_cache.CurrentGraphicsPipeline(); - const bool enable = pipeline != nullptr && pipeline->SupportsAlphaToCoverage() && - regs.anti_alias_alpha_control.alpha_to_coverage != 0; - scheduler.Record([enable](vk::CommandBuffer cmdbuf) { - cmdbuf.SetAlphaToCoverageEnableEXT(enable ? VK_TRUE : VK_FALSE); - }); -} - -void RasterizerVulkan::UpdateAlphaToOneEnable(Tegra::Engines::Maxwell3D::Regs& regs) { - if (!state_tracker.TouchAlphaToOneEnable()) { - return; - } - if (!device.SupportsDynamicState3AlphaToOneEnable()) { - static std::once_flag warn_alpha_to_one; - std::call_once(warn_alpha_to_one, [] { - LOG_WARNING(Render_Vulkan, - "Alpha-to-one is not supported on this device; forcing it disabled"); - }); - return; - } - GraphicsPipeline* const pipeline = pipeline_cache.CurrentGraphicsPipeline(); - const bool enable = pipeline != nullptr && pipeline->SupportsAlphaToOne() && - regs.anti_alias_alpha_control.alpha_to_one != 0; - scheduler.Record([enable](vk::CommandBuffer cmdbuf) { - cmdbuf.SetAlphaToOneEnableEXT(enable ? VK_TRUE : VK_FALSE); - }); -} - void RasterizerVulkan::UpdateDepthCompareOp(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchDepthCompareOp()) { return; @@ -1671,81 +1504,6 @@ void RasterizerVulkan::UpdateBlending(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchBlending()) { return; } - - if (state_tracker.TouchColorMask()) { - std::array setup_masks{}; - for (size_t index = 0; index < Maxwell::NumRenderTargets; index++) { - const auto& mask = regs.color_mask[regs.color_mask_common ? 0 : index]; - auto& current = setup_masks[index]; - if (mask.R) { - current |= VK_COLOR_COMPONENT_R_BIT; - } - if (mask.G) { - current |= VK_COLOR_COMPONENT_G_BIT; - } - if (mask.B) { - current |= VK_COLOR_COMPONENT_B_BIT; - } - if (mask.A) { - current |= VK_COLOR_COMPONENT_A_BIT; - } - } - scheduler.Record([setup_masks](vk::CommandBuffer cmdbuf) { - cmdbuf.SetColorWriteMaskEXT(0, setup_masks); - }); - } - - if (state_tracker.TouchBlendEnable()) { - std::array setup_enables{}; - std::ranges::transform( - regs.blend.enable, setup_enables.begin(), - [&](const auto& is_enabled) { return is_enabled != 0 ? VK_TRUE : VK_FALSE; }); - scheduler.Record([setup_enables](vk::CommandBuffer cmdbuf) { - cmdbuf.SetColorBlendEnableEXT(0, setup_enables); - }); - } - - if (state_tracker.TouchBlendEquations()) { - std::array setup_blends{}; - - const auto blend_setup = [&](auto& host_blend, const auto& guest_blend) { - host_blend.srcColorBlendFactor = MaxwellToVK::BlendFactor(guest_blend.color_source); - host_blend.dstColorBlendFactor = MaxwellToVK::BlendFactor(guest_blend.color_dest); - host_blend.colorBlendOp = MaxwellToVK::BlendEquation(guest_blend.color_op); - host_blend.srcAlphaBlendFactor = MaxwellToVK::BlendFactor(guest_blend.alpha_source); - host_blend.dstAlphaBlendFactor = MaxwellToVK::BlendFactor(guest_blend.alpha_dest); - host_blend.alphaBlendOp = MaxwellToVK::BlendEquation(guest_blend.alpha_op); - }; - - // Single blend equation for all targets - if (!regs.blend_per_target_enabled) { - // Temporary workaround for games that use iterated blending - if (regs.iterated_blend.enable && Settings::values.use_squashed_iterated_blend) { - setup_blends[0].srcColorBlendFactor = VK_BLEND_FACTOR_ONE; - setup_blends[0].dstColorBlendFactor = VK_BLEND_FACTOR_ONE; - setup_blends[0].colorBlendOp = VK_BLEND_OP_ADD; - setup_blends[0].srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR; - setup_blends[0].dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO; - setup_blends[0].alphaBlendOp = VK_BLEND_OP_ADD; - } else { - blend_setup(setup_blends[0], regs.blend); - } - - // Copy first blend state to all other targets - for (size_t index = 1; index < Maxwell::NumRenderTargets; index++) { - setup_blends[index] = setup_blends[0]; - } - } else { - // Per-target blending - for (size_t index = 0; index < Maxwell::NumRenderTargets; index++) { - blend_setup(setup_blends[index], regs.blend_per_target[index]); - } - } - - scheduler.Record([setup_blends](vk::CommandBuffer cmdbuf) { - cmdbuf.SetColorBlendEquationEXT(0, setup_blends); - }); - } } void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs) { @@ -1763,56 +1521,6 @@ void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs) return; } dirty[Dirty::VertexInput] = false; - - boost::container::static_vector bindings; - boost::container::static_vector attributes; - - // There seems to be a bug on Nvidia's driver where updating only higher attributes ends up - // generating dirty state. Track the highest dirty attribute and update all attributes until - // that one. - size_t highest_dirty_attr{}; - for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { - if (dirty[Dirty::VertexAttribute0 + index]) { - highest_dirty_attr = index; - } - } - for (size_t index = 0; index < highest_dirty_attr; ++index) { - const Maxwell::VertexAttribute attribute{regs.vertex_attrib_format[index]}; - const u32 binding{attribute.buffer}; - dirty[Dirty::VertexAttribute0 + index] = false; - dirty[Dirty::VertexBinding0 + static_cast(binding)] = true; - if (!attribute.constant) { - attributes.push_back({ - .sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_ATTRIBUTE_DESCRIPTION_2_EXT, - .pNext = nullptr, - .location = static_cast(index), - .binding = binding, - .format = MaxwellToVK::VertexFormat(device, attribute.type, attribute.size), - .offset = attribute.offset, - }); - } - } - for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { - if (!dirty[Dirty::VertexBinding0 + index]) { - continue; - } - dirty[Dirty::VertexBinding0 + index] = false; - - const u32 binding{static_cast(index)}; - const auto& input_binding{regs.vertex_streams[binding]}; - const bool is_instanced{regs.vertex_stream_instances.IsInstancingEnabled(binding)}; - bindings.push_back({ - .sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_BINDING_DESCRIPTION_2_EXT, - .pNext = nullptr, - .binding = binding, - .stride = input_binding.stride, - .inputRate = is_instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX, - .divisor = is_instanced ? input_binding.frequency : 1, - }); - } - scheduler.Record([bindings, attributes](vk::CommandBuffer cmdbuf) { - cmdbuf.SetVertexInputEXT(bindings, attributes); - }); } void RasterizerVulkan::InitializeChannel(Tegra::Control::ChannelState& channel) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index b689c6b660..2337ef644b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project @@ -176,15 +176,7 @@ private: void UpdateDepthCompareOp(Tegra::Engines::Maxwell3D::Regs& regs); void UpdatePrimitiveRestartEnable(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateRasterizerDiscardEnable(Tegra::Engines::Maxwell3D::Regs& regs); - void UpdateConservativeRasterizationMode(Tegra::Engines::Maxwell3D::Regs& regs); - void UpdateLineStippleEnable(Tegra::Engines::Maxwell3D::Regs& regs); - void UpdateLineStipple(Tegra::Engines::Maxwell3D::Regs& regs); - void UpdateLineRasterizationMode(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateDepthBiasEnable(Tegra::Engines::Maxwell3D::Regs& regs); - void UpdateLogicOpEnable(Tegra::Engines::Maxwell3D::Regs& regs); - void UpdateDepthClampEnable(Tegra::Engines::Maxwell3D::Regs& regs); - void UpdateAlphaToCoverageEnable(Tegra::Engines::Maxwell3D::Regs& regs); - void UpdateAlphaToOneEnable(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateFrontFace(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp index 79967d540a..c17bc5900b 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project @@ -54,19 +54,11 @@ Flags MakeInvalidationFlags() { StateEnable, PrimitiveRestartEnable, DepthBiasEnable, - LogicOpEnable, - DepthClampEnable, - AlphaToCoverageEnable, - AlphaToOneEnable, - LineRasterizationMode, LogicOp, Blending, ColorMask, BlendEquations, BlendEnable, - ConservativeRasterizationMode, - LineStippleEnable, - LineStippleParams, }; Flags flags{}; for (const int flag : INVALIDATION_FLAGS) { @@ -149,11 +141,6 @@ void SetupDirtyStateEnable(Tables& tables) { setup(OFF(polygon_offset_point_enable), DepthBiasEnable); setup(OFF(polygon_offset_line_enable), DepthBiasEnable); setup(OFF(polygon_offset_fill_enable), DepthBiasEnable); - setup(OFF(logic_op.enable), LogicOpEnable); - setup(OFF(viewport_clip_control.geometry_clip), DepthClampEnable); - setup(OFF(line_stipple_enable), LineStippleEnable); - setup(OFF(anti_alias_alpha_control.alpha_to_coverage), AlphaToCoverageEnable); - setup(OFF(anti_alias_alpha_control.alpha_to_one), AlphaToOneEnable); } void SetupDirtyDepthCompareOp(Tables& tables) { @@ -227,13 +214,6 @@ void SetupDirtyVertexBindings(Tables& tables) { } } -void SetupRasterModes(Tables &tables) { - auto& table = tables[0]; - - table[OFF(line_stipple_params)] = LineStippleParams; - table[OFF(conservative_raster_enable)] = ConservativeRasterizationMode; - table[OFF(line_anti_alias_enable)] = LineRasterizationMode; -} } // Anonymous namespace void StateTracker::SetupTables(Tegra::Control::ChannelState& channel_state) { @@ -256,7 +236,6 @@ void StateTracker::SetupTables(Tegra::Control::ChannelState& channel_state) { SetupDirtyVertexAttributes(tables); SetupDirtyVertexBindings(tables); SetupDirtySpecialOps(tables); - SetupRasterModes(tables); } void StateTracker::ChangeChannel(Tegra::Control::ChannelState& channel_state) { diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h index 74bae9e181..8f8db9f828 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.h +++ b/src/video_core/renderer_vulkan/vk_state_tracker.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project @@ -53,17 +53,9 @@ enum : u8 { StencilTestEnable, PrimitiveRestartEnable, RasterizerDiscardEnable, - ConservativeRasterizationMode, - LineRasterizationMode, - LineStippleEnable, - LineStippleParams, DepthBiasEnable, StateEnable, LogicOp, - LogicOpEnable, - DepthClampEnable, - AlphaToCoverageEnable, - AlphaToOneEnable, Blending, BlendEnable, @@ -213,33 +205,8 @@ public: return Exchange(Dirty::RasterizerDiscardEnable, false); } - bool TouchConservativeRasterizationMode() - { - return Exchange(Dirty::ConservativeRasterizationMode, false); - } - - bool TouchLineStippleEnable() { return Exchange(Dirty::LineStippleEnable, false); } - - bool TouchLineStipple() { return Exchange(Dirty::LineStippleParams, false); } - bool TouchDepthBiasEnable() { return Exchange(Dirty::DepthBiasEnable, false); } - bool TouchLogicOpEnable() { - return Exchange(Dirty::LogicOpEnable, false); - } - - bool TouchDepthClampEnable() { - return Exchange(Dirty::DepthClampEnable, false); - } - - bool TouchAlphaToCoverageEnable() { - return Exchange(Dirty::AlphaToCoverageEnable, false); - } - - bool TouchAlphaToOneEnable() { - return Exchange(Dirty::AlphaToOneEnable, false); - } - bool TouchDepthCompareOp() { return Exchange(Dirty::DepthCompareOp, false); } @@ -276,10 +243,6 @@ public: return Exchange(Dirty::LogicOp, false); } - bool TouchLineRasterizationMode() { - return Exchange(Dirty::LineRasterizationMode, false); - } - bool ChangePrimitiveTopology(Maxwell::PrimitiveTopology new_topology) { const bool has_changed = current_topology != new_topology; current_topology = new_topology; diff --git a/src/video_core/vulkan_common/vulkan_debug_callback.cpp b/src/video_core/vulkan_common/vulkan_debug_callback.cpp index cd6653c86e..cc3520f654 100644 --- a/src/video_core/vulkan_common/vulkan_debug_callback.cpp +++ b/src/video_core/vulkan_common/vulkan_debug_callback.cpp @@ -43,14 +43,6 @@ VkBool32 DebugUtilCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity, case 0xdff2e5c1u: // VUID-vkCmdSetRasterizerDiscardEnable-None-04871 case 0x0cc85f41u: // VUID-vkCmdSetPrimitiveRestartEnable-None-04866 case 0x01257b492: // VUID-vkCmdSetLogicOpEXT-None-0486 - // The below are due to incorrect reporting of vertexInputDynamicState - case 0x398e0dabu: // VUID-vkCmdSetVertexInputEXT-None-04790 - // The below are due to incorrect reporting of extendedDynamicState3 - case 0x970c11a5u: // VUID-vkCmdSetColorWriteMaskEXT-extendedDynamicState3ColorWriteMask-07364 - case 0x6b453f78u: // VUID-vkCmdSetColorBlendEnableEXT-extendedDynamicState3ColorBlendEnable-07355 - case 0xf66469d0u: // VUID-vkCmdSetColorBlendEquationEXT-extendedDynamicState3ColorBlendEquation-07356 - case 0x1d43405eu: // VUID-vkCmdSetLogicOpEnableEXT-extendedDynamicState3LogicOpEnable-07365 - case 0x638462e8u: // VUID-vkCmdSetDepthClampEnableEXT-extendedDynamicState3DepthClampEnable-07448 // Misc case 0xe0a2da61u: // VUID-vkCmdDrawIndexed-format-07753 #else diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index b51c57d380..399b5bef3a 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -665,48 +665,26 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR // Level 0 = Core Dynamic States only (Vulkan 1.0) // Level 1 = Core + VK_EXT_extended_dynamic_state // Level 2 = Core + VK_EXT_extended_dynamic_state + VK_EXT_extended_dynamic_state2 - // Level 3 = Core + VK_EXT_extended_dynamic_state + VK_EXT_extended_dynamic_state2 + VK_EXT_extended_dynamic_state3 switch (dyna_state) { case Settings::ExtendedDynamicState::Disabled: - // Level 0: Disable all extended dynamic state extensions + // Level 0: Disable all configured extended dynamic state extensions RemoveExtensionFeature(extensions.extended_dynamic_state, features.extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME); RemoveExtensionFeature(extensions.extended_dynamic_state2, features.extended_dynamic_state2, VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME); - RemoveExtensionFeature(extensions.extended_dynamic_state3, features.extended_dynamic_state3, - VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME); - dynamic_state3_blending = false; - dynamic_state3_enables = false; break; case Settings::ExtendedDynamicState::EDS1: - // Level 1: Enable EDS1, disable EDS2 and EDS3 + // Level 1: Enable EDS1, disable EDS2 RemoveExtensionFeature(extensions.extended_dynamic_state2, features.extended_dynamic_state2, VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME); - RemoveExtensionFeature(extensions.extended_dynamic_state3, features.extended_dynamic_state3, - VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME); - dynamic_state3_blending = false; - dynamic_state3_enables = false; break; case Settings::ExtendedDynamicState::EDS2: - // Level 2: Enable EDS1 + EDS2, disable EDS3 - RemoveExtensionFeature(extensions.extended_dynamic_state3, features.extended_dynamic_state3, - VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME); - dynamic_state3_blending = false; - dynamic_state3_enables = false; - break; - case Settings::ExtendedDynamicState::EDS3: default: - // Level 3: Enable all (EDS1 + EDS2 + EDS3) + // Level 2: Enable EDS1 + EDS2 break; } - // VK_EXT_vertex_input_dynamic_state is independent from EDS - // It can be enabled even without extended_dynamic_state - if (!Settings::values.vertex_input_dynamic_state.GetValue()) { - RemoveExtensionFeature(extensions.vertex_input_dynamic_state, features.vertex_input_dynamic_state, VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME); - } - logical = vk::Device::Create(physical, queue_cis, ExtensionListForVulkan(loaded_extensions), first_next, dld); graphics_queue = logical.GetQueue(graphics_family); @@ -1122,41 +1100,12 @@ bool Device::GetSuitability(bool requires_swapchain) { // VK_EXT_extended_dynamic_state2 below this will appear drivers that need workarounds. - // VK_EXT_extended_dynamic_state3 below this will appear drivers that need workarounds. - - // Samsung: Broken extendedDynamicState3ColorBlendEquation - // Disable blend equation dynamic state, force static pipeline state - if (extensions.extended_dynamic_state3 && - (driver_id == VK_DRIVER_ID_SAMSUNG_PROPRIETARY)) { - LOG_WARNING(Render_Vulkan, - "Samsung: Disabling broken extendedDynamicState3ColorBlendEquation"); - features.extended_dynamic_state3.extendedDynamicState3ColorBlendEnable = false; - features.extended_dynamic_state3.extendedDynamicState3ColorBlendEquation = false; - } - - // Intel Windows < 27.20.100.0: Broken VertexInputDynamicState - // Same for NVIDIA Proprietary < 580.119.02, unknown when VIDS was first NOT broken - // Disable VertexInputDynamicState on old Intel Windows drivers - if (extensions.vertex_input_dynamic_state) { - const u32 version = (properties.properties.driverVersion << 3) >> 3; - if ((driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS && version < VK_MAKE_API_VERSION(27, 20, 100, 0)) - || (driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY && version < VK_MAKE_API_VERSION(580, 119, 02, 0))) { - LOG_WARNING(Render_Vulkan, "Disabling broken VK_EXT_vertex_input_dynamic_state"); - RemoveExtensionFeature(extensions.vertex_input_dynamic_state, features.vertex_input_dynamic_state, VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME); - } - } - if (u32(Settings::values.dyna_state.GetValue()) == 0) { LOG_INFO(Render_Vulkan, "Extended Dynamic State disabled by user setting, clearing all EDS features"); features.custom_border_color.customBorderColors = false; features.custom_border_color.customBorderColorWithoutFormat = false; features.extended_dynamic_state.extendedDynamicState = false; features.extended_dynamic_state2.extendedDynamicState2 = false; - features.extended_dynamic_state3.extendedDynamicState3ColorBlendEnable = false; - features.extended_dynamic_state3.extendedDynamicState3ColorBlendEquation = false; - features.extended_dynamic_state3.extendedDynamicState3ColorWriteMask = false; - features.extended_dynamic_state3.extendedDynamicState3DepthClampEnable = false; - features.extended_dynamic_state3.extendedDynamicState3LogicOpEnable = false; } // Return whether we were suitable. @@ -1208,65 +1157,6 @@ void Device::RemoveUnsuitableExtensions() { features.extended_dynamic_state2, VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME); - // VK_EXT_extended_dynamic_state3 - const bool supports_color_blend_enable = - features.extended_dynamic_state3.extendedDynamicState3ColorBlendEnable; - const bool supports_color_blend_equation = - features.extended_dynamic_state3.extendedDynamicState3ColorBlendEquation; - const bool supports_color_write_mask = - features.extended_dynamic_state3.extendedDynamicState3ColorWriteMask; - dynamic_state3_blending = supports_color_blend_enable && supports_color_blend_equation && - supports_color_write_mask; - - const bool supports_depth_clamp_enable = - features.extended_dynamic_state3.extendedDynamicState3DepthClampEnable; - const bool supports_logic_op_enable = - features.extended_dynamic_state3.extendedDynamicState3LogicOpEnable; - const bool supports_line_raster_mode = - features.extended_dynamic_state3.extendedDynamicState3LineRasterizationMode && - extensions.line_rasterization && features.line_rasterization.rectangularLines; - const bool supports_conservative_raster_mode = - features.extended_dynamic_state3.extendedDynamicState3ConservativeRasterizationMode && - extensions.conservative_rasterization; - const bool supports_line_stipple_enable = - features.extended_dynamic_state3.extendedDynamicState3LineStippleEnable && - extensions.line_rasterization && features.line_rasterization.stippledRectangularLines; - const bool supports_alpha_to_coverage = - features.extended_dynamic_state3.extendedDynamicState3AlphaToCoverageEnable; - const bool supports_alpha_to_one = - features.extended_dynamic_state3.extendedDynamicState3AlphaToOneEnable && - features.features.alphaToOne; - - dynamic_state3_depth_clamp_enable = supports_depth_clamp_enable; - dynamic_state3_logic_op_enable = supports_logic_op_enable; - dynamic_state3_line_raster_mode = supports_line_raster_mode; - dynamic_state3_conservative_raster_mode = supports_conservative_raster_mode; - dynamic_state3_line_stipple_enable = supports_line_stipple_enable; - dynamic_state3_alpha_to_coverage = supports_alpha_to_coverage; - dynamic_state3_alpha_to_one = supports_alpha_to_one; - - dynamic_state3_enables = dynamic_state3_depth_clamp_enable || dynamic_state3_logic_op_enable || - dynamic_state3_line_raster_mode || - dynamic_state3_conservative_raster_mode || - dynamic_state3_line_stipple_enable || - dynamic_state3_alpha_to_coverage || dynamic_state3_alpha_to_one; - - extensions.extended_dynamic_state3 = dynamic_state3_blending || dynamic_state3_enables; - if (!extensions.extended_dynamic_state3) { - dynamic_state3_blending = false; - dynamic_state3_enables = false; - dynamic_state3_depth_clamp_enable = false; - dynamic_state3_logic_op_enable = false; - dynamic_state3_line_raster_mode = false; - dynamic_state3_conservative_raster_mode = false; - dynamic_state3_line_stipple_enable = false; - dynamic_state3_alpha_to_coverage = false; - dynamic_state3_alpha_to_one = false; - } - RemoveExtensionFeatureIfUnsuitable(extensions.extended_dynamic_state3, - features.extended_dynamic_state3, - VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME); - // VK_EXT_robustness2 // Enable if at least one robustness2 feature is available extensions.robustness_2 = features.robustness2.robustBufferAccess2 || @@ -1335,13 +1225,6 @@ void Device::RemoveUnsuitableExtensions() { properties.transform_feedback.transformFeedbackQueries); } - // VK_EXT_vertex_input_dynamic_state - extensions.vertex_input_dynamic_state = - features.vertex_input_dynamic_state.vertexInputDynamicState; - RemoveExtensionFeatureIfUnsuitable(extensions.vertex_input_dynamic_state, - features.vertex_input_dynamic_state, - VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME); - // VK_EXT_multi_draw extensions.multi_draw = features.multi_draw.multiDraw; diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index d29a8cd3f3..f5fe0516d4 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -53,7 +53,6 @@ VK_DEFINE_HANDLE(VmaAllocator) FEATURE(EXT, DepthClipControl, DEPTH_CLIP_CONTROL, depth_clip_control) \ FEATURE(EXT, ExtendedDynamicState, EXTENDED_DYNAMIC_STATE, extended_dynamic_state) \ FEATURE(EXT, ExtendedDynamicState2, EXTENDED_DYNAMIC_STATE_2, extended_dynamic_state2) \ - FEATURE(EXT, ExtendedDynamicState3, EXTENDED_DYNAMIC_STATE_3, extended_dynamic_state3) \ FEATURE(EXT, 4444Formats, 4444_FORMATS, format_a4b4g4r4) \ FEATURE(EXT, IndexTypeUint8, INDEX_TYPE_UINT8, index_type_uint8) \ FEATURE(EXT, LineRasterization, LINE_RASTERIZATION, line_rasterization) \ @@ -63,7 +62,6 @@ VK_DEFINE_HANDLE(VmaAllocator) FEATURE(EXT, ProvokingVertex, PROVOKING_VERTEX, provoking_vertex) \ FEATURE(EXT, Robustness2, ROBUSTNESS_2, robustness2) \ FEATURE(EXT, TransformFeedback, TRANSFORM_FEEDBACK, transform_feedback) \ - FEATURE(EXT, VertexInputDynamicState, VERTEX_INPUT_DYNAMIC_STATE, vertex_input_dynamic_state) \ FEATURE(EXT, SwapchainMaintenance1, SWAPCHAIN_MAINTENANCE_1, swapchain_maintenance1) \ FEATURE(KHR, Maintenance5, MAINTENANCE_5, maintenance5) \ FEATURE(KHR, Maintenance6, MAINTENANCE_6, maintenance6) \ @@ -125,13 +123,11 @@ VK_DEFINE_HANDLE(VmaAllocator) EXTENSION_NAME(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME) \ EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME) \ EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME) \ - EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME) \ EXTENSION_NAME(VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME) \ EXTENSION_NAME(VK_EXT_4444_FORMATS_EXTENSION_NAME) \ EXTENSION_NAME(VK_EXT_IMAGE_ROBUSTNESS_EXTENSION_NAME) \ EXTENSION_NAME(VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME) \ EXTENSION_NAME(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME) \ - EXTENSION_NAME(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME) \ EXTENSION_NAME(VK_NV_GEOMETRY_SHADER_PASSTHROUGH_EXTENSION_NAME) \ EXTENSION_NAME(VK_NV_VIEWPORT_ARRAY2_EXTENSION_NAME) \ EXTENSION_NAME(VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME) @@ -189,8 +185,7 @@ VK_DEFINE_HANDLE(VmaAllocator) FEATURE_NAME(shader_float16_int8, shaderInt8) \ FEATURE_NAME(timeline_semaphore, timelineSemaphore) \ FEATURE_NAME(transform_feedback, transformFeedback) \ - FEATURE_NAME(uniform_buffer_standard_layout, uniformBufferStandardLayout) \ - FEATURE_NAME(vertex_input_dynamic_state, vertexInputDynamicState) + FEATURE_NAME(uniform_buffer_standard_layout, uniformBufferStandardLayout) // These features are not required but can be helpful for drivers that can use it. #define FOR_EACH_VK_OPTIONAL_FEATURE(FEATURE_NAME) \ @@ -625,26 +620,11 @@ public: return features.extended_dynamic_state2.extendedDynamicState2LogicOp; } - /// Returns true if the device supports VK_EXT_extended_dynamic_state3. - bool IsExtExtendedDynamicState3Supported() const { - return extensions.extended_dynamic_state3; - } - /// Returns true if the device supports VK_EXT_4444_formats. bool IsExt4444FormatsSupported() const { return features.format_a4b4g4r4.formatA4B4G4R4; } - /// Returns true if the device supports VK_EXT_extended_dynamic_state3. - bool IsExtExtendedDynamicState3BlendingSupported() const { - return dynamic_state3_blending; - } - - /// Returns true if the device supports VK_EXT_extended_dynamic_state3. - bool IsExtExtendedDynamicState3EnablesSupported() const { - return dynamic_state3_enables; - } - /// Returns true if the device supports VK_EXT_filter_cubic bool IsExtFilterCubicSupported() const { return extensions.filter_cubic; @@ -676,38 +656,6 @@ public: return features.features.alphaToOne != VK_FALSE; } - bool SupportsDynamicState3DepthClampEnable() const { - return dynamic_state3_depth_clamp_enable; - } - - bool SupportsDynamicState3LogicOpEnable() const { - return dynamic_state3_logic_op_enable; - } - - bool SupportsDynamicState3LineRasterizationMode() const { - return dynamic_state3_line_raster_mode; - } - - bool SupportsDynamicState3ConservativeRasterizationMode() const { - return dynamic_state3_conservative_raster_mode; - } - - bool SupportsDynamicState3LineStippleEnable() const { - return dynamic_state3_line_stipple_enable; - } - - bool SupportsDynamicState3AlphaToCoverageEnable() const { - return dynamic_state3_alpha_to_coverage; - } - - bool SupportsDynamicState3AlphaToOneEnable() const { - return dynamic_state3_alpha_to_one; - } - - /// Returns true if the device supports VK_EXT_vertex_input_dynamic_state. - bool IsExtVertexInputDynamicStateSupported() const { - return extensions.vertex_input_dynamic_state; - } /// Returns true if the device supports VK_EXT_shader_demote_to_helper_invocation bool IsExtShaderDemoteToHelperInvocationSupported() const { @@ -1059,15 +1007,6 @@ private: bool supports_d24_depth{}; ///< Supports D24 depth buffers. bool cant_blit_msaa{}; ///< Does not support MSAA<->MSAA blitting. bool must_emulate_scaled_formats{}; ///< Requires scaled vertex format emulation - bool dynamic_state3_blending{}; ///< Has blending features of dynamic_state3. - bool dynamic_state3_enables{}; ///< Has at least one enable feature of dynamic_state3. - bool dynamic_state3_depth_clamp_enable{}; - bool dynamic_state3_logic_op_enable{}; - bool dynamic_state3_line_raster_mode{}; - bool dynamic_state3_conservative_raster_mode{}; - bool dynamic_state3_line_stipple_enable{}; - bool dynamic_state3_alpha_to_coverage{}; - bool dynamic_state3_alpha_to_one{}; bool supports_conditional_barriers{}; ///< Allows barriers in conditional control flow. size_t sampler_heap_budget{}; ///< Sampler budget for buggy drivers (0 = unlimited). u64 device_access_memory{}; ///< Total size of device local memory in bytes. diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index 926f48de89..095725b4c4 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -146,14 +146,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkCmdSetDepthWriteEnableEXT); X(vkCmdSetPrimitiveRestartEnableEXT); X(vkCmdSetRasterizerDiscardEnableEXT); - X(vkCmdSetAlphaToCoverageEnableEXT); - X(vkCmdSetAlphaToOneEnableEXT); - X(vkCmdSetConservativeRasterizationModeEXT); - X(vkCmdSetLineRasterizationModeEXT); - X(vkCmdSetLineStippleEnableEXT); X(vkCmdSetDepthBiasEnableEXT); - X(vkCmdSetLogicOpEnableEXT); - X(vkCmdSetDepthClampEnableEXT); X(vkCmdSetFrontFaceEXT); X(vkCmdSetLogicOpEXT); X(vkCmdSetPatchControlPointsEXT); @@ -161,10 +154,6 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkCmdSetPrimitiveTopologyEXT); X(vkCmdSetStencilOpEXT); X(vkCmdSetStencilTestEnableEXT); - X(vkCmdSetVertexInputEXT); - X(vkCmdSetColorWriteMaskEXT); - X(vkCmdSetColorBlendEnableEXT); - X(vkCmdSetColorBlendEquationEXT); X(vkCmdResolveImage); X(vkCreateBuffer); X(vkCreateBufferView); diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index 872fbd858e..44dffe11f3 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h @@ -241,15 +241,8 @@ struct DeviceDispatch : InstanceDispatch { PFN_vkCmdSetDepthWriteEnableEXT vkCmdSetDepthWriteEnableEXT{}; PFN_vkCmdSetPrimitiveRestartEnableEXT vkCmdSetPrimitiveRestartEnableEXT{}; PFN_vkCmdSetRasterizerDiscardEnableEXT vkCmdSetRasterizerDiscardEnableEXT{}; - PFN_vkCmdSetAlphaToCoverageEnableEXT vkCmdSetAlphaToCoverageEnableEXT{}; - PFN_vkCmdSetAlphaToOneEnableEXT vkCmdSetAlphaToOneEnableEXT{}; - PFN_vkCmdSetConservativeRasterizationModeEXT vkCmdSetConservativeRasterizationModeEXT{}; - PFN_vkCmdSetLineRasterizationModeEXT vkCmdSetLineRasterizationModeEXT{}; - PFN_vkCmdSetLineStippleEnableEXT vkCmdSetLineStippleEnableEXT{}; PFN_vkCmdSetLineStippleEXT vkCmdSetLineStippleEXT{}; PFN_vkCmdSetDepthBiasEnableEXT vkCmdSetDepthBiasEnableEXT{}; - PFN_vkCmdSetLogicOpEnableEXT vkCmdSetLogicOpEnableEXT{}; - PFN_vkCmdSetDepthClampEnableEXT vkCmdSetDepthClampEnableEXT{}; PFN_vkCmdSetEvent vkCmdSetEvent{}; PFN_vkCmdSetFrontFaceEXT vkCmdSetFrontFaceEXT{}; PFN_vkCmdSetPatchControlPointsEXT vkCmdSetPatchControlPointsEXT{}; @@ -262,11 +255,7 @@ struct DeviceDispatch : InstanceDispatch { PFN_vkCmdSetStencilReference vkCmdSetStencilReference{}; PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT{}; PFN_vkCmdSetStencilWriteMask vkCmdSetStencilWriteMask{}; - PFN_vkCmdSetVertexInputEXT vkCmdSetVertexInputEXT{}; PFN_vkCmdSetViewport vkCmdSetViewport{}; - PFN_vkCmdSetColorWriteMaskEXT vkCmdSetColorWriteMaskEXT{}; - PFN_vkCmdSetColorBlendEnableEXT vkCmdSetColorBlendEnableEXT{}; - PFN_vkCmdSetColorBlendEquationEXT vkCmdSetColorBlendEquationEXT{}; PFN_vkCmdWaitEvents vkCmdWaitEvents{}; PFN_vkCreateBuffer vkCreateBuffer{}; PFN_vkCreateBufferView vkCreateBufferView{}; @@ -1458,21 +1447,6 @@ public: dld->vkCmdSetRasterizerDiscardEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); } - void SetConservativeRasterizationModeEXT(VkConservativeRasterizationModeEXT mode) const noexcept - { - dld->vkCmdSetConservativeRasterizationModeEXT(handle, mode); - } - - void SetLineRasterizationModeEXT(VkLineRasterizationModeEXT mode) const noexcept - { - dld->vkCmdSetLineRasterizationModeEXT(handle, mode); - } - - void SetLineStippleEnableEXT(bool enable) const noexcept - { - dld->vkCmdSetLineStippleEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); - } - void SetLineStippleEXT(u32 factor, u16 pattern) const noexcept { dld->vkCmdSetLineStippleEXT(handle, factor, pattern); @@ -1482,22 +1456,6 @@ public: dld->vkCmdSetDepthBiasEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); } - void SetLogicOpEnableEXT(bool enable) const noexcept { - dld->vkCmdSetLogicOpEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); - } - - void SetAlphaToCoverageEnableEXT(bool enable) const noexcept { - dld->vkCmdSetAlphaToCoverageEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); - } - - void SetAlphaToOneEnableEXT(bool enable) const noexcept { - dld->vkCmdSetAlphaToOneEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); - } - - void SetDepthClampEnableEXT(bool enable) const noexcept { - dld->vkCmdSetDepthClampEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); - } - void SetFrontFaceEXT(VkFrontFace front_face) const noexcept { dld->vkCmdSetFrontFaceEXT(handle, front_face); } @@ -1510,19 +1468,6 @@ public: dld->vkCmdSetPatchControlPointsEXT(handle, patch_control_points); } - void SetColorWriteMaskEXT(u32 first, Span masks) const noexcept { - dld->vkCmdSetColorWriteMaskEXT(handle, first, masks.size(), masks.data()); - } - - void SetColorBlendEnableEXT(u32 first, Span enables) const noexcept { - dld->vkCmdSetColorBlendEnableEXT(handle, first, enables.size(), enables.data()); - } - - void SetColorBlendEquationEXT(u32 first, - Span equations) const noexcept { - dld->vkCmdSetColorBlendEquationEXT(handle, first, equations.size(), equations.data()); - } - void SetLineWidth(float line_width) const noexcept { dld->vkCmdSetLineWidth(handle, line_width); } @@ -1540,13 +1485,6 @@ public: dld->vkCmdSetStencilTestEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); } - void SetVertexInputEXT( - vk::Span bindings, - vk::Span attributes) const noexcept { - dld->vkCmdSetVertexInputEXT(handle, bindings.size(), bindings.data(), attributes.size(), - attributes.data()); - } - void BindTransformFeedbackBuffersEXT(u32 first, u32 count, const VkBuffer* buffers, const VkDeviceSize* offsets, const VkDeviceSize* sizes) const noexcept { From 3272e1fcb51b37214534d8d82193cc3a11dce1b5 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Sat, 14 Feb 2026 22:32:49 -0400 Subject: [PATCH 11/68] [vulkan] adjusting BindVertexBuffer2EXT wrong calling in pipeline --- src/video_core/renderer_vulkan/vk_buffer_cache.cpp | 5 ++++- src/video_core/renderer_vulkan/vk_buffer_cache.h | 6 ++++++ src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 1 + 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index c842cce709..d38a03a526 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -564,7 +564,10 @@ void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset if (index >= device.GetMaxVertexInputBindings()) { return; } - if (device.IsExtExtendedDynamicStateSupported()) { + const bool use_dynamic_vertex_input_binding_stride = + device.IsExtExtendedDynamicStateSupported() && + use_vertex_input_binding_stride_dynamic_state; + if (use_dynamic_vertex_input_binding_stride) { scheduler.Record([index, buffer, offset, size, stride](vk::CommandBuffer cmdbuf) { const VkDeviceSize vk_offset = buffer != VK_NULL_HANDLE ? offset : 0; const VkDeviceSize vk_size = buffer != VK_NULL_HANDLE ? size : VK_WHOLE_SIZE; diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index b73fcd162b..8db936ffe7 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -127,6 +127,10 @@ public: void BindVertexBuffers(VideoCommon::HostBindings& bindings); + void SetUseVertexInputBindingStrideDynamicState(bool enabled) { + use_vertex_input_binding_stride_dynamic_state = enabled; + } + void BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size); void BindTransformFeedbackBuffers(VideoCommon::HostBindings& bindings); @@ -183,6 +187,8 @@ private: std::unique_ptr uint8_pass; QuadIndexedPass quad_index_pass; + bool use_vertex_input_binding_stride_dynamic_state = true; + bool limit_dynamic_storage_buffers = false; u32 max_dynamic_storage_buffers = (std::numeric_limits::max)(); }; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 774d613c25..8b0b055641 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -461,6 +461,7 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) { bind_stage_info(4); } + buffer_cache.runtime.SetUseVertexInputBindingStrideDynamicState(UsesExtendedDynamicState()); buffer_cache.UpdateGraphicsBuffers(is_indexed); buffer_cache.BindHostGeometryBuffers(is_indexed); From 6190fcaaef36d3cfb789cf482b64f05a09d7f2de Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Sat, 14 Feb 2026 23:16:29 -0400 Subject: [PATCH 12/68] [vulkan] Changing ProvokingVertex enabling nature --- .../features/settings/model/BooleanSetting.kt | 1 - .../settings/model/view/SettingsItem.kt | 7 --- .../settings/ui/SettingsFragmentPresenter.kt | 1 - src/common/settings.h | 3 +- src/qt_common/config/shared_translation.cpp | 6 --- .../renderer_vulkan/vk_graphics_pipeline.cpp | 9 +++- .../vulkan_common/vulkan_device.cpp | 43 ++++--------------- src/video_core/vulkan_common/vulkan_device.h | 11 +++-- 8 files changed, 23 insertions(+), 58 deletions(-) diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/BooleanSetting.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/BooleanSetting.kt index 9b9f475725..7b98fe9b44 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/BooleanSetting.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/BooleanSetting.kt @@ -30,7 +30,6 @@ enum class BooleanSetting(override val key: String) : AbstractBooleanSetting { BUFFER_REORDER_DISABLE("disable_buffer_reorder"), RENDERER_DEBUG("debug"), RENDERER_PATCH_OLD_QCOM_DRIVERS("patch_old_qcom_drivers"), - RENDERER_PROVOKING_VERTEX("provoking_vertex"), RENDERER_DESCRIPTOR_INDEXING("descriptor_indexing"), RENDERER_SAMPLE_SHADING("sample_shading"), GPU_UNSWIZZLE_ENABLED("gpu_unswizzle_enabled"), diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt index bbf3674d17..64053518b9 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt @@ -141,13 +141,6 @@ abstract class SettingsItem( valuesId = R.array.dynaStateValues ) ) - put( - SwitchSetting( - BooleanSetting.RENDERER_PROVOKING_VERTEX, - titleId = R.string.provoking_vertex, - descriptionId = R.string.provoking_vertex_description - ) - ) put( SwitchSetting( BooleanSetting.RENDERER_DESCRIPTOR_INDEXING, diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt index 0e9a0df977..ae1f142b8b 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt @@ -291,7 +291,6 @@ class SettingsFragmentPresenter( add(HeaderSetting(R.string.extensions)) add(IntSetting.RENDERER_DYNA_STATE.key) - add(BooleanSetting.RENDERER_PROVOKING_VERTEX.key) add(BooleanSetting.RENDERER_DESCRIPTOR_INDEXING.key) add(IntSetting.RENDERER_SAMPLE_SHADING.key) diff --git a/src/common/settings.h b/src/common/settings.h index 237cd7f0b3..0f839f8873 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -586,7 +586,7 @@ struct Values { SwitchableSetting dyna_state{linkage, #if defined (ANDROID) || defined (__APPLE__) - ExtendedDynamicState::Disabled, + ExtendedDynamicState::EDS1, #else ExtendedDynamicState::EDS2, #endif @@ -601,7 +601,6 @@ struct Values { Category::RendererExtensions, Specialization::Scalar}; - SwitchableSetting provoking_vertex{linkage, false, "provoking_vertex", Category::RendererExtensions}; SwitchableSetting descriptor_indexing{linkage, false, "descriptor_indexing", Category::RendererExtensions}; Setting renderer_debug{linkage, false, "debug", Category::RendererDebug}; diff --git a/src/qt_common/config/shared_translation.cpp b/src/qt_common/config/shared_translation.cpp index 095335dc81..bf312a183e 100644 --- a/src/qt_common/config/shared_translation.cpp +++ b/src/qt_common/config/shared_translation.cpp @@ -368,12 +368,6 @@ std::unique_ptr InitializeTranslations(QObject* parent) "Higher states allow for more features and can increase performance, but may cause " "additional graphical issues.")); - INSERT(Settings, - provoking_vertex, - tr("Provoking Vertex"), - tr("Improves lighting and vertex handling in some games.\n" - "Only Vulkan 1.0+ devices support this extension.")); - INSERT(Settings, descriptor_indexing, tr("Descriptor Indexing"), diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 8b0b055641..3e95d8528a 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -740,10 +740,15 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { : VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT, .extraPrimitiveOverestimationSize = 0.0f, }; + const bool preserve_provoking_vertex_for_xfb = + !key.state.xfb_enabled || device.IsTransformFeedbackProvokingVertexPreserved(); + const bool use_last_provoking_vertex = + key.state.provoking_vertex_last != 0 && preserve_provoking_vertex_for_xfb; + VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provoking_vertex{ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT, .pNext = nullptr, - .provokingVertexMode = key.state.provoking_vertex_last != 0 + .provokingVertexMode = use_last_provoking_vertex ? VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT : VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT, }; @@ -754,7 +759,7 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { if (device.IsExtConservativeRasterizationSupported()) { conservative_raster.pNext = std::exchange(rasterization_ci.pNext, &conservative_raster); } - if (device.IsExtProvokingVertexSupported() && Settings::values.provoking_vertex.GetValue()) { + if (device.IsExtProvokingVertexSupported()) { provoking_vertex.pNext = std::exchange(rasterization_ci.pNext, &provoking_vertex); } diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 399b5bef3a..798c864736 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -660,12 +660,6 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR const auto dyna_state = Settings::values.dyna_state.GetValue(); - // Base dynamic states (VIEWPORT, SCISSOR, DEPTH_BIAS, etc.) are ALWAYS active in vk_graphics_pipeline.cpp - // This slider controls EXTENDED dynamic states with accumulative levels per Vulkan specs: - // Level 0 = Core Dynamic States only (Vulkan 1.0) - // Level 1 = Core + VK_EXT_extended_dynamic_state - // Level 2 = Core + VK_EXT_extended_dynamic_state + VK_EXT_extended_dynamic_state2 - switch (dyna_state) { case Settings::ExtendedDynamicState::Disabled: // Level 0: Disable all configured extended dynamic state extensions @@ -1098,6 +1092,8 @@ bool Device::GetSuitability(bool requires_swapchain) { // Driver detection variables for workarounds in GetSuitability const VkDriverId driver_id = properties.driver.driverID; + // VK_EXT_extended_dynamic_state below this will appear drivers that need workarounds. + // VK_EXT_extended_dynamic_state2 below this will appear drivers that need workarounds. if (u32(Settings::values.dyna_state.GetValue()) == 0) { @@ -1114,15 +1110,9 @@ bool Device::GetSuitability(bool requires_swapchain) { void Device::RemoveUnsuitableExtensions() { // VK_EXT_custom_border_color - // Enable extension if driver supports it, then check individual features - // - customBorderColors: Required to use VK_BORDER_COLOR_FLOAT_CUSTOM_EXT - // - customBorderColorWithoutFormat: Optional, allows VK_FORMAT_UNDEFINED - // If only customBorderColors is available, we must provide a specific format if (extensions.custom_border_color) { // Verify that at least customBorderColors is available if (!features.custom_border_color.customBorderColors) { - LOG_WARNING(Render_Vulkan, - "VK_EXT_custom_border_color reported but customBorderColors feature not available, disabling"); extensions.custom_border_color = false; } } @@ -1172,20 +1162,6 @@ void Device::RemoveUnsuitableExtensions() { RemoveExtensionFeatureIfUnsuitable(extensions.image_robustness, features.image_robustness, VK_EXT_IMAGE_ROBUSTNESS_EXTENSION_NAME); - // VK_EXT_provoking_vertex - if (Settings::values.provoking_vertex.GetValue()) { - extensions.provoking_vertex = features.provoking_vertex.provokingVertexLast - && features.provoking_vertex - .transformFeedbackPreservesProvokingVertex; - RemoveExtensionFeatureIfUnsuitable(extensions.provoking_vertex, - features.provoking_vertex, - VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME); - } else { - RemoveExtensionFeature(extensions.provoking_vertex, - features.provoking_vertex, - VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME); - } - // VK_KHR_shader_atomic_int64 extensions.shader_atomic_int64 = features.shader_atomic_int64.shaderBufferInt64Atomics && features.shader_atomic_int64.shaderSharedInt64Atomics; @@ -1209,21 +1185,18 @@ void Device::RemoveUnsuitableExtensions() { VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME); // VK_EXT_transform_feedback - // We only require the basic transformFeedback feature and at least - // one transform feedback buffer. We keep transformFeedbackQueries as it's used by - // the streaming byte count implementation. GeometryStreams and multiple streams - // are not strictly required since we currently support only stream 0. extensions.transform_feedback = features.transform_feedback.transformFeedback && properties.transform_feedback.maxTransformFeedbackBuffers > 0 && properties.transform_feedback.transformFeedbackQueries; RemoveExtensionFeatureIfUnsuitable(extensions.transform_feedback, features.transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME); - if (extensions.transform_feedback) { - LOG_INFO(Render_Vulkan, "VK_EXT_transform_feedback enabled (buffers={}, queries={})", - properties.transform_feedback.maxTransformFeedbackBuffers, - properties.transform_feedback.transformFeedbackQueries); - } + + // VK_EXT_provoking_vertex + extensions.provoking_vertex = features.provoking_vertex.provokingVertexLast; + RemoveExtensionFeatureIfUnsuitable(extensions.provoking_vertex, + features.provoking_vertex, + VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME); // VK_EXT_multi_draw extensions.multi_draw = features.multi_draw.multiDraw; diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index f5fe0516d4..a6bf263bba 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -514,13 +514,11 @@ public: } /// Returns true if the device supports VK_EXT_shader_stencil_export. - /// Note: Most Mali/NVIDIA drivers don't support this. Use hardware blits as fallback. bool IsExtShaderStencilExportSupported() const { return extensions.shader_stencil_export; } - /// Returns true if depth/stencil operations can be performed efficiently. - /// Either through shader export or hardware blits. + /// Returns true if depth/stencil operations through shader export or hardware blits. bool CanPerformDepthStencilOperations() const { return extensions.shader_stencil_export || is_blit_depth24_stencil8_supported || is_blit_depth32_stencil8_supported; @@ -556,11 +554,16 @@ public: return extensions.transform_feedback; } - /// Returns true if the device supports VK_EXT_transform_feedback properly. + /// Returns true if the device supports VK_EXT_transform_feedback. bool AreTransformFeedbackGeometryStreamsSupported() const { return features.transform_feedback.geometryStreams; } + /// Returns true if transform feedback preserves provoking vertex. + bool IsTransformFeedbackProvokingVertexPreserved() const { + return features.provoking_vertex.transformFeedbackPreservesProvokingVertex; + } + /// Returns true if the device supports VK_EXT_custom_border_color. bool IsExtCustomBorderColorSupported() const { return extensions.custom_border_color; From 2bac9cec320f2f7584c755676fc04bef4c8fa90e Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Sat, 14 Feb 2026 23:46:25 -0400 Subject: [PATCH 13/68] [vulkan] Implenting layouts use for indexing descriptors --- .../ir_opt/collect_shader_info_pass.cpp | 2 +- .../renderer_vulkan/pipeline_helper.h | 43 +++++++++++++++++-- .../renderer_vulkan/vk_compute_pass.cpp | 33 +++++++++++++- .../renderer_vulkan/vk_descriptor_pool.cpp | 41 +++++++++++++----- .../renderer_vulkan/vk_descriptor_pool.h | 16 +++++-- .../renderer_vulkan/vk_graphics_pipeline.cpp | 3 +- .../vulkan_common/vulkan_device.cpp | 3 ++ 7 files changed, 120 insertions(+), 21 deletions(-) diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 2bfa3227a8..62eb8ccf5f 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index e88b27b273..8d085f4541 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -30,23 +30,58 @@ public: return device->IsKhrPushDescriptorSupported() && num_descriptors <= device->MaxPushDescriptors(); } - - // TODO(crueter): utilize layout binding flags + vk::DescriptorSetLayout CreateDescriptorSetLayout(bool use_push_descriptor) const { if (bindings.empty()) { return nullptr; } + + variable_descriptor_count = 0; + binding_flags.clear(); + + VkDescriptorSetLayoutBindingFlagsCreateInfo binding_flags_ci{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO, + .pNext = nullptr, + .bindingCount = 0, + .pBindingFlags = nullptr, + }; + + const bool use_descriptor_indexing = + !use_push_descriptor && device->isExtDescriptorIndexingSupported(); + const void* layout_next = nullptr; + if (use_descriptor_indexing) { + binding_flags.assign(bindings.size(), 0); + for (size_t i = 0; i < bindings.size(); ++i) { + if (bindings[i].descriptorCount > 1) { + binding_flags[i] |= VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT; + } + } + + if (bindings.back().descriptorCount > 1) { + binding_flags.back() |= VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT; + variable_descriptor_count = bindings.back().descriptorCount; + } + + binding_flags_ci.bindingCount = static_cast(binding_flags.size()); + binding_flags_ci.pBindingFlags = binding_flags.data(); + layout_next = &binding_flags_ci; + } + const VkDescriptorSetLayoutCreateFlags flags = use_push_descriptor ? VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR : 0; return device->GetLogical().CreateDescriptorSetLayout({ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .pNext = nullptr, + .pNext = layout_next, .flags = flags, .bindingCount = static_cast(bindings.size()), .pBindings = bindings.data(), }); } + u32 VariableDescriptorCount() const noexcept { + return variable_descriptor_count; + } + vk::DescriptorUpdateTemplate CreateTemplate(VkDescriptorSetLayout descriptor_set_layout, VkPipelineLayout pipeline_layout, bool use_push_descriptor) const { @@ -134,8 +169,10 @@ private: bool is_compute{}; boost::container::small_vector bindings; boost::container::small_vector entries; + mutable boost::container::small_vector binding_flags; u32 binding{}; u32 num_descriptors{}; + mutable u32 variable_descriptor_count{}; size_t offset{}; }; diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index d45a57f7bb..d960fabd16 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include "video_core/renderer_vulkan/vk_texture_cache.h" @@ -237,9 +238,36 @@ ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool, vk::Span push_constants, std::span code, std::optional optional_subgroup_size) : device{device_} { + u32 variable_descriptor_count{}; + std::vector binding_flags; + VkDescriptorSetLayoutBindingFlagsCreateInfo binding_flags_ci{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO, + .pNext = nullptr, + .bindingCount = 0, + .pBindingFlags = nullptr, + }; + const bool use_descriptor_indexing = device.isExtDescriptorIndexingSupported(); + const void* layout_next = nullptr; + if (use_descriptor_indexing && !bindings.empty()) { + binding_flags.assign(bindings.size(), 0); + for (size_t i = 0; i < bindings.size(); ++i) { + if (bindings[i].descriptorCount > 1) { + binding_flags[i] |= VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT; + } + } + if (bindings.back().descriptorCount > 1) { + binding_flags.back() |= VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT; + variable_descriptor_count = bindings.back().descriptorCount; + } + + binding_flags_ci.bindingCount = static_cast(binding_flags.size()); + binding_flags_ci.pBindingFlags = binding_flags.data(); + layout_next = &binding_flags_ci; + } + descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .pNext = nullptr, + .pNext = layout_next, .flags = 0, .bindingCount = bindings.size(), .pBindings = bindings.data(), @@ -266,7 +294,8 @@ ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool, .pipelineLayout = *layout, .set = 0, }); - descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, bank_info); + descriptor_allocator = + descriptor_pool.Allocator(*descriptor_set_layout, bank_info, variable_descriptor_count); } if (code.empty()) { return; diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp index 3af9758a31..97f51988bc 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project @@ -88,9 +88,10 @@ static void AllocatePool(const Device& device, DescriptorBank& bank) { } DescriptorAllocator::DescriptorAllocator(const Device& device_, MasterSemaphore& master_semaphore_, - DescriptorBank& bank_, VkDescriptorSetLayout layout_) + DescriptorBank& bank_, VkDescriptorSetLayout layout_, + u32 variable_descriptor_count_) : ResourcePool(master_semaphore_, SETS_GROW_RATE), device{&device_}, bank{&bank_}, - layout{layout_} {} + layout{layout_}, variable_descriptor_count{variable_descriptor_count_} {} VkDescriptorSet DescriptorAllocator::Commit() { const size_t index = CommitResource(); @@ -103,9 +104,25 @@ void DescriptorAllocator::Allocate(size_t begin, size_t end) { vk::DescriptorSets DescriptorAllocator::AllocateDescriptors(size_t count) { const std::vector layouts(count, layout); + + std::vector variable_descriptor_counts; + VkDescriptorSetVariableDescriptorCountAllocateInfo variable_descriptor_count_info{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_ALLOCATE_INFO, + .pNext = nullptr, + .descriptorSetCount = 0, + .pDescriptorCounts = nullptr, + }; + const void* allocate_next = nullptr; + if (variable_descriptor_count != 0) { + variable_descriptor_counts.assign(count, variable_descriptor_count); + variable_descriptor_count_info.descriptorSetCount = static_cast(count); + variable_descriptor_count_info.pDescriptorCounts = variable_descriptor_counts.data(); + allocate_next = &variable_descriptor_count_info; + } + VkDescriptorSetAllocateInfo allocate_info{ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, - .pNext = nullptr, + .pNext = allocate_next, .descriptorPool = *bank->pools.back(), .descriptorSetCount = static_cast(count), .pSetLayouts = layouts.data(), @@ -131,18 +148,22 @@ DescriptorPool::DescriptorPool(const Device& device_, Scheduler& scheduler) DescriptorPool::~DescriptorPool() = default; DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout, - std::span infos) { - return Allocator(layout, MakeBankInfo(infos)); + std::span infos, + u32 variable_descriptor_count) { + return Allocator(layout, MakeBankInfo(infos), variable_descriptor_count); } DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout, - const Shader::Info& info) { - return Allocator(layout, MakeBankInfo(std::array{info})); + const Shader::Info& info, + u32 variable_descriptor_count) { + return Allocator(layout, MakeBankInfo(std::array{info}), variable_descriptor_count); } DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout, - const DescriptorBankInfo& info) { - return DescriptorAllocator(device, master_semaphore, Bank(info), layout); + const DescriptorBankInfo& info, + u32 variable_descriptor_count) { + return DescriptorAllocator(device, master_semaphore, Bank(info), layout, + variable_descriptor_count); } DescriptorBank& DescriptorPool::Bank(const DescriptorBankInfo& reqs) { diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.h b/src/video_core/renderer_vulkan/vk_descriptor_pool.h index 4aada5a006..fed43a45f8 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.h +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -47,7 +50,8 @@ public: private: explicit DescriptorAllocator(const Device& device_, MasterSemaphore& master_semaphore_, - DescriptorBank& bank_, VkDescriptorSetLayout layout_); + DescriptorBank& bank_, VkDescriptorSetLayout layout_, + u32 variable_descriptor_count_); void Allocate(size_t begin, size_t end) override; @@ -56,6 +60,7 @@ private: const Device* device{}; DescriptorBank* bank{}; VkDescriptorSetLayout layout{}; + u32 variable_descriptor_count{}; std::vector sets; }; @@ -69,9 +74,12 @@ public: DescriptorPool(const DescriptorPool&) = delete; DescriptorAllocator Allocator(VkDescriptorSetLayout layout, - std::span infos); - DescriptorAllocator Allocator(VkDescriptorSetLayout layout, const Shader::Info& info); - DescriptorAllocator Allocator(VkDescriptorSetLayout layout, const DescriptorBankInfo& info); + std::span infos, + u32 variable_descriptor_count = 0); + DescriptorAllocator Allocator(VkDescriptorSetLayout layout, const Shader::Info& info, + u32 variable_descriptor_count = 0); + DescriptorAllocator Allocator(VkDescriptorSetLayout layout, const DescriptorBankInfo& info, + u32 variable_descriptor_count = 0); private: DescriptorBank& Bank(const DescriptorBankInfo& reqs); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 3e95d8528a..7321da76a9 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -273,7 +273,8 @@ GraphicsPipeline::GraphicsPipeline( descriptor_set_layout = builder.CreateDescriptorSetLayout(uses_push_descriptor); if (!uses_push_descriptor) { - descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, stage_infos); + descriptor_allocator = descriptor_pool.Allocator( + *descriptor_set_layout, stage_infos, builder.VariableDescriptorCount()); } const VkDescriptorSetLayout set_layout{*descriptor_set_layout}; diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 798c864736..92ce4b63f0 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -475,6 +475,9 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR if (extensions.descriptor_indexing && Settings::values.descriptor_indexing.GetValue()) { first_next = &descriptor_indexing; + } else { + RemoveExtension(extensions.descriptor_indexing, + VK_EXT_DESCRIPTOR_INDEXING_EXTENSION_NAME); } is_blit_depth24_stencil8_supported = TestDepthStencilBlits(VK_FORMAT_D24_UNORM_S8_UINT); From c8e4818b01665e61d12f0a9e70d1fb16a9cc486a Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Sun, 15 Feb 2026 01:02:02 -0400 Subject: [PATCH 14/68] [vulkan] Fixing inconsistences within VK_EXT_extended_dynamic_state1 handling --- .../renderer_vulkan/fixed_pipeline_state.cpp | 36 ++++++++++++- .../renderer_vulkan/vk_graphics_pipeline.cpp | 7 ++- .../renderer_vulkan/vk_rasterizer.cpp | 50 ++++++++++++++++--- .../renderer_vulkan/vk_rasterizer.h | 1 + .../vulkan_common/vulkan_device.cpp | 18 +++---- .../vulkan_common/vulkan_wrapper.cpp | 11 ++++ src/video_core/vulkan_common/vulkan_wrapper.h | 10 ++++ 7 files changed, 112 insertions(+), 21 deletions(-) diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index c74f3824a0..b07f5927b3 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -50,6 +50,38 @@ void RefreshXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& }); state.varyings = regs.stream_out_layout; } + +Maxwell::PrimitiveTopology NormalizeDynamicTopologyClass(Maxwell::PrimitiveTopology topology) { + switch (topology) { + case Maxwell::PrimitiveTopology::Points: + return Maxwell::PrimitiveTopology::Points; + + case Maxwell::PrimitiveTopology::Lines: + case Maxwell::PrimitiveTopology::LineStrip: + return Maxwell::PrimitiveTopology::Lines; + + case Maxwell::PrimitiveTopology::Triangles: + case Maxwell::PrimitiveTopology::TriangleStrip: + case Maxwell::PrimitiveTopology::TriangleFan: + case Maxwell::PrimitiveTopology::Quads: + case Maxwell::PrimitiveTopology::QuadStrip: + case Maxwell::PrimitiveTopology::Polygon: + case Maxwell::PrimitiveTopology::LineLoop: + return Maxwell::PrimitiveTopology::Triangles; + + case Maxwell::PrimitiveTopology::LinesAdjacency: + case Maxwell::PrimitiveTopology::LineStripAdjacency: + return Maxwell::PrimitiveTopology::LinesAdjacency; + + case Maxwell::PrimitiveTopology::TrianglesAdjacency: + case Maxwell::PrimitiveTopology::TriangleStripAdjacency: + return Maxwell::PrimitiveTopology::TrianglesAdjacency; + + case Maxwell::PrimitiveTopology::Patches: + return Maxwell::PrimitiveTopology::Patches; + } + return topology; +} } // Anonymous namespace void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, DynamicFeatures& features) { @@ -71,7 +103,9 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, DynamicFe tessellation_clockwise.Assign(regs.tessellation.params.output_primitives.Value() == Maxwell::Tessellation::OutputPrimitives::Triangles_CW); patch_control_points_minus_one.Assign(regs.patch_vertices - 1); - topology.Assign(topology_); + const bool can_normalize_topology = + features.has_extended_dynamic_state && features.has_extended_dynamic_state_2; + topology.Assign(can_normalize_topology ? NormalizeDynamicTopologyClass(topology_) : topology_); msaa_mode.Assign(regs.anti_alias_samples_mode); raw2 = 0; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 7321da76a9..21d48e8441 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -835,7 +835,6 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .blendConstants = {} }; static_vector dynamic_states{ - VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS, VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE, @@ -843,8 +842,11 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { }; if (key.state.extended_dynamic_state) { static constexpr std::array extended{ + VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT_EXT, + VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT_EXT, VK_DYNAMIC_STATE_CULL_MODE_EXT, VK_DYNAMIC_STATE_FRONT_FACE_EXT, + VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY_EXT, VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT, VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT, VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT, @@ -855,6 +857,9 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end()); dynamic_states.push_back(VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT); + } else { + dynamic_states.push_back(VK_DYNAMIC_STATE_VIEWPORT); + dynamic_states.push_back(VK_DYNAMIC_STATE_SCISSOR); } // EDS2 - Core (3 states) diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 787ff47532..0e7b78b125 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -1023,6 +1023,7 @@ void RasterizerVulkan::UpdateDynamicStates() { UpdateCullMode(regs); UpdateDepthCompareOp(regs); UpdateFrontFace(regs); + UpdatePrimitiveTopology(regs); UpdateStencilOp(regs); if (state_tracker.TouchStateEnable()) { UpdateDepthBoundsTestEnable(regs); @@ -1097,8 +1098,14 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& reg .minDepth = 0.0f, .maxDepth = 1.0f, }; - scheduler.Record([viewport](vk::CommandBuffer cmdbuf) { - cmdbuf.SetViewport(0, viewport); + const bool use_viewport_with_count = device.IsExtExtendedDynamicStateSupported(); + scheduler.Record([viewport, use_viewport_with_count](vk::CommandBuffer cmdbuf) { + if (use_viewport_with_count) { + std::array viewports{viewport}; + cmdbuf.SetViewportWithCountEXT(viewports); + } else { + cmdbuf.SetViewport(0, viewport); + } }); return; } @@ -1114,10 +1121,15 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& reg GetViewportState(device, regs, 12, scale), GetViewportState(device, regs, 13, scale), GetViewportState(device, regs, 14, scale), GetViewportState(device, regs, 15, scale), }; - scheduler.Record([this, viewport_list](vk::CommandBuffer cmdbuf) { + const bool use_viewport_with_count = device.IsExtExtendedDynamicStateSupported(); + scheduler.Record([this, viewport_list, use_viewport_with_count](vk::CommandBuffer cmdbuf) { const u32 num_viewports = std::min(device.GetMaxViewports(), Maxwell::NumViewports); const vk::Span viewports(viewport_list.data(), num_viewports); - cmdbuf.SetViewport(0, viewports); + if (use_viewport_with_count) { + cmdbuf.SetViewportWithCountEXT(viewports); + } else { + cmdbuf.SetViewport(0, viewports); + } }); } @@ -1138,8 +1150,14 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs scissor.offset.y = static_cast(y); scissor.extent.width = width; scissor.extent.height = height; - scheduler.Record([scissor](vk::CommandBuffer cmdbuf) { - cmdbuf.SetScissor(0, scissor); + const bool use_scissor_with_count = device.IsExtExtendedDynamicStateSupported(); + scheduler.Record([scissor, use_scissor_with_count](vk::CommandBuffer cmdbuf) { + if (use_scissor_with_count) { + std::array scissors{scissor}; + cmdbuf.SetScissorWithCountEXT(scissors); + } else { + cmdbuf.SetScissor(0, scissor); + } }); return; } @@ -1167,10 +1185,15 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs GetScissorState(regs, 14, up_scale, down_shift), GetScissorState(regs, 15, up_scale, down_shift), }; - scheduler.Record([this, scissor_list](vk::CommandBuffer cmdbuf) { + const bool use_scissor_with_count = device.IsExtExtendedDynamicStateSupported(); + scheduler.Record([this, scissor_list, use_scissor_with_count](vk::CommandBuffer cmdbuf) { const u32 num_scissors = std::min(device.GetMaxViewports(), Maxwell::NumViewports); const vk::Span scissors(scissor_list.data(), num_scissors); - cmdbuf.SetScissor(0, scissors); + if (use_scissor_with_count) { + cmdbuf.SetScissorWithCountEXT(scissors); + } else { + cmdbuf.SetScissor(0, scissors); + } }); } @@ -1442,6 +1465,17 @@ void RasterizerVulkan::UpdateDepthCompareOp(Tegra::Engines::Maxwell3D::Regs& reg }); } +void RasterizerVulkan::UpdatePrimitiveTopology([[maybe_unused]] Tegra::Engines::Maxwell3D::Regs& regs) { + const auto topology = maxwell3d->draw_manager->GetDrawState().topology; + if (!state_tracker.ChangePrimitiveTopology(topology)) { + return; + } + const auto vk_topology = MaxwellToVK::PrimitiveTopology(device, topology); + scheduler.Record([vk_topology](vk::CommandBuffer cmdbuf) { + cmdbuf.SetPrimitiveTopologyEXT(vk_topology); + }); +} + void RasterizerVulkan::UpdateFrontFace(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchFrontFace()) { return; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 2337ef644b..c8f50cd99b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -174,6 +174,7 @@ private: void UpdateDepthTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateDepthWriteEnable(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateDepthCompareOp(Tegra::Engines::Maxwell3D::Regs& regs); + void UpdatePrimitiveTopology(Tegra::Engines::Maxwell3D::Regs& regs); void UpdatePrimitiveRestartEnable(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateRasterizerDiscardEnable(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateDepthBiasEnable(Tegra::Engines::Maxwell3D::Regs& regs); diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 92ce4b63f0..44cde01376 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -1101,8 +1101,6 @@ bool Device::GetSuitability(bool requires_swapchain) { if (u32(Settings::values.dyna_state.GetValue()) == 0) { LOG_INFO(Render_Vulkan, "Extended Dynamic State disabled by user setting, clearing all EDS features"); - features.custom_border_color.customBorderColors = false; - features.custom_border_color.customBorderColorWithoutFormat = false; features.extended_dynamic_state.extendedDynamicState = false; features.extended_dynamic_state2.extendedDynamicState2 = false; } @@ -1138,7 +1136,7 @@ void Device::RemoveUnsuitableExtensions() { RemoveExtensionFeatureIfUnsuitable(extensions.depth_clip_control, features.depth_clip_control, VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME); - /* */ // VK_EXT_extended_dynamic_state + // VK_EXT_extended_dynamic_state extensions.extended_dynamic_state = features.extended_dynamic_state.extendedDynamicState; RemoveExtensionFeatureIfUnsuitable(extensions.extended_dynamic_state, features.extended_dynamic_state, @@ -1151,7 +1149,6 @@ void Device::RemoveUnsuitableExtensions() { VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME); // VK_EXT_robustness2 - // Enable if at least one robustness2 feature is available extensions.robustness_2 = features.robustness2.robustBufferAccess2 || features.robustness2.robustImageAccess2 || features.robustness2.nullDescriptor; @@ -1160,7 +1157,6 @@ void Device::RemoveUnsuitableExtensions() { VK_EXT_ROBUSTNESS_2_EXTENSION_NAME); // VK_EXT_image_robustness - // Enable if robustImageAccess is available extensions.image_robustness = features.image_robustness.robustImageAccess; RemoveExtensionFeatureIfUnsuitable(extensions.image_robustness, features.image_robustness, VK_EXT_IMAGE_ROBUSTNESS_EXTENSION_NAME); @@ -1256,15 +1252,15 @@ void Device::RemoveUnsuitableExtensions() { RemoveExtensionFeatureIfUnsuitable(extensions.swapchain_maintenance1, features.swapchain_maintenance1, VK_EXT_SWAPCHAIN_MAINTENANCE_1_EXTENSION_NAME); - // VK_KHR_maintenance1 (core in Vulkan 1.1, no features) + // VK_KHR_maintenance1 extensions.maintenance1 = loaded_extensions.contains(VK_KHR_MAINTENANCE_1_EXTENSION_NAME); RemoveExtensionIfUnsuitable(extensions.maintenance1, VK_KHR_MAINTENANCE_1_EXTENSION_NAME); - // VK_KHR_maintenance2 (core in Vulkan 1.1, no features) + // VK_KHR_maintenance2 extensions.maintenance2 = loaded_extensions.contains(VK_KHR_MAINTENANCE_2_EXTENSION_NAME); RemoveExtensionIfUnsuitable(extensions.maintenance2, VK_KHR_MAINTENANCE_2_EXTENSION_NAME); - // VK_KHR_maintenance3 (core in Vulkan 1.1, no features) + // VK_KHR_maintenance3 extensions.maintenance3 = loaded_extensions.contains(VK_KHR_MAINTENANCE_3_EXTENSION_NAME); RemoveExtensionIfUnsuitable(extensions.maintenance3, VK_KHR_MAINTENANCE_3_EXTENSION_NAME); @@ -1294,15 +1290,15 @@ void Device::RemoveUnsuitableExtensions() { RemoveExtensionFeatureIfUnsuitable(extensions.maintenance6, features.maintenance6, VK_KHR_MAINTENANCE_6_EXTENSION_NAME); - // VK_KHR_maintenance7 (proposed for Vulkan 1.4, no features) + // VK_KHR_maintenance7 extensions.maintenance7 = loaded_extensions.contains(VK_KHR_MAINTENANCE_7_EXTENSION_NAME); RemoveExtensionIfUnsuitable(extensions.maintenance7, VK_KHR_MAINTENANCE_7_EXTENSION_NAME); - // VK_KHR_maintenance8 (proposed for Vulkan 1.4, no features) + // VK_KHR_maintenance8 extensions.maintenance8 = loaded_extensions.contains(VK_KHR_MAINTENANCE_8_EXTENSION_NAME); RemoveExtensionIfUnsuitable(extensions.maintenance8, VK_KHR_MAINTENANCE_8_EXTENSION_NAME); - // VK_KHR_maintenance9 (proposed for Vulkan 1.4, no features) + // VK_KHR_maintenance9 extensions.maintenance9 = loaded_extensions.contains(VK_KHR_MAINTENANCE_9_EXTENSION_NAME); RemoveExtensionIfUnsuitable(extensions.maintenance9, VK_KHR_MAINTENANCE_9_EXTENSION_NAME); } diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index 095725b4c4..b1f1d22fbb 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -133,10 +133,12 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkCmdSetDepthBounds); X(vkCmdSetEvent); X(vkCmdSetScissor); + X(vkCmdSetScissorWithCountEXT); X(vkCmdSetStencilCompareMask); X(vkCmdSetStencilReference); X(vkCmdSetStencilWriteMask); X(vkCmdSetViewport); + X(vkCmdSetViewportWithCountEXT); X(vkCmdWaitEvents); X(vkCmdBindVertexBuffers2EXT); X(vkCmdSetCullModeEXT); @@ -243,6 +245,15 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { Proc(dld.vkCmdDrawIndirectCount, dld, "vkCmdDrawIndirectCountKHR", device); Proc(dld.vkCmdDrawIndexedIndirectCount, dld, "vkCmdDrawIndexedIndirectCountKHR", device); } + if (!dld.vkCmdSetPrimitiveTopologyEXT) { + Proc(dld.vkCmdSetPrimitiveTopologyEXT, dld, "vkCmdSetPrimitiveTopology", device); + } + if (!dld.vkCmdSetViewportWithCountEXT) { + Proc(dld.vkCmdSetViewportWithCountEXT, dld, "vkCmdSetViewportWithCount", device); + } + if (!dld.vkCmdSetScissorWithCountEXT) { + Proc(dld.vkCmdSetScissorWithCountEXT, dld, "vkCmdSetScissorWithCount", device); + } #undef X } diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index 44dffe11f3..4d5e0bc1ec 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h @@ -250,12 +250,14 @@ struct DeviceDispatch : InstanceDispatch { PFN_vkCmdSetLineWidth vkCmdSetLineWidth{}; PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT{}; PFN_vkCmdSetScissor vkCmdSetScissor{}; + PFN_vkCmdSetScissorWithCountEXT vkCmdSetScissorWithCountEXT{}; PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask{}; PFN_vkCmdSetStencilOpEXT vkCmdSetStencilOpEXT{}; PFN_vkCmdSetStencilReference vkCmdSetStencilReference{}; PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT{}; PFN_vkCmdSetStencilWriteMask vkCmdSetStencilWriteMask{}; PFN_vkCmdSetViewport vkCmdSetViewport{}; + PFN_vkCmdSetViewportWithCountEXT vkCmdSetViewportWithCountEXT{}; PFN_vkCmdWaitEvents vkCmdWaitEvents{}; PFN_vkCreateBuffer vkCreateBuffer{}; PFN_vkCreateBufferView vkCreateBufferView{}; @@ -1363,6 +1365,14 @@ public: dld->vkCmdSetScissor(handle, first, scissors.size(), scissors.data()); } + void SetViewportWithCountEXT(Span viewports) const noexcept { + dld->vkCmdSetViewportWithCountEXT(handle, viewports.size(), viewports.data()); + } + + void SetScissorWithCountEXT(Span scissors) const noexcept { + dld->vkCmdSetScissorWithCountEXT(handle, scissors.size(), scissors.data()); + } + void SetBlendConstants(const float blend_constants[4]) const noexcept { dld->vkCmdSetBlendConstants(handle, blend_constants); } From 8d031532d834bdba4d22836e6baf0624fede244e Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Sun, 15 Feb 2026 01:12:47 -0400 Subject: [PATCH 15/68] [vulkan] Dead code removal --- .../vulkan_common/vulkan_device.cpp | 56 +++---------------- src/video_core/vulkan_common/vulkan_device.h | 6 -- .../vulkan_common/vulkan_instance.cpp | 8 --- 3 files changed, 7 insertions(+), 63 deletions(-) diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 44cde01376..1cd3ac7cf7 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -501,13 +501,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR CollectToolingInfo(); if (is_qualcomm) { - // Qualcomm Adreno GPUs doesn't handle scaled vertex attributes; keep emulation enabled must_emulate_scaled_formats = true; - LOG_WARNING(Render_Vulkan, - "Qualcomm drivers require scaled vertex format emulation; forcing fallback"); - - LOG_WARNING(Render_Vulkan, - "Disabling shader float controls and 64-bit integer features on Qualcomm proprietary drivers"); RemoveExtension(extensions.shader_float_controls, VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME); RemoveExtensionFeature(extensions.shader_atomic_int64, features.shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME); @@ -526,9 +520,6 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR bool should_patch_bcn = api_level >= 28; const bool bcn_debug_override = Settings::values.patch_old_qcom_drivers.GetValue(); if (bcn_debug_override != should_patch_bcn) { - LOG_WARNING(Render_Vulkan, - "BCn patch debug override active: {} (auto-detected: {})", - bcn_debug_override, should_patch_bcn); should_patch_bcn = bcn_debug_override; } @@ -543,11 +534,6 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR } else { LOG_ERROR(Render_Vulkan, "BCn patch failed! Driver code may now crash"); } - } else { - LOG_WARNING(Render_Vulkan, - "BCn texture patching skipped for stability (Android API {} < 28). " - "Driver version {}.{} would support patching, but may crash on older Android.", - api_level, major, minor); } } else if (patch_status == ADRENOTOOLS_BCN_BLOB) { LOG_INFO(Render_Vulkan, "Adreno driver supports BCn textures natively (no patch needed)"); @@ -567,6 +553,13 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR } // Mali/ NVIDIA proprietary drivers: Shader stencil export not supported + if (nv_major_version >= 510) { + LOG_WARNING(Render_Vulkan, + "NVIDIA Drivers >= 510 do not support MSAA->MSAA image blits. " + "MSAA scaling will use 3D helpers. MSAA resolves work normally."); + cant_blit_msaa = true; + } + // Use hardware depth/stencil blits instead when available if (!extensions.shader_stencil_export) { LOG_INFO(Render_Vulkan, @@ -613,10 +606,6 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR const size_t derived_budget = (std::max)(MIN_SAMPLER_BUDGET, sampler_limit - reserved); sampler_heap_budget = derived_budget; - LOG_WARNING(Render_Vulkan, - "Qualcomm driver reports max {} samplers; reserving {} (25%) and " - "allowing Eden to use {} (75%) to avoid heap exhaustion", - sampler_limit, reserved, sampler_heap_budget); } } @@ -1232,26 +1221,6 @@ void Device::RemoveUnsuitableExtensions() { features.workgroup_memory_explicit_layout, VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME); - // VK_EXT_swapchain_maintenance1 (extension only, has features) - // Requires VK_EXT_surface_maintenance1 instance extension - extensions.swapchain_maintenance1 = features.swapchain_maintenance1.swapchainMaintenance1; - if (extensions.swapchain_maintenance1) { - // Check if VK_EXT_surface_maintenance1 instance extension is available - const auto instance_extensions = vk::EnumerateInstanceExtensionProperties(dld); - const bool has_surface_maintenance1 = instance_extensions && std::ranges::any_of(*instance_extensions, - [](const VkExtensionProperties& prop) { - return std::strcmp(prop.extensionName, VK_EXT_SURFACE_MAINTENANCE_1_EXTENSION_NAME) == 0; - }); - if (!has_surface_maintenance1) { - LOG_WARNING(Render_Vulkan, - "VK_EXT_swapchain_maintenance1 requires VK_EXT_surface_maintenance1, disabling"); - extensions.swapchain_maintenance1 = false; - features.swapchain_maintenance1.swapchainMaintenance1 = false; - } - } - RemoveExtensionFeatureIfUnsuitable(extensions.swapchain_maintenance1, features.swapchain_maintenance1, - VK_EXT_SWAPCHAIN_MAINTENANCE_1_EXTENSION_NAME); - // VK_KHR_maintenance1 extensions.maintenance1 = loaded_extensions.contains(VK_KHR_MAINTENANCE_1_EXTENSION_NAME); RemoveExtensionIfUnsuitable(extensions.maintenance1, VK_KHR_MAINTENANCE_1_EXTENSION_NAME); @@ -1271,17 +1240,6 @@ void Device::RemoveUnsuitableExtensions() { // VK_KHR_maintenance5 extensions.maintenance5 = features.maintenance5.maintenance5; - - if (extensions.maintenance5) { - LOG_INFO(Render_Vulkan, "VK_KHR_maintenance5 properties: polygonModePointSize={} " - "depthStencilSwizzleOne={} earlyFragmentTests={} nonStrictWideLines={}", - properties.maintenance5.polygonModePointSize, - properties.maintenance5.depthStencilSwizzleOneSupport, - properties.maintenance5.earlyFragmentMultisampleCoverageAfterSampleCounting && - properties.maintenance5.earlyFragmentSampleMaskTestBeforeSampleCounting, - properties.maintenance5.nonStrictWideLinesUseParallelogram); - } - RemoveExtensionFeatureIfUnsuitable(extensions.maintenance5, features.maintenance5, VK_KHR_MAINTENANCE_5_EXTENSION_NAME); diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index a6bf263bba..cd933bb786 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -62,7 +62,6 @@ VK_DEFINE_HANDLE(VmaAllocator) FEATURE(EXT, ProvokingVertex, PROVOKING_VERTEX, provoking_vertex) \ FEATURE(EXT, Robustness2, ROBUSTNESS_2, robustness2) \ FEATURE(EXT, TransformFeedback, TRANSFORM_FEEDBACK, transform_feedback) \ - FEATURE(EXT, SwapchainMaintenance1, SWAPCHAIN_MAINTENANCE_1, swapchain_maintenance1) \ FEATURE(KHR, Maintenance5, MAINTENANCE_5, maintenance5) \ FEATURE(KHR, Maintenance6, MAINTENANCE_6, maintenance6) \ FEATURE(KHR, PipelineExecutableProperties, PIPELINE_EXECUTABLE_PROPERTIES, \ @@ -473,11 +472,6 @@ public: return extensions.swapchain_mutable_format; } - /// Returns true if VK_EXT_swapchain_maintenance1 is enabled. - bool IsExtSwapchainMaintenance1Enabled() const { - return extensions.swapchain_maintenance1; - } - /// Returns true if VK_KHR_shader_float_controls is enabled. bool IsKhrShaderFloatControlsSupported() const { return extensions.shader_float_controls; diff --git a/src/video_core/vulkan_common/vulkan_instance.cpp b/src/video_core/vulkan_common/vulkan_instance.cpp index 47e18dd6a5..9cf65ad252 100644 --- a/src/video_core/vulkan_common/vulkan_instance.cpp +++ b/src/video_core/vulkan_common/vulkan_instance.cpp @@ -81,14 +81,6 @@ namespace { #endif if (enable_validation && AreExtensionsSupported(dld, *properties, std::array{VK_EXT_DEBUG_UTILS_EXTENSION_NAME})) extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); - // VK_EXT_surface_maintenance1 is required for VK_EXT_swapchain_maintenance1 - if (window_type != Core::Frontend::WindowSystemType::Headless && AreExtensionsSupported(dld, *properties, std::array{VK_EXT_SURFACE_MAINTENANCE_1_EXTENSION_NAME})) { - extensions.push_back(VK_EXT_SURFACE_MAINTENANCE_1_EXTENSION_NAME); - // Some(which?) drivers dont like being told to load this extension(why?) - // NVIDIA on FreeBSD is totally fine with this through - if (AreExtensionsSupported(dld, *properties, std::array{VK_KHR_GET_SURFACE_CAPABILITIES_2_EXTENSION_NAME})) - extensions.push_back(VK_KHR_GET_SURFACE_CAPABILITIES_2_EXTENSION_NAME); - } } return extensions; } From 69678d02b642256e5d5b5772f840c8f3ee5ced12 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Sun, 15 Feb 2026 01:18:39 -0400 Subject: [PATCH 16/68] [android] Removing unneeded setting --- .../features/settings/model/view/SettingsItem.kt | 9 --------- .../features/settings/ui/SettingsFragmentPresenter.kt | 1 - src/common/settings.h | 2 +- 3 files changed, 1 insertion(+), 11 deletions(-) diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt index 64053518b9..cabea73353 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt @@ -335,15 +335,6 @@ abstract class SettingsItem( valuesId = R.array.astcDecodingMethodValues ) ) - put( - SingleChoiceSetting( - IntSetting.RENDERER_ASTC_RECOMPRESSION, - titleId = R.string.astc_recompression, - descriptionId = R.string.astc_recompression_description, - choicesId = R.array.astcRecompressionMethodNames, - valuesId = R.array.astcRecompressionMethodValues - ) - ) put( SingleChoiceSetting( IntSetting.RENDERER_VRAM_USAGE_MODE, diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt index ae1f142b8b..c43de4d5c7 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt @@ -271,7 +271,6 @@ class SettingsFragmentPresenter( add(IntSetting.MAX_ANISOTROPY.key) add(IntSetting.RENDERER_VRAM_USAGE_MODE.key) add(IntSetting.RENDERER_ASTC_DECODE_METHOD.key) - add(IntSetting.RENDERER_ASTC_RECOMPRESSION.key) add(BooleanSetting.SYNC_MEMORY_OPERATIONS.key) add(BooleanSetting.RENDERER_USE_DISK_SHADER_CACHE.key) diff --git a/src/common/settings.h b/src/common/settings.h index 0f839f8873..ff32ee42c3 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -453,7 +453,7 @@ struct Values { Category::RendererAdvanced}; SwitchableSetting accelerate_astc{linkage, #ifdef ANDROID - AstcDecodeMode::Cpu, + AstcDecodeMode::Gpu, #else AstcDecodeMode::Gpu, #endif From ecd5c751f80b3b07a987a47d68a9c42d6463d0c5 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Sun, 15 Feb 2026 01:40:34 -0400 Subject: [PATCH 17/68] fix license headers --- src/video_core/renderer_vulkan/vk_buffer_cache.h | 2 +- src/video_core/renderer_vulkan/vk_compute_pass.cpp | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 8db936ffe7..7ae0c28759 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index d960fabd16..3484fb7026 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -255,9 +255,10 @@ ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool, binding_flags[i] |= VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT; } } - if (bindings.back().descriptorCount > 1) { - binding_flags.back() |= VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT; - variable_descriptor_count = bindings.back().descriptorCount; + const size_t last_binding = bindings.size() - 1; + if (bindings[last_binding].descriptorCount > 1) { + binding_flags[last_binding] |= VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT; + variable_descriptor_count = bindings[last_binding].descriptorCount; } binding_flags_ci.bindingCount = static_cast(binding_flags.size()); From a1fdbef129d77870094f0c413ced52bc41d01323 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Sun, 15 Feb 2026 01:46:54 -0400 Subject: [PATCH 18/68] [vulkan] removing dead code for driverID detection under EDS handling/ban --- src/video_core/vulkan_common/vulkan_device.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 1cd3ac7cf7..8b86d80bc8 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -1081,9 +1081,6 @@ bool Device::GetSuitability(bool requires_swapchain) { // VK_DYNAMIC_STATE - // Driver detection variables for workarounds in GetSuitability - const VkDriverId driver_id = properties.driver.driverID; - // VK_EXT_extended_dynamic_state below this will appear drivers that need workarounds. // VK_EXT_extended_dynamic_state2 below this will appear drivers that need workarounds. From 051522b54efbc399f685680b78ba296a17717c78 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Sun, 15 Feb 2026 17:58:16 -0400 Subject: [PATCH 19/68] [vulkan] Fix conditional rendering enable --- src/video_core/renderer_vulkan/vk_query_cache.cpp | 9 +++++++-- src/video_core/vulkan_common/vulkan_device.cpp | 6 ++++++ src/video_core/vulkan_common/vulkan_device.h | 1 - 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 7cdb3acadd..ee1e70fe9e 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -1205,13 +1205,18 @@ struct QueryCacheRuntimeImpl { conditional_resolve_pass = std::make_unique( device, scheduler, descriptor_pool, compute_pass_descriptor_queue); + VkBufferUsageFlags conditional_usage = + VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + if (device.IsExtConditionalRendering()) { + conditional_usage |= VK_BUFFER_USAGE_CONDITIONAL_RENDERING_BIT_EXT; + } + const VkBufferCreateInfo buffer_ci = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, .flags = 0, .size = sizeof(u32), - .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | - VK_BUFFER_USAGE_CONDITIONAL_RENDERING_BIT_EXT, + .usage = conditional_usage, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, .pQueueFamilyIndices = nullptr, diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 8b86d80bc8..7305002401 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -1183,6 +1183,12 @@ void Device::RemoveUnsuitableExtensions() { features.provoking_vertex, VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME); + // VK_EXT_conditional_rendering + extensions.conditional_rendering = features.conditional_rendering.conditionalRendering; + RemoveExtensionFeatureIfUnsuitable(extensions.conditional_rendering, + features.conditional_rendering, + VK_EXT_CONDITIONAL_RENDERING_EXTENSION_NAME); + // VK_EXT_multi_draw extensions.multi_draw = features.multi_draw.multiDraw; diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index cd933bb786..2b49504d9b 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -73,7 +73,6 @@ VK_DEFINE_HANDLE(VmaAllocator) // Define miscellaneous extensions which may be used by the implementation here. #define FOR_EACH_VK_EXTENSION(EXTENSION) \ - EXTENSION(EXT, CONDITIONAL_RENDERING, conditional_rendering) \ EXTENSION(EXT, CONSERVATIVE_RASTERIZATION, conservative_rasterization) \ EXTENSION(EXT, DEPTH_RANGE_UNRESTRICTED, depth_range_unrestricted) \ EXTENSION(EXT, MEMORY_BUDGET, memory_budget) \ From 712c505cd13308de7deffea9f69a86fe62323a68 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Sun, 15 Feb 2026 19:37:02 -0400 Subject: [PATCH 20/68] [vulkan] Extending conversative rasterization detection and handling --- .../renderer_vulkan/vk_graphics_pipeline.cpp | 16 +++++++++++++--- src/video_core/vulkan_common/vulkan_device.cpp | 5 +++++ src/video_core/vulkan_common/vulkan_device.h | 18 +++++++++++++----- 3 files changed, 31 insertions(+), 8 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 21d48e8441..f0392470f6 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -736,11 +736,21 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_CONSERVATIVE_STATE_CREATE_INFO_EXT, .pNext = nullptr, .flags = 0, - .conservativeRasterizationMode = key.state.conservative_raster_enable != 0 - ? VK_CONSERVATIVE_RASTERIZATION_MODE_OVERESTIMATE_EXT - : VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT, + .conservativeRasterizationMode = VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT, .extraPrimitiveOverestimationSize = 0.0f, }; + const bool conservative_requested = key.state.conservative_raster_enable != 0; + if (conservative_requested) { + const bool is_point_topology = input_assembly_topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST; + const bool is_line_topology = IsLine(input_assembly_topology); + const bool needs_point_or_line_support = is_point_topology || is_line_topology; + const bool supports_requested_topology = + !needs_point_or_line_support || device.SupportsConservativePointAndLineRasterization(); + + conservative_raster.conservativeRasterizationMode = + supports_requested_topology ? VK_CONSERVATIVE_RASTERIZATION_MODE_OVERESTIMATE_EXT + : VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT; + } const bool preserve_provoking_vertex_for_xfb = !key.state.xfb_enabled || device.IsTransformFeedbackProvokingVertexPreserved(); const bool use_last_provoking_vertex = diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 7305002401..8c0e658a10 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -1027,6 +1027,11 @@ bool Device::GetSuitability(bool requires_swapchain) { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR; SetNext(next, properties.push_descriptor); } + if (extensions.conservative_rasterization) { + properties.conservative_rasterization.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONSERVATIVE_RASTERIZATION_PROPERTIES_EXT; + SetNext(next, properties.conservative_rasterization); + } if (extensions.subgroup_size_control || features.subgroup_size_control.subgroupSizeControl) { properties.subgroup_size_control.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES; diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 2b49504d9b..fbb20ad058 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -441,7 +441,7 @@ public: return extensions.viewport_array2; } - /// Returns true if the device supporst VK_EXT_DESCRIPTOR_INDEXING + /// Returns true if the device supporst VK_EXT_DESCRIPTOR_INDEXING. bool isExtDescriptorIndexingSupported() const { return extensions.descriptor_indexing; } @@ -621,12 +621,12 @@ public: return features.format_a4b4g4r4.formatA4B4G4R4; } - /// Returns true if the device supports VK_EXT_filter_cubic + /// Returns true if the device supports VK_EXT_filter_cubic. bool IsExtFilterCubicSupported() const { return extensions.filter_cubic; } - /// Returns true if the device supports VK_QCOM_filter_cubic_weights + /// Returns true if the device supports VK_QCOM_filter_cubic_weights. bool IsQcomFilterCubicWeightsSupported() const { return extensions.filter_cubic_weights; } @@ -653,7 +653,7 @@ public: } - /// Returns true if the device supports VK_EXT_shader_demote_to_helper_invocation + /// Returns true if the device supports VK_EXT_shader_demote_to_helper_invocation. bool IsExtShaderDemoteToHelperInvocationSupported() const { return extensions.shader_demote_to_helper_invocation; } @@ -662,6 +662,12 @@ public: bool IsExtConservativeRasterizationSupported() const { return extensions.conservative_rasterization; } + + /// Returns true if the device supports conservative rasterization for points and lines. + bool SupportsConservativePointAndLineRasterization() const { + return extensions.conservative_rasterization && + properties.conservative_rasterization.conservativePointAndLineRasterization; + } /// Returns true if the device supports VK_EXT_provoking_vertex. bool IsExtProvokingVertexSupported() const { @@ -673,7 +679,8 @@ public: return extensions.shader_atomic_int64; } - bool IsExtConditionalRendering() const { + /// Returns true if the device supports VK_EXT_conditional_rendering. + bool IsExtConditionalRenderingSupported() const { return extensions.conditional_rendering; } @@ -971,6 +978,7 @@ private: VkPhysicalDeviceSubgroupProperties subgroup_properties{}; VkPhysicalDeviceFloatControlsProperties float_controls{}; VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor{}; + VkPhysicalDeviceConservativeRasterizationPropertiesEXT conservative_rasterization{}; VkPhysicalDeviceSubgroupSizeControlProperties subgroup_size_control{}; VkPhysicalDeviceTransformFeedbackPropertiesEXT transform_feedback{}; VkPhysicalDeviceMaintenance5PropertiesKHR maintenance5{}; From 5c0e12fb0e1007ca34e17ca638da67b74cdc21b7 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Sun, 15 Feb 2026 20:32:00 -0400 Subject: [PATCH 21/68] [vulkan] Rework line rasterization handle --- .../renderer_vulkan/fixed_pipeline_state.cpp | 7 ++ .../renderer_vulkan/fixed_pipeline_state.h | 7 ++ .../renderer_vulkan/vk_graphics_pipeline.cpp | 78 +++++++++++++++---- .../renderer_vulkan/vk_rasterizer.cpp | 77 ++++++++++++++++++ .../renderer_vulkan/vk_rasterizer.h | 1 + .../renderer_vulkan/vk_state_tracker.cpp | 9 +++ .../renderer_vulkan/vk_state_tracker.h | 5 ++ .../vulkan_common/vulkan_device.cpp | 8 ++ src/video_core/vulkan_common/vulkan_device.h | 23 ++++-- .../vulkan_common/vulkan_wrapper.cpp | 1 + 10 files changed, 196 insertions(+), 20 deletions(-) diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index b07f5927b3..fe6730bd13 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -127,6 +127,13 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, DynamicFe depth_bounds_min = static_cast(regs.depth_bounds[0]); depth_bounds_max = static_cast(regs.depth_bounds[1]); + depth_bias = std::bit_cast(regs.depth_bias); + depth_bias_clamp = std::bit_cast(regs.depth_bias_clamp); + slope_scale_depth_bias = std::bit_cast(regs.slope_scale_depth_bias); + + line_width_smooth = std::bit_cast(regs.line_width_smooth); + line_width_aliased = std::bit_cast(regs.line_width_aliased); + line_stipple_factor = regs.line_stipple_params.factor; line_stipple_pattern = regs.line_stipple_params.pattern; diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 030c62a883..efe4e57960 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -236,6 +236,13 @@ struct FixedPipelineState { u32 depth_bounds_min; u32 depth_bounds_max; + u32 depth_bias; + u32 depth_bias_clamp; + u32 slope_scale_depth_bias; + + u32 line_width_smooth; + u32 line_width_aliased; + u32 line_stipple_factor; u32 line_stipple_pattern; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index f0392470f6..2c305f5497 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -5,6 +5,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include #include #include @@ -711,24 +712,68 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE), .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()), .depthBiasEnable = (dynamic.depth_bias_enable != 0 ? VK_TRUE : VK_FALSE), - .depthBiasConstantFactor = 0.0f, - .depthBiasClamp = 0.0f, - .depthBiasSlopeFactor = 0.0f, - .lineWidth = 1.0f, - // TODO(alekpop): Transfer from regs + .depthBiasConstantFactor = std::bit_cast(key.state.depth_bias) / 2.0f, + .depthBiasClamp = std::bit_cast(key.state.depth_bias_clamp), + .depthBiasSlopeFactor = std::bit_cast(key.state.slope_scale_depth_bias), + .lineWidth = key.state.smooth_lines != 0 + ? std::bit_cast(key.state.line_width_smooth) + : std::bit_cast(key.state.line_width_aliased), }; - const bool smooth_lines_supported = - device.IsExtLineRasterizationSupported() && device.SupportsSmoothLines(); - const bool stippled_lines_supported = - device.IsExtLineRasterizationSupported() && device.SupportsStippledRectangularLines(); + const bool line_rasterization_supported = device.IsExtLineRasterizationSupported(); + const bool any_stippled_lines_supported = + line_rasterization_supported && + (device.SupportsStippledRectangularLines() || device.SupportsStippledBresenhamLines() || + device.SupportsStippledSmoothLines()); + const bool line_stipple_dynamic_state_supported = + IsLine(input_assembly_topology) && any_stippled_lines_supported; + const bool supports_rectangular_lines = + line_rasterization_supported && device.SupportsRectangularLines(); + const bool supports_bresenham_lines = + line_rasterization_supported && device.SupportsBresenhamLines(); + const bool supports_smooth_lines = line_rasterization_supported && device.SupportsSmoothLines(); + + VkLineRasterizationModeEXT line_rasterization_mode = VK_LINE_RASTERIZATION_MODE_DEFAULT_EXT; + if (line_rasterization_supported) { + if (key.state.smooth_lines != 0) { + if (supports_smooth_lines) { + line_rasterization_mode = VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT; + } else if (supports_rectangular_lines) { + line_rasterization_mode = VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT; + } else if (supports_bresenham_lines) { + line_rasterization_mode = VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT; + } + } else { + if (supports_rectangular_lines) { + line_rasterization_mode = VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT; + } else if (supports_bresenham_lines) { + line_rasterization_mode = VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT; + } else if (supports_smooth_lines) { + line_rasterization_mode = VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT; + } + } + } + + const bool stippled_lines_supported = [&]() { + if (!line_rasterization_supported || !dynamic.line_stipple_enable) { + return false; + } + switch (line_rasterization_mode) { + case VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT: + return device.SupportsStippledRectangularLines(); + case VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT: + return device.SupportsStippledBresenhamLines(); + case VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT: + return device.SupportsStippledSmoothLines(); + default: + return false; + } + }(); + VkPipelineRasterizationLineStateCreateInfoEXT line_state{ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT, .pNext = nullptr, - .lineRasterizationMode = key.state.smooth_lines != 0 && smooth_lines_supported - ? VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT - : VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT, - .stippledLineEnable = - (dynamic.line_stipple_enable && stippled_lines_supported) ? VK_TRUE : VK_FALSE, + .lineRasterizationMode = line_rasterization_mode, + .stippledLineEnable = stippled_lines_supported ? VK_TRUE : VK_FALSE, .lineStippleFactor = key.state.line_stipple_factor, .lineStipplePattern = static_cast(key.state.line_stipple_pattern), }; @@ -844,12 +889,15 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .pAttachments = cb_attachments.data(), .blendConstants = {} }; - static_vector dynamic_states{ + static_vector dynamic_states{ VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS, VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE, VK_DYNAMIC_STATE_LINE_WIDTH, }; + if (line_stipple_dynamic_state_supported) { + dynamic_states.push_back(VK_DYNAMIC_STATE_LINE_STIPPLE_EXT); + } if (key.state.extended_dynamic_state) { static constexpr std::array extended{ VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT_EXT, diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 0e7b78b125..bc86b4bb6a 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -173,6 +173,55 @@ DrawParams MakeDrawParams(const MaxwellDrawState& draw_state, u32 num_instances, } return params; } + +bool IsLineRasterizationTopology(const Device& device, Maxwell::PrimitiveTopology topology) { + const VkPrimitiveTopology vk_topology = MaxwellToVK::PrimitiveTopology(device, topology); + return vk_topology == VK_PRIMITIVE_TOPOLOGY_LINE_LIST || + vk_topology == VK_PRIMITIVE_TOPOLOGY_LINE_STRIP; +} + +VkLineRasterizationModeEXT SelectLineRasterizationMode(const Device& device, bool smooth_lines) { + const bool supports_rectangular_lines = device.SupportsRectangularLines(); + const bool supports_bresenham_lines = device.SupportsBresenhamLines(); + const bool supports_smooth_lines = device.SupportsSmoothLines(); + + if (smooth_lines) { + if (supports_smooth_lines) { + return VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT; + } + if (supports_rectangular_lines) { + return VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT; + } + if (supports_bresenham_lines) { + return VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT; + } + } else { + if (supports_rectangular_lines) { + return VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT; + } + if (supports_bresenham_lines) { + return VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT; + } + if (supports_smooth_lines) { + return VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT; + } + } + + return VK_LINE_RASTERIZATION_MODE_DEFAULT_EXT; +} + +bool SupportsStippleForMode(const Device& device, VkLineRasterizationModeEXT mode) { + switch (mode) { + case VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT: + return device.SupportsStippledRectangularLines(); + case VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT: + return device.SupportsStippledBresenhamLines(); + case VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT: + return device.SupportsStippledSmoothLines(); + default: + return false; + } +} } // Anonymous namespace RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, @@ -1017,6 +1066,7 @@ void RasterizerVulkan::UpdateDynamicStates() { UpdateDepthBounds(regs); UpdateStencilFaces(regs); UpdateLineWidth(regs); + UpdateLineStipple(regs); // EDS1: CullMode, DepthCompare, FrontFace, StencilOp, DepthBoundsTest, DepthTest, DepthWrite, StencilTest if (device.IsExtExtendedDynamicStateSupported()) { @@ -1361,6 +1411,33 @@ void RasterizerVulkan::UpdateLineWidth(Tegra::Engines::Maxwell3D::Regs& regs) { scheduler.Record([width](vk::CommandBuffer cmdbuf) { cmdbuf.SetLineWidth(width); }); } +void RasterizerVulkan::UpdateLineStipple(Tegra::Engines::Maxwell3D::Regs& regs) { + if (!state_tracker.TouchLineStipple()) { + return; + } + if (!device.IsExtLineRasterizationSupported()) { + return; + } + + const auto topology = maxwell3d->draw_manager->GetDrawState().topology; + if (!IsLineRasterizationTopology(device, topology)) { + return; + } + + const VkLineRasterizationModeEXT mode = + SelectLineRasterizationMode(device, regs.line_anti_alias_enable != 0); + + if (regs.line_stipple_enable == 0 || !SupportsStippleForMode(device, mode)) { + return; + } + + scheduler.Record( + [factor = regs.line_stipple_params.factor, + pattern = static_cast(regs.line_stipple_params.pattern)](vk::CommandBuffer cmdbuf) { + cmdbuf.SetLineStippleEXT(factor, pattern); + }); +} + void RasterizerVulkan::UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchCullMode()) { return; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index c8f50cd99b..bdea6510c0 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -168,6 +168,7 @@ private: void UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateLineWidth(Tegra::Engines::Maxwell3D::Regs& regs); + void UpdateLineStipple(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp index c17bc5900b..19133d3255 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp @@ -40,6 +40,7 @@ Flags MakeInvalidationFlags() { StencilWriteMask, StencilCompare, LineWidth, + LineStipple, CullMode, DepthBoundsEnable, DepthTestEnable, @@ -119,6 +120,13 @@ void SetupDirtyStencilProperties(Tables& tables) { void SetupDirtyLineWidth(Tables& tables) { tables[0][OFF(line_width_smooth)] = LineWidth; tables[0][OFF(line_width_aliased)] = LineWidth; + tables[0][OFF(line_anti_alias_enable)] = LineWidth; +} + +void SetupDirtyLineStipple(Tables& tables) { + tables[0][OFF(line_stipple_enable)] = LineStipple; + FillBlock(tables[0], OFF(line_stipple_params), NUM(line_stipple_params), LineStipple); + tables[1][OFF(line_anti_alias_enable)] = LineStipple; } void SetupDirtyCullMode(Tables& tables) { @@ -226,6 +234,7 @@ void StateTracker::SetupTables(Tegra::Control::ChannelState& channel_state) { SetupDirtyDepthBounds(tables); SetupDirtyStencilProperties(tables); SetupDirtyLineWidth(tables); + SetupDirtyLineStipple(tables); SetupDirtyCullMode(tables); SetupDirtyStateEnable(tables); SetupDirtyDepthCompareOp(tables); diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h index 8f8db9f828..7282bc1f44 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.h +++ b/src/video_core/renderer_vulkan/vk_state_tracker.h @@ -42,6 +42,7 @@ enum : u8 { StencilWriteMask, StencilCompare, LineWidth, + LineStipple, CullMode, DepthBoundsEnable, @@ -177,6 +178,10 @@ public: return Exchange(Dirty::LineWidth, false); } + bool TouchLineStipple() const { + return Exchange(Dirty::LineStipple, false); + } + bool TouchCullMode() { return Exchange(Dirty::CullMode, false); } diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 8c0e658a10..2710321bd7 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -1188,6 +1188,14 @@ void Device::RemoveUnsuitableExtensions() { features.provoking_vertex, VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME); + // VK_EXT_line_rasterization + extensions.line_rasterization = features.line_rasterization.rectangularLines || + features.line_rasterization.bresenhamLines || + features.line_rasterization.smoothLines; + RemoveExtensionFeatureIfUnsuitable(extensions.line_rasterization, + features.line_rasterization, + VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME); + // VK_EXT_conditional_rendering extensions.conditional_rendering = features.conditional_rendering.conditionalRendering; RemoveExtensionFeatureIfUnsuitable(extensions.conditional_rendering, diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index fbb20ad058..240775bd12 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -441,7 +441,7 @@ public: return extensions.viewport_array2; } - /// Returns true if the device supporst VK_EXT_DESCRIPTOR_INDEXING. + /// Returns true if the device supporst VK_EXT_descriptor_indexing. bool isExtDescriptorIndexingSupported() const { return extensions.descriptor_indexing; } @@ -637,19 +637,32 @@ public: } bool SupportsRectangularLines() const { - return features.line_rasterization.rectangularLines != VK_FALSE; + return features.line_rasterization.rectangularLines; + } + + bool SupportsBresenhamLines() const { + return features.line_rasterization.bresenhamLines; } bool SupportsSmoothLines() const { - return features.line_rasterization.smoothLines != VK_FALSE; + return features.line_rasterization.smoothLines; } bool SupportsStippledRectangularLines() const { - return features.line_rasterization.stippledRectangularLines != VK_FALSE; + return features.line_rasterization.stippledRectangularLines; } + bool SupportsStippledBresenhamLines() const { + return features.line_rasterization.stippledBresenhamLines; + } + + bool SupportsStippledSmoothLines() const { + return features.line_rasterization.stippledSmoothLines; + } + + /// Returns true if the device supports AlphaToOne. bool SupportsAlphaToOne() const { - return features.features.alphaToOne != VK_FALSE; + return features.features.alphaToOne; } diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index b1f1d22fbb..17ea3804e9 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -152,6 +152,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkCmdSetFrontFaceEXT); X(vkCmdSetLogicOpEXT); X(vkCmdSetPatchControlPointsEXT); + X(vkCmdSetLineStippleEXT); X(vkCmdSetLineWidth); X(vkCmdSetPrimitiveTopologyEXT); X(vkCmdSetStencilOpEXT); From 651a999017b4409450854db2f3474f64c5464083 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Sun, 15 Feb 2026 22:15:58 -0400 Subject: [PATCH 22/68] [vulkan] Unique representation logic fix. --- .../renderer_vulkan/fixed_pipeline_state.cpp | 7 ------- src/video_core/renderer_vulkan/fixed_pipeline_state.h | 7 ------- .../renderer_vulkan/vk_graphics_pipeline.cpp | 11 ++++------- src/video_core/vulkan_common/vulkan_device.h | 1 + 4 files changed, 5 insertions(+), 21 deletions(-) diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index fe6730bd13..b07f5927b3 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -127,13 +127,6 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, DynamicFe depth_bounds_min = static_cast(regs.depth_bounds[0]); depth_bounds_max = static_cast(regs.depth_bounds[1]); - depth_bias = std::bit_cast(regs.depth_bias); - depth_bias_clamp = std::bit_cast(regs.depth_bias_clamp); - slope_scale_depth_bias = std::bit_cast(regs.slope_scale_depth_bias); - - line_width_smooth = std::bit_cast(regs.line_width_smooth); - line_width_aliased = std::bit_cast(regs.line_width_aliased); - line_stipple_factor = regs.line_stipple_params.factor; line_stipple_pattern = regs.line_stipple_params.pattern; diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index efe4e57960..030c62a883 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -236,13 +236,6 @@ struct FixedPipelineState { u32 depth_bounds_min; u32 depth_bounds_max; - u32 depth_bias; - u32 depth_bias_clamp; - u32 slope_scale_depth_bias; - - u32 line_width_smooth; - u32 line_width_aliased; - u32 line_stipple_factor; u32 line_stipple_pattern; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 2c305f5497..a36fde905f 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -5,7 +5,6 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include -#include #include #include @@ -712,12 +711,10 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE), .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()), .depthBiasEnable = (dynamic.depth_bias_enable != 0 ? VK_TRUE : VK_FALSE), - .depthBiasConstantFactor = std::bit_cast(key.state.depth_bias) / 2.0f, - .depthBiasClamp = std::bit_cast(key.state.depth_bias_clamp), - .depthBiasSlopeFactor = std::bit_cast(key.state.slope_scale_depth_bias), - .lineWidth = key.state.smooth_lines != 0 - ? std::bit_cast(key.state.line_width_smooth) - : std::bit_cast(key.state.line_width_aliased), + .depthBiasConstantFactor = 0.0f, + .depthBiasClamp = 0.0f, + .depthBiasSlopeFactor = 0.0f, + .lineWidth = 1.0f, }; const bool line_rasterization_supported = device.IsExtLineRasterizationSupported(); const bool any_stippled_lines_supported = diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 240775bd12..780198d8ee 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -48,6 +48,7 @@ VK_DEFINE_HANDLE(VmaAllocator) // Define all features which may be used by the implementation and require an extension here. #define FOR_EACH_VK_FEATURE_EXT(FEATURE) \ + FEATURE(EXT, ConditionalRendering, CONDITIONAL_RENDERING, conditional_rendering) \ FEATURE(EXT, CustomBorderColor, CUSTOM_BORDER_COLOR, custom_border_color) \ FEATURE(EXT, DepthBiasControl, DEPTH_BIAS_CONTROL, depth_bias_control) \ FEATURE(EXT, DepthClipControl, DEPTH_CLIP_CONTROL, depth_clip_control) \ From 24d07ab28c988bebd8c3bcb78e7330a7d526d69a Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Sun, 15 Feb 2026 22:23:46 -0400 Subject: [PATCH 23/68] fix build --- src/video_core/vulkan_common/vulkan_device.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 780198d8ee..8b321e0ea3 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -698,6 +698,10 @@ public: return extensions.conditional_rendering; } + bool IsExtConditionalRendering() const { + return IsExtConditionalRenderingSupported(); + } + bool HasTimelineSemaphore() const; /// Returns the minimum supported version of SPIR-V. From 1ae76d44c18421c69cd654b38a3c0299bb4cce06 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Mon, 16 Feb 2026 00:34:08 -0400 Subject: [PATCH 24/68] [vulkan, rasterizer] Filling missing byte count handling when TFB is not available --- .../renderer_vulkan/vk_rasterizer.cpp | 17 ++++++++++++----- src/video_core/vulkan_common/vulkan_device.h | 4 +--- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index bc86b4bb6a..3eb05f3286 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -354,11 +354,18 @@ void RasterizerVulkan::DrawIndirect() { const auto& buffer = indirect_buffer.first; const auto& offset = indirect_buffer.second; if (params.is_byte_count) { - scheduler.Record([buffer_obj = buffer->Handle(), offset, - stride = params.stride](vk::CommandBuffer cmdbuf) { - cmdbuf.DrawIndirectByteCountEXT(1, 0, buffer_obj, offset, 0, - static_cast(stride)); - }); + if (!device.IsExtTransformFeedbackSupported()) { + scheduler.Record([buffer_obj = buffer->Handle(), offset, + stride = params.stride](vk::CommandBuffer cmdbuf) { + cmdbuf.DrawIndirect(buffer_obj, offset, 1, static_cast(stride)); + }); + } else { + scheduler.Record([buffer_obj = buffer->Handle(), offset, + stride = params.stride](vk::CommandBuffer cmdbuf) { + cmdbuf.DrawIndirectByteCountEXT(1, 0, buffer_obj, offset, 0, + static_cast(stride)); + }); + } return; } if (params.include_count) { diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 8b321e0ea3..80854e5c98 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -548,12 +548,10 @@ public: return extensions.transform_feedback; } - /// Returns true if the device supports VK_EXT_transform_feedback. bool AreTransformFeedbackGeometryStreamsSupported() const { return features.transform_feedback.geometryStreams; } - /// Returns true if transform feedback preserves provoking vertex. bool IsTransformFeedbackProvokingVertexPreserved() const { return features.provoking_vertex.transformFeedbackPreservesProvokingVertex; } @@ -720,7 +718,7 @@ public: return has_renderdoc || has_nsight_graphics || has_radeon_gpu_profiler; } - /// @returns True if compute pipelines can cause crashing. + /// Returns true if compute pipelines can cause crashing. bool HasBrokenCompute() const { return has_broken_compute; } From 646542a3979153dd8b4562723ea810b52c9a3820 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Mon, 16 Feb 2026 00:52:00 -0400 Subject: [PATCH 25/68] [vulkan] Query custom border color properties based on device report --- src/video_core/vulkan_common/vulkan_device.cpp | 14 ++++++++++++++ src/video_core/vulkan_common/vulkan_device.h | 14 ++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 2710321bd7..6c5f4afcd1 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -1062,6 +1062,20 @@ bool Device::GetSuitability(bool requires_swapchain) { // Unload extensions if feature support is insufficient. RemoveUnsuitableExtensions(); + // Query VK_EXT_custom_border_color properties if the extension is enabled. + if (extensions.custom_border_color) { + const auto fp = reinterpret_cast( + dld.vkGetInstanceProcAddr(instance, "vkGetPhysicalDeviceCustomBorderColorPropertiesEXT")); + if (fp != nullptr) { + custom_border_color_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_PROPERTIES_EXT; + custom_border_color_properties.pNext = nullptr; + fp(physical, &custom_border_color_properties); + has_custom_border_color_properties = true; + } else { + has_custom_border_color_properties = false; + } + } + // Check limits. struct Limit { u32 minimum; diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 80854e5c98..5b1000f053 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -601,6 +601,16 @@ public: return features.custom_border_color.customBorderColorWithoutFormat; } + /// Returns true if physical device custom border color properties were queried. + bool HasCustomBorderColorProperties() const { + return has_custom_border_color_properties; + } + + /// Returns the queried VkPhysicalDeviceCustomBorderColorPropertiesEXT. + const VkPhysicalDeviceCustomBorderColorPropertiesEXT& GetCustomBorderColorProperties() const { + return custom_border_color_properties; + } + /// Returns true if the device supports VK_EXT_extended_dynamic_state. bool IsExtExtendedDynamicStateSupported() const { return extensions.extended_dynamic_state; @@ -625,6 +635,10 @@ public: return extensions.filter_cubic; } + /// Custom border color properties retrieved from the physical device (if available). + VkPhysicalDeviceCustomBorderColorPropertiesEXT custom_border_color_properties{}; + bool has_custom_border_color_properties = false; + /// Returns true if the device supports VK_QCOM_filter_cubic_weights. bool IsQcomFilterCubicWeightsSupported() const { return extensions.filter_cubic_weights; From ffae2350ca916167f3da3a0a65aef2006f870ff6 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Mon, 16 Feb 2026 01:08:23 -0400 Subject: [PATCH 26/68] [vulkan] fix custom border color query struct --- src/video_core/vulkan_common/vulkan_device.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 6c5f4afcd1..a8355119d3 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -1064,12 +1064,14 @@ bool Device::GetSuitability(bool requires_swapchain) { // Query VK_EXT_custom_border_color properties if the extension is enabled. if (extensions.custom_border_color) { - const auto fp = reinterpret_cast( - dld.vkGetInstanceProcAddr(instance, "vkGetPhysicalDeviceCustomBorderColorPropertiesEXT")); - if (fp != nullptr) { + auto proc = dld.vkGetInstanceProcAddr(instance, "vkGetPhysicalDeviceCustomBorderColorPropertiesEXT"); + if (proc != nullptr) { + auto vkGetPhysicalDeviceCustomBorderColorPropertiesEXT = + reinterpret_cast( + proc); custom_border_color_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_PROPERTIES_EXT; custom_border_color_properties.pNext = nullptr; - fp(physical, &custom_border_color_properties); + vkGetPhysicalDeviceCustomBorderColorPropertiesEXT(physical, &custom_border_color_properties); has_custom_border_color_properties = true; } else { has_custom_border_color_properties = false; From 72ced6b9471c096d5b032c071b29a751244f8570 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Mon, 16 Feb 2026 02:10:14 -0400 Subject: [PATCH 27/68] [vulkan] Set always vertex strides --- src/video_core/renderer_vulkan/fixed_pipeline_state.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index b07f5927b3..62fa2a10c6 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -166,10 +166,10 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, DynamicFe dynamic_state.raw2 = 0; if (!extended_dynamic_state) { dynamic_state.Refresh(regs); - std::ranges::transform(regs.vertex_streams, vertex_strides.begin(), [](const auto& array) { - return static_cast(array.stride.Value()); - }); } + std::ranges::transform(regs.vertex_streams, vertex_strides.begin(), [](const auto& array) { + return static_cast(array.stride.Value()); + }); if (!extended_dynamic_state_2_logic_op) { dynamic_state.Refresh2(regs, topology_, extended_dynamic_state_2); } From c9eb764d2a36d36ba2fea19610faa4d6d4e35fe3 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Mon, 16 Feb 2026 03:36:09 -0400 Subject: [PATCH 28/68] [vulkan] Fixing some incongruences with pipeline keys and dynamic state flags --- .../renderer_vulkan/vk_graphics_pipeline.cpp | 4 ++-- src/video_core/renderer_vulkan/vk_graphics_pipeline.h | 6 ++++++ src/video_core/renderer_vulkan/vk_rasterizer.cpp | 11 ++++++++--- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index a36fde905f..9a50d94897 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -686,9 +686,9 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .viewportCount = num_viewports, + .viewportCount = key.state.extended_dynamic_state ? 0u : num_viewports, .pViewports = nullptr, - .scissorCount = num_viewports, + .scissorCount = key.state.extended_dynamic_state ? 0u : num_viewports, .pScissors = nullptr, }; if (device.IsNvViewportSwizzleSupported()) { diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 29f5eba8dc..418a4cfd26 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -92,6 +92,12 @@ public: bool UsesExtendedDynamicState() const noexcept { return key.state.extended_dynamic_state != 0; } + bool UsesExtendedDynamicState2() const noexcept { + return key.state.extended_dynamic_state_2 != 0; + } + bool UsesExtendedDynamicState2LogicOp() const noexcept { + return key.state.extended_dynamic_state_2_logic_op != 0; + } GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = delete; GraphicsPipeline(GraphicsPipeline&&) noexcept = delete; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 3eb05f3286..c18027b5cf 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -277,6 +277,10 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) { if (!pipeline->Configure(is_indexed)) return; + if (pipeline->UsesExtendedDynamicState()) { + state_tracker.InvalidateStateEnableFlag(); + } + UpdateDynamicStates(); HandleTransformFeedback(); @@ -1064,6 +1068,7 @@ bool AccelerateDMA::BufferToImage(const Tegra::DMA::ImageCopy& copy_info, void RasterizerVulkan::UpdateDynamicStates() { auto& regs = maxwell3d->regs; + GraphicsPipeline* pipeline = pipeline_cache.CurrentGraphicsPipeline(); // Core Dynamic States (Vulkan 1.0) - Always active regardless of dyna_state setting UpdateViewportsState(regs); @@ -1076,7 +1081,7 @@ void RasterizerVulkan::UpdateDynamicStates() { UpdateLineStipple(regs); // EDS1: CullMode, DepthCompare, FrontFace, StencilOp, DepthBoundsTest, DepthTest, DepthWrite, StencilTest - if (device.IsExtExtendedDynamicStateSupported()) { + if (device.IsExtExtendedDynamicStateSupported() && pipeline && pipeline->UsesExtendedDynamicState()) { UpdateCullMode(regs); UpdateDepthCompareOp(regs); UpdateFrontFace(regs); @@ -1091,14 +1096,14 @@ void RasterizerVulkan::UpdateDynamicStates() { } // EDS2: PrimitiveRestart, RasterizerDiscard, DepthBias enable/disable - if (device.IsExtExtendedDynamicState2Supported()) { + if (device.IsExtExtendedDynamicState2Supported() && pipeline && pipeline->UsesExtendedDynamicState2()) { UpdatePrimitiveRestartEnable(regs); UpdateRasterizerDiscardEnable(regs); UpdateDepthBiasEnable(regs); } // EDS2 Extras: LogicOp operation selection - if (device.IsExtExtendedDynamicState2ExtrasSupported()) { + if (device.IsExtExtendedDynamicState2ExtrasSupported() && pipeline && pipeline->UsesExtendedDynamicState2LogicOp()) { UpdateLogicOp(regs); } From 9eacaf2444c2f2ffbe5f50f1c1ffdf17b0de6753 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Mon, 16 Feb 2026 04:36:40 -0400 Subject: [PATCH 29/68] [vulkan] Adjustment for Viewport and Scissor counts within EDS --- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index c18027b5cf..c3d4ad798a 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -1160,7 +1160,9 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& reg .minDepth = 0.0f, .maxDepth = 1.0f, }; - const bool use_viewport_with_count = device.IsExtExtendedDynamicStateSupported(); + GraphicsPipeline* pipeline = pipeline_cache.CurrentGraphicsPipeline(); + const bool use_viewport_with_count = device.IsExtExtendedDynamicStateSupported() && + (!pipeline || pipeline->UsesExtendedDynamicState()); scheduler.Record([viewport, use_viewport_with_count](vk::CommandBuffer cmdbuf) { if (use_viewport_with_count) { std::array viewports{viewport}; @@ -1183,7 +1185,9 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& reg GetViewportState(device, regs, 12, scale), GetViewportState(device, regs, 13, scale), GetViewportState(device, regs, 14, scale), GetViewportState(device, regs, 15, scale), }; - const bool use_viewport_with_count = device.IsExtExtendedDynamicStateSupported(); + GraphicsPipeline* pipeline = pipeline_cache.CurrentGraphicsPipeline(); + const bool use_viewport_with_count = device.IsExtExtendedDynamicStateSupported() && + (!pipeline || pipeline->UsesExtendedDynamicState()); scheduler.Record([this, viewport_list, use_viewport_with_count](vk::CommandBuffer cmdbuf) { const u32 num_viewports = std::min(device.GetMaxViewports(), Maxwell::NumViewports); const vk::Span viewports(viewport_list.data(), num_viewports); @@ -1212,7 +1216,9 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs scissor.offset.y = static_cast(y); scissor.extent.width = width; scissor.extent.height = height; - const bool use_scissor_with_count = device.IsExtExtendedDynamicStateSupported(); + GraphicsPipeline* pipeline = pipeline_cache.CurrentGraphicsPipeline(); + const bool use_scissor_with_count = device.IsExtExtendedDynamicStateSupported() && + (!pipeline || pipeline->UsesExtendedDynamicState()); scheduler.Record([scissor, use_scissor_with_count](vk::CommandBuffer cmdbuf) { if (use_scissor_with_count) { std::array scissors{scissor}; @@ -1247,7 +1253,9 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs GetScissorState(regs, 14, up_scale, down_shift), GetScissorState(regs, 15, up_scale, down_shift), }; - const bool use_scissor_with_count = device.IsExtExtendedDynamicStateSupported(); + GraphicsPipeline* pipeline = pipeline_cache.CurrentGraphicsPipeline(); + const bool use_scissor_with_count = device.IsExtExtendedDynamicStateSupported() && + (!pipeline || pipeline->UsesExtendedDynamicState()); scheduler.Record([this, scissor_list, use_scissor_with_count](vk::CommandBuffer cmdbuf) { const u32 num_scissors = std::min(device.GetMaxViewports(), Maxwell::NumViewports); const vk::Span scissors(scissor_list.data(), num_scissors); From 2081d659d2441950a5ff83124d2836c4545d96e0 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Mon, 16 Feb 2026 05:32:17 -0400 Subject: [PATCH 30/68] [vulkan] Dead code removal from VertexInputDynamicState --- .../renderer_vulkan/vk_buffer_cache.cpp | 41 ++++++------------- .../renderer_vulkan/vk_buffer_cache.h | 7 +--- .../renderer_vulkan/vk_graphics_pipeline.cpp | 3 -- 3 files changed, 14 insertions(+), 37 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index d38a03a526..8030a29a7c 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -564,26 +564,14 @@ void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset if (index >= device.GetMaxVertexInputBindings()) { return; } - const bool use_dynamic_vertex_input_binding_stride = - device.IsExtExtendedDynamicStateSupported() && - use_vertex_input_binding_stride_dynamic_state; - if (use_dynamic_vertex_input_binding_stride) { - scheduler.Record([index, buffer, offset, size, stride](vk::CommandBuffer cmdbuf) { - const VkDeviceSize vk_offset = buffer != VK_NULL_HANDLE ? offset : 0; - const VkDeviceSize vk_size = buffer != VK_NULL_HANDLE ? size : VK_WHOLE_SIZE; - const VkDeviceSize vk_stride = stride; - cmdbuf.BindVertexBuffers2EXT(index, 1, &buffer, &vk_offset, &vk_size, &vk_stride); - }); - } else { - if (!device.HasNullDescriptor() && buffer == VK_NULL_HANDLE) { - ReserveNullBuffer(); - buffer = *null_buffer; - offset = 0; - } - scheduler.Record([index, buffer, offset](vk::CommandBuffer cmdbuf) { - cmdbuf.BindVertexBuffer(index, buffer, offset); - }); + if (!device.HasNullDescriptor() && buffer == VK_NULL_HANDLE) { + ReserveNullBuffer(); + buffer = *null_buffer; + offset = 0; } + scheduler.Record([index, buffer, offset](vk::CommandBuffer cmdbuf) { + cmdbuf.BindVertexBuffer(index, buffer, offset); + }); } void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings& bindings) { @@ -607,15 +595,12 @@ void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings& bi if (binding_count == 0) { return; } - if (device.IsExtExtendedDynamicStateSupported()) { - scheduler.Record([bindings_ = std::move(bindings), buffer_handles_ = std::move(buffer_handles), binding_count](vk::CommandBuffer cmdbuf) { - cmdbuf.BindVertexBuffers2EXT(bindings_.min_index, binding_count, buffer_handles_.data(), bindings_.offsets.data(), bindings_.sizes.data(), bindings_.strides.data()); - }); - } else { - scheduler.Record([bindings_ = std::move(bindings), buffer_handles_ = std::move(buffer_handles), binding_count](vk::CommandBuffer cmdbuf) { - cmdbuf.BindVertexBuffers(bindings_.min_index, binding_count, buffer_handles_.data(), bindings_.offsets.data()); - }); - } + scheduler.Record([bindings_ = std::move(bindings), + buffer_handles_ = std::move(buffer_handles), + binding_count](vk::CommandBuffer cmdbuf) { + cmdbuf.BindVertexBuffers(bindings_.min_index, binding_count, buffer_handles_.data(), + bindings_.offsets.data()); + }); } void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset, diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 7ae0c28759..5b0b876364 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -127,9 +127,6 @@ public: void BindVertexBuffers(VideoCommon::HostBindings& bindings); - void SetUseVertexInputBindingStrideDynamicState(bool enabled) { - use_vertex_input_binding_stride_dynamic_state = enabled; - } void BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size); @@ -185,9 +182,7 @@ private: vk::Buffer null_buffer; std::unique_ptr uint8_pass; - QuadIndexedPass quad_index_pass; - - bool use_vertex_input_binding_stride_dynamic_state = true; + QuadIndexedPass quad_index_pass; bool limit_dynamic_storage_buffers = false; u32 max_dynamic_storage_buffers = (std::numeric_limits::max)(); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 9a50d94897..918bd148cf 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -462,7 +462,6 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) { bind_stage_info(4); } - buffer_cache.runtime.SetUseVertexInputBindingStrideDynamicState(UsesExtendedDynamicState()); buffer_cache.UpdateGraphicsBuffers(is_indexed); buffer_cache.BindHostGeometryBuffers(is_indexed); @@ -910,8 +909,6 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { VK_DYNAMIC_STATE_STENCIL_OP_EXT, }; dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end()); - - dynamic_states.push_back(VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT); } else { dynamic_states.push_back(VK_DYNAMIC_STATE_VIEWPORT); dynamic_states.push_back(VK_DYNAMIC_STATE_SCISSOR); From 88b9393b4473bc4a4061fdd930f4c3442a737cff Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Mon, 16 Feb 2026 05:55:57 -0400 Subject: [PATCH 31/68] [vulkan] Adding guards per dynamic states setters --- .../vulkan_common/vulkan_debug_callback.cpp | 4 +- .../vulkan_common/vulkan_wrapper.cpp | 1 - src/video_core/vulkan_common/vulkan_wrapper.h | 115 ++++++++++++------ 3 files changed, 79 insertions(+), 41 deletions(-) diff --git a/src/video_core/vulkan_common/vulkan_debug_callback.cpp b/src/video_core/vulkan_common/vulkan_debug_callback.cpp index cc3520f654..c3aba89179 100644 --- a/src/video_core/vulkan_common/vulkan_debug_callback.cpp +++ b/src/video_core/vulkan_common/vulkan_debug_callback.cpp @@ -45,9 +45,7 @@ VkBool32 DebugUtilCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity, case 0x01257b492: // VUID-vkCmdSetLogicOpEXT-None-0486 // Misc case 0xe0a2da61u: // VUID-vkCmdDrawIndexed-format-07753 -#else - case 0x682a878au: // VUID-vkCmdBindVertexBuffers2EXT-pBuffers-parameter - case 0x99fb7dfdu: // UNASSIGNED-RequiredParameter (vkCmdBindVertexBuffers2EXT pBuffers[0]) +#else case 0xe8616bf2u: // Bound VkDescriptorSet 0x0[] was destroyed. Likely push_descriptor related case 0x1608dec0u: // Image layout in vkUpdateDescriptorSet doesn't match descriptor use case 0x55362756u: // Descriptor binding and framebuffer attachment overlap diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index 17ea3804e9..4f967f0e04 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -140,7 +140,6 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkCmdSetViewport); X(vkCmdSetViewportWithCountEXT); X(vkCmdWaitEvents); - X(vkCmdBindVertexBuffers2EXT); X(vkCmdSetCullModeEXT); X(vkCmdSetDepthBoundsTestEnableEXT); X(vkCmdSetDepthCompareOpEXT); diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index 4d5e0bc1ec..0f0e1f2745 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h @@ -200,7 +200,6 @@ struct DeviceDispatch : InstanceDispatch { PFN_vkCmdBindPipeline vkCmdBindPipeline{}; PFN_vkCmdBindTransformFeedbackBuffersEXT vkCmdBindTransformFeedbackBuffersEXT{}; PFN_vkCmdBindVertexBuffers vkCmdBindVertexBuffers{}; - PFN_vkCmdBindVertexBuffers2EXT vkCmdBindVertexBuffers2EXT{}; PFN_vkCmdBlitImage vkCmdBlitImage{}; PFN_vkCmdClearAttachments vkCmdClearAttachments{}; PFN_vkCmdClearColorImage vkCmdClearColorImage{}; @@ -1366,11 +1365,15 @@ public: } void SetViewportWithCountEXT(Span viewports) const noexcept { - dld->vkCmdSetViewportWithCountEXT(handle, viewports.size(), viewports.data()); + if (dld && dld->vkCmdSetViewportWithCountEXT) { + dld->vkCmdSetViewportWithCountEXT(handle, viewports.size(), viewports.data()); + } } void SetScissorWithCountEXT(Span scissors) const noexcept { - dld->vkCmdSetScissorWithCountEXT(handle, scissors.size(), scissors.data()); + if (dld && dld->vkCmdSetScissorWithCountEXT) { + dld->vkCmdSetScissorWithCountEXT(handle, scissors.size(), scissors.data()); + } } void SetBlendConstants(const float blend_constants[4]) const noexcept { @@ -1402,7 +1405,9 @@ public: .depthBiasClamp = clamp, .depthBiasSlopeFactor = slope_factor, }; - dld->vkCmdSetDepthBias2EXT(handle, &info); + if (dld && dld->vkCmdSetDepthBias2EXT) { + dld->vkCmdSetDepthBias2EXT(handle, &info); + } } void SetDepthBounds(float min_depth_bounds, float max_depth_bounds) const noexcept { @@ -1422,60 +1427,76 @@ public: buffer_barriers.data(), image_barriers.size(), image_barriers.data()); } - void BindVertexBuffers2EXT(u32 first_binding, u32 binding_count, const VkBuffer* buffers, - const VkDeviceSize* offsets, const VkDeviceSize* sizes, - const VkDeviceSize* strides) const noexcept { - dld->vkCmdBindVertexBuffers2EXT(handle, first_binding, binding_count, buffers, offsets, - sizes, strides); - } - void SetCullModeEXT(VkCullModeFlags cull_mode) const noexcept { - dld->vkCmdSetCullModeEXT(handle, cull_mode); + if (dld && dld->vkCmdSetCullModeEXT) { + dld->vkCmdSetCullModeEXT(handle, cull_mode); + } } void SetDepthBoundsTestEnableEXT(bool enable) const noexcept { - dld->vkCmdSetDepthBoundsTestEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); + if (dld && dld->vkCmdSetDepthBoundsTestEnableEXT) { + dld->vkCmdSetDepthBoundsTestEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); + } } void SetDepthCompareOpEXT(VkCompareOp compare_op) const noexcept { - dld->vkCmdSetDepthCompareOpEXT(handle, compare_op); + if (dld && dld->vkCmdSetDepthCompareOpEXT) { + dld->vkCmdSetDepthCompareOpEXT(handle, compare_op); + } } void SetDepthTestEnableEXT(bool enable) const noexcept { - dld->vkCmdSetDepthTestEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); + if (dld && dld->vkCmdSetDepthTestEnableEXT) { + dld->vkCmdSetDepthTestEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); + } } void SetDepthWriteEnableEXT(bool enable) const noexcept { - dld->vkCmdSetDepthWriteEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); + if (dld && dld->vkCmdSetDepthWriteEnableEXT) { + dld->vkCmdSetDepthWriteEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); + } } void SetPrimitiveRestartEnableEXT(bool enable) const noexcept { - dld->vkCmdSetPrimitiveRestartEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); + if (dld && dld->vkCmdSetPrimitiveRestartEnableEXT) { + dld->vkCmdSetPrimitiveRestartEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); + } } void SetRasterizerDiscardEnableEXT(bool enable) const noexcept { - dld->vkCmdSetRasterizerDiscardEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); + if (dld && dld->vkCmdSetRasterizerDiscardEnableEXT) { + dld->vkCmdSetRasterizerDiscardEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); + } } - void SetLineStippleEXT(u32 factor, u16 pattern) const noexcept - { - dld->vkCmdSetLineStippleEXT(handle, factor, pattern); + void SetLineStippleEXT(u32 factor, u16 pattern) const noexcept { + if (dld && dld->vkCmdSetLineStippleEXT) { + dld->vkCmdSetLineStippleEXT(handle, factor, pattern); + } } void SetDepthBiasEnableEXT(bool enable) const noexcept { - dld->vkCmdSetDepthBiasEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); + if (dld && dld->vkCmdSetDepthBiasEnableEXT) { + dld->vkCmdSetDepthBiasEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); + } } void SetFrontFaceEXT(VkFrontFace front_face) const noexcept { - dld->vkCmdSetFrontFaceEXT(handle, front_face); + if (dld && dld->vkCmdSetFrontFaceEXT) { + dld->vkCmdSetFrontFaceEXT(handle, front_face); + } } void SetLogicOpEXT(VkLogicOp logic_op) const noexcept { - dld->vkCmdSetLogicOpEXT(handle, logic_op); + if (dld && dld->vkCmdSetLogicOpEXT) { + dld->vkCmdSetLogicOpEXT(handle, logic_op); + } } void SetPatchControlPointsEXT(uint32_t patch_control_points) const noexcept { - dld->vkCmdSetPatchControlPointsEXT(handle, patch_control_points); + if (dld && dld->vkCmdSetPatchControlPointsEXT) { + dld->vkCmdSetPatchControlPointsEXT(handle, patch_control_points); + } } void SetLineWidth(float line_width) const noexcept { @@ -1483,45 +1504,61 @@ public: } void SetPrimitiveTopologyEXT(VkPrimitiveTopology primitive_topology) const noexcept { - dld->vkCmdSetPrimitiveTopologyEXT(handle, primitive_topology); + if (dld && dld->vkCmdSetPrimitiveTopologyEXT) { + dld->vkCmdSetPrimitiveTopologyEXT(handle, primitive_topology); + } } void SetStencilOpEXT(VkStencilFaceFlags face_mask, VkStencilOp fail_op, VkStencilOp pass_op, VkStencilOp depth_fail_op, VkCompareOp compare_op) const noexcept { - dld->vkCmdSetStencilOpEXT(handle, face_mask, fail_op, pass_op, depth_fail_op, compare_op); + if (dld && dld->vkCmdSetStencilOpEXT) { + dld->vkCmdSetStencilOpEXT(handle, face_mask, fail_op, pass_op, depth_fail_op, compare_op); + } } void SetStencilTestEnableEXT(bool enable) const noexcept { - dld->vkCmdSetStencilTestEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); + if (dld && dld->vkCmdSetStencilTestEnableEXT) { + dld->vkCmdSetStencilTestEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); + } } void BindTransformFeedbackBuffersEXT(u32 first, u32 count, const VkBuffer* buffers, const VkDeviceSize* offsets, const VkDeviceSize* sizes) const noexcept { - dld->vkCmdBindTransformFeedbackBuffersEXT(handle, first, count, buffers, offsets, sizes); + if (dld && dld->vkCmdBindTransformFeedbackBuffersEXT) { + dld->vkCmdBindTransformFeedbackBuffersEXT(handle, first, count, buffers, offsets, sizes); + } } void BeginTransformFeedbackEXT(u32 first_counter_buffer, u32 counter_buffers_count, const VkBuffer* counter_buffers, const VkDeviceSize* counter_buffer_offsets) const noexcept { - dld->vkCmdBeginTransformFeedbackEXT(handle, first_counter_buffer, counter_buffers_count, - counter_buffers, counter_buffer_offsets); + if (dld && dld->vkCmdBeginTransformFeedbackEXT) { + dld->vkCmdBeginTransformFeedbackEXT(handle, first_counter_buffer, counter_buffers_count, + counter_buffers, counter_buffer_offsets); + } } void EndTransformFeedbackEXT(u32 first_counter_buffer, u32 counter_buffers_count, const VkBuffer* counter_buffers, const VkDeviceSize* counter_buffer_offsets) const noexcept { - dld->vkCmdEndTransformFeedbackEXT(handle, first_counter_buffer, counter_buffers_count, - counter_buffers, counter_buffer_offsets); + if (dld && dld->vkCmdEndTransformFeedbackEXT) { + dld->vkCmdEndTransformFeedbackEXT(handle, first_counter_buffer, counter_buffers_count, + counter_buffers, counter_buffer_offsets); + } } void BeginConditionalRenderingEXT( const VkConditionalRenderingBeginInfoEXT& info) const noexcept { - dld->vkCmdBeginConditionalRenderingEXT(handle, &info); + if (dld && dld->vkCmdBeginConditionalRenderingEXT) { + dld->vkCmdBeginConditionalRenderingEXT(handle, &info); + } } void EndConditionalRenderingEXT() const noexcept { - dld->vkCmdEndConditionalRenderingEXT(handle); + if (dld && dld->vkCmdEndConditionalRenderingEXT) { + dld->vkCmdEndConditionalRenderingEXT(handle); + } } void BeginDebugUtilsLabelEXT(const char* label, std::span color) const noexcept { @@ -1531,11 +1568,15 @@ public: .pLabelName = label, .color{color[0], color[1], color[2], color[3]}, }; - dld->vkCmdBeginDebugUtilsLabelEXT(handle, &label_info); + if (dld && dld->vkCmdBeginDebugUtilsLabelEXT) { + dld->vkCmdBeginDebugUtilsLabelEXT(handle, &label_info); + } } void EndDebugUtilsLabelEXT() const noexcept { - dld->vkCmdEndDebugUtilsLabelEXT(handle); + if (dld && dld->vkCmdEndDebugUtilsLabelEXT) { + dld->vkCmdEndDebugUtilsLabelEXT(handle); + } } private: From a24e7e81434ee96489214cb0fd5dd1f8132f5e9c Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Mon, 16 Feb 2026 16:48:08 -0400 Subject: [PATCH 32/68] [maxwell] Adding storage flags to some surface format --- src/video_core/renderer_vulkan/maxwell_to_vk.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 024c72b38e..64bc4bed30 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project @@ -173,7 +173,7 @@ FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with SURFACE_FORMAT_ELEM(VK_FORMAT_R16G16_SINT, usage_attachable | usage_storage, R16G16_SINT) \ SURFACE_FORMAT_ELEM(VK_FORMAT_R16G16_SNORM, usage_attachable | usage_storage, R16G16_SNORM) \ SURFACE_FORMAT_ELEM(VK_FORMAT_R32G32B32_SFLOAT, 0, R32G32B32_FLOAT) \ - SURFACE_FORMAT_ELEM(VK_FORMAT_A8B8G8R8_SRGB_PACK32, usage_attachable, A8B8G8R8_SRGB) \ + SURFACE_FORMAT_ELEM(VK_FORMAT_A8B8G8R8_SRGB_PACK32, usage_attachable | usage_storage, A8B8G8R8_SRGB) \ SURFACE_FORMAT_ELEM(VK_FORMAT_R8G8_UNORM, usage_attachable | usage_storage, R8G8_UNORM) \ SURFACE_FORMAT_ELEM(VK_FORMAT_R8G8_SNORM, usage_attachable | usage_storage, R8G8_SNORM) \ SURFACE_FORMAT_ELEM(VK_FORMAT_R8G8_SINT, usage_attachable | usage_storage, R8G8_SINT) \ @@ -185,7 +185,7 @@ FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with SURFACE_FORMAT_ELEM(VK_FORMAT_ASTC_8x8_UNORM_BLOCK, 0, ASTC_2D_8X8_UNORM) \ SURFACE_FORMAT_ELEM(VK_FORMAT_ASTC_8x5_UNORM_BLOCK, 0, ASTC_2D_8X5_UNORM) \ SURFACE_FORMAT_ELEM(VK_FORMAT_ASTC_5x4_UNORM_BLOCK, 0, ASTC_2D_5X4_UNORM) \ - SURFACE_FORMAT_ELEM(VK_FORMAT_B8G8R8A8_SRGB, usage_attachable, B8G8R8A8_SRGB) \ + SURFACE_FORMAT_ELEM(VK_FORMAT_B8G8R8A8_SRGB, usage_attachable | usage_storage, B8G8R8A8_SRGB) \ SURFACE_FORMAT_ELEM(VK_FORMAT_BC1_RGBA_SRGB_BLOCK, 0, BC1_RGBA_SRGB) \ SURFACE_FORMAT_ELEM(VK_FORMAT_BC2_SRGB_BLOCK, 0, BC2_SRGB) \ SURFACE_FORMAT_ELEM(VK_FORMAT_BC3_SRGB_BLOCK, 0, BC3_SRGB) \ From 6b8115f27a921748c45ab4c1545a915bd556f400 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Mon, 16 Feb 2026 17:35:41 -0400 Subject: [PATCH 33/68] fix meow --- src/video_core/renderer_vulkan/maxwell_to_vk.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 64bc4bed30..0538102e4a 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -173,7 +173,7 @@ FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with SURFACE_FORMAT_ELEM(VK_FORMAT_R16G16_SINT, usage_attachable | usage_storage, R16G16_SINT) \ SURFACE_FORMAT_ELEM(VK_FORMAT_R16G16_SNORM, usage_attachable | usage_storage, R16G16_SNORM) \ SURFACE_FORMAT_ELEM(VK_FORMAT_R32G32B32_SFLOAT, 0, R32G32B32_FLOAT) \ - SURFACE_FORMAT_ELEM(VK_FORMAT_A8B8G8R8_SRGB_PACK32, usage_attachable | usage_storage, A8B8G8R8_SRGB) \ + SURFACE_FORMAT_ELEM(VK_FORMAT_A8B8G8R8_SRGB_PACK32, usage_attachable, A8B8G8R8_SRGB) \ SURFACE_FORMAT_ELEM(VK_FORMAT_R8G8_UNORM, usage_attachable | usage_storage, R8G8_UNORM) \ SURFACE_FORMAT_ELEM(VK_FORMAT_R8G8_SNORM, usage_attachable | usage_storage, R8G8_SNORM) \ SURFACE_FORMAT_ELEM(VK_FORMAT_R8G8_SINT, usage_attachable | usage_storage, R8G8_SINT) \ @@ -185,7 +185,7 @@ FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with SURFACE_FORMAT_ELEM(VK_FORMAT_ASTC_8x8_UNORM_BLOCK, 0, ASTC_2D_8X8_UNORM) \ SURFACE_FORMAT_ELEM(VK_FORMAT_ASTC_8x5_UNORM_BLOCK, 0, ASTC_2D_8X5_UNORM) \ SURFACE_FORMAT_ELEM(VK_FORMAT_ASTC_5x4_UNORM_BLOCK, 0, ASTC_2D_5X4_UNORM) \ - SURFACE_FORMAT_ELEM(VK_FORMAT_B8G8R8A8_SRGB, usage_attachable | usage_storage, B8G8R8A8_SRGB) \ + SURFACE_FORMAT_ELEM(VK_FORMAT_B8G8R8A8_SRGB, usage_attachable, B8G8R8A8_SRGB) \ SURFACE_FORMAT_ELEM(VK_FORMAT_BC1_RGBA_SRGB_BLOCK, 0, BC1_RGBA_SRGB) \ SURFACE_FORMAT_ELEM(VK_FORMAT_BC2_SRGB_BLOCK, 0, BC2_SRGB) \ SURFACE_FORMAT_ELEM(VK_FORMAT_BC3_SRGB_BLOCK, 0, BC3_SRGB) \ From e3e880e8791a994b01d6af8c02b9771fbdd5a202 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Tue, 17 Feb 2026 00:59:10 -0400 Subject: [PATCH 34/68] [vulkan] Added flag to detect last mode from provokingVertex --- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 3 ++- src/video_core/vulkan_common/vulkan_device.h | 5 +++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 918bd148cf..e56a941769 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -795,7 +795,8 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { const bool preserve_provoking_vertex_for_xfb = !key.state.xfb_enabled || device.IsTransformFeedbackProvokingVertexPreserved(); const bool use_last_provoking_vertex = - key.state.provoking_vertex_last != 0 && preserve_provoking_vertex_for_xfb; + key.state.provoking_vertex_last != 0 && preserve_provoking_vertex_for_xfb && + device.IsProvokingVertexLastSupported(); VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provoking_vertex{ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT, diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 5b1000f053..67ab01752d 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -700,6 +700,11 @@ public: return extensions.provoking_vertex; } + /// Returns true if the device supports provoking-vertex LAST mode. + bool IsProvokingVertexLastSupported() const { + return features.provoking_vertex.provokingVertexLast; + } + /// Returns true if the device supports VK_KHR_shader_atomic_int64. bool IsExtShaderAtomicInt64Supported() const { return extensions.shader_atomic_int64; From d273fc4ad6feeb7c1c44690f818030b7824009ad Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Tue, 17 Feb 2026 01:18:23 -0400 Subject: [PATCH 35/68] [vulkan] Changed UpdateDynamicState order --- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index c3d4ad798a..6d8cb3737c 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -281,12 +281,11 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) { state_tracker.InvalidateStateEnableFlag(); } - UpdateDynamicStates(); - HandleTransformFeedback(); query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, maxwell3d->regs.zpass_pixel_count_enable); + UpdateDynamicStates(); draw_func(); } From 5615ea9ced6003ad880a4b9ef92a6977d5a4a476 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Tue, 17 Feb 2026 02:16:40 -0400 Subject: [PATCH 36/68] [vulkan] re-cast vkCmdSet for dynamic states during binding --- .../renderer_vulkan/vk_graphics_pipeline.cpp | 36 ++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index e56a941769..d280c0901a 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -530,12 +530,46 @@ void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling, } const void* const descriptor_data{guest_descriptor_queue.UpdateData()}; + const auto dynamic_state = dynamic; scheduler.Record([this, descriptor_data, bind_pipeline, rescaling_data = rescaling.Data(), is_rescaling, update_rescaling, uses_render_area = render_area.uses_render_area, - render_area_data = render_area.words](vk::CommandBuffer cmdbuf) { + render_area_data = render_area.words, dynamic_state](vk::CommandBuffer cmdbuf) { if (bind_pipeline) { cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); + + if (UsesExtendedDynamicState()) { + cmdbuf.SetDepthCompareOpEXT(MaxwellToVK::ComparisonOp(dynamic_state.DepthTestFunc())); + cmdbuf.SetFrontFaceEXT(MaxwellToVK::FrontFace(dynamic_state.FrontFace())); + VkCullModeFlags cull_mode = dynamic_state.cull_enable + ? MaxwellToVK::CullFace(dynamic_state.CullFace()) + : VK_CULL_MODE_NONE; + cmdbuf.SetCullModeEXT(cull_mode); + cmdbuf.SetDepthTestEnableEXT(dynamic_state.depth_test_enable != 0); + cmdbuf.SetDepthWriteEnableEXT(dynamic_state.depth_write_enable != 0); + cmdbuf.SetDepthBoundsTestEnableEXT(dynamic_state.depth_bounds_enable != 0); + cmdbuf.SetStencilTestEnableEXT(dynamic_state.stencil_enable != 0); + if (dynamic_state.stencil_enable) { + if (false) { + } else { + cmdbuf.SetStencilOpEXT(VK_STENCIL_FACE_FRONT_AND_BACK, + MaxwellToVK::StencilOp(dynamic_state.front.ActionStencilFail()), + MaxwellToVK::StencilOp(dynamic_state.front.ActionDepthPass()), + MaxwellToVK::StencilOp(dynamic_state.front.ActionDepthFail()), + MaxwellToVK::ComparisonOp(dynamic_state.front.TestFunc())); + } + } + } + + if (UsesExtendedDynamicState2()) { + cmdbuf.SetPrimitiveRestartEnableEXT(dynamic_state.primitive_restart_enable != 0); + cmdbuf.SetRasterizerDiscardEnableEXT(dynamic_state.rasterize_enable == 0); + cmdbuf.SetDepthBiasEnableEXT(dynamic_state.depth_bias_enable != 0); + } + + if (UsesExtendedDynamicState2LogicOp()) { + cmdbuf.SetLogicOpEXT(static_cast(dynamic_state.logic_op.Value())); + } } cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS, RESCALING_LAYOUT_WORDS_OFFSET, sizeof(rescaling_data), From 35ab33de6aa0f1cc0f32f627782b8e91ed8197b0 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Tue, 17 Feb 2026 02:41:31 -0400 Subject: [PATCH 37/68] Fix build --- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index d280c0901a..10383d48d0 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -530,7 +530,12 @@ void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling, } const void* const descriptor_data{guest_descriptor_queue.UpdateData()}; - const auto dynamic_state = dynamic; + FixedPipelineState::DynamicState dynamic_state{}; + if (!key.state.extended_dynamic_state) { + dynamic_state = key.state.dynamic_state; + } else { + dynamic_state.raw1 = key.state.dynamic_state.raw1; + } scheduler.Record([this, descriptor_data, bind_pipeline, rescaling_data = rescaling.Data(), is_rescaling, update_rescaling, uses_render_area = render_area.uses_render_area, From 9b915c86594e93cad58f1c1c49551d543cfe80ff Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Tue, 17 Feb 2026 03:38:10 -0400 Subject: [PATCH 38/68] [vulkan] Adjusting re-cast for EDS support when bind happens --- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 10383d48d0..6aabb486ea 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -543,7 +543,7 @@ void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling, if (bind_pipeline) { cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); - if (UsesExtendedDynamicState()) { + if (device.IsExtExtendedDynamicStateSupported() && UsesExtendedDynamicState()) { cmdbuf.SetDepthCompareOpEXT(MaxwellToVK::ComparisonOp(dynamic_state.DepthTestFunc())); cmdbuf.SetFrontFaceEXT(MaxwellToVK::FrontFace(dynamic_state.FrontFace())); VkCullModeFlags cull_mode = dynamic_state.cull_enable @@ -566,13 +566,13 @@ void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling, } } - if (UsesExtendedDynamicState2()) { + if (device.IsExtExtendedDynamicState2Supported() && UsesExtendedDynamicState2()) { cmdbuf.SetPrimitiveRestartEnableEXT(dynamic_state.primitive_restart_enable != 0); cmdbuf.SetRasterizerDiscardEnableEXT(dynamic_state.rasterize_enable == 0); cmdbuf.SetDepthBiasEnableEXT(dynamic_state.depth_bias_enable != 0); } - if (UsesExtendedDynamicState2LogicOp()) { + if (device.IsExtExtendedDynamicState2ExtrasSupported() && UsesExtendedDynamicState2LogicOp()) { cmdbuf.SetLogicOpEXT(static_cast(dynamic_state.logic_op.Value())); } } From e310f0b1513596db5fd666e7adb5128399035b7b Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Tue, 17 Feb 2026 04:02:29 -0400 Subject: [PATCH 39/68] [vulkan] Indirect draw for dstStageMask --- .../renderer_vulkan/vk_buffer_cache.cpp | 29 +++++++++++++++++-- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 8030a29a7c..a630b21a8f 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -347,7 +347,7 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& m driver_id == VK_DRIVER_ID_ARM_PROPRIETARY; if (limit_dynamic_storage_buffers) { max_dynamic_storage_buffers = device.GetMaxDescriptorSetStorageBuffersDynamic(); - } + } if (device.SupportsUint8Indices()) { uint8_pass = std::make_unique(device, scheduler, descriptor_pool, staging_pool, compute_pass_descriptor_queue); @@ -450,6 +450,14 @@ void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer, } cmdbuf.CopyBuffer(src_buffer, dst_buffer, VideoCommon::FixSmallVectorADL(vk_copies)); if (barrier) { + // Buffer reads can go to vertex input, shaders, or compute + const VkPipelineStageFlags dst_stages = + VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, WRITE_BARRIER); } @@ -479,7 +487,14 @@ void BufferCacheRuntime::PostCopyBarrier() { }; scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([](vk::CommandBuffer cmdbuf) { - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + const VkPipelineStageFlags dst_stages = + VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stages, 0, WRITE_BARRIER); }); } @@ -506,7 +521,15 @@ void BufferCacheRuntime::ClearBuffer(VkBuffer dest_buffer, u32 offset, size_t si cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, READ_BARRIER); cmdbuf.FillBuffer(dest_buffer, offset, size, value); - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + // Buffer reads can go to vertex input, shaders, or compute + const VkPipelineStageFlags dst_stages_clear = + VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stages_clear, 0, WRITE_BARRIER); }); } From 43ebdb1ffcdb746d750eda1c9e84578f09bf0dff Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Tue, 17 Feb 2026 04:14:17 -0400 Subject: [PATCH 40/68] [vulkan] Adjusted DYNAMIC_STATES setting --- src/video_core/renderer_vulkan/blit_image.cpp | 10 ++++++++-- src/video_core/renderer_vulkan/present/util.cpp | 3 +++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index 789f4da2ed..b4aab57b97 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -170,8 +170,14 @@ constexpr VkPipelineMultisampleStateCreateInfo PIPELINE_MULTISAMPLE_STATE_CREATE .alphaToCoverageEnable = VK_FALSE, .alphaToOneEnable = VK_FALSE, }; -constexpr std::array DYNAMIC_STATES{VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, - VK_DYNAMIC_STATE_BLEND_CONSTANTS}; +constexpr std::array DYNAMIC_STATES{ + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, + VK_DYNAMIC_STATE_BLEND_CONSTANTS, + VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, + VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, + VK_DYNAMIC_STATE_STENCIL_REFERENCE, +}; constexpr VkPipelineDynamicStateCreateInfo PIPELINE_DYNAMIC_STATE_CREATE_INFO{ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, .pNext = nullptr, diff --git a/src/video_core/renderer_vulkan/present/util.cpp b/src/video_core/renderer_vulkan/present/util.cpp index a2c4727703..383e4205c1 100644 --- a/src/video_core/renderer_vulkan/present/util.cpp +++ b/src/video_core/renderer_vulkan/present/util.cpp @@ -461,6 +461,9 @@ static vk::Pipeline CreateWrappedPipelineImpl( constexpr std::array dynamic_states{ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, + VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, + VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, + VK_DYNAMIC_STATE_STENCIL_REFERENCE, }; const VkPipelineDynamicStateCreateInfo dynamic_state_ci{ From 4bb853d52aaeb88dbda174ee03466b8537bb95ae Mon Sep 17 00:00:00 2001 From: lizzie Date: Thu, 5 Mar 2026 02:55:45 +0000 Subject: [PATCH 41/68] Merge fix --- src/video_core/vulkan_common/vulkan_device.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index a8355119d3..85e402ec2b 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -552,11 +552,11 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR features.shader_float16_int8.shaderFloat16 = false; } - // Mali/ NVIDIA proprietary drivers: Shader stencil export not supported - if (nv_major_version >= 510) { + // NVIDIA proprietary drivers: Shader stencil export not supported + if (properties.properties.driverVersion >= VK_MAKE_API_VERSION(510, 0, 0, 0)) { LOG_WARNING(Render_Vulkan, - "NVIDIA Drivers >= 510 do not support MSAA->MSAA image blits. " - "MSAA scaling will use 3D helpers. MSAA resolves work normally."); + "NVIDIA Drivers >= 510 do not support MSAA->MSAA image blits. " + "MSAA scaling will use 3D helpers. MSAA resolves work normally."); cant_blit_msaa = true; } From d82a6a273d92a1f5fca1f80a4e7ad50229bf0be9 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Thu, 5 Mar 2026 02:24:43 -0400 Subject: [PATCH 42/68] fix build --- src/video_core/renderer_vulkan/vk_buffer_cache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index a630b21a8f..fef315e035 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -459,7 +459,7 @@ void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, WRITE_BARRIER); + dst_stages, 0, WRITE_BARRIER); } }); } From aabc47031445ab7c20c0ad09fd429e1389f5d161 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Thu, 5 Mar 2026 03:19:05 -0400 Subject: [PATCH 43/68] First meow in honor of meowly --- .../renderer_vulkan/fixed_pipeline_state.cpp | 2 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 33 ------------------- 2 files changed, 1 insertion(+), 34 deletions(-) diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 62fa2a10c6..fa25d99016 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -170,7 +170,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, DynamicFe std::ranges::transform(regs.vertex_streams, vertex_strides.begin(), [](const auto& array) { return static_cast(array.stride.Value()); }); - if (!extended_dynamic_state_2_logic_op) { + if (!extended_dynamic_state_2) { dynamic_state.Refresh2(regs, topology_, extended_dynamic_state_2); } if (maxwell3d.dirty.flags[Dirty::Blending]) { diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 6aabb486ea..9235c50a01 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -542,39 +542,6 @@ void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling, render_area_data = render_area.words, dynamic_state](vk::CommandBuffer cmdbuf) { if (bind_pipeline) { cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); - - if (device.IsExtExtendedDynamicStateSupported() && UsesExtendedDynamicState()) { - cmdbuf.SetDepthCompareOpEXT(MaxwellToVK::ComparisonOp(dynamic_state.DepthTestFunc())); - cmdbuf.SetFrontFaceEXT(MaxwellToVK::FrontFace(dynamic_state.FrontFace())); - VkCullModeFlags cull_mode = dynamic_state.cull_enable - ? MaxwellToVK::CullFace(dynamic_state.CullFace()) - : VK_CULL_MODE_NONE; - cmdbuf.SetCullModeEXT(cull_mode); - cmdbuf.SetDepthTestEnableEXT(dynamic_state.depth_test_enable != 0); - cmdbuf.SetDepthWriteEnableEXT(dynamic_state.depth_write_enable != 0); - cmdbuf.SetDepthBoundsTestEnableEXT(dynamic_state.depth_bounds_enable != 0); - cmdbuf.SetStencilTestEnableEXT(dynamic_state.stencil_enable != 0); - if (dynamic_state.stencil_enable) { - if (false) { - } else { - cmdbuf.SetStencilOpEXT(VK_STENCIL_FACE_FRONT_AND_BACK, - MaxwellToVK::StencilOp(dynamic_state.front.ActionStencilFail()), - MaxwellToVK::StencilOp(dynamic_state.front.ActionDepthPass()), - MaxwellToVK::StencilOp(dynamic_state.front.ActionDepthFail()), - MaxwellToVK::ComparisonOp(dynamic_state.front.TestFunc())); - } - } - } - - if (device.IsExtExtendedDynamicState2Supported() && UsesExtendedDynamicState2()) { - cmdbuf.SetPrimitiveRestartEnableEXT(dynamic_state.primitive_restart_enable != 0); - cmdbuf.SetRasterizerDiscardEnableEXT(dynamic_state.rasterize_enable == 0); - cmdbuf.SetDepthBiasEnableEXT(dynamic_state.depth_bias_enable != 0); - } - - if (device.IsExtExtendedDynamicState2ExtrasSupported() && UsesExtendedDynamicState2LogicOp()) { - cmdbuf.SetLogicOpEXT(static_cast(dynamic_state.logic_op.Value())); - } } cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS, RESCALING_LAYOUT_WORDS_OFFSET, sizeof(rescaling_data), From 33f1fb1cf48eb01d698f91256efcf808919e76a2 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Thu, 5 Mar 2026 03:27:24 -0400 Subject: [PATCH 44/68] fix build --- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 9235c50a01..0e98276c74 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -539,7 +539,7 @@ void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling, scheduler.Record([this, descriptor_data, bind_pipeline, rescaling_data = rescaling.Data(), is_rescaling, update_rescaling, uses_render_area = render_area.uses_render_area, - render_area_data = render_area.words, dynamic_state](vk::CommandBuffer cmdbuf) { + render_area_data = render_area.words](vk::CommandBuffer cmdbuf) { if (bind_pipeline) { cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); } From c931de057048e0d4cf08b9612f40b46f54adbe96 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Thu, 5 Mar 2026 20:47:08 -0400 Subject: [PATCH 45/68] [vulkan] Adjusted QueryReset's --- src/video_core/renderer_vulkan/vk_query_cache.cpp | 6 ++++-- src/video_core/vulkan_common/vulkan_wrapper.cpp | 1 + src/video_core/vulkan_common/vulkan_wrapper.h | 5 +++++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index ee1e70fe9e..64458a23d5 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -60,8 +60,6 @@ public: void Reset() override { ASSERT(references == 0); VideoCommon::BankBase::Reset(); - const auto& dev = device.GetLogical(); - dev.ResetQueryPool(*query_pool, 0, BANK_SIZE); host_results.fill(0ULL); next_bank = 0; } @@ -441,6 +439,10 @@ private: } current_bank = &bank_pool.GetBank(current_bank_id); current_query_pool = current_bank->GetInnerPool(); + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([query_pool = current_query_pool](vk::CommandBuffer cmdbuf) { + cmdbuf.ResetQueryPool(query_pool, 0, SamplesQueryBank::BANK_SIZE); + }); } size_t ReserveBankSlot() { diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index 4f967f0e04..f1854d4792 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -127,6 +127,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkCmdPipelineBarrier); X(vkCmdPushConstants); X(vkCmdPushDescriptorSetWithTemplateKHR); + X(vkCmdResetQueryPool); X(vkCmdSetBlendConstants); X(vkCmdSetDepthBias); X(vkCmdSetDepthBias2EXT); diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index 0f0e1f2745..cc92d5de16 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h @@ -228,6 +228,7 @@ struct DeviceDispatch : InstanceDispatch { PFN_vkCmdPipelineBarrier vkCmdPipelineBarrier{}; PFN_vkCmdPushConstants vkCmdPushConstants{}; PFN_vkCmdPushDescriptorSetWithTemplateKHR vkCmdPushDescriptorSetWithTemplateKHR{}; + PFN_vkCmdResetQueryPool vkCmdResetQueryPool{}; PFN_vkCmdResolveImage vkCmdResolveImage{}; PFN_vkCmdSetBlendConstants vkCmdSetBlendConstants{}; PFN_vkCmdSetCullModeEXT vkCmdSetCullModeEXT{}; @@ -1172,6 +1173,10 @@ public: dld->vkCmdPushDescriptorSetWithTemplateKHR(handle, update_template, layout, set, data); } + void ResetQueryPool(VkQueryPool query_pool, u32 first, u32 count) const noexcept { + dld->vkCmdResetQueryPool(handle, query_pool, first, count); + } + void BindPipeline(VkPipelineBindPoint bind_point, VkPipeline pipeline) const noexcept { dld->vkCmdBindPipeline(handle, bind_point, pipeline); } From ee1ffbaf2e337de59377b3e010f4458240bf19f4 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Thu, 5 Mar 2026 21:09:50 -0400 Subject: [PATCH 46/68] [vulkan] Adjustments to wrong access of image-memory barrier on depth fragments + blending extended enabling method --- .../renderer_vulkan/vk_graphics_pipeline.cpp | 4 +++- src/video_core/renderer_vulkan/vk_scheduler.cpp | 12 +++++++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 0e98276c74..925e931c0d 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -23,6 +23,7 @@ #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/surface.h" #include "video_core/shader_notify.h" #include "video_core/texture_cache/texture_cache.h" #include "video_core/vulkan_common/vulkan_device.h" @@ -866,13 +867,14 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { VK_COLOR_COMPONENT_A_BIT, }; const auto& blend{key.state.attachments[index]}; + const bool supports_blending = !VideoCore::Surface::IsPixelFormatInteger(key.color_formats[index]); const std::array mask{blend.Mask()}; VkColorComponentFlags write_mask{}; for (size_t i = 0; i < mask_table.size(); ++i) { write_mask |= mask[i] ? mask_table[i] : 0; } cb_attachments.push_back({ - .blendEnable = blend.enable != 0, + .blendEnable = supports_blending && blend.enable != 0, .srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor()), .dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor()), .colorBlendOp = MaxwellToVK::BlendEquation(blend.EquationRGB()), diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 947de6a80e..710bfddc74 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -337,6 +337,13 @@ void Scheduler::EndRenderPass() images = renderpass_images, ranges = renderpass_image_ranges](vk::CommandBuffer cmdbuf) { std::array barriers; + constexpr VkPipelineStageFlags src_stages = + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + constexpr VkPipelineStageFlags dst_stages = + VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; for (size_t i = 0; i < num_images; ++i) { const VkImageSubresourceRange& range = ranges[i]; const bool is_color = (range.aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) != 0; @@ -372,9 +379,8 @@ void Scheduler::EndRenderPass() }; } cmdbuf.EndRenderPass(); - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, 0, nullptr, nullptr, vk::Span(barriers.data(), num_images)); + cmdbuf.PipelineBarrier(src_stages, dst_stages, 0, nullptr, nullptr, + vk::Span(barriers.data(), num_images)); }); state.renderpass = VkRenderPass{}; From 186c0b0cc7dbca4d651fa00231b1f8de3cd098a4 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Thu, 5 Mar 2026 21:45:38 -0400 Subject: [PATCH 47/68] [vulkan] Replaced old logic for DescriptorType for a numeric handling per type to avoid mismatches during format binding --- .../backend/spirv/emit_spirv_image.cpp | 70 +++++++++++++------ .../backend/spirv/spirv_emit_context.cpp | 35 +++++++--- .../backend/spirv/spirv_emit_context.h | 8 ++- src/shader_recompiler/ir_opt/texture_pass.cpp | 25 +++++-- src/shader_recompiler/shader_info.h | 12 +++- .../renderer_vulkan/vk_graphics_pipeline.cpp | 3 +- 6 files changed, 112 insertions(+), 41 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 4bff810547..2fd0f3bd1a 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -14,6 +14,25 @@ namespace Shader::Backend::SPIRV { namespace { +Id GetResultType(EmitContext& ctx, NumericType numeric_type) { + switch (numeric_type) { + case NumericType::Float: + return ctx.F32[4]; + case NumericType::SignedInt: + return ctx.S32[4]; + case NumericType::UnsignedInt: + return ctx.U32[4]; + } + throw LogicError("Invalid numeric type {}", static_cast(numeric_type)); +} + +NumericType GetTextureNumericType(EmitContext& ctx, const IR::TextureInstInfo& info) { + if (info.type == TextureType::Buffer) { + return ctx.texture_buffers.at(info.descriptor_index).numeric_type; + } + return ctx.textures.at(info.descriptor_index).numeric_type; +} + class ImageOperands { public: [[maybe_unused]] static constexpr bool ImageSampleOffsetAllowed = false; @@ -201,10 +220,10 @@ Id TextureImage(EmitContext& ctx, IR::TextureInstInfo info, const IR::Value& ind const TextureBufferDefinition& def{ctx.texture_buffers.at(info.descriptor_index)}; if (def.count > 1) { const Id idx{index.IsImmediate() ? ctx.Const(index.U32()) : ctx.Def(index)}; - const Id ptr{ctx.OpAccessChain(ctx.image_buffer_type, def.id, idx)}; - return ctx.OpLoad(ctx.image_buffer_type, ptr); + const Id ptr{ctx.OpAccessChain(def.pointer_type, def.id, idx)}; + return ctx.OpLoad(def.image_type, ptr); } - return ctx.OpLoad(ctx.image_buffer_type, def.id); + return ctx.OpLoad(def.image_type, def.id); } else { const TextureDefinition& def{ctx.textures.at(info.descriptor_index)}; if (def.count > 1) { @@ -216,23 +235,24 @@ Id TextureImage(EmitContext& ctx, IR::TextureInstInfo info, const IR::Value& ind } } -std::pair Image(EmitContext& ctx, const IR::Value& index, IR::TextureInstInfo info) { +std::pair Image(EmitContext& ctx, const IR::Value& index, + IR::TextureInstInfo info) { if (info.type == TextureType::Buffer) { const ImageBufferDefinition def{ctx.image_buffers.at(info.descriptor_index)}; if (def.count > 1) { const Id idx{index.IsImmediate() ? ctx.Const(index.U32()) : ctx.Def(index)}; const Id ptr{ctx.OpAccessChain(def.pointer_type, def.id, idx)}; - return {ctx.OpLoad(def.image_type, ptr), def.is_integer}; + return {ctx.OpLoad(def.image_type, ptr), def.numeric_type}; } - return {ctx.OpLoad(def.image_type, def.id), def.is_integer}; + return {ctx.OpLoad(def.image_type, def.id), def.numeric_type}; } else { const ImageDefinition def{ctx.images.at(info.descriptor_index)}; if (def.count > 1) { const Id idx{index.IsImmediate() ? ctx.Const(index.U32()) : ctx.Def(index)}; const Id ptr{ctx.OpAccessChain(def.pointer_type, def.id, idx)}; - return {ctx.OpLoad(def.image_type, ptr), def.is_integer}; + return {ctx.OpLoad(def.image_type, ptr), def.numeric_type}; } - return {ctx.OpLoad(def.image_type, def.id), def.is_integer}; + return {ctx.OpLoad(def.image_type, def.id), def.numeric_type}; } } @@ -461,8 +481,9 @@ Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& if (ctx.stage == Stage::Fragment) { const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0, bias_lc, offset); + const Id result_type{GetResultType(ctx, GetTextureNumericType(ctx, info))}; return Emit(&EmitContext::OpImageSparseSampleImplicitLod, - &EmitContext::OpImageSampleImplicitLod, ctx, inst, ctx.F32[4], + &EmitContext::OpImageSampleImplicitLod, ctx, inst, result_type, Texture(ctx, info, index), coords, operands.MaskOptional(), operands.Span()); } else { // We can't use implicit lods on non-fragment stages on SPIR-V. Maxwell hardware behaves as @@ -470,8 +491,9 @@ Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& // derivatives const Id lod{ctx.Const(0.0f)}; const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod, offset); + const Id result_type{GetResultType(ctx, GetTextureNumericType(ctx, info))}; return Emit(&EmitContext::OpImageSparseSampleExplicitLod, - &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], + &EmitContext::OpImageSampleExplicitLod, ctx, inst, result_type, Texture(ctx, info, index), coords, operands.Mask(), operands.Span()); } } @@ -480,12 +502,14 @@ Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& Id lod, const IR::Value& offset) { const auto info{inst->Flags()}; const ImageOperands operands(ctx, false, true, false, lod, offset); + const NumericType numeric_type{GetTextureNumericType(ctx, info)}; + const Id result_type{GetResultType(ctx, numeric_type)}; Id result = Emit(&EmitContext::OpImageSparseSampleExplicitLod, - &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], + &EmitContext::OpImageSampleExplicitLod, ctx, inst, result_type, Texture(ctx, info, index), coords, operands.Mask(), operands.Span()); #ifdef ANDROID - if (Settings::values.fix_bloom_effects.GetValue()) { + if (numeric_type == NumericType::Float && Settings::values.fix_bloom_effects.GetValue()) { result = ctx.OpVectorTimesScalar(ctx.F32[4], result, ctx.Const(0.98f)); } #endif @@ -529,8 +553,9 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id if (ctx.profile.need_gather_subpixel_offset) { coords = ImageGatherSubpixelOffset(ctx, info, TextureImage(ctx, info, index), coords); } + const Id result_type{GetResultType(ctx, GetTextureNumericType(ctx, info))}; return Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst, - ctx.F32[4], Texture(ctx, info, index), coords, ctx.Const(info.gather_component), + result_type, Texture(ctx, info, index), coords, ctx.Const(info.gather_component), operands.MaskOptional(), operands.Span()); } @@ -558,8 +583,10 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id c lod = Id{}; } const ImageOperands operands(lod, ms); - return Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst, ctx.F32[4], - TextureImage(ctx, info, index), coords, operands.MaskOptional(), operands.Span()); + const Id result_type{GetResultType(ctx, GetTextureNumericType(ctx, info))}; + return Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst, + result_type, TextureImage(ctx, info, index), coords, operands.MaskOptional(), + operands.Span()); } Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod, @@ -609,8 +636,9 @@ Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I ctx.Def(offset), {}, lod_clamp) : ImageOperands(ctx, info.has_lod_clamp != 0, derivatives, info.num_derivatives, offset, lod_clamp); + const Id result_type{GetResultType(ctx, GetTextureNumericType(ctx, info))}; return Emit(&EmitContext::OpImageSparseSampleExplicitLod, - &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], + &EmitContext::OpImageSampleExplicitLod, ctx, inst, result_type, Texture(ctx, info, index), coords, operands.Mask(), operands.Span()); } @@ -620,11 +648,11 @@ Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id co LOG_WARNING(Shader_SPIRV, "Typeless image read not supported by host"); return ctx.ConstantNull(ctx.U32[4]); } - const auto [image, is_integer] = Image(ctx, index, info); - const Id result_type{is_integer ? ctx.U32[4] : ctx.F32[4]}; + const auto [image, numeric_type] = Image(ctx, index, info); + const Id result_type{GetResultType(ctx, numeric_type)}; Id color{Emit(&EmitContext::OpImageSparseRead, &EmitContext::OpImageRead, ctx, inst, result_type, image, coords, std::nullopt, std::span{})}; - if (!is_integer) { + if (numeric_type == NumericType::Float) { color = ctx.OpBitcast(ctx.U32[4], color); } return color; @@ -632,8 +660,8 @@ Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id co void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color) { const auto info{inst->Flags()}; - const auto [image, is_integer] = Image(ctx, index, info); - if (!is_integer) { + const auto [image, numeric_type] = Image(ctx, index, info); + if (numeric_type == NumericType::Float) { color = ctx.OpBitcast(ctx.F32[4], color); } ctx.OpImageWrite(image, coords, color); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index b9a24496c9..68b9cad859 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -28,9 +28,21 @@ enum class Operation { FPMax, }; +Id GetNumericTypeId(EmitContext& ctx, NumericType numeric_type) { + switch (numeric_type) { + case NumericType::Float: + return ctx.F32[1]; + case NumericType::SignedInt: + return ctx.S32[1]; + case NumericType::UnsignedInt: + return ctx.U32[1]; + } + throw InvalidArgument("Invalid numeric type {}", static_cast(numeric_type)); +} + Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) { const spv::ImageFormat format{spv::ImageFormat::Unknown}; - const Id type{ctx.F32[1]}; + const Id type{GetNumericTypeId(ctx, desc.numeric_type)}; const bool depth{desc.is_depth}; const bool ms{desc.is_multisample}; switch (desc.type) { @@ -1304,22 +1316,26 @@ void EmitContext::DefineTextureBuffers(const Info& info, u32& binding) { if (info.texture_buffer_descriptors.empty()) { return; } - const spv::ImageFormat format{spv::ImageFormat::Unknown}; - image_buffer_type = TypeImage(F32[1], spv::Dim::Buffer, 0U, false, false, 1, format); - - const Id type{TypePointer(spv::StorageClass::UniformConstant, image_buffer_type)}; texture_buffers.reserve(info.texture_buffer_descriptors.size()); for (const TextureBufferDescriptor& desc : info.texture_buffer_descriptors) { if (desc.count != 1) { throw NotImplementedException("Array of texture buffers"); } + const spv::ImageFormat format{spv::ImageFormat::Unknown}; + const Id image_type{ + TypeImage(GetNumericTypeId(*this, desc.numeric_type), spv::Dim::Buffer, 0U, false, + false, 1, format)}; + const Id type{TypePointer(spv::StorageClass::UniformConstant, image_type)}; const Id id{AddGlobalVariable(type, spv::StorageClass::UniformConstant)}; Decorate(id, spv::Decoration::Binding, binding); Decorate(id, spv::Decoration::DescriptorSet, 0U); Name(id, NameOf(stage, desc, "texbuf")); texture_buffers.push_back({ .id = id, + .image_type = image_type, + .pointer_type = type, .count = desc.count, + .numeric_type = desc.numeric_type, }); if (profile.supported_spirv >= 0x00010400) { interfaces.push_back(id); @@ -1332,7 +1348,7 @@ void EmitContext::DefineImageBuffers(const Info& info, u32& binding) { image_buffers.reserve(info.image_buffer_descriptors.size()); for (const ImageBufferDescriptor& desc : info.image_buffer_descriptors) { const spv::ImageFormat format{GetImageFormat(desc.format)}; - const Id sampled_type{desc.is_integer ? U32[1] : F32[1]}; + const Id sampled_type{GetNumericTypeId(*this, desc.numeric_type)}; const Id image_type{ TypeImage(sampled_type, spv::Dim::Buffer, false, false, false, 2, format)}; const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)}; @@ -1345,7 +1361,7 @@ void EmitContext::DefineImageBuffers(const Info& info, u32& binding) { .image_type = image_type, .pointer_type = pointer_type, .count = desc.count, - .is_integer = desc.is_integer, + .numeric_type = desc.numeric_type, }); if (profile.supported_spirv >= 0x00010400) { interfaces.push_back(id); @@ -1372,6 +1388,7 @@ void EmitContext::DefineTextures(const Info& info, u32& binding, u32& scaling_in .image_type = image_type, .count = desc.count, .is_multisample = desc.is_multisample, + .numeric_type = desc.numeric_type, }); if (profile.supported_spirv >= 0x00010400) { interfaces.push_back(id); @@ -1387,7 +1404,7 @@ void EmitContext::DefineTextures(const Info& info, u32& binding, u32& scaling_in void EmitContext::DefineImages(const Info& info, u32& binding, u32& scaling_index) { images.reserve(info.image_descriptors.size()); for (const ImageDescriptor& desc : info.image_descriptors) { - const Id sampled_type{desc.is_integer ? U32[1] : F32[1]}; + const Id sampled_type{GetNumericTypeId(*this, desc.numeric_type)}; const Id image_type{ImageType(*this, desc, sampled_type)}; const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)}; const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)}; @@ -1399,7 +1416,7 @@ void EmitContext::DefineImages(const Info& info, u32& binding, u32& scaling_inde .image_type = image_type, .pointer_type = pointer_type, .count = desc.count, - .is_integer = desc.is_integer, + .numeric_type = desc.numeric_type, }); if (profile.supported_spirv >= 0x00010400) { interfaces.push_back(id); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index de56809a98..396022eddf 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -41,11 +41,15 @@ struct TextureDefinition { Id image_type; u32 count; bool is_multisample; + NumericType numeric_type; }; struct TextureBufferDefinition { Id id; + Id image_type; + Id pointer_type; u32 count; + NumericType numeric_type; }; struct ImageBufferDefinition { @@ -53,7 +57,7 @@ struct ImageBufferDefinition { Id image_type; Id pointer_type; u32 count; - bool is_integer; + NumericType numeric_type; }; struct ImageDefinition { @@ -61,7 +65,7 @@ struct ImageDefinition { Id image_type; Id pointer_type; u32 count; - bool is_integer; + NumericType numeric_type; }; struct UniformDefinitions { diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index 20b8591072..5fc8f0f4a9 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -19,6 +19,7 @@ #include "shader_recompiler/host_translate_info.h" #include "shader_recompiler/ir_opt/passes.h" #include "shader_recompiler/shader_info.h" +#include "video_core/surface.h" namespace Shader::Optimization { namespace { @@ -33,6 +34,16 @@ using TextureInstVector = boost::container::small_vector; constexpr u32 DESCRIPTOR_SIZE = 8; constexpr u32 DESCRIPTOR_SIZE_SHIFT = static_cast(std::countr_zero(DESCRIPTOR_SIZE)); +NumericType GetNumericType(TexturePixelFormat format) { + const auto pixel_format = static_cast(format); + if (!VideoCore::Surface::IsPixelFormatInteger(pixel_format)) { + return NumericType::Float; + } + return VideoCore::Surface::IsPixelFormatSignedInteger(pixel_format) + ? NumericType::SignedInt + : NumericType::UnsignedInt; +} + IR::Opcode IndexedInstruction(const IR::Inst& inst) { switch (inst.GetOpcode()) { case IR::Opcode::BindlessImageSampleImplicitLod: @@ -430,7 +441,8 @@ public: u32 Add(const TextureBufferDescriptor& desc) { return Add(texture_buffer_descriptors, desc, [&desc](const auto& existing) { - return desc.cbuf_index == existing.cbuf_index && + return desc.numeric_type == existing.numeric_type && + desc.cbuf_index == existing.cbuf_index && desc.cbuf_offset == existing.cbuf_offset && desc.shift_left == existing.shift_left && desc.secondary_cbuf_index == existing.secondary_cbuf_index && @@ -449,13 +461,13 @@ public: })}; image_buffer_descriptors[index].is_written |= desc.is_written; image_buffer_descriptors[index].is_read |= desc.is_read; - image_buffer_descriptors[index].is_integer |= desc.is_integer; return index; } u32 Add(const TextureDescriptor& desc) { const u32 index{Add(texture_descriptors, desc, [&desc](const auto& existing) { return desc.type == existing.type && desc.is_depth == existing.is_depth && + desc.numeric_type == existing.numeric_type && desc.has_secondary == existing.has_secondary && desc.cbuf_index == existing.cbuf_index && desc.cbuf_offset == existing.cbuf_offset && @@ -479,7 +491,6 @@ public: })}; image_descriptors[index].is_written |= desc.is_written; image_descriptors[index].is_read |= desc.is_read; - image_descriptors[index].is_integer |= desc.is_integer; return index; } @@ -651,13 +662,13 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo } const bool is_written{inst->GetOpcode() != IR::Opcode::ImageRead}; const bool is_read{inst->GetOpcode() != IR::Opcode::ImageWrite}; - const bool is_integer{IsTexturePixelFormatIntegerCached(env, cbuf)}; + const NumericType numeric_type{GetNumericType(ReadTexturePixelFormatCached(env, cbuf))}; if (flags.type == TextureType::Buffer) { index = descriptors.Add(ImageBufferDescriptor{ .format = flags.image_format, .is_written = is_written, .is_read = is_read, - .is_integer = is_integer, + .numeric_type = numeric_type, .cbuf_index = cbuf.index, .cbuf_offset = cbuf.offset, .count = cbuf.count, @@ -669,7 +680,7 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo .format = flags.image_format, .is_written = is_written, .is_read = is_read, - .is_integer = is_integer, + .numeric_type = numeric_type, .cbuf_index = cbuf.index, .cbuf_offset = cbuf.offset, .count = cbuf.count, @@ -681,6 +692,7 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo default: if (flags.type == TextureType::Buffer) { index = descriptors.Add(TextureBufferDescriptor{ + .numeric_type = GetNumericType(ReadTexturePixelFormatCached(env, cbuf)), .has_secondary = cbuf.has_secondary, .cbuf_index = cbuf.index, .cbuf_offset = cbuf.offset, @@ -696,6 +708,7 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo .type = flags.type, .is_depth = flags.is_depth != 0, .is_multisample = is_multisample, + .numeric_type = GetNumericType(ReadTexturePixelFormatCached(env, cbuf)), .has_secondary = cbuf.has_secondary, .cbuf_index = cbuf.index, .cbuf_offset = cbuf.offset, diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index dfacc06802..87dd14fa46 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -38,6 +38,12 @@ enum class TextureType : u32 { }; constexpr u32 NUM_TEXTURE_TYPES = 9; +enum class NumericType : u8 { + Float, + SignedInt, + UnsignedInt, +}; + enum class TexturePixelFormat { A8B8G8R8_UNORM, A8B8G8R8_SNORM, @@ -177,6 +183,7 @@ struct StorageBufferDescriptor { }; struct TextureBufferDescriptor { + NumericType numeric_type; bool has_secondary; u32 cbuf_index; u32 cbuf_offset; @@ -195,7 +202,7 @@ struct ImageBufferDescriptor { ImageFormat format; bool is_written; bool is_read; - bool is_integer; + NumericType numeric_type; u32 cbuf_index; u32 cbuf_offset; u32 count; @@ -209,6 +216,7 @@ struct TextureDescriptor { TextureType type; bool is_depth; bool is_multisample; + NumericType numeric_type; bool has_secondary; u32 cbuf_index; u32 cbuf_offset; @@ -228,7 +236,7 @@ struct ImageDescriptor { ImageFormat format; bool is_written; bool is_read; - bool is_integer; + NumericType numeric_type; u32 cbuf_index; u32 cbuf_offset; u32 count; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 925e931c0d..f60fe20b9a 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -867,7 +867,8 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { VK_COLOR_COMPONENT_A_BIT, }; const auto& blend{key.state.attachments[index]}; - const bool supports_blending = !VideoCore::Surface::IsPixelFormatInteger(key.color_formats[index]); + const PixelFormat color_format{DecodeFormat(key.state.color_formats[index])}; + const bool supports_blending = !VideoCore::Surface::IsPixelFormatInteger(color_format); const std::array mask{blend.Mask()}; VkColorComponentFlags write_mask{}; for (size_t i = 0; i < mask_table.size(); ++i) { From 646aea7fbf2ff927867001dea996662e4e389c06 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Thu, 5 Mar 2026 21:52:42 -0400 Subject: [PATCH 48/68] [vulkan] Removed unused helper in texture pass --- src/shader_recompiler/ir_opt/texture_pass.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index 5fc8f0f4a9..a1405b225f 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -210,11 +210,6 @@ static inline TexturePixelFormat ReadTexturePixelFormatCached(Environment& env, const ConstBufferAddr& cbuf) { return env.ReadTexturePixelFormat(GetTextureHandleCached(env, cbuf)); } -static inline bool IsTexturePixelFormatIntegerCached(Environment& env, - const ConstBufferAddr& cbuf) { - return env.IsTexturePixelFormatInteger(GetTextureHandleCached(env, cbuf)); -} - std::optional Track(const IR::Value& value, Environment& env); static inline std::optional TrackCached(const IR::Value& v, Environment& env) { From 12fdd88a584dc058ebebc6097137a2bf20d3e4e0 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Thu, 5 Mar 2026 22:41:34 -0400 Subject: [PATCH 49/68] smol fix for query enable --- src/video_core/renderer_vulkan/vk_query_cache.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 64458a23d5..64fb71fc7a 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -143,6 +143,7 @@ public: scheduler.Record([buffer = *accumulation_buffer](vk::CommandBuffer cmdbuf) { cmdbuf.FillBuffer(buffer, 0, 8, 0); }); + ReserveBank(); } ~SamplesStreamer() = default; From 9630da580d524e67e693e271369b669409a27870 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Thu, 5 Mar 2026 23:17:15 -0400 Subject: [PATCH 50/68] [vulkan] Removed counter enable for ZPassPixelCount64 in Clear method and added initial layout transition for images in RefreshContents --- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 1 - src/video_core/texture_cache/texture_cache.h | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 6d8cb3737c..82ffa07579 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -481,7 +481,6 @@ void RasterizerVulkan::Clear(u32 layer_count) { scheduler.RequestRenderpass(framebuffer); query_cache.NotifySegment(true); - query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, maxwell3d->regs.zpass_pixel_count_enable); u32 up_scale = 1; u32 down_shift = 0; if (texture_cache.IsRescaling()) { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 71210ffe6e..a92f44cc30 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1191,7 +1191,7 @@ void TextureCache

::DownloadImageIntoBuffer(typename TextureCache

::Image* i template void TextureCache

::RefreshContents(Image& image, ImageId image_id) { if (False(image.flags & ImageFlagBits::CpuModified)) { - // Only upload modified images + runtime.TransitionImageLayout(image); return; } From 6b87b0052a873c4a408a6eb08ffb67bf7cd567fa Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Fri, 6 Mar 2026 00:02:30 -0400 Subject: [PATCH 51/68] [test] shader float control returned to Adreno --- src/video_core/vulkan_common/vulkan_device.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 85e402ec2b..fac640a27c 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -502,7 +502,6 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR if (is_qualcomm) { must_emulate_scaled_formats = true; - RemoveExtension(extensions.shader_float_controls, VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME); RemoveExtensionFeature(extensions.shader_atomic_int64, features.shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME); features.shader_atomic_int64.shaderBufferInt64Atomics = false; From ce15cf7cd352ae9fcd3422c2fdeca392d5092597 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Fri, 6 Mar 2026 00:40:27 -0400 Subject: [PATCH 52/68] [vulkan] Adjusted image view usage flags to ensure compatibility with image format in TextureCache --- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 1 + src/video_core/renderer_vulkan/vk_texture_cache.cpp | 7 +++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 82ffa07579..26006802ae 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -286,6 +286,7 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) { maxwell3d->regs.zpass_pixel_count_enable); UpdateDynamicStates(); + scheduler.RequestRenderpass(texture_cache.GetFramebuffer()); draw_func(); } diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index d51564dcb3..18bc37fbd5 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -2112,7 +2112,10 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI } } const auto format_info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format); - if (ImageUsageFlags(format_info, format) != image.UsageFlags()) { + const VkImageUsageFlags desired_view_usage = ImageUsageFlags(format_info, format); + const VkImageUsageFlags image_usage = image.UsageFlags(); + const VkImageUsageFlags view_usage = desired_view_usage & image_usage; + if (desired_view_usage != image_usage) { LOG_WARNING(Render_Vulkan, "Image view format {} has different usage flags than image format {}", format, image.info.format); @@ -2120,7 +2123,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI const VkImageViewUsageCreateInfo image_view_usage{ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO, .pNext = nullptr, - .usage = ImageUsageFlags(format_info, format), + .usage = view_usage, }; const VkImageViewCreateInfo create_info{ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, From d10080b757b28f1242547082fe9430ac8ea9e5f2 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Fri, 6 Mar 2026 01:37:49 -0400 Subject: [PATCH 53/68] [test] Histogram debug - shader float control -> initial target: Adreno --- .../frontend/maxwell/translate_program.cpp | 168 ++++++++++++++++++ 1 file changed, 168 insertions(+) diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index f156192c13..096aaf6a6a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -5,7 +5,9 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include #include +#include #include #include @@ -22,6 +24,165 @@ namespace Shader::Maxwell { namespace { +struct FpControlHistogram { + std::array total{}; + std::array no_contraction{}; + std::array, 2> rounding{}; + std::array, 2> fmz{}; + std::array, 5>, 2> combos{}; +}; + +[[nodiscard]] constexpr std::string_view StageName(Stage stage) noexcept { + switch (stage) { + case Stage::VertexA: + return "VertexA"; + case Stage::VertexB: + return "VertexB"; + case Stage::TessellationControl: + return "TessellationControl"; + case Stage::TessellationEval: + return "TessellationEval"; + case Stage::Geometry: + return "Geometry"; + case Stage::Fragment: + return "Fragment"; + case Stage::Compute: + return "Compute"; + } + return "Unknown"; +} + +[[nodiscard]] constexpr std::string_view RoundingName(IR::FpRounding rounding) noexcept { + switch (rounding) { + case IR::FpRounding::DontCare: + return "DontCare"; + case IR::FpRounding::RN: + return "RN"; + case IR::FpRounding::RM: + return "RM"; + case IR::FpRounding::RP: + return "RP"; + case IR::FpRounding::RZ: + return "RZ"; + } + return "Unknown"; +} + +[[nodiscard]] constexpr std::string_view FmzName(IR::FmzMode fmz_mode) noexcept { + switch (fmz_mode) { + case IR::FmzMode::DontCare: + return "DontCare"; + case IR::FmzMode::FTZ: + return "FTZ"; + case IR::FmzMode::FMZ: + return "FMZ"; + case IR::FmzMode::None: + return "None"; + } + return "Unknown"; +} + +[[nodiscard]] constexpr std::optional FpControlBucket(const IR::Opcode opcode) noexcept { + switch (opcode) { + case IR::Opcode::FPAdd16: + case IR::Opcode::FPFma16: + case IR::Opcode::FPMul16: + case IR::Opcode::FPRoundEven16: + case IR::Opcode::FPFloor16: + case IR::Opcode::FPCeil16: + case IR::Opcode::FPTrunc16: + return 0; + case IR::Opcode::FPAdd32: + case IR::Opcode::FPFma32: + case IR::Opcode::FPMul32: + case IR::Opcode::FPRoundEven32: + case IR::Opcode::FPFloor32: + case IR::Opcode::FPCeil32: + case IR::Opcode::FPTrunc32: + case IR::Opcode::FPOrdEqual32: + case IR::Opcode::FPUnordEqual32: + case IR::Opcode::FPOrdNotEqual32: + case IR::Opcode::FPUnordNotEqual32: + case IR::Opcode::FPOrdLessThan32: + case IR::Opcode::FPUnordLessThan32: + case IR::Opcode::FPOrdGreaterThan32: + case IR::Opcode::FPUnordGreaterThan32: + case IR::Opcode::FPOrdLessThanEqual32: + case IR::Opcode::FPUnordLessThanEqual32: + case IR::Opcode::FPOrdGreaterThanEqual32: + case IR::Opcode::FPUnordGreaterThanEqual32: + case IR::Opcode::ConvertF16F32: + case IR::Opcode::ConvertF64F32: + return 1; + default: + return std::nullopt; + } +} + +FpControlHistogram CollectFpControlHistogram(const IR::Program& program) { + FpControlHistogram histogram{}; + for (const IR::Block* const block : program.post_order_blocks) { + for (const IR::Inst& inst : block->Instructions()) { + const std::optional bucket{FpControlBucket(inst.GetOpcode())}; + if (!bucket) { + continue; + } + const auto flags{inst.Flags()}; + ++histogram.total[*bucket]; + if (flags.no_contraction) { + ++histogram.no_contraction[*bucket]; + } + ++histogram.rounding[*bucket][static_cast(flags.rounding)]; + ++histogram.fmz[*bucket][static_cast(flags.fmz_mode)]; + ++histogram.combos[*bucket][static_cast(flags.rounding)] + [static_cast(flags.fmz_mode)]; + } + } + return histogram; +} + +void LogFpControlHistogram(const IR::Program& program) { + const FpControlHistogram histogram{CollectFpControlHistogram(program)}; + if (histogram.total[0] == 0 && histogram.total[1] == 0) { + return; + } + + LOG_DEBUG(Shader, "FP control histogram for {} shader: blocks={} post_order_blocks={}", + StageName(program.stage), program.blocks.size(), program.post_order_blocks.size()); + + constexpr std::array precision_names{"fp16", "fp32"}; + for (size_t bucket = 0; bucket < precision_names.size(); ++bucket) { + if (histogram.total[bucket] == 0) { + continue; + } + + LOG_DEBUG(Shader, + " {} total={} no_contraction={} rounding[DontCare={}, RN={}, RM={}, RP={}, RZ={}] fmz[DontCare={}, FTZ={}, FMZ={}, None={}]", + precision_names[bucket], histogram.total[bucket], histogram.no_contraction[bucket], + histogram.rounding[bucket][static_cast(IR::FpRounding::DontCare)], + histogram.rounding[bucket][static_cast(IR::FpRounding::RN)], + histogram.rounding[bucket][static_cast(IR::FpRounding::RM)], + histogram.rounding[bucket][static_cast(IR::FpRounding::RP)], + histogram.rounding[bucket][static_cast(IR::FpRounding::RZ)], + histogram.fmz[bucket][static_cast(IR::FmzMode::DontCare)], + histogram.fmz[bucket][static_cast(IR::FmzMode::FTZ)], + histogram.fmz[bucket][static_cast(IR::FmzMode::FMZ)], + histogram.fmz[bucket][static_cast(IR::FmzMode::None)]); + + for (size_t rounding = 0; rounding < histogram.combos[bucket].size(); ++rounding) { + for (size_t fmz = 0; fmz < histogram.combos[bucket][rounding].size(); ++fmz) { + const u32 count{histogram.combos[bucket][rounding][fmz]}; + if (count == 0) { + continue; + } + LOG_DEBUG(Shader, " {} combo {} / {} = {}", precision_names[bucket], + RoundingName(static_cast(rounding)), + FmzName(static_cast(fmz)), count); + } + } + } +} + IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) { size_t num_syntax_blocks{}; for (const auto& node : syntax_list) { @@ -315,6 +476,10 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool Date: Fri, 6 Mar 2026 02:13:47 -0400 Subject: [PATCH 54/68] [debug] fix logging entries for histogram --- .../frontend/maxwell/translate_program.cpp | 35 ++++++++++--------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 096aaf6a6a..d7cf548c63 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -11,6 +11,7 @@ #include #include +#include "common/logging/log.h" #include "common/settings.h" #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/ir/basic_block.h" @@ -147,8 +148,8 @@ void LogFpControlHistogram(const IR::Program& program) { return; } - LOG_DEBUG(Shader, "FP control histogram for {} shader: blocks={} post_order_blocks={}", - StageName(program.stage), program.blocks.size(), program.post_order_blocks.size()); + LOG_INFO(Shader, "FP_HIST {} shader blocks={} post_order_blocks={}", + StageName(program.stage), program.blocks.size(), program.post_order_blocks.size()); constexpr std::array precision_names{"fp16", "fp32"}; for (size_t bucket = 0; bucket < precision_names.size(); ++bucket) { @@ -156,18 +157,18 @@ void LogFpControlHistogram(const IR::Program& program) { continue; } - LOG_DEBUG(Shader, - " {} total={} no_contraction={} rounding[DontCare={}, RN={}, RM={}, RP={}, RZ={}] fmz[DontCare={}, FTZ={}, FMZ={}, None={}]", - precision_names[bucket], histogram.total[bucket], histogram.no_contraction[bucket], - histogram.rounding[bucket][static_cast(IR::FpRounding::DontCare)], - histogram.rounding[bucket][static_cast(IR::FpRounding::RN)], - histogram.rounding[bucket][static_cast(IR::FpRounding::RM)], - histogram.rounding[bucket][static_cast(IR::FpRounding::RP)], - histogram.rounding[bucket][static_cast(IR::FpRounding::RZ)], - histogram.fmz[bucket][static_cast(IR::FmzMode::DontCare)], - histogram.fmz[bucket][static_cast(IR::FmzMode::FTZ)], - histogram.fmz[bucket][static_cast(IR::FmzMode::FMZ)], - histogram.fmz[bucket][static_cast(IR::FmzMode::None)]); + LOG_INFO(Shader, + "FP_HIST {} total={} no_contraction={} rounding[DontCare={}, RN={}, RM={}, RP={}, RZ={}] fmz[DontCare={}, FTZ={}, FMZ={}, None={}]", + precision_names[bucket], histogram.total[bucket], histogram.no_contraction[bucket], + histogram.rounding[bucket][static_cast(IR::FpRounding::DontCare)], + histogram.rounding[bucket][static_cast(IR::FpRounding::RN)], + histogram.rounding[bucket][static_cast(IR::FpRounding::RM)], + histogram.rounding[bucket][static_cast(IR::FpRounding::RP)], + histogram.rounding[bucket][static_cast(IR::FpRounding::RZ)], + histogram.fmz[bucket][static_cast(IR::FmzMode::DontCare)], + histogram.fmz[bucket][static_cast(IR::FmzMode::FTZ)], + histogram.fmz[bucket][static_cast(IR::FmzMode::FMZ)], + histogram.fmz[bucket][static_cast(IR::FmzMode::None)]); for (size_t rounding = 0; rounding < histogram.combos[bucket].size(); ++rounding) { for (size_t fmz = 0; fmz < histogram.combos[bucket][rounding].size(); ++fmz) { @@ -175,9 +176,9 @@ void LogFpControlHistogram(const IR::Program& program) { if (count == 0) { continue; } - LOG_DEBUG(Shader, " {} combo {} / {} = {}", precision_names[bucket], - RoundingName(static_cast(rounding)), - FmzName(static_cast(fmz)), count); + LOG_INFO(Shader, "FP_HIST {} combo {} / {} = {}", precision_names[bucket], + RoundingName(static_cast(rounding)), + FmzName(static_cast(fmz)), count); } } } From dee102cf92614da3231016ddfa570b8ff5a08052 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Fri, 6 Mar 2026 18:30:18 -0400 Subject: [PATCH 55/68] [debug] Added extra parametters for histogram track info -> shader info for RZ --- .../frontend/maxwell/translate_program.cpp | 51 +++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index d7cf548c63..f52a3e72de 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -142,6 +142,55 @@ FpControlHistogram CollectFpControlHistogram(const IR::Program& program) { return histogram; } +void LogRzFpControlTrace(Environment& env, const IR::Program& program) { + std::array totals{}; + for (const IR::Block* const block : program.post_order_blocks) { + for (const IR::Inst& inst : block->Instructions()) { + const std::optional bucket{FpControlBucket(inst.GetOpcode())}; + if (!bucket) { + continue; + } + const auto flags{inst.Flags()}; + if (flags.rounding != IR::FpRounding::RZ) { + continue; + } + ++totals[*bucket]; + } + } + + if (totals[0] == 0 && totals[1] == 0) { + return; + } + + constexpr std::array precision_names{"fp16", "fp32"}; + LOG_INFO(Shader, + "FP_RZ {} shader start={:#010x} blocks={} post_order_blocks={} fp16={} fp32={}", + StageName(program.stage), env.StartAddress(), program.blocks.size(), + program.post_order_blocks.size(), totals[0], totals[1]); + + for (const IR::Block* const block : program.post_order_blocks) { + u32 inst_index{}; + for (const IR::Inst& inst : block->Instructions()) { + const std::optional bucket{FpControlBucket(inst.GetOpcode())}; + if (!bucket) { + ++inst_index; + continue; + } + const auto flags{inst.Flags()}; + if (flags.rounding != IR::FpRounding::RZ) { + ++inst_index; + continue; + } + LOG_INFO(Shader, + "FP_RZ {} start={:#010x} block_order={} inst_index={} precision={} opcode={} no_contraction={} fmz={}", + StageName(program.stage), env.StartAddress(), block->GetOrder(), inst_index, + precision_names[*bucket], inst.GetOpcode(), flags.no_contraction, + FmzName(flags.fmz_mode)); + ++inst_index; + } + } +} + void LogFpControlHistogram(const IR::Program& program) { const FpControlHistogram histogram{CollectFpControlHistogram(program)}; if (histogram.total[0] == 0 && histogram.total[1] == 0) { @@ -479,6 +528,7 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool Date: Fri, 6 Mar 2026 18:37:37 -0400 Subject: [PATCH 56/68] [vulkan] Maintenance9 removal --- src/video_core/vulkan_common/vulkan.h | 5 +---- src/video_core/vulkan_common/vulkan_device.cpp | 4 ---- src/video_core/vulkan_common/vulkan_device.h | 6 ------ 3 files changed, 1 insertion(+), 14 deletions(-) diff --git a/src/video_core/vulkan_common/vulkan.h b/src/video_core/vulkan_common/vulkan.h index 2cc0f0d7f0..018438e36d 100644 --- a/src/video_core/vulkan_common/vulkan.h +++ b/src/video_core/vulkan_common/vulkan.h @@ -22,16 +22,13 @@ #include -// Define maintenance 7-9 extension names (not yet in official Vulkan headers) +// Define maintenance 7-8 extension names #ifndef VK_KHR_MAINTENANCE_7_EXTENSION_NAME #define VK_KHR_MAINTENANCE_7_EXTENSION_NAME "VK_KHR_maintenance7" #endif #ifndef VK_KHR_MAINTENANCE_8_EXTENSION_NAME #define VK_KHR_MAINTENANCE_8_EXTENSION_NAME "VK_KHR_maintenance8" #endif -#ifndef VK_KHR_MAINTENANCE_9_EXTENSION_NAME -#define VK_KHR_MAINTENANCE_9_EXTENSION_NAME "VK_KHR_maintenance9" -#endif // Sanitize macros #undef CreateEvent diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index fac640a27c..f2fe444e05 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -1286,10 +1286,6 @@ void Device::RemoveUnsuitableExtensions() { // VK_KHR_maintenance8 extensions.maintenance8 = loaded_extensions.contains(VK_KHR_MAINTENANCE_8_EXTENSION_NAME); RemoveExtensionIfUnsuitable(extensions.maintenance8, VK_KHR_MAINTENANCE_8_EXTENSION_NAME); - - // VK_KHR_maintenance9 - extensions.maintenance9 = loaded_extensions.contains(VK_KHR_MAINTENANCE_9_EXTENSION_NAME); - RemoveExtensionIfUnsuitable(extensions.maintenance9, VK_KHR_MAINTENANCE_9_EXTENSION_NAME); } void Device::SetupFamilies(VkSurfaceKHR surface) { diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 67ab01752d..2afb57def5 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -97,7 +97,6 @@ VK_DEFINE_HANDLE(VmaAllocator) EXTENSION(KHR, MAINTENANCE_3, maintenance3) \ EXTENSION(KHR, MAINTENANCE_7, maintenance7) \ EXTENSION(KHR, MAINTENANCE_8, maintenance8) \ - EXTENSION(KHR, MAINTENANCE_9, maintenance9) \ EXTENSION(NV, DEVICE_DIAGNOSTICS_CONFIG, device_diagnostics_config) \ EXTENSION(NV, GEOMETRY_SHADER_PASSTHROUGH, geometry_shader_passthrough) \ EXTENSION(NV, VIEWPORT_ARRAY2, viewport_array2) \ @@ -884,11 +883,6 @@ public: return extensions.maintenance8; } - /// Returns true if the device supports VK_KHR_maintenance9. - bool IsKhrMaintenance9Supported() const { - return extensions.maintenance9; - } - /// Returns true if the device supports UINT8 index buffer conversion via compute shader. bool SupportsUint8Indices() const { return features.bit8_storage.storageBuffer8BitAccess && From 3db45f3c46b37e5dd72ebaab956bf574a129eb6e Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Fri, 6 Mar 2026 18:51:45 -0400 Subject: [PATCH 57/68] [vulkan] Implemented active color output tracking in runtime info and update fragment color handling --- .../backend/spirv/emit_spirv_context_get_set.cpp | 3 +++ .../backend/spirv/spirv_emit_context.cpp | 6 ++++-- src/shader_recompiler/runtime_info.h | 3 +++ src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 11 +++++++++++ 4 files changed, 21 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index db11def7b2..beab29ec8a 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -491,6 +491,9 @@ void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) { } void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value) { + if (!ctx.runtime_info.active_color_outputs[index]) { + return; + } const Id component_id{ctx.Const(component)}; const AttributeType type{ctx.runtime_info.color_output_types[index]}; if (type == AttributeType::Float) { diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 68b9cad859..fb66a7962e 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -1688,8 +1688,10 @@ void EmitContext::DefineOutputs(const IR::Program& program) { case Stage::Fragment: for (u32 index = 0; index < 8; ++index) { const bool need_dual_source = runtime_info.dual_source_blend && index <= 1; - if (!need_dual_source && !info.stores_frag_color[index] && - !profile.need_declared_frag_colors) { + const bool should_declare = runtime_info.active_color_outputs[index] && + (info.stores_frag_color[index] || + profile.need_declared_frag_colors); + if (!need_dual_source && !should_declare) { continue; } const Id type{GetAttributeType(*this, runtime_info.color_output_types[index])}; diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index be10a9bb08..b8888504bb 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -111,6 +111,9 @@ struct RuntimeInfo { /// Output types for each color attachment std::array color_output_types{}; + /// Fragment color outputs that are active for the current pipeline. + std::array active_color_outputs{true, true, true, true, true, true, true, true}; + /// Dual source blending bool dual_source_blend{}; }; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 8cf02a959c..e838dc0b43 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -249,6 +249,17 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span program dst_a == F::Source1Alpha_GL || dst_a == F::OneMinusSource1Alpha_GL; } + for (size_t i = 0; i < info.active_color_outputs.size(); ++i) { + const auto format = static_cast(key.state.color_formats[i]); + info.active_color_outputs[i] = format != Tegra::RenderTargetFormat::NONE; + } + if (info.dual_source_blend && info.active_color_outputs[0]) { + info.active_color_outputs[1] = true; + } + if (info.alpha_test_func && *info.alpha_test_func != Shader::CompareFunction::Always) { + info.active_color_outputs[0] = true; + } + if (device.IsMoltenVK()) { for (size_t i = 0; i < 8; ++i) { const auto format = static_cast(key.state.color_formats[i]); From 47fe86be7b573e22c98f659f1783b74554e90ccb Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Fri, 6 Mar 2026 19:23:25 -0400 Subject: [PATCH 58/68] [vulkan] Extended 3D image handling for subresource range calculations --- src/video_core/renderer_vulkan/blit_image.cpp | 9 ++- .../renderer_vulkan/vk_texture_cache.cpp | 77 ++++++++++++------- 2 files changed, 59 insertions(+), 27 deletions(-) diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index b4aab57b97..e22cf72c2a 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -56,16 +56,23 @@ namespace { [[nodiscard]] VkImageSubresourceRange SubresourceRangeFromView(const ImageView& image_view) { auto range = image_view.range; + const bool is_3d_image = image_view.type == VideoCommon::ImageViewType::e3D || + (image_view.flags & VideoCommon::ImageViewFlagBits::Slice) != + VideoCommon::ImageViewFlagBits{}; if ((image_view.flags & VideoCommon::ImageViewFlagBits::Slice) != VideoCommon::ImageViewFlagBits{}) { range.base.layer = 0; range.extent.layers = 1; } + u32 layer_count = static_cast(range.extent.layers); + if (is_3d_image && layer_count == 1) { + layer_count = VK_REMAINING_ARRAY_LAYERS; + } return VkImageSubresourceRange{ .aspectMask = AspectMaskFromFormat(image_view.format), .baseMipLevel = static_cast(range.base.level), .levelCount = static_cast(range.extent.levels), .baseArrayLayer = static_cast(range.base.layer), - .layerCount = static_cast(range.extent.layers), + .layerCount = layer_count, }; } diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 18bc37fbd5..61932ea941 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -446,15 +446,24 @@ TransformBufferCopies(std::span copies, size_t bu }; } +[[nodiscard]] VkImageSubresourceRange MakeBarrierSubresourceRange( + VkImageAspectFlags aspect_mask, const SubresourceRange& range, bool is_3d_image) { + VkImageSubresourceRange subresource_range = MakeSubresourceRange(aspect_mask, range); + if (is_3d_image && subresource_range.layerCount == 1) { + subresource_range.layerCount = VK_REMAINING_ARRAY_LAYERS; + } + return subresource_range; +} + [[nodiscard]] VkImageSubresourceRange MakeSubresourceRange(const ImageView* image_view) { SubresourceRange range = image_view->range; + const bool is_3d_image = image_view->type == VideoCommon::ImageViewType::e3D || + True(image_view->flags & VideoCommon::ImageViewFlagBits::Slice); if (True(image_view->flags & VideoCommon::ImageViewFlagBits::Slice)) { - // Slice image views always affect a single layer, but their subresource range corresponds - // to the slice. Override the value to affect a single layer. range.base.layer = 0; range.extent.layers = 1; } - return MakeSubresourceRange(ImageAspectMask(image_view->format), range); + return MakeBarrierSubresourceRange(ImageAspectMask(image_view->format), range, is_3d_image); } [[nodiscard]] VkImageSubresourceLayers MakeSubresourceLayers(const ImageView* image_view) { @@ -524,18 +533,23 @@ struct RangedBarrierRange { max_layer = (std::max)(max_layer, layers.baseArrayLayer + layers.layerCount); } - VkImageSubresourceRange SubresourceRange(VkImageAspectFlags aspect_mask) const noexcept { - return VkImageSubresourceRange{ - .aspectMask = aspect_mask, - .baseMipLevel = min_mip, - .levelCount = max_mip - min_mip, - .baseArrayLayer = min_layer, - .layerCount = max_layer - min_layer, + VkImageSubresourceRange SubresourceRange(VkImageAspectFlags aspect_mask, + bool is_3d_image) const noexcept { + const SubresourceRange range{ + .base = { + .level = static_cast(min_mip), + .layer = static_cast(min_layer), + }, + .extent = { + .levels = static_cast(max_mip - min_mip), + .layers = static_cast(max_layer - min_layer), + }, }; + return MakeBarrierSubresourceRange(aspect_mask, range, is_3d_image); } }; void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage image, - VkImageAspectFlags aspect_mask, bool is_initialized, + VkImageAspectFlags aspect_mask, bool is_initialized, bool is_3d_image, std::span copies) { static constexpr VkAccessFlags WRITE_ACCESS_FLAGS = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | @@ -549,7 +563,8 @@ void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage im for (const auto& region : copies) { range.AddLayers(region.imageSubresource); } - const VkImageSubresourceRange subresource_range = range.SubresourceRange(aspect_mask); + const VkImageSubresourceRange subresource_range = + range.SubresourceRange(aspect_mask, is_3d_image); const VkImageMemoryBarrier read_barrier{ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, @@ -1006,9 +1021,12 @@ void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src, const VkBuffer copy_buffer = GetTemporaryBuffer(total_size); const VkImage dst_image = dst.Handle(); const VkImage src_image = src.Handle(); + const bool dst_is_3d = dst.info.type == ImageType::e3D; + const bool src_is_3d = src.info.type == ImageType::e3D; scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([dst_image, src_image, copy_buffer, src_aspect_mask, dst_aspect_mask, - vk_in_copies, vk_out_copies](vk::CommandBuffer cmdbuf) { + dst_is_3d, src_is_3d, vk_in_copies, + vk_out_copies](vk::CommandBuffer cmdbuf) { RangedBarrierRange dst_range; RangedBarrierRange src_range; for (const VkBufferImageCopy& copy : vk_in_copies) { @@ -1042,7 +1060,7 @@ void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .image = src_image, - .subresourceRange = src_range.SubresourceRange(src_aspect_mask), + .subresourceRange = src_range.SubresourceRange(src_aspect_mask, src_is_3d), }, }; const std::array middle_in_barrier{ @@ -1056,7 +1074,7 @@ void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .image = src_image, - .subresourceRange = src_range.SubresourceRange(src_aspect_mask), + .subresourceRange = src_range.SubresourceRange(src_aspect_mask, src_is_3d), }, }; const std::array middle_out_barrier{ @@ -1072,7 +1090,7 @@ void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .image = dst_image, - .subresourceRange = dst_range.SubresourceRange(dst_aspect_mask), + .subresourceRange = dst_range.SubresourceRange(dst_aspect_mask, dst_is_3d), }, }; const std::array post_barriers{ @@ -1091,7 +1109,7 @@ void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .image = dst_image, - .subresourceRange = dst_range.SubresourceRange(dst_aspect_mask), + .subresourceRange = dst_range.SubresourceRange(dst_aspect_mask, dst_is_3d), }, }; cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, @@ -1440,6 +1458,8 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src, boost::container::small_vector vk_copies(copies.size()); const VkImageAspectFlags aspect_mask = dst.AspectMask(); ASSERT(aspect_mask == src.AspectMask()); + const bool dst_is_3d = dst.info.type == ImageType::e3D; + const bool src_is_3d = src.info.type == ImageType::e3D; std::ranges::transform(copies, vk_copies.begin(), [aspect_mask](const auto& copy) { return MakeImageCopy(copy, aspect_mask); @@ -1447,7 +1467,8 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src, const VkImage dst_image = dst.Handle(); const VkImage src_image = src.Handle(); scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([dst_image, src_image, aspect_mask, vk_copies](vk::CommandBuffer cmdbuf) { + scheduler.Record([dst_image, src_image, aspect_mask, dst_is_3d, src_is_3d, + vk_copies](vk::CommandBuffer cmdbuf) { RangedBarrierRange dst_range; RangedBarrierRange src_range; for (const VkImageCopy& copy : vk_copies) { @@ -1467,7 +1488,7 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .image = src_image, - .subresourceRange = src_range.SubresourceRange(aspect_mask), + .subresourceRange = src_range.SubresourceRange(aspect_mask, src_is_3d), }, VkImageMemoryBarrier{ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, @@ -1481,7 +1502,7 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .image = dst_image, - .subresourceRange = dst_range.SubresourceRange(aspect_mask), + .subresourceRange = dst_range.SubresourceRange(aspect_mask, dst_is_3d), }, }; const std::array post_barriers{ @@ -1495,7 +1516,7 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .image = src_image, - .subresourceRange = src_range.SubresourceRange(aspect_mask), + .subresourceRange = src_range.SubresourceRange(aspect_mask, src_is_3d), }, VkImageMemoryBarrier{ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, @@ -1512,7 +1533,7 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .image = dst_image, - .subresourceRange = dst_range.SubresourceRange(aspect_mask), + .subresourceRange = dst_range.SubresourceRange(aspect_mask, dst_is_3d), }, }; cmdbuf.PipelineBarrier( @@ -1691,10 +1712,12 @@ void Image::UploadMemory(VkBuffer buffer, VkDeviceSize offset, const VkBuffer src_buffer = buffer; const VkImage temp_vk_image = *temp_wrapper->original_image; const VkImageAspectFlags vk_aspect_mask = temp_wrapper->aspect_mask; + const bool temp_is_3d = temp_info.type == ImageType::e3D; - scheduler->Record([src_buffer, temp_vk_image, vk_aspect_mask, vk_copies, + scheduler->Record([src_buffer, temp_vk_image, vk_aspect_mask, temp_is_3d, vk_copies, keep = temp_wrapper](vk::CommandBuffer cmdbuf) { - CopyBufferToImage(cmdbuf, src_buffer, temp_vk_image, vk_aspect_mask, false, VideoCommon::FixSmallVectorADL(vk_copies)); + CopyBufferToImage(cmdbuf, src_buffer, temp_vk_image, vk_aspect_mask, false, + temp_is_3d, VideoCommon::FixSmallVectorADL(vk_copies)); }); // Use MSAACopyPass to convert from non-MSAA to MSAA @@ -1730,10 +1753,12 @@ void Image::UploadMemory(VkBuffer buffer, VkDeviceSize offset, const VkImage vk_image = *original_image; const VkImageAspectFlags vk_aspect_mask = aspect_mask; const bool was_initialized = std::exchange(initialized, true); + const bool is_3d_image = info.type == ImageType::e3D; - scheduler->Record([src_buffer, vk_image, vk_aspect_mask, was_initialized, + scheduler->Record([src_buffer, vk_image, vk_aspect_mask, was_initialized, is_3d_image, vk_copies](vk::CommandBuffer cmdbuf) { - CopyBufferToImage(cmdbuf, src_buffer, vk_image, vk_aspect_mask, was_initialized, VideoCommon::FixSmallVectorADL(vk_copies)); + CopyBufferToImage(cmdbuf, src_buffer, vk_image, vk_aspect_mask, was_initialized, + is_3d_image, VideoCommon::FixSmallVectorADL(vk_copies)); }); if (is_rescaled) { From 15d575aa31d77213abf587986375ea28fad66659 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Fri, 6 Mar 2026 20:29:20 -0400 Subject: [PATCH 59/68] [test] Change forcerd order for CompareMask + forced refresh/ emit --- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 6 +++++- src/video_core/renderer_vulkan/vk_state_tracker.h | 4 ++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 26006802ae..e01bf3a11a 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -1075,7 +1075,6 @@ void RasterizerVulkan::UpdateDynamicStates() { UpdateDepthBias(regs); UpdateBlendConstants(regs); UpdateDepthBounds(regs); - UpdateStencilFaces(regs); UpdateLineWidth(regs); UpdateLineStipple(regs); @@ -1094,6 +1093,8 @@ void RasterizerVulkan::UpdateDynamicStates() { } } + UpdateStencilFaces(regs); + // EDS2: PrimitiveRestart, RasterizerDiscard, DepthBias enable/disable if (device.IsExtExtendedDynamicState2Supported() && pipeline && pipeline->UsesExtendedDynamicState2()) { UpdatePrimitiveRestartEnable(regs); @@ -1640,6 +1641,9 @@ void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& if (!state_tracker.TouchStencilTestEnable()) { return; } + if (regs.stencil_enable != 0) { + state_tracker.ResetStencilState(); + } scheduler.Record([enable = regs.stencil_enable](vk::CommandBuffer cmdbuf) { cmdbuf.SetStencilTestEnableEXT(enable); }); diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h index 7282bc1f44..81cd4d9631 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.h +++ b/src/video_core/renderer_vulkan/vk_state_tracker.h @@ -170,6 +170,10 @@ public: return ExchangeCheck(back.compare_mask, new_value) || stencil_reset; } + void ResetStencilState() { + stencil_reset = true; + } + void ClearStencilReset() { stencil_reset = false; } From 5b12a7725da9a3cf42143c2144c9c4a6ba50d7cd Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Fri, 6 Mar 2026 20:43:33 -0400 Subject: [PATCH 60/68] fix build --- src/video_core/renderer_vulkan/vk_texture_cache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 61932ea941..5cd4273c2d 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -535,7 +535,7 @@ struct RangedBarrierRange { VkImageSubresourceRange SubresourceRange(VkImageAspectFlags aspect_mask, bool is_3d_image) const noexcept { - const SubresourceRange range{ + const VideoCommon::SubresourceRange range{ .base = { .level = static_cast(min_mip), .layer = static_cast(min_layer), From 108bb3d28b94b10fef052476155241278a88bbf9 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Fri, 6 Mar 2026 23:24:18 -0400 Subject: [PATCH 61/68] [debug] Added extra logging/ address for shader info -> FP32Mul Optimize Path --- .../backend/spirv/emit_spirv.cpp | 109 +++++++++++++++++- .../spirv/emit_spirv_floating_point.cpp | 45 +++++++- .../backend/spirv/spirv_emit_context.cpp | 39 ++++++- .../backend/spirv/spirv_emit_context.h | 2 + src/shader_recompiler/frontend/ir/program.h | 4 + .../frontend/maxwell/translate_program.cpp | 2 + 6 files changed, 198 insertions(+), 3 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 313a1deb30..719d7a2744 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project @@ -11,6 +11,7 @@ #include #include +#include "common/logging/log.h" #include "common/settings.h" #include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" @@ -20,6 +21,100 @@ namespace Shader::Backend::SPIRV { namespace { +[[nodiscard]] constexpr std::string_view StageName(Stage stage) noexcept { + switch (stage) { + case Stage::VertexA: + return "VertexA"; + case Stage::VertexB: + return "VertexB"; + case Stage::TessellationControl: + return "TessellationControl"; + case Stage::TessellationEval: + return "TessellationEval"; + case Stage::Geometry: + return "Geometry"; + case Stage::Fragment: + return "Fragment"; + case Stage::Compute: + return "Compute"; + } + return "Unknown"; +} + +[[nodiscard]] constexpr std::string_view DenormModeName(bool flush, bool preserve) noexcept { + if (flush && preserve) { + return "Flush+Preserve"; + } + if (flush) { + return "Flush"; + } + if (preserve) { + return "Preserve"; + } + return "None"; +} + +void LogRzBackendSummary(const Profile& profile, const IR::Program& program, bool optimize) { + if (!Settings::values.renderer_debug) { + return; + } + u32 rz_count{}; + for (const IR::Block* const block : program.post_order_blocks) { + for (const IR::Inst& inst : block->Instructions()) { + switch (inst.GetOpcode()) { + case IR::Opcode::FPAdd16: + case IR::Opcode::FPFma16: + case IR::Opcode::FPMul16: + case IR::Opcode::FPRoundEven16: + case IR::Opcode::FPFloor16: + case IR::Opcode::FPCeil16: + case IR::Opcode::FPTrunc16: + case IR::Opcode::FPAdd32: + case IR::Opcode::FPFma32: + case IR::Opcode::FPMul32: + case IR::Opcode::FPRoundEven32: + case IR::Opcode::FPFloor32: + case IR::Opcode::FPCeil32: + case IR::Opcode::FPTrunc32: + case IR::Opcode::FPOrdEqual32: + case IR::Opcode::FPUnordEqual32: + case IR::Opcode::FPOrdNotEqual32: + case IR::Opcode::FPUnordNotEqual32: + case IR::Opcode::FPOrdLessThan32: + case IR::Opcode::FPUnordLessThan32: + case IR::Opcode::FPOrdGreaterThan32: + case IR::Opcode::FPUnordGreaterThan32: + case IR::Opcode::FPOrdLessThanEqual32: + case IR::Opcode::FPUnordLessThanEqual32: + case IR::Opcode::FPOrdGreaterThanEqual32: + case IR::Opcode::FPUnordGreaterThanEqual32: + case IR::Opcode::ConvertF16F32: + case IR::Opcode::ConvertF64F32: + rz_count += inst.Flags().rounding == IR::FpRounding::RZ ? 1U : 0U; + break; + default: + break; + } + } + } + if (rz_count == 0) { + return; + } + + LOG_INFO(Shader_SPIRV, + "SPV_RZ {} start={:#010x} optimize={} support_float_controls={} separate_denorm_behavior={} broken_fp16_float_controls={} fp16_denorm={} fp32_denorm={} signed_nan16={} signed_nan32={} signed_nan64={} rz_inst_count={}", + StageName(program.stage), program.start_address, optimize, + profile.support_float_controls, profile.support_separate_denorm_behavior, + profile.has_broken_fp16_float_controls, + DenormModeName(program.info.uses_fp16_denorms_flush, + program.info.uses_fp16_denorms_preserve), + DenormModeName(program.info.uses_fp32_denorms_flush, + program.info.uses_fp32_denorms_preserve), + profile.support_fp16_signed_zero_nan_preserve, + profile.support_fp32_signed_zero_nan_preserve, + profile.support_fp64_signed_zero_nan_preserve, rz_count); +} + template struct FuncTraits {}; thread_local std::unique_ptr thread_optimizer; @@ -503,6 +598,7 @@ void PatchPhiNodes(IR::Program& program, EmitContext& ctx) { std::vector EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info, IR::Program& program, Bindings& bindings, bool optimize) { + LogRzBackendSummary(profile, program, optimize); EmitContext ctx{profile, runtime_info, program, bindings}; const Id main{DefineMain(ctx, program)}; DefineEntryPoint(program, ctx, main); @@ -516,6 +612,12 @@ std::vector EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_in PatchPhiNodes(program, ctx); if (!optimize) { + if (Settings::values.renderer_debug && ctx.log_rz_fp_controls) { + const std::vector spirv{ctx.Assemble()}; + LOG_INFO(Shader_SPIRV, "SPV_RZ {} start={:#010x} assembled_words={} optimized_words={} validator_run=false", + StageName(program.stage), program.start_address, spirv.size(), spirv.size()); + return spirv; + } return ctx.Assemble(); } else { std::vector spirv = ctx.Assemble(); @@ -535,6 +637,11 @@ std::vector EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_in "Failed to optimize SPIRV shader output, continuing without optimization"); result = std::move(spirv); } + if (Settings::values.renderer_debug && ctx.log_rz_fp_controls) { + LOG_INFO(Shader_SPIRV, + "SPV_RZ {} start={:#010x} assembled_words={} optimized_words={} validator_run=false", + StageName(program.stage), program.start_address, spirv.size(), result.size()); + } return result; } } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index d921913b4a..d09b2238e5 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -1,16 +1,59 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "common/logging/log.h" #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/backend/spirv/spirv_emit_context.h" #include "shader_recompiler/frontend/ir/modifiers.h" namespace Shader::Backend::SPIRV { namespace { +[[nodiscard]] constexpr std::string_view StageName(Stage stage) noexcept { + switch (stage) { + case Stage::VertexA: + return "VertexA"; + case Stage::VertexB: + return "VertexB"; + case Stage::TessellationControl: + return "TessellationControl"; + case Stage::TessellationEval: + return "TessellationEval"; + case Stage::Geometry: + return "Geometry"; + case Stage::Fragment: + return "Fragment"; + case Stage::Compute: + return "Compute"; + } + return "Unknown"; +} + +[[nodiscard]] constexpr std::string_view FmzName(IR::FmzMode fmz_mode) noexcept { + switch (fmz_mode) { + case IR::FmzMode::DontCare: + return "DontCare"; + case IR::FmzMode::FTZ: + return "FTZ"; + case IR::FmzMode::FMZ: + return "FMZ"; + case IR::FmzMode::None: + return "None"; + } + return "Unknown"; +} + Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) { const auto flags{inst->Flags()}; + if (Settings::values.renderer_debug && ctx.log_rz_fp_controls && + flags.rounding == IR::FpRounding::RZ) { + LOG_INFO(Shader_SPIRV, + "SPV_RZ_EMIT {} start={:#010x} ir_opcode={} spirv_op=OpFMul result_id={} no_contraction={} fmz={} float_controls_ext={}", + StageName(ctx.stage), ctx.start_address, inst->GetOpcode(), op, + flags.no_contraction, FmzName(flags.fmz_mode), ctx.profile.support_float_controls); + } if (flags.no_contraction) { ctx.Decorate(op, spv::Decoration::NoContraction); } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index fb66a7962e..7b0181249c 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -473,7 +473,44 @@ void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_vie EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_, IR::Program& program, Bindings& bindings) : Sirit::Module(profile_.supported_spirv), profile{profile_}, runtime_info{runtime_info_}, - stage{program.stage}, texture_rescaling_index{bindings.texture_scaling_index}, + stage{program.stage}, start_address{program.start_address}, + log_rz_fp_controls{std::ranges::any_of(program.post_order_blocks, [](const IR::Block* block) { + return std::ranges::any_of(block->Instructions(), [](const IR::Inst& inst) { + switch (inst.GetOpcode()) { + case IR::Opcode::FPAdd16: + case IR::Opcode::FPFma16: + case IR::Opcode::FPMul16: + case IR::Opcode::FPRoundEven16: + case IR::Opcode::FPFloor16: + case IR::Opcode::FPCeil16: + case IR::Opcode::FPTrunc16: + case IR::Opcode::FPAdd32: + case IR::Opcode::FPFma32: + case IR::Opcode::FPMul32: + case IR::Opcode::FPRoundEven32: + case IR::Opcode::FPFloor32: + case IR::Opcode::FPCeil32: + case IR::Opcode::FPTrunc32: + case IR::Opcode::FPOrdEqual32: + case IR::Opcode::FPUnordEqual32: + case IR::Opcode::FPOrdNotEqual32: + case IR::Opcode::FPUnordNotEqual32: + case IR::Opcode::FPOrdLessThan32: + case IR::Opcode::FPUnordLessThan32: + case IR::Opcode::FPOrdGreaterThan32: + case IR::Opcode::FPUnordGreaterThan32: + case IR::Opcode::FPOrdLessThanEqual32: + case IR::Opcode::FPUnordLessThanEqual32: + case IR::Opcode::FPOrdGreaterThanEqual32: + case IR::Opcode::FPUnordGreaterThanEqual32: + case IR::Opcode::ConvertF16F32: + case IR::Opcode::ConvertF64F32: + return inst.Flags().rounding == IR::FpRounding::RZ; + default: + return false; + } + }); + })}, texture_rescaling_index{bindings.texture_scaling_index}, image_rescaling_index{bindings.image_scaling_index} { const bool is_unified{profile.unified_descriptor_binding}; u32& uniform_binding{is_unified ? bindings.unified : bindings.uniform_buffer}; diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 396022eddf..21151bab38 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -216,6 +216,8 @@ public: const Profile& profile; const RuntimeInfo& runtime_info; Stage stage{}; + u32 start_address{}; + bool log_rz_fp_controls{}; Id void_id{}; Id U1{}; diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h index 6b4a05c598..1836a18bd3 100644 --- a/src/shader_recompiler/frontend/ir/program.h +++ b/src/shader_recompiler/frontend/ir/program.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -20,6 +23,7 @@ struct Program { BlockList post_order_blocks; Info info; Stage stage{}; + u32 start_address{}; std::array workgroup_size{}; OutputTopology output_topology{}; u32 output_vertices{}; diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index f52a3e72de..6cca023330 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -458,6 +458,7 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool Date: Fri, 6 Mar 2026 23:31:27 -0400 Subject: [PATCH 62/68] [vulkan] Added no depth vs depth compare support --- .../renderer_vulkan/pipeline_helper.h | 11 ++++++++-- .../renderer_vulkan/vk_texture_cache.cpp | 16 +++++++++++---- .../renderer_vulkan/vk_texture_cache.h | 20 ++++++++++++++++--- 3 files changed, 38 insertions(+), 9 deletions(-) diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index 8d085f4541..f5c6ca78b5 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -15,6 +15,7 @@ #include "shader_recompiler/shader_info.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/surface.h" #include "video_core/texture_cache/types.h" #include "video_core/vulkan_common/vulkan_device.h" @@ -232,10 +233,16 @@ inline void PushImageDescriptors(TextureCache& texture_cache, ImageView& image_view{texture_cache.GetImageView(image_view_id)}; const VkImageView vk_image_view{image_view.Handle(desc.type)}; const Sampler& sampler{texture_cache.GetSampler(sampler_id)}; + const auto surface_type{VideoCore::Surface::GetFormatType(image_view.format)}; + const bool allow_depth_compare = + desc.is_depth && (surface_type == VideoCore::Surface::SurfaceType::Depth || + surface_type == VideoCore::Surface::SurfaceType::DepthStencil); const bool use_fallback_sampler{sampler.HasAddedAnisotropy() && !image_view.SupportsAnisotropy()}; - const VkSampler vk_sampler{use_fallback_sampler ? sampler.HandleWithDefaultAnisotropy() - : sampler.Handle()}; + const VkSampler vk_sampler{use_fallback_sampler + ? sampler.HandleWithDefaultAnisotropy( + allow_depth_compare) + : sampler.Handle(allow_depth_compare)}; guest_descriptor_queue.AddSampledImage(vk_image_view, vk_sampler); rescaling.PushTexture(texture_cache.IsRescaling(image_view)); } diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 5cd4273c2d..a289865102 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -2314,6 +2314,7 @@ vk::ImageView ImageView::MakeView(VkFormat vk_format, VkImageAspectFlags aspect_ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& tsc) { const auto& device = runtime.device; + has_depth_compare = tsc.depth_compare_enabled != 0; // Check if custom border colors are supported const bool has_custom_border_colors = runtime.device.IsCustomBorderColorsSupported(); const bool has_format_undefined = runtime.device.IsCustomBorderColorWithoutFormatSupported(); @@ -2354,7 +2355,7 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t // Some games have samplers with garbage. Sanitize them here. const f32 max_anisotropy = std::clamp(tsc.MaxAnisotropy(), 1.0f, 16.0f); - const auto create_sampler = [&](const f32 anisotropy) { + const auto create_sampler = [&](const f32 anisotropy, bool enable_depth_compare) { return device.GetLogical().CreateSampler(VkSamplerCreateInfo{ .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, .pNext = pnext, @@ -2368,7 +2369,7 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t .mipLodBias = tsc.LodBias(), .anisotropyEnable = static_cast(anisotropy > 1.0f ? VK_TRUE : VK_FALSE), .maxAnisotropy = anisotropy, - .compareEnable = tsc.depth_compare_enabled, + .compareEnable = enable_depth_compare, .compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), .minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.MinLod(), .maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.MaxLod(), @@ -2378,11 +2379,18 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t }); }; - sampler = create_sampler(max_anisotropy); + sampler = create_sampler(max_anisotropy, has_depth_compare); + if (has_depth_compare) { + sampler_no_compare = create_sampler(max_anisotropy, false); + } const f32 max_anisotropy_default = static_cast(1U << tsc.max_anisotropy); if (max_anisotropy > max_anisotropy_default) { - sampler_default_anisotropy = create_sampler(max_anisotropy_default); + sampler_default_anisotropy = create_sampler(max_anisotropy_default, has_depth_compare); + if (has_depth_compare) { + sampler_default_anisotropy_no_compare = + create_sampler(max_anisotropy_default, false); + } } } diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 4bb9687ab0..ee1c842852 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project @@ -396,11 +396,18 @@ class Sampler { public: explicit Sampler(TextureCacheRuntime&, const Tegra::Texture::TSCEntry&); - [[nodiscard]] VkSampler Handle() const noexcept { + [[nodiscard]] VkSampler Handle(bool enable_depth_compare = true) const noexcept { + if (!enable_depth_compare && sampler_no_compare) { + return *sampler_no_compare; + } return *sampler; } - [[nodiscard]] VkSampler HandleWithDefaultAnisotropy() const noexcept { + [[nodiscard]] VkSampler HandleWithDefaultAnisotropy( + bool enable_depth_compare = true) const noexcept { + if (!enable_depth_compare && sampler_default_anisotropy_no_compare) { + return *sampler_default_anisotropy_no_compare; + } return *sampler_default_anisotropy; } @@ -408,9 +415,16 @@ public: return static_cast(sampler_default_anisotropy); } + [[nodiscard]] bool HasDepthCompareEnabled() const noexcept { + return has_depth_compare; + } + private: vk::Sampler sampler; + vk::Sampler sampler_no_compare; vk::Sampler sampler_default_anisotropy; + vk::Sampler sampler_default_anisotropy_no_compare; + bool has_depth_compare = false; }; struct TextureCacheParams { From c028d925bc8e8f1931b04760bf17120906f07d7c Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Fri, 6 Mar 2026 23:38:42 -0400 Subject: [PATCH 63/68] fix build --- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 719d7a2744..d52a5aa971 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -17,6 +17,7 @@ #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/backend/spirv/spirv_emit_context.h" #include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/modifiers.h" #include "shader_recompiler/frontend/ir/program.h" namespace Shader::Backend::SPIRV { From bb768ad57056b45f095666eb9ff64ee7206b92cd Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Sat, 7 Mar 2026 00:02:39 -0400 Subject: [PATCH 64/68] fix building 2 --- .../backend/spirv/emit_spirv_floating_point.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index d09b2238e5..ff09e6cae5 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -5,6 +5,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "common/logging/log.h" +#include "common/settings.h" #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/backend/spirv/spirv_emit_context.h" #include "shader_recompiler/frontend/ir/modifiers.h" @@ -51,7 +52,7 @@ Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) { flags.rounding == IR::FpRounding::RZ) { LOG_INFO(Shader_SPIRV, "SPV_RZ_EMIT {} start={:#010x} ir_opcode={} spirv_op=OpFMul result_id={} no_contraction={} fmz={} float_controls_ext={}", - StageName(ctx.stage), ctx.start_address, inst->GetOpcode(), op, + StageName(ctx.stage), ctx.start_address, inst->GetOpcode(), static_cast(op), flags.no_contraction, FmzName(flags.fmz_mode), ctx.profile.support_float_controls); } if (flags.no_contraction) { From ccb518dc05f27b08c21738cfd94363175fdab7e8 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Sat, 7 Mar 2026 00:13:54 -0400 Subject: [PATCH 65/68] Changed logging context --- .../backend/spirv/emit_spirv_floating_point.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index ff09e6cae5..1957c26df9 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -51,8 +51,8 @@ Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) { if (Settings::values.renderer_debug && ctx.log_rz_fp_controls && flags.rounding == IR::FpRounding::RZ) { LOG_INFO(Shader_SPIRV, - "SPV_RZ_EMIT {} start={:#010x} ir_opcode={} spirv_op=OpFMul result_id={} no_contraction={} fmz={} float_controls_ext={}", - StageName(ctx.stage), ctx.start_address, inst->GetOpcode(), static_cast(op), + "SPV_RZ_EMIT {} start={:#010x} ir_opcode={} spirv_op=OpFMul no_contraction={} fmz={} float_controls_ext={}", + StageName(ctx.stage), ctx.start_address, inst->GetOpcode(), flags.no_contraction, FmzName(flags.fmz_mode), ctx.profile.support_float_controls); } if (flags.no_contraction) { From a80e0f10bac108dc85c999e89a2c96b9edddc76c Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Sat, 7 Mar 2026 00:28:14 -0400 Subject: [PATCH 66/68] Just meow --- src/shader_recompiler/backend/spirv/spirv_emit_context.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 7b0181249c..7b422a4a41 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -17,6 +17,7 @@ #include "common/div_ceil.h" #include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/backend/spirv/spirv_emit_context.h" +#include "shader_recompiler/frontend/ir/modifiers.h" namespace Shader::Backend::SPIRV { namespace { From 21c77d5dceef8d4caafd9c7ad303bd695c867e90 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Sat, 7 Mar 2026 01:27:06 -0400 Subject: [PATCH 67/68] [vulkan] Added conservative path for RoundingModeRTZ + instrumentalization for shaders use --- .../backend/spirv/emit_spirv.cpp | 123 ++++++++++++------ src/shader_recompiler/profile.h | 4 + .../renderer_opengl/gl_shader_cache.cpp | 1 + .../renderer_vulkan/vk_pipeline_cache.cpp | 1 + 4 files changed, 92 insertions(+), 37 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index d52a5aa971..97124c0bfa 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -55,57 +55,78 @@ namespace { return "None"; } -void LogRzBackendSummary(const Profile& profile, const IR::Program& program, bool optimize) { - if (!Settings::values.renderer_debug) { - return; +[[nodiscard]] constexpr bool IsFp32RoundingRelevantOpcode(IR::Opcode opcode) noexcept { + switch (opcode) { + case IR::Opcode::FPAdd32: + case IR::Opcode::FPFma32: + case IR::Opcode::FPMul32: + case IR::Opcode::FPRoundEven32: + case IR::Opcode::FPFloor32: + case IR::Opcode::FPCeil32: + case IR::Opcode::FPTrunc32: + case IR::Opcode::FPOrdEqual32: + case IR::Opcode::FPUnordEqual32: + case IR::Opcode::FPOrdNotEqual32: + case IR::Opcode::FPUnordNotEqual32: + case IR::Opcode::FPOrdLessThan32: + case IR::Opcode::FPUnordLessThan32: + case IR::Opcode::FPOrdGreaterThan32: + case IR::Opcode::FPUnordGreaterThan32: + case IR::Opcode::FPOrdLessThanEqual32: + case IR::Opcode::FPUnordLessThanEqual32: + case IR::Opcode::FPOrdGreaterThanEqual32: + case IR::Opcode::FPUnordGreaterThanEqual32: + case IR::Opcode::ConvertF16F32: + case IR::Opcode::ConvertF64F32: + return true; + default: + return false; } +} + +struct Fp32RoundingUsage { u32 rz_count{}; + bool has_conflicting_rounding{}; +}; + +Fp32RoundingUsage CollectFp32RoundingUsage(const IR::Program& program) { + Fp32RoundingUsage usage{}; for (const IR::Block* const block : program.post_order_blocks) { for (const IR::Inst& inst : block->Instructions()) { - switch (inst.GetOpcode()) { - case IR::Opcode::FPAdd16: - case IR::Opcode::FPFma16: - case IR::Opcode::FPMul16: - case IR::Opcode::FPRoundEven16: - case IR::Opcode::FPFloor16: - case IR::Opcode::FPCeil16: - case IR::Opcode::FPTrunc16: - case IR::Opcode::FPAdd32: - case IR::Opcode::FPFma32: - case IR::Opcode::FPMul32: - case IR::Opcode::FPRoundEven32: - case IR::Opcode::FPFloor32: - case IR::Opcode::FPCeil32: - case IR::Opcode::FPTrunc32: - case IR::Opcode::FPOrdEqual32: - case IR::Opcode::FPUnordEqual32: - case IR::Opcode::FPOrdNotEqual32: - case IR::Opcode::FPUnordNotEqual32: - case IR::Opcode::FPOrdLessThan32: - case IR::Opcode::FPUnordLessThan32: - case IR::Opcode::FPOrdGreaterThan32: - case IR::Opcode::FPUnordGreaterThan32: - case IR::Opcode::FPOrdLessThanEqual32: - case IR::Opcode::FPUnordLessThanEqual32: - case IR::Opcode::FPOrdGreaterThanEqual32: - case IR::Opcode::FPUnordGreaterThanEqual32: - case IR::Opcode::ConvertF16F32: - case IR::Opcode::ConvertF64F32: - rz_count += inst.Flags().rounding == IR::FpRounding::RZ ? 1U : 0U; + if (!IsFp32RoundingRelevantOpcode(inst.GetOpcode())) { + continue; + } + switch (inst.Flags().rounding) { + case IR::FpRounding::RZ: + ++usage.rz_count; break; - default: + case IR::FpRounding::RN: + case IR::FpRounding::RM: + case IR::FpRounding::RP: + usage.has_conflicting_rounding = true; + break; + case IR::FpRounding::DontCare: break; } } } - if (rz_count == 0) { + return usage; +} + +void LogRzBackendSummary(const Profile& profile, const IR::Program& program, bool optimize) { + if (!Settings::values.renderer_debug) { + return; + } + const Fp32RoundingUsage usage{CollectFp32RoundingUsage(program)}; + if (usage.rz_count == 0) { return; } LOG_INFO(Shader_SPIRV, - "SPV_RZ {} start={:#010x} optimize={} support_float_controls={} separate_denorm_behavior={} broken_fp16_float_controls={} fp16_denorm={} fp32_denorm={} signed_nan16={} signed_nan32={} signed_nan64={} rz_inst_count={}", + "SPV_RZ {} start={:#010x} optimize={} support_float_controls={} separate_denorm_behavior={} separate_rounding_mode={} support_fp32_rounding_rtz={} broken_fp16_float_controls={} fp16_denorm={} fp32_denorm={} signed_nan16={} signed_nan32={} signed_nan64={} rz_inst_count={} mixed_fp32_rounding={}", StageName(program.stage), program.start_address, optimize, profile.support_float_controls, profile.support_separate_denorm_behavior, + profile.support_separate_rounding_mode, profile.support_fp32_rounding_rtz, profile.has_broken_fp16_float_controls, DenormModeName(program.info.uses_fp16_denorms_flush, program.info.uses_fp16_denorms_preserve), @@ -113,7 +134,34 @@ void LogRzBackendSummary(const Profile& profile, const IR::Program& program, boo program.info.uses_fp32_denorms_preserve), profile.support_fp16_signed_zero_nan_preserve, profile.support_fp32_signed_zero_nan_preserve, - profile.support_fp64_signed_zero_nan_preserve, rz_count); + profile.support_fp64_signed_zero_nan_preserve, usage.rz_count, + usage.has_conflicting_rounding); +} + +void SetupRoundingControl(const Profile& profile, const IR::Program& program, EmitContext& ctx, + Id main_func) { + const Fp32RoundingUsage usage{CollectFp32RoundingUsage(program)}; + if (usage.rz_count == 0) { + return; + } + if (usage.has_conflicting_rounding) { + if (Settings::values.renderer_debug) { + LOG_INFO(Shader_SPIRV, + "SPV_RZ {} start={:#010x} skipping_fp32_rtz_execution_mode reason=mixed_rounding", + StageName(program.stage), program.start_address); + } + return; + } + if (!profile.support_fp32_rounding_rtz) { + if (Settings::values.renderer_debug) { + LOG_INFO(Shader_SPIRV, + "SPV_RZ {} start={:#010x} skipping_fp32_rtz_execution_mode reason=unsupported_fp32_rtz", + StageName(program.stage), program.start_address); + } + return; + } + ctx.AddCapability(spv::Capability::RoundingModeRTZ); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::RoundingModeRTZ, 32U); } template @@ -606,6 +654,7 @@ std::vector EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_in if (profile.support_float_controls) { ctx.AddExtension("SPV_KHR_float_controls"); SetupDenormControl(profile, program, ctx, main); + SetupRoundingControl(profile, program, ctx, main); SetupSignedNanCapabilities(profile, program, ctx, main); } SetupCapabilities(profile, program.info, ctx); diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 90e46bb1ba..5a8993834c 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -18,6 +21,7 @@ struct Profile { bool support_float_controls{}; bool support_separate_denorm_behavior{}; bool support_separate_rounding_mode{}; + bool support_fp32_rounding_rtz{}; bool support_fp16_denorm_preserve{}; bool support_fp32_denorm_preserve{}; bool support_fp16_denorm_flush{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 70a13d6a69..a1d01317ba 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -194,6 +194,7 @@ ShaderCache::ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, .support_float_controls = false, .support_separate_denorm_behavior = false, .support_separate_rounding_mode = false, + .support_fp32_rounding_rtz = false, .support_fp16_denorm_preserve = false, .support_fp32_denorm_preserve = false, .support_fp16_denorm_flush = false, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index e838dc0b43..cf20e53175 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -379,6 +379,7 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, float_control.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL, .support_separate_rounding_mode = float_control.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL, + .support_fp32_rounding_rtz = float_control.shaderRoundingModeRTZFloat32 != VK_FALSE, .support_fp16_denorm_preserve = float_control.shaderDenormPreserveFloat16 != VK_FALSE, .support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE, .support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE, From d06eb3f52fb48954549b0d7a72c93db5ff8855b4 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Sat, 7 Mar 2026 01:32:27 -0400 Subject: [PATCH 68/68] [vulkan] Changed info.color_output_type gate --- .../renderer_vulkan/vk_pipeline_cache.cpp | 20 +++++++++---------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index cf20e53175..85234838b8 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -260,19 +260,17 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span program info.active_color_outputs[0] = true; } - if (device.IsMoltenVK()) { - for (size_t i = 0; i < 8; ++i) { - const auto format = static_cast(key.state.color_formats[i]); - const auto pixel_format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(format); - if (VideoCore::Surface::IsPixelFormatInteger(pixel_format)) { - if (VideoCore::Surface::IsPixelFormatSignedInteger(pixel_format)) { - info.color_output_types[i] = Shader::AttributeType::SignedInt; - } else { - info.color_output_types[i] = Shader::AttributeType::UnsignedInt; - } + for (size_t i = 0; i < 8; ++i) { + const auto format = static_cast(key.state.color_formats[i]); + const auto pixel_format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(format); + if (VideoCore::Surface::IsPixelFormatInteger(pixel_format)) { + if (VideoCore::Surface::IsPixelFormatSignedInteger(pixel_format)) { + info.color_output_types[i] = Shader::AttributeType::SignedInt; } else { - info.color_output_types[i] = Shader::AttributeType::Float; + info.color_output_types[i] = Shader::AttributeType::UnsignedInt; } + } else { + info.color_output_types[i] = Shader::AttributeType::Float; } } break;