mirror of
https://git.eden-emu.dev/eden-emu/eden
synced 2026-05-25 15:27:02 +02:00
[vk, host shaders, qcom] MSAA Handling by Native QCOM Shader Resolve
This commit is contained in:
parent
c52fda760a
commit
aea945b671
9 changed files with 122 additions and 25 deletions
|
|
@ -76,6 +76,7 @@ set(SHADER_FILES
|
||||||
vulkan_quad_indexed.comp
|
vulkan_quad_indexed.comp
|
||||||
vulkan_turbo_mode.comp
|
vulkan_turbo_mode.comp
|
||||||
vulkan_uint8.comp
|
vulkan_uint8.comp
|
||||||
|
vulkan_qcom_msaa_resolve.frag
|
||||||
convert_rgba8_to_bgra8.frag
|
convert_rgba8_to_bgra8.frag
|
||||||
convert_yuv420_to_rgb.comp
|
convert_yuv420_to_rgb.comp
|
||||||
convert_rgb_to_yuv420.comp
|
convert_rgb_to_yuv420.comp
|
||||||
|
|
|
||||||
39
src/video_core/host_shaders/vulkan_qcom_msaa_resolve.frag
Normal file
39
src/video_core/host_shaders/vulkan_qcom_msaa_resolve.frag
Normal file
|
|
@ -0,0 +1,39 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
|
||||||
|
#version 450
|
||||||
|
|
||||||
|
// VK_QCOM_render_pass_shader_resolve fragment shader
|
||||||
|
// Resolves MSAA attachment to single-sample within render pass
|
||||||
|
// Requires VK_SUBPASS_DESCRIPTION_SHADER_RESOLVE_BIT_QCOM in subpass flags
|
||||||
|
|
||||||
|
// Use combined image sampler for MSAA texture instead of input attachment
|
||||||
|
// This allows us to sample MSAA textures from previous rendering
|
||||||
|
layout(set = 0, binding = 0) uniform sampler2DMS msaa_texture;
|
||||||
|
|
||||||
|
layout(location = 0) out vec4 color_output;
|
||||||
|
|
||||||
|
layout(push_constant) uniform PushConstants {
|
||||||
|
vec2 tex_scale;
|
||||||
|
vec2 tex_offset;
|
||||||
|
} push_constants;
|
||||||
|
|
||||||
|
// Custom MSAA resolve using box filter (simple average)
|
||||||
|
// Assumes 4x MSAA (can be extended with push constant for dynamic sample count)
|
||||||
|
void main() {
|
||||||
|
ivec2 coord = ivec2(gl_FragCoord.xy);
|
||||||
|
ivec2 tex_size = textureSize(msaa_texture);
|
||||||
|
|
||||||
|
// Clamp coordinates to texture bounds
|
||||||
|
coord = clamp(coord, ivec2(0), tex_size - ivec2(1));
|
||||||
|
|
||||||
|
vec4 accumulated_color = vec4(0.0);
|
||||||
|
int sample_count = 4; // Adreno typically uses 4x MSAA max
|
||||||
|
|
||||||
|
// Box filter: simple average of all MSAA samples
|
||||||
|
for (int i = 0; i < sample_count; i++) {
|
||||||
|
accumulated_color += texelFetch(msaa_texture, coord, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
color_output = accumulated_color / float(sample_count);
|
||||||
|
}
|
||||||
|
|
@ -40,6 +40,7 @@
|
||||||
#include "video_core/host_shaders/convert_rgba16f_to_rgba8_frag_spv.h"
|
#include "video_core/host_shaders/convert_rgba16f_to_rgba8_frag_spv.h"
|
||||||
#include "video_core/host_shaders/dither_temporal_frag_spv.h"
|
#include "video_core/host_shaders/dither_temporal_frag_spv.h"
|
||||||
#include "video_core/host_shaders/dynamic_resolution_scale_comp_spv.h"
|
#include "video_core/host_shaders/dynamic_resolution_scale_comp_spv.h"
|
||||||
|
#include "video_core/host_shaders/vulkan_qcom_msaa_resolve_frag_spv.h"
|
||||||
|
|
||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
|
|
||||||
|
|
@ -545,6 +546,7 @@ BlitImageHelper::BlitImageHelper(const Device& device_, Scheduler& scheduler_,
|
||||||
convert_rgba16f_to_rgba8_frag(BuildShader(device, CONVERT_RGBA16F_TO_RGBA8_FRAG_SPV)),
|
convert_rgba16f_to_rgba8_frag(BuildShader(device, CONVERT_RGBA16F_TO_RGBA8_FRAG_SPV)),
|
||||||
dither_temporal_frag(BuildShader(device, DITHER_TEMPORAL_FRAG_SPV)),
|
dither_temporal_frag(BuildShader(device, DITHER_TEMPORAL_FRAG_SPV)),
|
||||||
dynamic_resolution_scale_comp(BuildShader(device, DYNAMIC_RESOLUTION_SCALE_COMP_SPV)),
|
dynamic_resolution_scale_comp(BuildShader(device, DYNAMIC_RESOLUTION_SCALE_COMP_SPV)),
|
||||||
|
qcom_msaa_resolve_frag(BuildShader(device, VULKAN_QCOM_MSAA_RESOLVE_FRAG_SPV)),
|
||||||
linear_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_LINEAR>)),
|
linear_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_LINEAR>)),
|
||||||
nearest_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_NEAREST>)) {}
|
nearest_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_NEAREST>)) {}
|
||||||
|
|
||||||
|
|
@ -1240,4 +1242,30 @@ void BlitImageHelper::ApplyDynamicResolutionScale(const Framebuffer* dst_framebu
|
||||||
Convert(*dynamic_resolution_scale_pipeline, dst_framebuffer, src_image_view);
|
Convert(*dynamic_resolution_scale_pipeline, dst_framebuffer, src_image_view);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void BlitImageHelper::ResolveMSAAQcom(const Framebuffer* dst_framebuffer,
|
||||||
|
const ImageView& src_image_view) {
|
||||||
|
// VK_QCOM_render_pass_shader_resolve implementation
|
||||||
|
// This must be used within a render pass with VK_SUBPASS_DESCRIPTION_SHADER_RESOLVE_BIT_QCOM
|
||||||
|
ConvertPipeline(qcom_msaa_resolve_pipeline,
|
||||||
|
dst_framebuffer->RenderPass(),
|
||||||
|
false);
|
||||||
|
|
||||||
|
RecordShaderReadBarrier(scheduler, src_image_view);
|
||||||
|
scheduler.RequestRenderpass(dst_framebuffer);
|
||||||
|
|
||||||
|
const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D);
|
||||||
|
const VkPipelineLayout layout = *one_texture_pipeline_layout;
|
||||||
|
const VkPipeline pipeline = *qcom_msaa_resolve_pipeline;
|
||||||
|
|
||||||
|
scheduler.Record([this, src_view, layout, pipeline](vk::CommandBuffer cmdbuf) {
|
||||||
|
const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
|
||||||
|
UpdateOneTextureDescriptorSet(device, descriptor_set, *nearest_sampler, src_view);
|
||||||
|
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
|
||||||
|
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, nullptr);
|
||||||
|
cmdbuf.Draw(3, 1, 0, 0);
|
||||||
|
});
|
||||||
|
|
||||||
|
scheduler.InvalidateState();
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Vulkan
|
} // namespace Vulkan
|
||||||
|
|
|
||||||
|
|
@ -95,6 +95,8 @@ public:
|
||||||
void ConvertRGBA16FtoRGBA8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
|
void ConvertRGBA16FtoRGBA8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
|
||||||
void ApplyDitherTemporal(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
|
void ApplyDitherTemporal(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
|
||||||
void ApplyDynamicResolutionScale(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
|
void ApplyDynamicResolutionScale(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
|
||||||
|
|
||||||
|
void ResolveMSAAQcom(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
|
void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
|
||||||
|
|
@ -159,6 +161,7 @@ private:
|
||||||
vk::ShaderModule convert_rgba16f_to_rgba8_frag;
|
vk::ShaderModule convert_rgba16f_to_rgba8_frag;
|
||||||
vk::ShaderModule dither_temporal_frag;
|
vk::ShaderModule dither_temporal_frag;
|
||||||
vk::ShaderModule dynamic_resolution_scale_comp;
|
vk::ShaderModule dynamic_resolution_scale_comp;
|
||||||
|
vk::ShaderModule qcom_msaa_resolve_frag;
|
||||||
vk::Sampler linear_sampler;
|
vk::Sampler linear_sampler;
|
||||||
vk::Sampler nearest_sampler;
|
vk::Sampler nearest_sampler;
|
||||||
|
|
||||||
|
|
@ -188,6 +191,7 @@ private:
|
||||||
vk::Pipeline convert_rgba16f_to_rgba8_pipeline;
|
vk::Pipeline convert_rgba16f_to_rgba8_pipeline;
|
||||||
vk::Pipeline dither_temporal_pipeline;
|
vk::Pipeline dither_temporal_pipeline;
|
||||||
vk::Pipeline dynamic_resolution_scale_pipeline;
|
vk::Pipeline dynamic_resolution_scale_pipeline;
|
||||||
|
vk::Pipeline qcom_msaa_resolve_pipeline;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Vulkan
|
} // namespace Vulkan
|
||||||
|
|
|
||||||
|
|
@ -156,8 +156,15 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) {
|
||||||
descriptions.push_back(AttachmentDescription(*device, key.depth_format, key.samples,
|
descriptions.push_back(AttachmentDescription(*device, key.depth_format, key.samples,
|
||||||
key.tbdr_will_clear, key.tbdr_discard_after));
|
key.tbdr_will_clear, key.tbdr_discard_after));
|
||||||
}
|
}
|
||||||
|
VkSubpassDescriptionFlags subpass_flags = 0;
|
||||||
|
if (key.qcom_shader_resolve) {
|
||||||
|
// VK_QCOM_render_pass_shader_resolve: enables custom shader resolve in fragment shader
|
||||||
|
// This must be the last subpass in the dependency chain
|
||||||
|
subpass_flags |= 0x00000004; // VK_SUBPASS_DESCRIPTION_SHADER_RESOLVE_BIT_QCOM
|
||||||
|
}
|
||||||
|
|
||||||
const VkSubpassDescription subpass{
|
const VkSubpassDescription subpass{
|
||||||
.flags = 0,
|
.flags = subpass_flags,
|
||||||
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
|
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
|
||||||
.inputAttachmentCount = 0,
|
.inputAttachmentCount = 0,
|
||||||
.pInputAttachments = nullptr,
|
.pInputAttachments = nullptr,
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,9 @@ struct RenderPassKey {
|
||||||
// These flags indicate the expected usage pattern to optimize load/store operations
|
// These flags indicate the expected usage pattern to optimize load/store operations
|
||||||
bool tbdr_will_clear{false}; // Attachment will be cleared with vkCmdClearAttachments
|
bool tbdr_will_clear{false}; // Attachment will be cleared with vkCmdClearAttachments
|
||||||
bool tbdr_discard_after{false}; // Attachment won't be read after render pass
|
bool tbdr_discard_after{false}; // Attachment won't be read after render pass
|
||||||
|
|
||||||
|
// VK_QCOM_render_pass_shader_resolve support
|
||||||
|
bool qcom_shader_resolve{false}; // Use shader resolve instead of fixed-function (last subpass)
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Vulkan
|
} // namespace Vulkan
|
||||||
|
|
|
||||||
|
|
@ -160,43 +160,39 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Emergency fallback for MSAA with HDR formats: degrade to non-MSAA if driver doesn't support
|
/// Emergency fallback: degrade MSAA to non-MSAA for HDR formats when no resolve support exists
|
||||||
/// shaderStorageImageMultisample (required for msaa_copy_pass)
|
|
||||||
[[nodiscard]] ImageInfo AdjustMSAAForHDRFormats(const Device& device, ImageInfo info) {
|
[[nodiscard]] ImageInfo AdjustMSAAForHDRFormats(const Device& device, ImageInfo info) {
|
||||||
// Only apply emergency fallback if MSAA is requested
|
|
||||||
if (info.num_samples <= 1) {
|
if (info.num_samples <= 1) {
|
||||||
return info;
|
return info;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if this is an HDR format that commonly fails with MSAA
|
|
||||||
const auto vk_format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal,
|
const auto vk_format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal,
|
||||||
false, info.format).format;
|
false, info.format).format;
|
||||||
const bool is_hdr_format = vk_format == VK_FORMAT_B10G11R11_UFLOAT_PACK32 ||
|
const bool is_hdr_format = vk_format == VK_FORMAT_B10G11R11_UFLOAT_PACK32 ||
|
||||||
vk_format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32;
|
vk_format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32;
|
||||||
|
|
||||||
if (!is_hdr_format) {
|
if (!is_hdr_format) {
|
||||||
return info; // Not an HDR format, no adjustment needed
|
|
||||||
}
|
|
||||||
|
|
||||||
// If driver doesn't support shader storage image multisample, MSAACopyPass will fail
|
|
||||||
// Emergency fallback: degrade to non-MSAA (1 sample) to avoid texture corruption
|
|
||||||
if (!device.IsStorageImageMultisampleSupported()) {
|
|
||||||
LOG_ERROR(Render_Vulkan,
|
|
||||||
"EMERGENCY MSAA FALLBACK: Driver doesn't support shaderStorageImageMultisample. "
|
|
||||||
"Degrading HDR format {} from {}x MSAA to 1x (non-MSAA) to prevent texture corruption. "
|
|
||||||
"This will cause visual quality loss but prevents black textures.",
|
|
||||||
vk_format, info.num_samples);
|
|
||||||
|
|
||||||
// Degrade to non-MSAA
|
|
||||||
// NOTE: We only change num_samples, NOT dimensions. The ImageInfo dimensions are already
|
|
||||||
// in "logical" space (full resolution), and MakeImageCreateInfo will handle the conversion
|
|
||||||
// to physical GPU dimensions based on num_samples automatically.
|
|
||||||
info.num_samples = 1;
|
|
||||||
|
|
||||||
return info;
|
return info;
|
||||||
}
|
}
|
||||||
|
|
||||||
return info; // Driver supports MSAA storage images, no adjustment needed
|
// Qualcomm: VK_QCOM_render_pass_shader_resolve handles HDR+MSAA
|
||||||
|
if (device.GetDriverID() == VK_DRIVER_ID_QUALCOMM_PROPRIETARY) {
|
||||||
|
if (device.IsQcomRenderPassShaderResolveSupported()) {
|
||||||
|
return info;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Other vendors: shaderStorageImageMultisample handles HDR+MSAA
|
||||||
|
if (device.IsStorageImageMultisampleSupported()) {
|
||||||
|
return info;
|
||||||
|
}
|
||||||
|
|
||||||
|
// No suitable resolve method - degrade to non-MSAA
|
||||||
|
LOG_WARNING(Render_Vulkan, "HDR format {} with MSAA not supported, degrading to 1x samples",
|
||||||
|
vk_format);
|
||||||
|
info.num_samples = 1;
|
||||||
|
|
||||||
|
return info;
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] vk::Image MakeImage(const Device& device, const MemoryAllocator& allocator,
|
[[nodiscard]] vk::Image MakeImage(const Device& device, const MemoryAllocator& allocator,
|
||||||
|
|
@ -896,6 +892,9 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, Scheduler& sched
|
||||||
astc_decoder_pass.emplace(device, scheduler, descriptor_pool, staging_buffer_pool,
|
astc_decoder_pass.emplace(device, scheduler, descriptor_pool, staging_buffer_pool,
|
||||||
compute_pass_descriptor_queue, memory_allocator);
|
compute_pass_descriptor_queue, memory_allocator);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// MSAA copy support via compute shader (only for non-Qualcomm with shaderStorageImageMultisample)
|
||||||
|
// Qualcomm uses VK_QCOM_render_pass_shader_resolve (fragment shader in render pass)
|
||||||
if (device.IsStorageImageMultisampleSupported()) {
|
if (device.IsStorageImageMultisampleSupported()) {
|
||||||
msaa_copy_pass = std::make_unique<MSAACopyPass>(
|
msaa_copy_pass = std::make_unique<MSAACopyPass>(
|
||||||
device, scheduler, descriptor_pool, staging_buffer_pool, compute_pass_descriptor_queue);
|
device, scheduler, descriptor_pool, staging_buffer_pool, compute_pass_descriptor_queue);
|
||||||
|
|
|
||||||
|
|
@ -549,7 +549,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
||||||
LOG_INFO(Render_Vulkan, "Shader Float Controls FORCE ENABLED by user (Eden Veil/Extensions)");
|
LOG_INFO(Render_Vulkan, "Shader Float Controls FORCE ENABLED by user (Eden Veil/Extensions)");
|
||||||
|
|
||||||
// Log driver capabilities
|
// Log driver capabilities
|
||||||
const auto& fc = float_control;
|
const auto& fc = properties.float_controls;
|
||||||
LOG_INFO(Render_Vulkan, "Driver Float Controls Capabilities:");
|
LOG_INFO(Render_Vulkan, "Driver Float Controls Capabilities:");
|
||||||
LOG_INFO(Render_Vulkan, " - Denorm Flush FP32: {}", fc.shaderDenormFlushToZeroFloat32 ? "YES" : "NO");
|
LOG_INFO(Render_Vulkan, " - Denorm Flush FP32: {}", fc.shaderDenormFlushToZeroFloat32 ? "YES" : "NO");
|
||||||
LOG_INFO(Render_Vulkan, " - Denorm Preserve FP32: {}", fc.shaderDenormPreserveFloat32 ? "YES" : "NO");
|
LOG_INFO(Render_Vulkan, " - Denorm Preserve FP32: {}", fc.shaderDenormPreserveFloat32 ? "YES" : "NO");
|
||||||
|
|
|
||||||
|
|
@ -95,6 +95,7 @@ VK_DEFINE_HANDLE(VmaAllocator)
|
||||||
EXTENSION(EXT, DESCRIPTOR_INDEXING, descriptor_indexing) \
|
EXTENSION(EXT, DESCRIPTOR_INDEXING, descriptor_indexing) \
|
||||||
EXTENSION(EXT, FILTER_CUBIC, filter_cubic) \
|
EXTENSION(EXT, FILTER_CUBIC, filter_cubic) \
|
||||||
EXTENSION(QCOM, FILTER_CUBIC_WEIGHTS, filter_cubic_weights) \
|
EXTENSION(QCOM, FILTER_CUBIC_WEIGHTS, filter_cubic_weights) \
|
||||||
|
EXTENSION(QCOM, RENDER_PASS_SHADER_RESOLVE, render_pass_shader_resolve) \
|
||||||
EXTENSION(KHR, MAINTENANCE_1, maintenance1) \
|
EXTENSION(KHR, MAINTENANCE_1, maintenance1) \
|
||||||
EXTENSION(KHR, MAINTENANCE_2, maintenance2) \
|
EXTENSION(KHR, MAINTENANCE_2, maintenance2) \
|
||||||
EXTENSION(KHR, MAINTENANCE_3, maintenance3) \
|
EXTENSION(KHR, MAINTENANCE_3, maintenance3) \
|
||||||
|
|
@ -582,6 +583,21 @@ public:
|
||||||
return extensions.filter_cubic_weights;
|
return extensions.filter_cubic_weights;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns true if the device supports VK_QCOM_render_pass_shader_resolve
|
||||||
|
bool IsQcomRenderPassShaderResolveSupported() const {
|
||||||
|
return extensions.render_pass_shader_resolve;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns true if MSAA copy operations are supported via compute shader (upload/download)
|
||||||
|
/// Qualcomm uses render pass shader resolve instead, so this returns false for Qualcomm
|
||||||
|
bool CanUploadMSAA() const {
|
||||||
|
return IsStorageImageMultisampleSupported();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool CanDownloadMSAA() const {
|
||||||
|
return CanUploadMSAA();
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns true if the device supports VK_EXT_line_rasterization.
|
/// Returns true if the device supports VK_EXT_line_rasterization.
|
||||||
bool IsExtLineRasterizationSupported() const {
|
bool IsExtLineRasterizationSupported() const {
|
||||||
return extensions.line_rasterization;
|
return extensions.line_rasterization;
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue