From 1199034fd1c6d4137085ab9fc321baccf37da2fb Mon Sep 17 00:00:00 2001 From: lizzie Date: Fri, 15 May 2026 10:40:05 +0000 Subject: [PATCH] simplify sgsr1 shader --- .../host_shaders/sgsr1_shader_mobile.frag | 157 ++++++++---------- .../sgsr1_shader_mobile_edge_direction.frag | 55 ++---- .../renderer_vulkan/present/sgsr.cpp | 54 +++--- src/video_core/renderer_vulkan/present/sgsr.h | 10 +- 4 files changed, 113 insertions(+), 163 deletions(-) diff --git a/src/video_core/host_shaders/sgsr1_shader_mobile.frag b/src/video_core/host_shaders/sgsr1_shader_mobile.frag index 31c94851da..dcac17ce62 100644 --- a/src/video_core/host_shaders/sgsr1_shader_mobile.frag +++ b/src/video_core/host_shaders/sgsr1_shader_mobile.frag @@ -3,107 +3,82 @@ #version 460 core -//precision mediump float; -//precision highp int; -#define highp -#define mediump +precision highp float; +precision highp int; // Operation modes: RGBA -> 1, RGBY -> 3, LERP -> 4 -#define OperationMode 1 -#define EdgeThreshold 8.0/255.0 -// #define EdgeSharpness 2.0 +#define OPERATION_MODE 1 +#define EDGE_THRESHOLD (8.0 / 255.0) layout( push_constant ) uniform constants { - highp vec4 ViewportInfo[1]; - highp vec2 ResizeFactor; - highp float EdgeSharpness; + vec4 viewport[1]; + vec2 resize_factor; + float edge_sharpness; }; -layout(set = 0, binding = 0) uniform mediump sampler2D ps0; -layout(location=0) in highp vec2 in_TEXCOORD0; -layout(location=0) out vec4 out_Target0; +layout(set = 0, binding = 0) uniform sampler2D sampler0; +layout(location=0) in vec2 texcoord; +layout(location=0) out vec4 frag_color; -float fastLanczos2(float x) { - float wA = x-4.0; - float wB = x*wA-wA; - wA *= wA; - return wB*wA; -} - -vec2 weightY(float dx, float dy,float c, float std) { - float x = ((dx*dx)+(dy* dy))* 0.55 + clamp(abs(c)*std, 0.0, 1.0); - float w = fastLanczos2(x); - return vec2(w, w * c); +vec4 weightY(vec4 dx, vec4 dy, vec4 std) { + vec4 x = ((dx * dx) + (dy * dy)) * 0.55f + std; + return (x - 1.f) * (x - 4.f) * 3.8125f; // approx. of (x - 1) * (x - 4)^3 } void main() { - vec4 color; - if(OperationMode == 1) - color.xyz = textureLod(ps0, in_TEXCOORD0.xy, 0.0).xyz; - else - color.xyzw = textureLod(ps0, in_TEXCOORD0.xy, 0.0).xyzw; + vec4 color = textureLod(sampler0, texcoord.xy, 0.0f); + // image coord + vec2 icoord = ((texcoord.xy * viewport[0].zw) + vec2(-0.5f, 0.5f)); + vec2 icoord_pixel = floor(icoord); + vec2 coord = icoord_pixel * viewport[0].xy; + vec2 pl = icoord - icoord_pixel; + vec4 left = textureGather(sampler0, coord, 1); + float edgeVote = abs(left.z - left.y) + abs(color[1] - left.y) + abs(color[1] - left.z); + if (edgeVote > EDGE_THRESHOLD) { + coord.x += viewport[0].x; + vec4 right = textureGather(sampler0, coord + vec2(viewport[0].x, 0.0f), 1); + vec4 upDown = vec4( + textureGather(sampler0, coord + vec2(0.0f, -viewport[0].y), 1).wz, + textureGather(sampler0, coord + vec2(0.0f, +viewport[0].y), 1).yx + ); + float mean = (left.y + left.z + right.x + right.w) * 0.25f; + left = left - vec4(mean); + right = right - vec4(mean); + upDown = upDown - vec4(mean); + color.w = color[1] - mean; - highp float xCenter = abs(in_TEXCOORD0.x + -0.5); - highp float yCenter = abs(in_TEXCOORD0.y + -0.5); + vec4 sum = abs(left) + abs(right) + abs(upDown); + float std = 2.181818f / (sum.x + sum.y + sum.z + sum.w); - //todo: config the SR region based on needs - //if ( OperationMode!=4 && xCenter*xCenter+yCenter*yCenter<=0.4 * 0.4) - if ( OperationMode!=4) { - highp vec2 imgCoord = ((in_TEXCOORD0.xy * ViewportInfo[0].zw) + vec2(-0.5, 0.5)); - highp vec2 imgCoordPixel = floor(imgCoord); - highp vec2 coord = imgCoordPixel * ViewportInfo[0].xy; - vec2 pl = imgCoord - imgCoordPixel; - vec4 left = textureGather(ps0, coord, OperationMode); - float edgeVote = abs(left.z - left.y) + abs(color[OperationMode] - left.y) + abs(color[OperationMode] - left.z) ; - if(edgeVote > EdgeThreshold) { - coord.x += ViewportInfo[0].x; - - highp vec2 IR_highp_vec2_0 = coord + vec2(ViewportInfo[0].x, 0.0); - vec4 right = textureGather(ps0, IR_highp_vec2_0, OperationMode); - vec4 upDown; - highp vec2 IR_highp_vec2_1 = coord + vec2(0.0, -ViewportInfo[0].y); - upDown.xy = textureGather(ps0, IR_highp_vec2_1, OperationMode).wz; - highp vec2 IR_highp_vec2_2 = coord + vec2(0.0, ViewportInfo[0].y); - upDown.zw = textureGather(ps0, IR_highp_vec2_2, OperationMode).yx; - - float mean = (left.y+left.z+right.x+right.w)*0.25; - left = left - vec4(mean); - right = right - vec4(mean); - upDown = upDown - vec4(mean); - color.w =color[OperationMode] - mean; - - float sum = (((((abs(left.x)+abs(left.y))+abs(left.z))+abs(left.w))+(((abs(right.x)+abs(right.y))+abs(right.z))+abs(right.w)))+(((abs(upDown.x)+abs(upDown.y))+abs(upDown.z))+abs(upDown.w))); - float std = 2.181818/sum; - - vec2 aWY = weightY(pl.x, pl.y+1.0, upDown.x,std); - aWY += weightY(pl.x-1.0, pl.y+1.0, upDown.y,std); - aWY += weightY(pl.x-1.0, pl.y-2.0, upDown.z,std); - aWY += weightY(pl.x, pl.y-2.0, upDown.w,std); - aWY += weightY(pl.x+1.0, pl.y-1.0, left.x,std); - aWY += weightY(pl.x, pl.y-1.0, left.y,std); - aWY += weightY(pl.x, pl.y, left.z,std); - aWY += weightY(pl.x+1.0, pl.y, left.w,std); - aWY += weightY(pl.x-1.0, pl.y-1.0, right.x,std); - aWY += weightY(pl.x-2.0, pl.y-1.0, right.y,std); - aWY += weightY(pl.x-2.0, pl.y, right.z,std); - aWY += weightY(pl.x-1.0, pl.y, right.w,std); - - float finalY = aWY.y/aWY.x; - - float maxY = max(max(left.y,left.z),max(right.x,right.w)); - float minY = min(min(left.y,left.z),min(right.x,right.w)); - finalY = clamp(EdgeSharpness*finalY, minY, maxY); - - float deltaY = finalY -color.w; - - //smooth high contrast input - deltaY = clamp(deltaY, -23.0 / 255.0, 23.0 / 255.0); - - color.x = clamp((color.x+deltaY),0.0,1.0); - color.y = clamp((color.y+deltaY),0.0,1.0); - color.z = clamp((color.z+deltaY),0.0,1.0); - } + mat2x4 w = mat2x4( + weightY( + pl.xxxx + vec4(+0.0f, -1.0f, -1.0f, +0.0f), + pl.yyyy + vec4(+1.0f, +1.0f, -2.0f, -2.0f), + clamp(abs(upDown) * std, 0.0f, 1.0f) + ) + weightY( + pl.xxxx + vec4(+1.0f, +0.0f, +0.0f, +1.0f), + pl.yyyy + vec4(-1.0f, -1.0f, +0.0f, +0.0f), + clamp(abs(left) * std, 0.0f, 1.0f) + ) + weightY( + pl.xxxx + vec4(-1.0f, -2.0f, -2.0f, -1.0f), + pl.yyyy + vec4(-1.0f, -1.0f, +0.0f, +0.0f), + clamp(abs(right) * std, 0.0f, 1.0f) + ), + upDown + left + right + ); + // compute final y with bounds + vec2 yb = vec2( + min(min(left.y, left.z), min(right.x, right.w)), // min + max(max(left.y, left.z), max(right.x, right.w)) // max + ); + vec2 fvy = vec2( + w[0].x + w[0].y + w[0].z + w[0].w, + w[1].x + w[1].y + w[1].z + w[1].w + ); + float fy = clamp((fvy.y / fvy.x) * edge_sharpness, yb[0], yb[1]); + // Smooth high contrast input + float dy = clamp(fy - color.w, -23.0f / 255.0f, 23.0f / 255.0f); + color = clamp(color + dy, 0.0f, 1.0f); } - - color.w = 1.0; //assume alpha channel is not used - out_Target0.xyzw = color; + color.w = 1.0f; //assume alpha channel is not used + frag_color.xyzw = color; } \ No newline at end of file diff --git a/src/video_core/host_shaders/sgsr1_shader_mobile_edge_direction.frag b/src/video_core/host_shaders/sgsr1_shader_mobile_edge_direction.frag index 477b73fe63..10a8ff8e00 100644 --- a/src/video_core/host_shaders/sgsr1_shader_mobile_edge_direction.frag +++ b/src/video_core/host_shaders/sgsr1_shader_mobile_edge_direction.frag @@ -3,26 +3,20 @@ #version 460 core -//precision mediump float; -//precision highp int; -#define highp -#define mediump +//precision float; +//precision int; // Operation modes: RGBA -> 1, RGBY -> 3, LERP -> 4 #define OperationMode 1 -// If set, will use edge direction to improve visual quality -// Expect a minimal cost increase -#define UseEdgeDirection 1 #define EdgeThreshold 8.0/255.0 -// #define EdgeSharpness 2.0 layout( push_constant ) uniform constants { - highp vec4 ViewportInfo[1]; - highp vec2 ResizeFactor; - highp float EdgeSharpness; + vec4 ViewportInfo[1]; + vec2 ResizeFactor; + float EdgeSharpness; }; -layout(set = 0, binding = 0) uniform mediump sampler2D ps0; -layout(location=0) in highp vec2 in_TEXCOORD0; +layout(set = 0, binding = 0) uniform sampler2D ps0; +layout(location=0) in vec2 in_TEXCOORD0; layout(location=0) out vec4 out_Target0; float fastLanczos2(float x) { @@ -32,23 +26,11 @@ float fastLanczos2(float x) { return wB*wA; } -#if defined(UseEdgeDirection) -vec2 weightY(float dx, float dy, float c, vec3 data) -#else -vec2 weightY(float dx, float dy, float c, float data) -#endif -{ -#if defined(UseEdgeDirection) +vec2 weightY(float dx, float dy, float c, vec3 data) { float std = data.x; vec2 dir = data.yz; - float edgeDis = ((dx*dir.y)+(dy*dir.x)); float x = (((dx*dx)+(dy*dy))+((edgeDis*edgeDis)*((clamp(((c*c)*std),0.0,1.0)*0.7)+-1.0))); -#else - float std = data; - float x = ((dx*dx)+(dy* dy))* 0.55 + clamp(abs(c)*std, 0.0, 1.0); -#endif - float w = fastLanczos2(x); return vec2(w, w * c); } @@ -73,27 +55,22 @@ void main() { else color.xyzw = textureLod(ps0, in_TEXCOORD0.xy, 0.0).xyzw; - highp float xCenter = abs(in_TEXCOORD0.x + -0.5); - highp float yCenter = abs(in_TEXCOORD0.y + -0.5); - - //todo: config the SR region based on needs - //if ( OperationMode!=4 && xCenter*xCenter+yCenter*yCenter<=0.4 * 0.4) if ( OperationMode!=4) { - highp vec2 imgCoord = ((in_TEXCOORD0.xy*ViewportInfo[0].zw)+vec2(-0.5,0.5)); - highp vec2 imgCoordPixel = floor(imgCoord); - highp vec2 coord = (imgCoordPixel*ViewportInfo[0].xy); + vec2 imgCoord = ((in_TEXCOORD0.xy*ViewportInfo[0].zw)+vec2(-0.5,0.5)); + vec2 imgCoordPixel = floor(imgCoord); + vec2 coord = (imgCoordPixel*ViewportInfo[0].xy); vec2 pl = imgCoord - imgCoordPixel; vec4 left = textureGather(ps0, coord, OperationMode); float edgeVote = abs(left.z - left.y) + abs(color[OperationMode] - left.y) + abs(color[OperationMode] - left.z) ; if(edgeVote > EdgeThreshold) { coord.x += ViewportInfo[0].x; - highp vec2 IR_highp_vec2_0 = coord + vec2(ViewportInfo[0].x, 0.0); + vec2 IR_highp_vec2_0 = coord + vec2(ViewportInfo[0].x, 0.0); vec4 right = textureGather(ps0, IR_highp_vec2_0, OperationMode); vec4 upDown; - highp vec2 IR_highp_vec2_1 = coord + vec2(0.0, -ViewportInfo[0].y); + vec2 IR_highp_vec2_1 = coord + vec2(0.0, -ViewportInfo[0].y); upDown.xy = textureGather(ps0, IR_highp_vec2_1, OperationMode).wz; - highp vec2 IR_highp_vec2_2 = coord + vec2(0.0, ViewportInfo[0].y); + vec2 IR_highp_vec2_2 = coord + vec2(0.0, ViewportInfo[0].y); upDown.zw = textureGather(ps0, IR_highp_vec2_2, OperationMode).yx; float mean = (left.y+left.z+right.x+right.w)*0.25; @@ -106,11 +83,7 @@ void main() { float sumMean = 1.014185e+01/sum; float std = (sumMean*sumMean); -#if defined(UseEdgeDirection) vec3 data = vec3(std, edgeDirection(left, right)); -#else - float data = std; -#endif vec2 aWY = weightY(pl.x, pl.y+1.0, upDown.x,data); aWY += weightY(pl.x-1.0, pl.y+1.0, upDown.y,data); aWY += weightY(pl.x-1.0, pl.y-2.0, upDown.z,data); diff --git a/src/video_core/renderer_vulkan/present/sgsr.cpp b/src/video_core/renderer_vulkan/present/sgsr.cpp index 301a23c45f..3b1d017801 100644 --- a/src/video_core/renderer_vulkan/present/sgsr.cpp +++ b/src/video_core/renderer_vulkan/present/sgsr.cpp @@ -20,33 +20,35 @@ namespace Vulkan { using PushConstants = std::array; SGSR::SGSR(const Device& device, MemoryAllocator& memory_allocator, size_t image_count, VkExtent2D extent, bool edge_dir) - : m_device{device}, m_memory_allocator{memory_allocator} - , m_image_count{image_count}, m_extent{extent} + : m_device{device} + , m_memory_allocator{memory_allocator} + , m_image_count{image_count} + , m_extent{extent} , m_edge_dir{edge_dir} { // Not finished yet initializing at ctor time? m_dynamic_images.resize(m_image_count); for (auto& images : m_dynamic_images) { - images.images[0] = CreateWrappedImage(m_memory_allocator, m_extent, VK_FORMAT_B8G8R8A8_UNORM); - images.image_views[0] = CreateWrappedImageView(m_device, images.images[0], VK_FORMAT_B8G8R8A8_UNORM); + images.image = CreateWrappedImage(m_memory_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT); + images.image_view = CreateWrappedImageView(m_device, images.image, VK_FORMAT_R16G16B16A16_SFLOAT); } - m_renderpass = CreateWrappedRenderPass(m_device, VK_FORMAT_B8G8R8A8_UNORM); + m_renderpass = CreateWrappedRenderPass(m_device, VK_FORMAT_R16G16B16A16_SFLOAT); for (auto& images : m_dynamic_images) - images.framebuffers[0] = CreateWrappedFramebuffer(m_device, m_renderpass, images.image_views[0], m_extent); + images.framebuffer = CreateWrappedFramebuffer(m_device, m_renderpass, images.image_view, m_extent); m_sampler = CreateBilinearSampler(m_device); m_vert_shader = BuildShader(m_device, SGSR1_SHADER_VERT_SPV); - m_stage_shader[0] = m_edge_dir + m_stage_shader = m_edge_dir ? BuildShader(m_device, SGSR1_SHADER_MOBILE_EDGE_DIRECTION_FRAG_SPV) : BuildShader(m_device, SGSR1_SHADER_MOBILE_FRAG_SPV); // 2 descriptors, 2 descriptor sets per invocation - m_descriptor_pool = CreateWrappedDescriptorPool(m_device, SGSR_STAGE_COUNT * m_image_count, SGSR_STAGE_COUNT * m_image_count); + m_descriptor_pool = CreateWrappedDescriptorPool(m_device, m_image_count, m_image_count); m_descriptor_set_layout = CreateWrappedDescriptorSetLayout(m_device, {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER}); - std::vector layouts(SGSR_STAGE_COUNT, *m_descriptor_set_layout); + VkDescriptorSetLayout layout = *m_descriptor_set_layout; for (auto& images : m_dynamic_images) - images.descriptor_sets = CreateWrappedDescriptorSets(m_descriptor_pool, layouts); + images.descriptor_sets = CreateWrappedDescriptorSets(m_descriptor_pool, layout); const VkPushConstantRange range{ .stageFlags = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, @@ -63,7 +65,7 @@ SGSR::SGSR(const Device& device, MemoryAllocator& memory_allocator, size_t image .pPushConstantRanges = &range, }; m_pipeline_layout = m_device.GetLogical().CreatePipelineLayout(ci); - m_stage_pipeline[0] = CreateWrappedPipeline(m_device, m_renderpass, m_pipeline_layout, std::tie(m_vert_shader, m_stage_shader[0])); + m_stage_pipeline = CreateWrappedPipeline(m_device, m_renderpass, m_pipeline_layout, std::tie(m_vert_shader, m_stage_shader)); } void SGSR::UpdateDescriptorSets(VkImageView image_view, size_t image_index) { @@ -79,7 +81,7 @@ void SGSR::UploadImages(Scheduler& scheduler) { if (!m_images_ready) { scheduler.Record([&](vk::CommandBuffer cmdbuf) { for (auto& image : m_dynamic_images) - ClearColorImage(cmdbuf, *image.images[0]); + ClearColorImage(cmdbuf, *image.image); }); scheduler.Finish(); m_images_ready = true; @@ -88,19 +90,19 @@ void SGSR::UploadImages(Scheduler& scheduler) { VkImageView SGSR::Draw(Scheduler& scheduler, size_t image_index, VkImage source_image, VkImageView source_image_view, VkExtent2D input_image_extent, const Common::Rectangle& crop_rect) { Images& images = m_dynamic_images[image_index]; - auto const stage0_image = *images.images[0]; - auto const stage0_descriptor_set = images.descriptor_sets[0]; - auto const stage0_framebuffer = *images.framebuffers[0]; - auto const stage0_pipeline = *m_stage_pipeline[0]; + auto const output_image = *images.image; + auto const descriptor_set = images.descriptor_sets[0]; + auto const framebuffer = *images.framebuffer; + auto const pipeline = *m_stage_pipeline; - VkPipelineLayout pipeline_layout = *m_pipeline_layout; + VkPipelineLayout layout = *m_pipeline_layout; VkRenderPass renderpass = *m_renderpass; VkExtent2D extent = m_extent; const f32 input_image_width = f32(input_image_extent.width); const f32 input_image_height = f32(input_image_extent.height); - const f32 viewport_width = (crop_rect.right - crop_rect.left) * input_image_width; - const f32 viewport_height = (crop_rect.bottom - crop_rect.top) * input_image_height; + const f32 viewport_width = f32(crop_rect.right - crop_rect.left) * input_image_width; + const f32 viewport_height = f32(crop_rect.bottom - crop_rect.top) * input_image_height; // expected [0, 2] const f32 sharpening = f32(Settings::values.fsr_sharpening_slider.GetValue()) / 100.0f; @@ -126,16 +128,16 @@ VkImageView SGSR::Draw(Scheduler& scheduler, size_t image_index, VkImage source_ scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([=](vk::CommandBuffer cmdbuf) { TransitionImageLayout(cmdbuf, source_image, VK_IMAGE_LAYOUT_GENERAL); - TransitionImageLayout(cmdbuf, stage0_image, VK_IMAGE_LAYOUT_GENERAL); - BeginRenderPass(cmdbuf, renderpass, stage0_framebuffer, extent); - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, stage0_pipeline); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, stage0_descriptor_set, {}); - cmdbuf.PushConstants(pipeline_layout, VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, viewport_con); + TransitionImageLayout(cmdbuf, output_image, VK_IMAGE_LAYOUT_GENERAL); + BeginRenderPass(cmdbuf, renderpass, framebuffer, extent); + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, {}); + cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, viewport_con); cmdbuf.Draw(3, 1, 0, 0); cmdbuf.EndRenderPass(); - TransitionImageLayout(cmdbuf, stage0_image, VK_IMAGE_LAYOUT_GENERAL); + TransitionImageLayout(cmdbuf, output_image, VK_IMAGE_LAYOUT_GENERAL); }); - return *images.image_views[0]; + return *images.image_view; } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/present/sgsr.h b/src/video_core/renderer_vulkan/present/sgsr.h index d6362c361d..67b25be75d 100644 --- a/src/video_core/renderer_vulkan/present/sgsr.h +++ b/src/video_core/renderer_vulkan/present/sgsr.h @@ -31,16 +31,16 @@ private: vk::DescriptorSetLayout m_descriptor_set_layout; vk::PipelineLayout m_pipeline_layout; vk::ShaderModule m_vert_shader; - std::array m_stage_shader; - std::array m_stage_pipeline; + vk::ShaderModule m_stage_shader; + vk::Pipeline m_stage_pipeline; vk::RenderPass m_renderpass; vk::Sampler m_sampler; struct Images { vk::DescriptorSets descriptor_sets; - std::array images; - std::array image_views; - std::array framebuffers; + vk::Image image; + vk::ImageView image_view; + vk::Framebuffer framebuffer; }; std::vector m_dynamic_images; bool m_images_ready{};