simplify sgsr1 shader

This commit is contained in:
lizzie 2026-05-15 10:40:05 +00:00
parent 42149e1fd9
commit 1199034fd1
4 changed files with 113 additions and 163 deletions

View file

@ -3,107 +3,82 @@
#version 460 core
//precision mediump float;
//precision highp int;
#define highp
#define mediump
precision highp float;
precision highp int;
// Operation modes: RGBA -> 1, RGBY -> 3, LERP -> 4
#define OperationMode 1
#define EdgeThreshold 8.0/255.0
// #define EdgeSharpness 2.0
#define OPERATION_MODE 1
#define EDGE_THRESHOLD (8.0 / 255.0)
layout( push_constant ) uniform constants {
highp vec4 ViewportInfo[1];
highp vec2 ResizeFactor;
highp float EdgeSharpness;
vec4 viewport[1];
vec2 resize_factor;
float edge_sharpness;
};
layout(set = 0, binding = 0) uniform mediump sampler2D ps0;
layout(location=0) in highp vec2 in_TEXCOORD0;
layout(location=0) out vec4 out_Target0;
layout(set = 0, binding = 0) uniform sampler2D sampler0;
layout(location=0) in vec2 texcoord;
layout(location=0) out vec4 frag_color;
float fastLanczos2(float x) {
float wA = x-4.0;
float wB = x*wA-wA;
wA *= wA;
return wB*wA;
}
vec2 weightY(float dx, float dy,float c, float std) {
float x = ((dx*dx)+(dy* dy))* 0.55 + clamp(abs(c)*std, 0.0, 1.0);
float w = fastLanczos2(x);
return vec2(w, w * c);
vec4 weightY(vec4 dx, vec4 dy, vec4 std) {
vec4 x = ((dx * dx) + (dy * dy)) * 0.55f + std;
return (x - 1.f) * (x - 4.f) * 3.8125f; // approx. of (x - 1) * (x - 4)^3
}
void main() {
vec4 color;
if(OperationMode == 1)
color.xyz = textureLod(ps0, in_TEXCOORD0.xy, 0.0).xyz;
else
color.xyzw = textureLod(ps0, in_TEXCOORD0.xy, 0.0).xyzw;
vec4 color = textureLod(sampler0, texcoord.xy, 0.0f);
// image coord
vec2 icoord = ((texcoord.xy * viewport[0].zw) + vec2(-0.5f, 0.5f));
vec2 icoord_pixel = floor(icoord);
vec2 coord = icoord_pixel * viewport[0].xy;
vec2 pl = icoord - icoord_pixel;
vec4 left = textureGather(sampler0, coord, 1);
float edgeVote = abs(left.z - left.y) + abs(color[1] - left.y) + abs(color[1] - left.z);
if (edgeVote > EDGE_THRESHOLD) {
coord.x += viewport[0].x;
vec4 right = textureGather(sampler0, coord + vec2(viewport[0].x, 0.0f), 1);
vec4 upDown = vec4(
textureGather(sampler0, coord + vec2(0.0f, -viewport[0].y), 1).wz,
textureGather(sampler0, coord + vec2(0.0f, +viewport[0].y), 1).yx
);
float mean = (left.y + left.z + right.x + right.w) * 0.25f;
left = left - vec4(mean);
right = right - vec4(mean);
upDown = upDown - vec4(mean);
color.w = color[1] - mean;
highp float xCenter = abs(in_TEXCOORD0.x + -0.5);
highp float yCenter = abs(in_TEXCOORD0.y + -0.5);
vec4 sum = abs(left) + abs(right) + abs(upDown);
float std = 2.181818f / (sum.x + sum.y + sum.z + sum.w);
//todo: config the SR region based on needs
//if ( OperationMode!=4 && xCenter*xCenter+yCenter*yCenter<=0.4 * 0.4)
if ( OperationMode!=4) {
highp vec2 imgCoord = ((in_TEXCOORD0.xy * ViewportInfo[0].zw) + vec2(-0.5, 0.5));
highp vec2 imgCoordPixel = floor(imgCoord);
highp vec2 coord = imgCoordPixel * ViewportInfo[0].xy;
vec2 pl = imgCoord - imgCoordPixel;
vec4 left = textureGather(ps0, coord, OperationMode);
float edgeVote = abs(left.z - left.y) + abs(color[OperationMode] - left.y) + abs(color[OperationMode] - left.z) ;
if(edgeVote > EdgeThreshold) {
coord.x += ViewportInfo[0].x;
highp vec2 IR_highp_vec2_0 = coord + vec2(ViewportInfo[0].x, 0.0);
vec4 right = textureGather(ps0, IR_highp_vec2_0, OperationMode);
vec4 upDown;
highp vec2 IR_highp_vec2_1 = coord + vec2(0.0, -ViewportInfo[0].y);
upDown.xy = textureGather(ps0, IR_highp_vec2_1, OperationMode).wz;
highp vec2 IR_highp_vec2_2 = coord + vec2(0.0, ViewportInfo[0].y);
upDown.zw = textureGather(ps0, IR_highp_vec2_2, OperationMode).yx;
float mean = (left.y+left.z+right.x+right.w)*0.25;
left = left - vec4(mean);
right = right - vec4(mean);
upDown = upDown - vec4(mean);
color.w =color[OperationMode] - mean;
float sum = (((((abs(left.x)+abs(left.y))+abs(left.z))+abs(left.w))+(((abs(right.x)+abs(right.y))+abs(right.z))+abs(right.w)))+(((abs(upDown.x)+abs(upDown.y))+abs(upDown.z))+abs(upDown.w)));
float std = 2.181818/sum;
vec2 aWY = weightY(pl.x, pl.y+1.0, upDown.x,std);
aWY += weightY(pl.x-1.0, pl.y+1.0, upDown.y,std);
aWY += weightY(pl.x-1.0, pl.y-2.0, upDown.z,std);
aWY += weightY(pl.x, pl.y-2.0, upDown.w,std);
aWY += weightY(pl.x+1.0, pl.y-1.0, left.x,std);
aWY += weightY(pl.x, pl.y-1.0, left.y,std);
aWY += weightY(pl.x, pl.y, left.z,std);
aWY += weightY(pl.x+1.0, pl.y, left.w,std);
aWY += weightY(pl.x-1.0, pl.y-1.0, right.x,std);
aWY += weightY(pl.x-2.0, pl.y-1.0, right.y,std);
aWY += weightY(pl.x-2.0, pl.y, right.z,std);
aWY += weightY(pl.x-1.0, pl.y, right.w,std);
float finalY = aWY.y/aWY.x;
float maxY = max(max(left.y,left.z),max(right.x,right.w));
float minY = min(min(left.y,left.z),min(right.x,right.w));
finalY = clamp(EdgeSharpness*finalY, minY, maxY);
float deltaY = finalY -color.w;
//smooth high contrast input
deltaY = clamp(deltaY, -23.0 / 255.0, 23.0 / 255.0);
color.x = clamp((color.x+deltaY),0.0,1.0);
color.y = clamp((color.y+deltaY),0.0,1.0);
color.z = clamp((color.z+deltaY),0.0,1.0);
}
mat2x4 w = mat2x4(
weightY(
pl.xxxx + vec4(+0.0f, -1.0f, -1.0f, +0.0f),
pl.yyyy + vec4(+1.0f, +1.0f, -2.0f, -2.0f),
clamp(abs(upDown) * std, 0.0f, 1.0f)
) + weightY(
pl.xxxx + vec4(+1.0f, +0.0f, +0.0f, +1.0f),
pl.yyyy + vec4(-1.0f, -1.0f, +0.0f, +0.0f),
clamp(abs(left) * std, 0.0f, 1.0f)
) + weightY(
pl.xxxx + vec4(-1.0f, -2.0f, -2.0f, -1.0f),
pl.yyyy + vec4(-1.0f, -1.0f, +0.0f, +0.0f),
clamp(abs(right) * std, 0.0f, 1.0f)
),
upDown + left + right
);
// compute final y with bounds
vec2 yb = vec2(
min(min(left.y, left.z), min(right.x, right.w)), // min
max(max(left.y, left.z), max(right.x, right.w)) // max
);
vec2 fvy = vec2(
w[0].x + w[0].y + w[0].z + w[0].w,
w[1].x + w[1].y + w[1].z + w[1].w
);
float fy = clamp((fvy.y / fvy.x) * edge_sharpness, yb[0], yb[1]);
// Smooth high contrast input
float dy = clamp(fy - color.w, -23.0f / 255.0f, 23.0f / 255.0f);
color = clamp(color + dy, 0.0f, 1.0f);
}
color.w = 1.0; //assume alpha channel is not used
out_Target0.xyzw = color;
color.w = 1.0f; //assume alpha channel is not used
frag_color.xyzw = color;
}

View file

@ -3,26 +3,20 @@
#version 460 core
//precision mediump float;
//precision highp int;
#define highp
#define mediump
//precision float;
//precision int;
// Operation modes: RGBA -> 1, RGBY -> 3, LERP -> 4
#define OperationMode 1
// If set, will use edge direction to improve visual quality
// Expect a minimal cost increase
#define UseEdgeDirection 1
#define EdgeThreshold 8.0/255.0
// #define EdgeSharpness 2.0
layout( push_constant ) uniform constants {
highp vec4 ViewportInfo[1];
highp vec2 ResizeFactor;
highp float EdgeSharpness;
vec4 ViewportInfo[1];
vec2 ResizeFactor;
float EdgeSharpness;
};
layout(set = 0, binding = 0) uniform mediump sampler2D ps0;
layout(location=0) in highp vec2 in_TEXCOORD0;
layout(set = 0, binding = 0) uniform sampler2D ps0;
layout(location=0) in vec2 in_TEXCOORD0;
layout(location=0) out vec4 out_Target0;
float fastLanczos2(float x) {
@ -32,23 +26,11 @@ float fastLanczos2(float x) {
return wB*wA;
}
#if defined(UseEdgeDirection)
vec2 weightY(float dx, float dy, float c, vec3 data)
#else
vec2 weightY(float dx, float dy, float c, float data)
#endif
{
#if defined(UseEdgeDirection)
vec2 weightY(float dx, float dy, float c, vec3 data) {
float std = data.x;
vec2 dir = data.yz;
float edgeDis = ((dx*dir.y)+(dy*dir.x));
float x = (((dx*dx)+(dy*dy))+((edgeDis*edgeDis)*((clamp(((c*c)*std),0.0,1.0)*0.7)+-1.0)));
#else
float std = data;
float x = ((dx*dx)+(dy* dy))* 0.55 + clamp(abs(c)*std, 0.0, 1.0);
#endif
float w = fastLanczos2(x);
return vec2(w, w * c);
}
@ -73,27 +55,22 @@ void main() {
else
color.xyzw = textureLod(ps0, in_TEXCOORD0.xy, 0.0).xyzw;
highp float xCenter = abs(in_TEXCOORD0.x + -0.5);
highp float yCenter = abs(in_TEXCOORD0.y + -0.5);
//todo: config the SR region based on needs
//if ( OperationMode!=4 && xCenter*xCenter+yCenter*yCenter<=0.4 * 0.4)
if ( OperationMode!=4) {
highp vec2 imgCoord = ((in_TEXCOORD0.xy*ViewportInfo[0].zw)+vec2(-0.5,0.5));
highp vec2 imgCoordPixel = floor(imgCoord);
highp vec2 coord = (imgCoordPixel*ViewportInfo[0].xy);
vec2 imgCoord = ((in_TEXCOORD0.xy*ViewportInfo[0].zw)+vec2(-0.5,0.5));
vec2 imgCoordPixel = floor(imgCoord);
vec2 coord = (imgCoordPixel*ViewportInfo[0].xy);
vec2 pl = imgCoord - imgCoordPixel;
vec4 left = textureGather(ps0, coord, OperationMode);
float edgeVote = abs(left.z - left.y) + abs(color[OperationMode] - left.y) + abs(color[OperationMode] - left.z) ;
if(edgeVote > EdgeThreshold) {
coord.x += ViewportInfo[0].x;
highp vec2 IR_highp_vec2_0 = coord + vec2(ViewportInfo[0].x, 0.0);
vec2 IR_highp_vec2_0 = coord + vec2(ViewportInfo[0].x, 0.0);
vec4 right = textureGather(ps0, IR_highp_vec2_0, OperationMode);
vec4 upDown;
highp vec2 IR_highp_vec2_1 = coord + vec2(0.0, -ViewportInfo[0].y);
vec2 IR_highp_vec2_1 = coord + vec2(0.0, -ViewportInfo[0].y);
upDown.xy = textureGather(ps0, IR_highp_vec2_1, OperationMode).wz;
highp vec2 IR_highp_vec2_2 = coord + vec2(0.0, ViewportInfo[0].y);
vec2 IR_highp_vec2_2 = coord + vec2(0.0, ViewportInfo[0].y);
upDown.zw = textureGather(ps0, IR_highp_vec2_2, OperationMode).yx;
float mean = (left.y+left.z+right.x+right.w)*0.25;
@ -106,11 +83,7 @@ void main() {
float sumMean = 1.014185e+01/sum;
float std = (sumMean*sumMean);
#if defined(UseEdgeDirection)
vec3 data = vec3(std, edgeDirection(left, right));
#else
float data = std;
#endif
vec2 aWY = weightY(pl.x, pl.y+1.0, upDown.x,data);
aWY += weightY(pl.x-1.0, pl.y+1.0, upDown.y,data);
aWY += weightY(pl.x-1.0, pl.y-2.0, upDown.z,data);

View file

@ -20,33 +20,35 @@ namespace Vulkan {
using PushConstants = std::array<u32, 4 + 2 + 1>;
SGSR::SGSR(const Device& device, MemoryAllocator& memory_allocator, size_t image_count, VkExtent2D extent, bool edge_dir)
: m_device{device}, m_memory_allocator{memory_allocator}
, m_image_count{image_count}, m_extent{extent}
: m_device{device}
, m_memory_allocator{memory_allocator}
, m_image_count{image_count}
, m_extent{extent}
, m_edge_dir{edge_dir}
{
// Not finished yet initializing at ctor time?
m_dynamic_images.resize(m_image_count);
for (auto& images : m_dynamic_images) {
images.images[0] = CreateWrappedImage(m_memory_allocator, m_extent, VK_FORMAT_B8G8R8A8_UNORM);
images.image_views[0] = CreateWrappedImageView(m_device, images.images[0], VK_FORMAT_B8G8R8A8_UNORM);
images.image = CreateWrappedImage(m_memory_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT);
images.image_view = CreateWrappedImageView(m_device, images.image, VK_FORMAT_R16G16B16A16_SFLOAT);
}
m_renderpass = CreateWrappedRenderPass(m_device, VK_FORMAT_B8G8R8A8_UNORM);
m_renderpass = CreateWrappedRenderPass(m_device, VK_FORMAT_R16G16B16A16_SFLOAT);
for (auto& images : m_dynamic_images)
images.framebuffers[0] = CreateWrappedFramebuffer(m_device, m_renderpass, images.image_views[0], m_extent);
images.framebuffer = CreateWrappedFramebuffer(m_device, m_renderpass, images.image_view, m_extent);
m_sampler = CreateBilinearSampler(m_device);
m_vert_shader = BuildShader(m_device, SGSR1_SHADER_VERT_SPV);
m_stage_shader[0] = m_edge_dir
m_stage_shader = m_edge_dir
? BuildShader(m_device, SGSR1_SHADER_MOBILE_EDGE_DIRECTION_FRAG_SPV)
: BuildShader(m_device, SGSR1_SHADER_MOBILE_FRAG_SPV);
// 2 descriptors, 2 descriptor sets per invocation
m_descriptor_pool = CreateWrappedDescriptorPool(m_device, SGSR_STAGE_COUNT * m_image_count, SGSR_STAGE_COUNT * m_image_count);
m_descriptor_pool = CreateWrappedDescriptorPool(m_device, m_image_count, m_image_count);
m_descriptor_set_layout = CreateWrappedDescriptorSetLayout(m_device, {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER});
std::vector<VkDescriptorSetLayout> layouts(SGSR_STAGE_COUNT, *m_descriptor_set_layout);
VkDescriptorSetLayout layout = *m_descriptor_set_layout;
for (auto& images : m_dynamic_images)
images.descriptor_sets = CreateWrappedDescriptorSets(m_descriptor_pool, layouts);
images.descriptor_sets = CreateWrappedDescriptorSets(m_descriptor_pool, layout);
const VkPushConstantRange range{
.stageFlags = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
@ -63,7 +65,7 @@ SGSR::SGSR(const Device& device, MemoryAllocator& memory_allocator, size_t image
.pPushConstantRanges = &range,
};
m_pipeline_layout = m_device.GetLogical().CreatePipelineLayout(ci);
m_stage_pipeline[0] = CreateWrappedPipeline(m_device, m_renderpass, m_pipeline_layout, std::tie(m_vert_shader, m_stage_shader[0]));
m_stage_pipeline = CreateWrappedPipeline(m_device, m_renderpass, m_pipeline_layout, std::tie(m_vert_shader, m_stage_shader));
}
void SGSR::UpdateDescriptorSets(VkImageView image_view, size_t image_index) {
@ -79,7 +81,7 @@ void SGSR::UploadImages(Scheduler& scheduler) {
if (!m_images_ready) {
scheduler.Record([&](vk::CommandBuffer cmdbuf) {
for (auto& image : m_dynamic_images)
ClearColorImage(cmdbuf, *image.images[0]);
ClearColorImage(cmdbuf, *image.image);
});
scheduler.Finish();
m_images_ready = true;
@ -88,19 +90,19 @@ void SGSR::UploadImages(Scheduler& scheduler) {
VkImageView SGSR::Draw(Scheduler& scheduler, size_t image_index, VkImage source_image, VkImageView source_image_view, VkExtent2D input_image_extent, const Common::Rectangle<f32>& crop_rect) {
Images& images = m_dynamic_images[image_index];
auto const stage0_image = *images.images[0];
auto const stage0_descriptor_set = images.descriptor_sets[0];
auto const stage0_framebuffer = *images.framebuffers[0];
auto const stage0_pipeline = *m_stage_pipeline[0];
auto const output_image = *images.image;
auto const descriptor_set = images.descriptor_sets[0];
auto const framebuffer = *images.framebuffer;
auto const pipeline = *m_stage_pipeline;
VkPipelineLayout pipeline_layout = *m_pipeline_layout;
VkPipelineLayout layout = *m_pipeline_layout;
VkRenderPass renderpass = *m_renderpass;
VkExtent2D extent = m_extent;
const f32 input_image_width = f32(input_image_extent.width);
const f32 input_image_height = f32(input_image_extent.height);
const f32 viewport_width = (crop_rect.right - crop_rect.left) * input_image_width;
const f32 viewport_height = (crop_rect.bottom - crop_rect.top) * input_image_height;
const f32 viewport_width = f32(crop_rect.right - crop_rect.left) * input_image_width;
const f32 viewport_height = f32(crop_rect.bottom - crop_rect.top) * input_image_height;
// expected [0, 2]
const f32 sharpening = f32(Settings::values.fsr_sharpening_slider.GetValue()) / 100.0f;
@ -126,16 +128,16 @@ VkImageView SGSR::Draw(Scheduler& scheduler, size_t image_index, VkImage source_
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([=](vk::CommandBuffer cmdbuf) {
TransitionImageLayout(cmdbuf, source_image, VK_IMAGE_LAYOUT_GENERAL);
TransitionImageLayout(cmdbuf, stage0_image, VK_IMAGE_LAYOUT_GENERAL);
BeginRenderPass(cmdbuf, renderpass, stage0_framebuffer, extent);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, stage0_pipeline);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, stage0_descriptor_set, {});
cmdbuf.PushConstants(pipeline_layout, VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, viewport_con);
TransitionImageLayout(cmdbuf, output_image, VK_IMAGE_LAYOUT_GENERAL);
BeginRenderPass(cmdbuf, renderpass, framebuffer, extent);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, {});
cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, viewport_con);
cmdbuf.Draw(3, 1, 0, 0);
cmdbuf.EndRenderPass();
TransitionImageLayout(cmdbuf, stage0_image, VK_IMAGE_LAYOUT_GENERAL);
TransitionImageLayout(cmdbuf, output_image, VK_IMAGE_LAYOUT_GENERAL);
});
return *images.image_views[0];
return *images.image_view;
}
} // namespace Vulkan

View file

@ -31,16 +31,16 @@ private:
vk::DescriptorSetLayout m_descriptor_set_layout;
vk::PipelineLayout m_pipeline_layout;
vk::ShaderModule m_vert_shader;
std::array<vk::ShaderModule, SGSR_STAGE_COUNT> m_stage_shader;
std::array<vk::Pipeline, SGSR_STAGE_COUNT> m_stage_pipeline;
vk::ShaderModule m_stage_shader;
vk::Pipeline m_stage_pipeline;
vk::RenderPass m_renderpass;
vk::Sampler m_sampler;
struct Images {
vk::DescriptorSets descriptor_sets;
std::array<vk::Image, SGSR_STAGE_COUNT> images;
std::array<vk::ImageView, SGSR_STAGE_COUNT> image_views;
std::array<vk::Framebuffer, SGSR_STAGE_COUNT> framebuffers;
vk::Image image;
vk::ImageView image_view;
vk::Framebuffer framebuffer;
};
std::vector<Images> m_dynamic_images;
bool m_images_ready{};