[video_core/host_shaders] unroll lanczos loop for slightly better perf (#3754)

Some (Mali) drivers particularly are afraid to unroll loops with more than 7 constant iterations (?); hence manual unrolling is potentially beneficial due to avoiding extra branching + the uniform runtime expectations

Signed-off-by: lizzie <lizzie@eden-emu.dev>

Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3754
Reviewed-by: Maufeat <sahyno1996@gmail.com>
Reviewed-by: crueter <crueter@eden-emu.dev>
Co-authored-by: lizzie <lizzie@eden-emu.dev>
Co-committed-by: lizzie <lizzie@eden-emu.dev>
This commit is contained in:
lizzie 2026-04-06 19:14:13 +02:00 committed by crueter
parent 028050cf04
commit 876884e783
No known key found for this signature in database
GPG key ID: 425ACD2D4830EBC6

View file

@ -24,13 +24,23 @@ vec4 textureLanczos(sampler2D textureSampler, vec2 p) {
vec2 cc = floor(p * res) / res; vec2 cc = floor(p * res) / res;
// kernel size = (2r + 1)^2 // kernel size = (2r + 1)^2
const int r = 3; //radius (1 = 3 steps) const int r = 3; //radius (1 = 3 steps)
for (int x = -r; x <= r; x++) #define LANCZOS_LOOP_STEP(x, y) \
for (int y = -r; y <= r; y++) { { \
vec2 kp = 0.5f * (vec2(x, y) / res); // 0.5 = half-pixel level resampling vec2 kp = 0.5f * (vec2(x, y) / res); /* 0.5 = half-pixel level resampling */ \
vec2 uv = cc + kp; vec2 uv = cc + kp; \
float w = lanczos(kp, float(r)); float w = lanczos(kp, float(r)); \
c_sum += w * texture(textureSampler, p + kp).rgb; c_sum += w * texture(textureSampler, p + kp).rgb; \
w_sum += w; w_sum += w; \
}
for (int y = -r; y <= r; ++y) {
LANCZOS_LOOP_STEP(-3, y);
LANCZOS_LOOP_STEP(-2, y);
LANCZOS_LOOP_STEP(-1, y);
LANCZOS_LOOP_STEP(-0, y);
LANCZOS_LOOP_STEP(+1, y);
LANCZOS_LOOP_STEP(+2, y);
LANCZOS_LOOP_STEP(+3, y);
} }
return vec4(c_sum / w_sum, 1.0f); return vec4(c_sum / w_sum, 1.0f);
} }