mirror of
https://git.eden-emu.dev/eden-emu/eden
synced 2026-04-10 03:18:55 +02:00
[video_core/host_shaders] unroll lanczos loop for slightly better perf (#3754)
Some (Mali) drivers particularly are afraid to unroll loops with more than 7 constant iterations (?); hence manual unrolling is potentially beneficial due to avoiding extra branching + the uniform runtime expectations Signed-off-by: lizzie <lizzie@eden-emu.dev> Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3754 Reviewed-by: Maufeat <sahyno1996@gmail.com> Reviewed-by: crueter <crueter@eden-emu.dev> Co-authored-by: lizzie <lizzie@eden-emu.dev> Co-committed-by: lizzie <lizzie@eden-emu.dev>
This commit is contained in:
parent
028050cf04
commit
876884e783
1 changed files with 18 additions and 8 deletions
|
|
@ -24,13 +24,23 @@ vec4 textureLanczos(sampler2D textureSampler, vec2 p) {
|
|||
vec2 cc = floor(p * res) / res;
|
||||
// kernel size = (2r + 1)^2
|
||||
const int r = 3; //radius (1 = 3 steps)
|
||||
for (int x = -r; x <= r; x++)
|
||||
for (int y = -r; y <= r; y++) {
|
||||
vec2 kp = 0.5f * (vec2(x, y) / res); // 0.5 = half-pixel level resampling
|
||||
vec2 uv = cc + kp;
|
||||
float w = lanczos(kp, float(r));
|
||||
c_sum += w * texture(textureSampler, p + kp).rgb;
|
||||
w_sum += w;
|
||||
#define LANCZOS_LOOP_STEP(x, y) \
|
||||
{ \
|
||||
vec2 kp = 0.5f * (vec2(x, y) / res); /* 0.5 = half-pixel level resampling */ \
|
||||
vec2 uv = cc + kp; \
|
||||
float w = lanczos(kp, float(r)); \
|
||||
c_sum += w * texture(textureSampler, p + kp).rgb; \
|
||||
w_sum += w; \
|
||||
}
|
||||
|
||||
for (int y = -r; y <= r; ++y) {
|
||||
LANCZOS_LOOP_STEP(-3, y);
|
||||
LANCZOS_LOOP_STEP(-2, y);
|
||||
LANCZOS_LOOP_STEP(-1, y);
|
||||
LANCZOS_LOOP_STEP(-0, y);
|
||||
LANCZOS_LOOP_STEP(+1, y);
|
||||
LANCZOS_LOOP_STEP(+2, y);
|
||||
LANCZOS_LOOP_STEP(+3, y);
|
||||
}
|
||||
return vec4(c_sum / w_sum, 1.0f);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue