| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306 |
- /* EASU stage
- *
- * This takes a reduced resolution source, and scales it up while preserving detail.
- *
- * Updates:
- * stretch definition fixed. Thanks nehon for the bug report!
- */
- vec3 FsrEasuCF(vec2 p);
- /**** EASU ****/
- void FsrEasuCon(
- out vec4 con0,
- out vec4 con1,
- out vec4 con2,
- out vec4 con3,
- // This the rendered image resolution being upscaled
- vec2 inputViewportInPixels,
- // This is the resolution of the resource containing the input image (useful for dynamic resolution)
- vec2 inputSizeInPixels,
- // This is the display resolution which the input image gets upscaled to
- vec2 outputSizeInPixels
- )
- {
- // Output integer position to a pixel position in viewport.
- con0 = vec4(
- inputViewportInPixels.x/outputSizeInPixels.x,
- inputViewportInPixels.y/outputSizeInPixels.y,
- .5*inputViewportInPixels.x/outputSizeInPixels.x-.5,
- .5*inputViewportInPixels.y/outputSizeInPixels.y-.5
- );
- // Viewport pixel position to normalized image space.
- // This is used to get upper-left of 'F' tap.
- con1 = vec4(1,1,1,-1)/inputSizeInPixels.xyxy;
- // Centers of gather4, first offset from upper-left of 'F'.
- // +---+---+
- // | | |
- // +--(0)--+
- // | b | c |
- // +---F---+---+---+
- // | e | f | g | h |
- // +--(1)--+--(2)--+
- // | i | j | k | l |
- // +---+---+---+---+
- // | n | o |
- // +--(3)--+
- // | | |
- // +---+---+
- // These are from (0) instead of 'F'.
- con2 = vec4(-1,2,1,2)/inputSizeInPixels.xyxy;
- con3 = vec4(0,4,0,0)/inputSizeInPixels.xyxy;
- }
- // Filtering for a given tap for the scalar.
- void FsrEasuTapF(
- inout vec3 aC, // Accumulated color, with negative lobe.
- inout float aW, // Accumulated weight.
- vec2 off, // Pixel offset from resolve position to tap.
- vec2 dir, // Gradient direction.
- vec2 len, // Length.
- float lob, // Negative lobe strength.
- float clp, // Clipping point.
- vec3 c
- )
- {
- // Tap color.
- // Rotate offset by direction.
- vec2 v = vec2(dot(off, dir), dot(off,vec2(-dir.y,dir.x)));
- // Anisotropy.
- v *= len;
- // Compute distance^2.
- float d2 = min(dot(v,v),clp);
- // Limit to the window as at corner, 2 taps can easily be outside.
- // Approximation of lancos2 without sin() or rcp(), or sqrt() to get x.
- // (25/16 * (2/5 * x^2 - 1)^2 - (25/16 - 1)) * (1/4 * x^2 - 1)^2
- // |_______________________________________| |_______________|
- // base window
- // The general form of the 'base' is,
- // (a*(b*x^2-1)^2-(a-1))
- // Where 'a=1/(2*b-b^2)' and 'b' moves around the negative lobe.
- float wB = .4 * d2 - 1.;
- float wA = lob * d2 -1.;
- wB *= wB;
- wA *= wA;
- wB = 1.5625*wB-.5625;
- float w= wB * wA;
- // Do weighted average.
- aC += c*w;
- aW += w;
- }
- //------------------------------------------------------------------------------------------------------------------------------
- // Accumulate direction and length.
- void FsrEasuSetF(
- inout vec2 dir,
- inout float len,
- float w,
- float lA,float lB,float lC,float lD,float lE
- )
- {
- // Direction is the '+' diff.
- // a
- // b c d
- // e
- // Then takes magnitude from abs average of both sides of 'c'.
- // Length converts gradient reversal to 0, smoothly to non-reversal at 1, shaped, then adding horz and vert terms.
- float lenX = max(abs(lD - lC), abs(lC - lB));
- float dirX = lD - lB;
- dir.x += dirX * w;
- lenX = clamp(abs(dirX)/lenX,0.,1.);
- lenX *= lenX;
- len += lenX * w;
- // Repeat for the y axis.
- float lenY = max(abs(lE - lC), abs(lC - lA));
- float dirY = lE - lA;
- dir.y += dirY * w;
- lenY = clamp(abs(dirY) / lenY,0.,1.);
- lenY *= lenY;
- len += lenY * w;
- }
- //------------------------------------------------------------------------------------------------------------------------------
- void FsrEasuF(
- out vec3 pix,
- vec2 ip, // Integer pixel position in output.
- // Constants generated by FsrEasuCon().
- vec4 con0, // xy = output to input scale, zw = first pixel offset correction
- vec4 con1,
- vec4 con2,
- vec4 con3
- )
- {
- //------------------------------------------------------------------------------------------------------------------------------
- // Get position of 'f'.
- vec2 pp = ip * con0.xy + con0.zw; // Corresponding input pixel/subpixel
- vec2 fp = floor(pp);// fp = source nearest pixel
- pp -= fp; // pp = source subpixel
- //------------------------------------------------------------------------------------------------------------------------------
- // 12-tap kernel.
- // b c
- // e f g h
- // i j k l
- // n o
- // Gather 4 ordering.
- // a b
- // r g
- vec2 p0 = fp * con1.xy + con1.zw;
- // These are from p0 to avoid pulling two constants on pre-Navi hardware.
- vec2 p1 = p0 + con2.xy;
- vec2 p2 = p0 + con2.zw;
- vec2 p3 = p0 + con3.xy;
- // TextureGather is not available on WebGL2
- vec4 off = vec4(-.5,.5,-.5,.5)*con1.xxyy;
- // textureGather to texture offsets
- // x=west y=east z=north w=south
- vec3 bC = FsrEasuCF(p0 + off.xw); float bL = bC.g + 0.5 *(bC.r + bC.b);
- vec3 cC = FsrEasuCF(p0 + off.yw); float cL = cC.g + 0.5 *(cC.r + cC.b);
- vec3 iC = FsrEasuCF(p1 + off.xw); float iL = iC.g + 0.5 *(iC.r + iC.b);
- vec3 jC = FsrEasuCF(p1 + off.yw); float jL = jC.g + 0.5 *(jC.r + jC.b);
- vec3 fC = FsrEasuCF(p1 + off.yz); float fL = fC.g + 0.5 *(fC.r + fC.b);
- vec3 eC = FsrEasuCF(p1 + off.xz); float eL = eC.g + 0.5 *(eC.r + eC.b);
- vec3 kC = FsrEasuCF(p2 + off.xw); float kL = kC.g + 0.5 *(kC.r + kC.b);
- vec3 lC = FsrEasuCF(p2 + off.yw); float lL = lC.g + 0.5 *(lC.r + lC.b);
- vec3 hC = FsrEasuCF(p2 + off.yz); float hL = hC.g + 0.5 *(hC.r + hC.b);
- vec3 gC = FsrEasuCF(p2 + off.xz); float gL = gC.g + 0.5 *(gC.r + gC.b);
- vec3 oC = FsrEasuCF(p3 + off.yz); float oL = oC.g + 0.5 *(oC.r + oC.b);
- vec3 nC = FsrEasuCF(p3 + off.xz); float nL = nC.g + 0.5 *(nC.r + nC.b);
- //------------------------------------------------------------------------------------------------------------------------------
- // Simplest multi-channel approximate luma possible (luma times 2, in 2 FMA/MAD).
- // Accumulate for bilinear interpolation.
- vec2 dir = vec2(0);
- float len = 0.;
- FsrEasuSetF(dir, len, (1.-pp.x)*(1.-pp.y), bL, eL, fL, gL, jL);
- FsrEasuSetF(dir, len, pp.x *(1.-pp.y), cL, fL, gL, hL, kL);
- FsrEasuSetF(dir, len, (1.-pp.x)* pp.y , fL, iL, jL, kL, nL);
- FsrEasuSetF(dir, len, pp.x * pp.y , gL, jL, kL, lL, oL);
- //------------------------------------------------------------------------------------------------------------------------------
- // Normalize with approximation, and cleanup close to zero.
- vec2 dir2 = dir * dir;
- float dirR = dir2.x + dir2.y;
- bool zro = dirR < (1.0/32768.0);
- dirR = inversesqrt(dirR);
- dirR = zro ? 1.0 : dirR;
- dir.x = zro ? 1.0 : dir.x;
- dir *= vec2(dirR);
- // Transform from {0 to 2} to {0 to 1} range, and shape with square.
- len = len * 0.5;
- len *= len;
- // Stretch kernel {1.0 vert|horz, to sqrt(2.0) on diagonal}.
- float stretch = dot(dir,dir) / (max(abs(dir.x), abs(dir.y)));
- // Anisotropic length after rotation,
- // x := 1.0 lerp to 'stretch' on edges
- // y := 1.0 lerp to 2x on edges
- vec2 len2 = vec2(1. +(stretch-1.0)*len, 1. -.5 * len);
- // Based on the amount of 'edge',
- // the window shifts from +/-{sqrt(2.0) to slightly beyond 2.0}.
- float lob = .5 - .29 * len;
- // Set distance^2 clipping point to the end of the adjustable window.
- float clp = 1./lob;
- //------------------------------------------------------------------------------------------------------------------------------
- // Accumulation mixed with min/max of 4 nearest.
- // b c
- // e f g h
- // i j k l
- // n o
- vec3 min4 = min(min(fC,gC),min(jC,kC));
- vec3 max4 = max(max(fC,gC),max(jC,kC));
- // Accumulation.
- vec3 aC = vec3(0);
- float aW = 0.;
- FsrEasuTapF(aC, aW, vec2( 0,-1)-pp, dir, len2, lob, clp, bC);
- FsrEasuTapF(aC, aW, vec2( 1,-1)-pp, dir, len2, lob, clp, cC);
- FsrEasuTapF(aC, aW, vec2(-1, 1)-pp, dir, len2, lob, clp, iC);
- FsrEasuTapF(aC, aW, vec2( 0, 1)-pp, dir, len2, lob, clp, jC);
- FsrEasuTapF(aC, aW, vec2( 0, 0)-pp, dir, len2, lob, clp, fC);
- FsrEasuTapF(aC, aW, vec2(-1, 0)-pp, dir, len2, lob, clp, eC);
- FsrEasuTapF(aC, aW, vec2( 1, 1)-pp, dir, len2, lob, clp, kC);
- FsrEasuTapF(aC, aW, vec2( 2, 1)-pp, dir, len2, lob, clp, lC);
- FsrEasuTapF(aC, aW, vec2( 2, 0)-pp, dir, len2, lob, clp, hC);
- FsrEasuTapF(aC, aW, vec2( 1, 0)-pp, dir, len2, lob, clp, gC);
- FsrEasuTapF(aC, aW, vec2( 1, 2)-pp, dir, len2, lob, clp, oC);
- FsrEasuTapF(aC, aW, vec2( 0, 2)-pp, dir, len2, lob, clp, nC);
- //------------------------------------------------------------------------------------------------------------------------------
- // Normalize and dering.
- pix=min(max4,max(min4,aC/aW));
- }
- /***** RCAS *****/
- #define FSR_RCAS_LIMIT (0.25-(1.0/16.0))
- #define FSR_RCAS_DENOISE 0
- // Input callback prototypes that need to be implemented by calling shader
- vec4 FsrRcasLoadF(vec2 p);
- //------------------------------------------------------------------------------------------------------------------------------
- void FsrRcasCon(
- out float con,
- // The scale is {0.0 := maximum, to N>0, where N is the number of stops (halving) of the reduction of sharpness}.
- float sharpness
- ){
- // Transform from stops to linear value.
- con = exp2(-sharpness);
- }
- vec3 FsrRcasF(
- vec2 ip, // Integer pixel position in output.
- float con
- )
- {
- // Constant generated by RcasSetup().
- // Algorithm uses minimal 3x3 pixel neighborhood.
- // b
- // d e f
- // h
- vec2 sp = vec2(ip);
- vec3 b = FsrRcasLoadF(sp + vec2( 0,-1)).rgb;
- vec3 d = FsrRcasLoadF(sp + vec2(-1, 0)).rgb;
- vec3 e = FsrRcasLoadF(sp).rgb;
- vec3 f = FsrRcasLoadF(sp+vec2( 1, 0)).rgb;
- vec3 h = FsrRcasLoadF(sp+vec2( 0, 1)).rgb;
- // Luma times 2.
- float bL = b.g + .5 * (b.b + b.r);
- float dL = d.g + .5 * (d.b + d.r);
- float eL = e.g + .5 * (e.b + e.r);
- float fL = f.g + .5 * (f.b + f.r);
- float hL = h.g + .5 * (h.b + h.r);
- // Noise detection.
- float nz = .25 * (bL + dL + fL + hL) - eL;
- nz=clamp(
- abs(nz)
- /(
- max(max(bL,dL),max(eL,max(fL,hL)))
- -min(min(bL,dL),min(eL,min(fL,hL)))
- ),
- 0., 1.
- );
- nz=1.-.5*nz;
- // Min and max of ring.
- vec3 mn4 = min(b, min(f, h));
- vec3 mx4 = max(b, max(f, h));
- // Immediate constants for peak range.
- vec2 peakC = vec2(1., -4.);
- // Limiters, these need to be high precision RCPs.
- vec3 hitMin = mn4 / (4. * mx4);
- vec3 hitMax = (peakC.x - mx4) / (4.* mn4 + peakC.y);
- vec3 lobeRGB = max(-hitMin, hitMax);
- float lobe = max(
- -FSR_RCAS_LIMIT,
- min(max(lobeRGB.r, max(lobeRGB.g, lobeRGB.b)), 0.)
- )*con;
- // Apply noise removal.
- #if FSR_RCAS_DENOISE
- lobe *= nz;
- #endif
- // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes.
- return (lobe * (b + d + h + f) + e) / (4. * lobe + 1.);
- }
|