fsr.chunk 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306
  1. /* EASU stage
  2. *
  3. * This takes a reduced resolution source, and scales it up while preserving detail.
  4. *
  5. * Updates:
  6. * stretch definition fixed. Thanks nehon for the bug report!
  7. */
  8. vec3 FsrEasuCF(vec2 p);
  9. /**** EASU ****/
  10. void FsrEasuCon(
  11. out vec4 con0,
  12. out vec4 con1,
  13. out vec4 con2,
  14. out vec4 con3,
  15. // This the rendered image resolution being upscaled
  16. vec2 inputViewportInPixels,
  17. // This is the resolution of the resource containing the input image (useful for dynamic resolution)
  18. vec2 inputSizeInPixels,
  19. // This is the display resolution which the input image gets upscaled to
  20. vec2 outputSizeInPixels
  21. )
  22. {
  23. // Output integer position to a pixel position in viewport.
  24. con0 = vec4(
  25. inputViewportInPixels.x/outputSizeInPixels.x,
  26. inputViewportInPixels.y/outputSizeInPixels.y,
  27. .5*inputViewportInPixels.x/outputSizeInPixels.x-.5,
  28. .5*inputViewportInPixels.y/outputSizeInPixels.y-.5
  29. );
  30. // Viewport pixel position to normalized image space.
  31. // This is used to get upper-left of 'F' tap.
  32. con1 = vec4(1,1,1,-1)/inputSizeInPixels.xyxy;
  33. // Centers of gather4, first offset from upper-left of 'F'.
  34. // +---+---+
  35. // | | |
  36. // +--(0)--+
  37. // | b | c |
  38. // +---F---+---+---+
  39. // | e | f | g | h |
  40. // +--(1)--+--(2)--+
  41. // | i | j | k | l |
  42. // +---+---+---+---+
  43. // | n | o |
  44. // +--(3)--+
  45. // | | |
  46. // +---+---+
  47. // These are from (0) instead of 'F'.
  48. con2 = vec4(-1,2,1,2)/inputSizeInPixels.xyxy;
  49. con3 = vec4(0,4,0,0)/inputSizeInPixels.xyxy;
  50. }
  51. // Filtering for a given tap for the scalar.
  52. void FsrEasuTapF(
  53. inout vec3 aC, // Accumulated color, with negative lobe.
  54. inout float aW, // Accumulated weight.
  55. vec2 off, // Pixel offset from resolve position to tap.
  56. vec2 dir, // Gradient direction.
  57. vec2 len, // Length.
  58. float lob, // Negative lobe strength.
  59. float clp, // Clipping point.
  60. vec3 c
  61. )
  62. {
  63. // Tap color.
  64. // Rotate offset by direction.
  65. vec2 v = vec2(dot(off, dir), dot(off,vec2(-dir.y,dir.x)));
  66. // Anisotropy.
  67. v *= len;
  68. // Compute distance^2.
  69. float d2 = min(dot(v,v),clp);
  70. // Limit to the window as at corner, 2 taps can easily be outside.
  71. // Approximation of lancos2 without sin() or rcp(), or sqrt() to get x.
  72. // (25/16 * (2/5 * x^2 - 1)^2 - (25/16 - 1)) * (1/4 * x^2 - 1)^2
  73. // |_______________________________________| |_______________|
  74. // base window
  75. // The general form of the 'base' is,
  76. // (a*(b*x^2-1)^2-(a-1))
  77. // Where 'a=1/(2*b-b^2)' and 'b' moves around the negative lobe.
  78. float wB = .4 * d2 - 1.;
  79. float wA = lob * d2 -1.;
  80. wB *= wB;
  81. wA *= wA;
  82. wB = 1.5625*wB-.5625;
  83. float w= wB * wA;
  84. // Do weighted average.
  85. aC += c*w;
  86. aW += w;
  87. }
  88. //------------------------------------------------------------------------------------------------------------------------------
  89. // Accumulate direction and length.
  90. void FsrEasuSetF(
  91. inout vec2 dir,
  92. inout float len,
  93. float w,
  94. float lA,float lB,float lC,float lD,float lE
  95. )
  96. {
  97. // Direction is the '+' diff.
  98. // a
  99. // b c d
  100. // e
  101. // Then takes magnitude from abs average of both sides of 'c'.
  102. // Length converts gradient reversal to 0, smoothly to non-reversal at 1, shaped, then adding horz and vert terms.
  103. float lenX = max(abs(lD - lC), abs(lC - lB));
  104. float dirX = lD - lB;
  105. dir.x += dirX * w;
  106. lenX = clamp(abs(dirX)/lenX,0.,1.);
  107. lenX *= lenX;
  108. len += lenX * w;
  109. // Repeat for the y axis.
  110. float lenY = max(abs(lE - lC), abs(lC - lA));
  111. float dirY = lE - lA;
  112. dir.y += dirY * w;
  113. lenY = clamp(abs(dirY) / lenY,0.,1.);
  114. lenY *= lenY;
  115. len += lenY * w;
  116. }
  117. //------------------------------------------------------------------------------------------------------------------------------
  118. void FsrEasuF(
  119. out vec3 pix,
  120. vec2 ip, // Integer pixel position in output.
  121. // Constants generated by FsrEasuCon().
  122. vec4 con0, // xy = output to input scale, zw = first pixel offset correction
  123. vec4 con1,
  124. vec4 con2,
  125. vec4 con3
  126. )
  127. {
  128. //------------------------------------------------------------------------------------------------------------------------------
  129. // Get position of 'f'.
  130. vec2 pp = ip * con0.xy + con0.zw; // Corresponding input pixel/subpixel
  131. vec2 fp = floor(pp);// fp = source nearest pixel
  132. pp -= fp; // pp = source subpixel
  133. //------------------------------------------------------------------------------------------------------------------------------
  134. // 12-tap kernel.
  135. // b c
  136. // e f g h
  137. // i j k l
  138. // n o
  139. // Gather 4 ordering.
  140. // a b
  141. // r g
  142. vec2 p0 = fp * con1.xy + con1.zw;
  143. // These are from p0 to avoid pulling two constants on pre-Navi hardware.
  144. vec2 p1 = p0 + con2.xy;
  145. vec2 p2 = p0 + con2.zw;
  146. vec2 p3 = p0 + con3.xy;
  147. // TextureGather is not available on WebGL2
  148. vec4 off = vec4(-.5,.5,-.5,.5)*con1.xxyy;
  149. // textureGather to texture offsets
  150. // x=west y=east z=north w=south
  151. vec3 bC = FsrEasuCF(p0 + off.xw); float bL = bC.g + 0.5 *(bC.r + bC.b);
  152. vec3 cC = FsrEasuCF(p0 + off.yw); float cL = cC.g + 0.5 *(cC.r + cC.b);
  153. vec3 iC = FsrEasuCF(p1 + off.xw); float iL = iC.g + 0.5 *(iC.r + iC.b);
  154. vec3 jC = FsrEasuCF(p1 + off.yw); float jL = jC.g + 0.5 *(jC.r + jC.b);
  155. vec3 fC = FsrEasuCF(p1 + off.yz); float fL = fC.g + 0.5 *(fC.r + fC.b);
  156. vec3 eC = FsrEasuCF(p1 + off.xz); float eL = eC.g + 0.5 *(eC.r + eC.b);
  157. vec3 kC = FsrEasuCF(p2 + off.xw); float kL = kC.g + 0.5 *(kC.r + kC.b);
  158. vec3 lC = FsrEasuCF(p2 + off.yw); float lL = lC.g + 0.5 *(lC.r + lC.b);
  159. vec3 hC = FsrEasuCF(p2 + off.yz); float hL = hC.g + 0.5 *(hC.r + hC.b);
  160. vec3 gC = FsrEasuCF(p2 + off.xz); float gL = gC.g + 0.5 *(gC.r + gC.b);
  161. vec3 oC = FsrEasuCF(p3 + off.yz); float oL = oC.g + 0.5 *(oC.r + oC.b);
  162. vec3 nC = FsrEasuCF(p3 + off.xz); float nL = nC.g + 0.5 *(nC.r + nC.b);
  163. //------------------------------------------------------------------------------------------------------------------------------
  164. // Simplest multi-channel approximate luma possible (luma times 2, in 2 FMA/MAD).
  165. // Accumulate for bilinear interpolation.
  166. vec2 dir = vec2(0);
  167. float len = 0.;
  168. FsrEasuSetF(dir, len, (1.-pp.x)*(1.-pp.y), bL, eL, fL, gL, jL);
  169. FsrEasuSetF(dir, len, pp.x *(1.-pp.y), cL, fL, gL, hL, kL);
  170. FsrEasuSetF(dir, len, (1.-pp.x)* pp.y , fL, iL, jL, kL, nL);
  171. FsrEasuSetF(dir, len, pp.x * pp.y , gL, jL, kL, lL, oL);
  172. //------------------------------------------------------------------------------------------------------------------------------
  173. // Normalize with approximation, and cleanup close to zero.
  174. vec2 dir2 = dir * dir;
  175. float dirR = dir2.x + dir2.y;
  176. bool zro = dirR < (1.0/32768.0);
  177. dirR = inversesqrt(dirR);
  178. dirR = zro ? 1.0 : dirR;
  179. dir.x = zro ? 1.0 : dir.x;
  180. dir *= vec2(dirR);
  181. // Transform from {0 to 2} to {0 to 1} range, and shape with square.
  182. len = len * 0.5;
  183. len *= len;
  184. // Stretch kernel {1.0 vert|horz, to sqrt(2.0) on diagonal}.
  185. float stretch = dot(dir,dir) / (max(abs(dir.x), abs(dir.y)));
  186. // Anisotropic length after rotation,
  187. // x := 1.0 lerp to 'stretch' on edges
  188. // y := 1.0 lerp to 2x on edges
  189. vec2 len2 = vec2(1. +(stretch-1.0)*len, 1. -.5 * len);
  190. // Based on the amount of 'edge',
  191. // the window shifts from +/-{sqrt(2.0) to slightly beyond 2.0}.
  192. float lob = .5 - .29 * len;
  193. // Set distance^2 clipping point to the end of the adjustable window.
  194. float clp = 1./lob;
  195. //------------------------------------------------------------------------------------------------------------------------------
  196. // Accumulation mixed with min/max of 4 nearest.
  197. // b c
  198. // e f g h
  199. // i j k l
  200. // n o
  201. vec3 min4 = min(min(fC,gC),min(jC,kC));
  202. vec3 max4 = max(max(fC,gC),max(jC,kC));
  203. // Accumulation.
  204. vec3 aC = vec3(0);
  205. float aW = 0.;
  206. FsrEasuTapF(aC, aW, vec2( 0,-1)-pp, dir, len2, lob, clp, bC);
  207. FsrEasuTapF(aC, aW, vec2( 1,-1)-pp, dir, len2, lob, clp, cC);
  208. FsrEasuTapF(aC, aW, vec2(-1, 1)-pp, dir, len2, lob, clp, iC);
  209. FsrEasuTapF(aC, aW, vec2( 0, 1)-pp, dir, len2, lob, clp, jC);
  210. FsrEasuTapF(aC, aW, vec2( 0, 0)-pp, dir, len2, lob, clp, fC);
  211. FsrEasuTapF(aC, aW, vec2(-1, 0)-pp, dir, len2, lob, clp, eC);
  212. FsrEasuTapF(aC, aW, vec2( 1, 1)-pp, dir, len2, lob, clp, kC);
  213. FsrEasuTapF(aC, aW, vec2( 2, 1)-pp, dir, len2, lob, clp, lC);
  214. FsrEasuTapF(aC, aW, vec2( 2, 0)-pp, dir, len2, lob, clp, hC);
  215. FsrEasuTapF(aC, aW, vec2( 1, 0)-pp, dir, len2, lob, clp, gC);
  216. FsrEasuTapF(aC, aW, vec2( 1, 2)-pp, dir, len2, lob, clp, oC);
  217. FsrEasuTapF(aC, aW, vec2( 0, 2)-pp, dir, len2, lob, clp, nC);
  218. //------------------------------------------------------------------------------------------------------------------------------
  219. // Normalize and dering.
  220. pix=min(max4,max(min4,aC/aW));
  221. }
  222. /***** RCAS *****/
  223. #define FSR_RCAS_LIMIT (0.25-(1.0/16.0))
  224. #define FSR_RCAS_DENOISE 0
  225. // Input callback prototypes that need to be implemented by calling shader
  226. vec4 FsrRcasLoadF(vec2 p);
  227. //------------------------------------------------------------------------------------------------------------------------------
  228. void FsrRcasCon(
  229. out float con,
  230. // The scale is {0.0 := maximum, to N>0, where N is the number of stops (halving) of the reduction of sharpness}.
  231. float sharpness
  232. ){
  233. // Transform from stops to linear value.
  234. con = exp2(-sharpness);
  235. }
  236. vec3 FsrRcasF(
  237. vec2 ip, // Integer pixel position in output.
  238. float con
  239. )
  240. {
  241. // Constant generated by RcasSetup().
  242. // Algorithm uses minimal 3x3 pixel neighborhood.
  243. // b
  244. // d e f
  245. // h
  246. vec2 sp = vec2(ip);
  247. vec3 b = FsrRcasLoadF(sp + vec2( 0,-1)).rgb;
  248. vec3 d = FsrRcasLoadF(sp + vec2(-1, 0)).rgb;
  249. vec3 e = FsrRcasLoadF(sp).rgb;
  250. vec3 f = FsrRcasLoadF(sp+vec2( 1, 0)).rgb;
  251. vec3 h = FsrRcasLoadF(sp+vec2( 0, 1)).rgb;
  252. // Luma times 2.
  253. float bL = b.g + .5 * (b.b + b.r);
  254. float dL = d.g + .5 * (d.b + d.r);
  255. float eL = e.g + .5 * (e.b + e.r);
  256. float fL = f.g + .5 * (f.b + f.r);
  257. float hL = h.g + .5 * (h.b + h.r);
  258. // Noise detection.
  259. float nz = .25 * (bL + dL + fL + hL) - eL;
  260. nz=clamp(
  261. abs(nz)
  262. /(
  263. max(max(bL,dL),max(eL,max(fL,hL)))
  264. -min(min(bL,dL),min(eL,min(fL,hL)))
  265. ),
  266. 0., 1.
  267. );
  268. nz=1.-.5*nz;
  269. // Min and max of ring.
  270. vec3 mn4 = min(b, min(f, h));
  271. vec3 mx4 = max(b, max(f, h));
  272. // Immediate constants for peak range.
  273. vec2 peakC = vec2(1., -4.);
  274. // Limiters, these need to be high precision RCPs.
  275. vec3 hitMin = mn4 / (4. * mx4);
  276. vec3 hitMax = (peakC.x - mx4) / (4.* mn4 + peakC.y);
  277. vec3 lobeRGB = max(-hitMin, hitMax);
  278. float lobe = max(
  279. -FSR_RCAS_LIMIT,
  280. min(max(lobeRGB.r, max(lobeRGB.g, lobeRGB.b)), 0.)
  281. )*con;
  282. // Apply noise removal.
  283. #if FSR_RCAS_DENOISE
  284. lobe *= nz;
  285. #endif
  286. // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes.
  287. return (lobe * (b + d + h + f) + e) / (4. * lobe + 1.);
  288. }