Developer Documentation
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
Fxaa3_11.h
1 /*============================================================================
2 
3 
4  NVIDIA FXAA 3.11 by TIMOTHY LOTTES
5 
6 
7 ------------------------------------------------------------------------------
8 COPYRIGHT (C) 2010, 2011 NVIDIA CORPORATION. ALL RIGHTS RESERVED.
9 ------------------------------------------------------------------------------
10 TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED
11 *AS IS* AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS
12 OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF
13 MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA
14 OR ITS SUPPLIERS BE LIABLE FOR ANY SPECIAL, INCIDENTAL, INDIRECT, OR
15 CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT LIMITATION, DAMAGES FOR
16 LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION,
17 OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR INABILITY TO USE
18 THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
19 DAMAGES.
20 
21 ------------------------------------------------------------------------------
22  INTEGRATION CHECKLIST
23 ------------------------------------------------------------------------------
24 (1.)
25 In the shader source, setup defines for the desired configuration.
26 When providing multiple shaders (for different presets),
27 simply setup the defines differently in multiple files.
28 Example,
29 
30  #define FXAA_PC 1
31  #define FXAA_HLSL_5 1
32  #define FXAA_QUALITY__PRESET 12
33 
34 Or,
35 
36  #define FXAA_360 1
37 
38 Or,
39 
40  #define FXAA_PS3 1
41 
42 Etc.
43 
44 (2.)
45 Then include this file,
46 
47  #include "Fxaa3_11.h"
48 
49 (3.)
50 Then call the FXAA pixel shader from within your desired shader.
51 Look at the FXAA Quality FxaaPixelShader() for docs on inputs.
52 As for FXAA 3.11 all inputs for all shaders are the same
53 to enable easy porting between platforms.
54 
55  return FxaaPixelShader(...);
56 
57 (4.)
58 Insure pass prior to FXAA outputs RGBL (see next section).
59 Or use,
60 
61  #define FXAA_GREEN_AS_LUMA 1
62 
63 (5.)
64 Setup engine to provide the following constants
65 which are used in the FxaaPixelShader() inputs,
66 
67  FxaaFloat2 fxaaQualityRcpFrame,
68  FxaaFloat4 fxaaConsoleRcpFrameOpt,
69  FxaaFloat4 fxaaConsoleRcpFrameOpt2,
70  FxaaFloat4 fxaaConsole360RcpFrameOpt2,
71  FxaaFloat fxaaQualitySubpix,
72  FxaaFloat fxaaQualityEdgeThreshold,
73  FxaaFloat fxaaQualityEdgeThresholdMin,
74  FxaaFloat fxaaConsoleEdgeSharpness,
75  FxaaFloat fxaaConsoleEdgeThreshold,
76  FxaaFloat fxaaConsoleEdgeThresholdMin,
77  FxaaFloat4 fxaaConsole360ConstDir
78 
79 Look at the FXAA Quality FxaaPixelShader() for docs on inputs.
80 
81 (6.)
82 Have FXAA vertex shader run as a full screen triangle,
83 and output "pos" and "fxaaConsolePosPos"
84 such that inputs in the pixel shader provide,
85 
86  // {xy} = center of pixel
87  FxaaFloat2 pos,
88 
89  // {xy__} = upper left of pixel
90  // {__zw} = lower right of pixel
91  FxaaFloat4 fxaaConsolePosPos,
92 
93 (7.)
94 Insure the texture sampler(s) used by FXAA are set to bilinear filtering.
95 
96 
97 ------------------------------------------------------------------------------
98  INTEGRATION - RGBL AND COLORSPACE
99 ------------------------------------------------------------------------------
100 FXAA3 requires RGBL as input unless the following is set,
101 
102  #define FXAA_GREEN_AS_LUMA 1
103 
104 In which case the engine uses green in place of luma,
105 and requires RGB input is in a non-linear colorspace.
106 
107 RGB should be LDR (low dynamic range).
108 Specifically do FXAA after tonemapping.
109 
110 RGB data as returned by a texture fetch can be non-linear,
111 or linear when FXAA_GREEN_AS_LUMA is not set.
112 Note an "sRGB format" texture counts as linear,
113 because the result of a texture fetch is linear data.
114 Regular "RGBA8" textures in the sRGB colorspace are non-linear.
115 
116 If FXAA_GREEN_AS_LUMA is not set,
117 luma must be stored in the alpha channel prior to running FXAA.
118 This luma should be in a perceptual space (could be gamma 2.0).
119 Example pass before FXAA where output is gamma 2.0 encoded,
120 
121  color.rgb = ToneMap(color.rgb); // linear color output
122  color.rgb = sqrt(color.rgb); // gamma 2.0 color output
123  return color;
124 
125 To use FXAA,
126 
127  color.rgb = ToneMap(color.rgb); // linear color output
128  color.rgb = sqrt(color.rgb); // gamma 2.0 color output
129  color.a = dot(color.rgb, FxaaFloat3(0.299, 0.587, 0.114)); // compute luma
130  return color;
131 
132 Another example where output is linear encoded,
133 say for instance writing to an sRGB formated render target,
134 where the render target does the conversion back to sRGB after blending,
135 
136  color.rgb = ToneMap(color.rgb); // linear color output
137  return color;
138 
139 To use FXAA,
140 
141  color.rgb = ToneMap(color.rgb); // linear color output
142  color.a = sqrt(dot(color.rgb, FxaaFloat3(0.299, 0.587, 0.114))); // compute luma
143  return color;
144 
145 Getting luma correct is required for the algorithm to work correctly.
146 
147 
148 ------------------------------------------------------------------------------
149  BEING LINEARLY CORRECT?
150 ------------------------------------------------------------------------------
151 Applying FXAA to a framebuffer with linear RGB color will look worse.
152 This is very counter intuitive, but happends to be true in this case.
153 The reason is because dithering artifacts will be more visiable
154 in a linear colorspace.
155 
156 
157 ------------------------------------------------------------------------------
158  COMPLEX INTEGRATION
159 ------------------------------------------------------------------------------
160 Q. What if the engine is blending into RGB before wanting to run FXAA?
161 
162 A. In the last opaque pass prior to FXAA,
163  have the pass write out luma into alpha.
164  Then blend into RGB only.
165  FXAA should be able to run ok
166  assuming the blending pass did not any add aliasing.
167  This should be the common case for particles and common blending passes.
168 
169 A. Or use FXAA_GREEN_AS_LUMA.
170 
171 ============================================================================*/
172 
173 /*============================================================================
174 
175  INTEGRATION KNOBS
176 
177 ============================================================================*/
178 //
179 // FXAA_PS3 and FXAA_360 choose the console algorithm (FXAA3 CONSOLE).
180 // FXAA_360_OPT is a prototype for the new optimized 360 version.
181 //
182 // 1 = Use API.
183 // 0 = Don't use API.
184 //
185 /*--------------------------------------------------------------------------*/
186 #ifndef FXAA_PS3
187  #define FXAA_PS3 0
188 #endif
189 /*--------------------------------------------------------------------------*/
190 #ifndef FXAA_360
191  #define FXAA_360 0
192 #endif
193 /*--------------------------------------------------------------------------*/
194 #ifndef FXAA_360_OPT
195  #define FXAA_360_OPT 0
196 #endif
197 /*==========================================================================*/
198 #ifndef FXAA_PC
199  //
200  // FXAA Quality
201  // The high quality PC algorithm.
202  //
203  #define FXAA_PC 0
204 #endif
205 /*--------------------------------------------------------------------------*/
206 #ifndef FXAA_PC_CONSOLE
207  //
208  // The console algorithm for PC is included
209  // for developers targeting really low spec machines.
210  // Likely better to just run FXAA_PC, and use a really low preset.
211  //
212  #define FXAA_PC_CONSOLE 0
213 #endif
214 /*--------------------------------------------------------------------------*/
215 #ifndef FXAA_GLSL_120
216  #define FXAA_GLSL_120 0
217 #endif
218 /*--------------------------------------------------------------------------*/
219 #ifndef FXAA_GLSL_130
220  #define FXAA_GLSL_130 0
221 #endif
222 /*--------------------------------------------------------------------------*/
223 #ifndef FXAA_HLSL_3
224  #define FXAA_HLSL_3 0
225 #endif
226 /*--------------------------------------------------------------------------*/
227 #ifndef FXAA_HLSL_4
228  #define FXAA_HLSL_4 0
229 #endif
230 /*--------------------------------------------------------------------------*/
231 #ifndef FXAA_HLSL_5
232  #define FXAA_HLSL_5 0
233 #endif
234 /*==========================================================================*/
235 #ifndef FXAA_GREEN_AS_LUMA
236  //
237  // For those using non-linear color,
238  // and either not able to get luma in alpha, or not wanting to,
239  // this enables FXAA to run using green as a proxy for luma.
240  // So with this enabled, no need to pack luma in alpha.
241  //
242  // This will turn off AA on anything which lacks some amount of green.
243  // Pure red and blue or combination of only R and B, will get no AA.
244  //
245  // Might want to lower the settings for both,
246  // fxaaConsoleEdgeThresholdMin
247  // fxaaQualityEdgeThresholdMin
248  // In order to insure AA does not get turned off on colors
249  // which contain a minor amount of green.
250  //
251  // 1 = On.
252  // 0 = Off.
253  //
254  #define FXAA_GREEN_AS_LUMA 0
255 #endif
256 /*--------------------------------------------------------------------------*/
257 #ifndef FXAA_EARLY_EXIT
258  //
259  // Controls algorithm's early exit path.
260  // On PS3 turning this ON adds 2 cycles to the shader.
261  // On 360 turning this OFF adds 10ths of a millisecond to the shader.
262  // Turning this off on console will result in a more blurry image.
263  // So this defaults to on.
264  //
265  // 1 = On.
266  // 0 = Off.
267  //
268  #define FXAA_EARLY_EXIT 1
269 #endif
270 /*--------------------------------------------------------------------------*/
271 #ifndef FXAA_DISCARD
272  //
273  // Only valid for PC OpenGL currently.
274  // Probably will not work when FXAA_GREEN_AS_LUMA = 1.
275  //
276  // 1 = Use discard on pixels which don't need AA.
277  // For APIs which enable concurrent TEX+ROP from same surface.
278  // 0 = Return unchanged color on pixels which don't need AA.
279  //
280  #define FXAA_DISCARD 0
281 #endif
282 /*--------------------------------------------------------------------------*/
283 #ifndef FXAA_FAST_PIXEL_OFFSET
284  //
285  // Used for GLSL 120 only.
286  //
287  // 1 = GL API supports fast pixel offsets
288  // 0 = do not use fast pixel offsets
289  //
290  #ifdef GL_EXT_gpu_shader4
291  #define FXAA_FAST_PIXEL_OFFSET 1
292  #endif
293  #ifdef GL_NV_gpu_shader5
294  #define FXAA_FAST_PIXEL_OFFSET 1
295  #endif
296  #ifdef GL_ARB_gpu_shader5
297  #define FXAA_FAST_PIXEL_OFFSET 1
298  #endif
299  #ifndef FXAA_FAST_PIXEL_OFFSET
300  #define FXAA_FAST_PIXEL_OFFSET 0
301  #endif
302 #endif
303 /*--------------------------------------------------------------------------*/
304 #ifndef FXAA_GATHER4_ALPHA
305  //
306  // 1 = API supports gather4 on alpha channel.
307  // 0 = API does not support gather4 on alpha channel.
308  //
309  #if (FXAA_HLSL_5 == 1)
310  #define FXAA_GATHER4_ALPHA 1
311  #endif
312  #ifdef GL_ARB_gpu_shader5
313  #define FXAA_GATHER4_ALPHA 1
314  #endif
315  #ifdef GL_NV_gpu_shader5
316  #define FXAA_GATHER4_ALPHA 1
317  #endif
318  #ifndef FXAA_GATHER4_ALPHA
319  #define FXAA_GATHER4_ALPHA 0
320  #endif
321 #endif
322 
323 /*============================================================================
324  FXAA CONSOLE PS3 - TUNING KNOBS
325 ============================================================================*/
326 #ifndef FXAA_CONSOLE__PS3_EDGE_SHARPNESS
327  //
328  // Consoles the sharpness of edges on PS3 only.
329  // Non-PS3 tuning is done with shader input.
330  //
331  // Due to the PS3 being ALU bound,
332  // there are only two safe values here: 4 and 8.
333  // These options use the shaders ability to a free *|/ by 2|4|8.
334  //
335  // 8.0 is sharper
336  // 4.0 is softer
337  // 2.0 is really soft (good for vector graphics inputs)
338  //
339  #if 1
340  #define FXAA_CONSOLE__PS3_EDGE_SHARPNESS 8.0
341  #endif
342  #if 0
343  #define FXAA_CONSOLE__PS3_EDGE_SHARPNESS 4.0
344  #endif
345  #if 0
346  #define FXAA_CONSOLE__PS3_EDGE_SHARPNESS 2.0
347  #endif
348 #endif
349 /*--------------------------------------------------------------------------*/
350 #ifndef FXAA_CONSOLE__PS3_EDGE_THRESHOLD
351  //
352  // Only effects PS3.
353  // Non-PS3 tuning is done with shader input.
354  //
355  // The minimum amount of local contrast required to apply algorithm.
356  // The console setting has a different mapping than the quality setting.
357  //
358  // This only applies when FXAA_EARLY_EXIT is 1.
359  //
360  // Due to the PS3 being ALU bound,
361  // there are only two safe values here: 0.25 and 0.125.
362  // These options use the shaders ability to a free *|/ by 2|4|8.
363  //
364  // 0.125 leaves less aliasing, but is softer
365  // 0.25 leaves more aliasing, and is sharper
366  //
367  #if 1
368  #define FXAA_CONSOLE__PS3_EDGE_THRESHOLD 0.125
369  #else
370  #define FXAA_CONSOLE__PS3_EDGE_THRESHOLD 0.25
371  #endif
372 #endif
373 
374 /*============================================================================
375  FXAA QUALITY - TUNING KNOBS
376 ------------------------------------------------------------------------------
377 NOTE the other tuning knobs are now in the shader function inputs!
378 ============================================================================*/
379 #ifndef FXAA_QUALITY__PRESET
380  //
381  // Choose the quality preset.
382  // This needs to be compiled into the shader as it effects code.
383  // Best option to include multiple presets is to
384  // in each shader define the preset, then include this file.
385  //
386  // OPTIONS
387  // -----------------------------------------------------------------------
388  // 10 to 15 - default medium dither (10=fastest, 15=highest quality)
389  // 20 to 29 - less dither, more expensive (20=fastest, 29=highest quality)
390  // 39 - no dither, very expensive
391  //
392  // NOTES
393  // -----------------------------------------------------------------------
394  // 12 = slightly faster then FXAA 3.9 and higher edge quality (default)
395  // 13 = about same speed as FXAA 3.9 and better than 12
396  // 23 = closest to FXAA 3.9 visually and performance wise
397  // _ = the lowest digit is directly related to performance
398  // _ = the highest digit is directly related to style
399  //
400  #define FXAA_QUALITY__PRESET 12
401 #endif
402 
403 
404 /*============================================================================
405 
406  FXAA QUALITY - PRESETS
407 
408 ============================================================================*/
409 
410 /*============================================================================
411  FXAA QUALITY - MEDIUM DITHER PRESETS
412 ============================================================================*/
413 #if (FXAA_QUALITY__PRESET == 10)
414  #define FXAA_QUALITY__PS 3
415  #define FXAA_QUALITY__P0 1.5
416  #define FXAA_QUALITY__P1 3.0
417  #define FXAA_QUALITY__P2 12.0
418 #endif
419 /*--------------------------------------------------------------------------*/
420 #if (FXAA_QUALITY__PRESET == 11)
421  #define FXAA_QUALITY__PS 4
422  #define FXAA_QUALITY__P0 1.0
423  #define FXAA_QUALITY__P1 1.5
424  #define FXAA_QUALITY__P2 3.0
425  #define FXAA_QUALITY__P3 12.0
426 #endif
427 /*--------------------------------------------------------------------------*/
428 #if (FXAA_QUALITY__PRESET == 12)
429  #define FXAA_QUALITY__PS 5
430  #define FXAA_QUALITY__P0 1.0
431  #define FXAA_QUALITY__P1 1.5
432  #define FXAA_QUALITY__P2 2.0
433  #define FXAA_QUALITY__P3 4.0
434  #define FXAA_QUALITY__P4 12.0
435 #endif
436 /*--------------------------------------------------------------------------*/
437 #if (FXAA_QUALITY__PRESET == 13)
438  #define FXAA_QUALITY__PS 6
439  #define FXAA_QUALITY__P0 1.0
440  #define FXAA_QUALITY__P1 1.5
441  #define FXAA_QUALITY__P2 2.0
442  #define FXAA_QUALITY__P3 2.0
443  #define FXAA_QUALITY__P4 4.0
444  #define FXAA_QUALITY__P5 12.0
445 #endif
446 /*--------------------------------------------------------------------------*/
447 #if (FXAA_QUALITY__PRESET == 14)
448  #define FXAA_QUALITY__PS 7
449  #define FXAA_QUALITY__P0 1.0
450  #define FXAA_QUALITY__P1 1.5
451  #define FXAA_QUALITY__P2 2.0
452  #define FXAA_QUALITY__P3 2.0
453  #define FXAA_QUALITY__P4 2.0
454  #define FXAA_QUALITY__P5 4.0
455  #define FXAA_QUALITY__P6 12.0
456 #endif
457 /*--------------------------------------------------------------------------*/
458 #if (FXAA_QUALITY__PRESET == 15)
459  #define FXAA_QUALITY__PS 8
460  #define FXAA_QUALITY__P0 1.0
461  #define FXAA_QUALITY__P1 1.5
462  #define FXAA_QUALITY__P2 2.0
463  #define FXAA_QUALITY__P3 2.0
464  #define FXAA_QUALITY__P4 2.0
465  #define FXAA_QUALITY__P5 2.0
466  #define FXAA_QUALITY__P6 4.0
467  #define FXAA_QUALITY__P7 12.0
468 #endif
469 
470 /*============================================================================
471  FXAA QUALITY - LOW DITHER PRESETS
472 ============================================================================*/
473 #if (FXAA_QUALITY__PRESET == 20)
474  #define FXAA_QUALITY__PS 3
475  #define FXAA_QUALITY__P0 1.5
476  #define FXAA_QUALITY__P1 2.0
477  #define FXAA_QUALITY__P2 8.0
478 #endif
479 /*--------------------------------------------------------------------------*/
480 #if (FXAA_QUALITY__PRESET == 21)
481  #define FXAA_QUALITY__PS 4
482  #define FXAA_QUALITY__P0 1.0
483  #define FXAA_QUALITY__P1 1.5
484  #define FXAA_QUALITY__P2 2.0
485  #define FXAA_QUALITY__P3 8.0
486 #endif
487 /*--------------------------------------------------------------------------*/
488 #if (FXAA_QUALITY__PRESET == 22)
489  #define FXAA_QUALITY__PS 5
490  #define FXAA_QUALITY__P0 1.0
491  #define FXAA_QUALITY__P1 1.5
492  #define FXAA_QUALITY__P2 2.0
493  #define FXAA_QUALITY__P3 2.0
494  #define FXAA_QUALITY__P4 8.0
495 #endif
496 /*--------------------------------------------------------------------------*/
497 #if (FXAA_QUALITY__PRESET == 23)
498  #define FXAA_QUALITY__PS 6
499  #define FXAA_QUALITY__P0 1.0
500  #define FXAA_QUALITY__P1 1.5
501  #define FXAA_QUALITY__P2 2.0
502  #define FXAA_QUALITY__P3 2.0
503  #define FXAA_QUALITY__P4 2.0
504  #define FXAA_QUALITY__P5 8.0
505 #endif
506 /*--------------------------------------------------------------------------*/
507 #if (FXAA_QUALITY__PRESET == 24)
508  #define FXAA_QUALITY__PS 7
509  #define FXAA_QUALITY__P0 1.0
510  #define FXAA_QUALITY__P1 1.5
511  #define FXAA_QUALITY__P2 2.0
512  #define FXAA_QUALITY__P3 2.0
513  #define FXAA_QUALITY__P4 2.0
514  #define FXAA_QUALITY__P5 3.0
515  #define FXAA_QUALITY__P6 8.0
516 #endif
517 /*--------------------------------------------------------------------------*/
518 #if (FXAA_QUALITY__PRESET == 25)
519  #define FXAA_QUALITY__PS 8
520  #define FXAA_QUALITY__P0 1.0
521  #define FXAA_QUALITY__P1 1.5
522  #define FXAA_QUALITY__P2 2.0
523  #define FXAA_QUALITY__P3 2.0
524  #define FXAA_QUALITY__P4 2.0
525  #define FXAA_QUALITY__P5 2.0
526  #define FXAA_QUALITY__P6 4.0
527  #define FXAA_QUALITY__P7 8.0
528 #endif
529 /*--------------------------------------------------------------------------*/
530 #if (FXAA_QUALITY__PRESET == 26)
531  #define FXAA_QUALITY__PS 9
532  #define FXAA_QUALITY__P0 1.0
533  #define FXAA_QUALITY__P1 1.5
534  #define FXAA_QUALITY__P2 2.0
535  #define FXAA_QUALITY__P3 2.0
536  #define FXAA_QUALITY__P4 2.0
537  #define FXAA_QUALITY__P5 2.0
538  #define FXAA_QUALITY__P6 2.0
539  #define FXAA_QUALITY__P7 4.0
540  #define FXAA_QUALITY__P8 8.0
541 #endif
542 /*--------------------------------------------------------------------------*/
543 #if (FXAA_QUALITY__PRESET == 27)
544  #define FXAA_QUALITY__PS 10
545  #define FXAA_QUALITY__P0 1.0
546  #define FXAA_QUALITY__P1 1.5
547  #define FXAA_QUALITY__P2 2.0
548  #define FXAA_QUALITY__P3 2.0
549  #define FXAA_QUALITY__P4 2.0
550  #define FXAA_QUALITY__P5 2.0
551  #define FXAA_QUALITY__P6 2.0
552  #define FXAA_QUALITY__P7 2.0
553  #define FXAA_QUALITY__P8 4.0
554  #define FXAA_QUALITY__P9 8.0
555 #endif
556 /*--------------------------------------------------------------------------*/
557 #if (FXAA_QUALITY__PRESET == 28)
558  #define FXAA_QUALITY__PS 11
559  #define FXAA_QUALITY__P0 1.0
560  #define FXAA_QUALITY__P1 1.5
561  #define FXAA_QUALITY__P2 2.0
562  #define FXAA_QUALITY__P3 2.0
563  #define FXAA_QUALITY__P4 2.0
564  #define FXAA_QUALITY__P5 2.0
565  #define FXAA_QUALITY__P6 2.0
566  #define FXAA_QUALITY__P7 2.0
567  #define FXAA_QUALITY__P8 2.0
568  #define FXAA_QUALITY__P9 4.0
569  #define FXAA_QUALITY__P10 8.0
570 #endif
571 /*--------------------------------------------------------------------------*/
572 #if (FXAA_QUALITY__PRESET == 29)
573  #define FXAA_QUALITY__PS 12
574  #define FXAA_QUALITY__P0 1.0
575  #define FXAA_QUALITY__P1 1.5
576  #define FXAA_QUALITY__P2 2.0
577  #define FXAA_QUALITY__P3 2.0
578  #define FXAA_QUALITY__P4 2.0
579  #define FXAA_QUALITY__P5 2.0
580  #define FXAA_QUALITY__P6 2.0
581  #define FXAA_QUALITY__P7 2.0
582  #define FXAA_QUALITY__P8 2.0
583  #define FXAA_QUALITY__P9 2.0
584  #define FXAA_QUALITY__P10 4.0
585  #define FXAA_QUALITY__P11 8.0
586 #endif
587 
588 /*============================================================================
589  FXAA QUALITY - EXTREME QUALITY
590 ============================================================================*/
591 #if (FXAA_QUALITY__PRESET == 39)
592  #define FXAA_QUALITY__PS 12
593  #define FXAA_QUALITY__P0 1.0
594  #define FXAA_QUALITY__P1 1.0
595  #define FXAA_QUALITY__P2 1.0
596  #define FXAA_QUALITY__P3 1.0
597  #define FXAA_QUALITY__P4 1.0
598  #define FXAA_QUALITY__P5 1.5
599  #define FXAA_QUALITY__P6 2.0
600  #define FXAA_QUALITY__P7 2.0
601  #define FXAA_QUALITY__P8 2.0
602  #define FXAA_QUALITY__P9 2.0
603  #define FXAA_QUALITY__P10 4.0
604  #define FXAA_QUALITY__P11 8.0
605 #endif
606 
607 
608 
609 /*============================================================================
610 
611  API PORTING
612 
613 ============================================================================*/
614 #if (FXAA_GLSL_120 == 1) || (FXAA_GLSL_130 == 1)
615  #define FxaaBool bool
616  #define FxaaDiscard discard
617  #define FxaaFloat float
618  #define FxaaFloat2 vec2
619  #define FxaaFloat3 vec3
620  #define FxaaFloat4 vec4
621  #define FxaaHalf float
622  #define FxaaHalf2 vec2
623  #define FxaaHalf3 vec3
624  #define FxaaHalf4 vec4
625  #define FxaaInt2 ivec2
626  #define FxaaSat(x) clamp(x, 0.0, 1.0)
627  #define FxaaTex sampler2D
628 #else
629  #define FxaaBool bool
630  #define FxaaDiscard clip(-1)
631  #define FxaaFloat float
632  #define FxaaFloat2 float2
633  #define FxaaFloat3 float3
634  #define FxaaFloat4 float4
635  #define FxaaHalf half
636  #define FxaaHalf2 half2
637  #define FxaaHalf3 half3
638  #define FxaaHalf4 half4
639  #define FxaaSat(x) saturate(x)
640 #endif
641 /*--------------------------------------------------------------------------*/
642 #if (FXAA_GLSL_120 == 1)
643  // Requires,
644  // #version 120
645  // And at least,
646  // #extension GL_EXT_gpu_shader4 : enable
647  // (or set FXAA_FAST_PIXEL_OFFSET 1 to work like DX9)
648  #define FxaaTexTop(t, p) texture2DLod(t, p, 0.0)
649  #if (FXAA_FAST_PIXEL_OFFSET == 1)
650  #define FxaaTexOff(t, p, o, r) texture2DLodOffset(t, p, 0.0, o)
651  #else
652  #define FxaaTexOff(t, p, o, r) texture2DLod(t, p + (o * r), 0.0)
653  #endif
654  #if (FXAA_GATHER4_ALPHA == 1)
655  // use #extension GL_ARB_gpu_shader5 : enable
656  #define FxaaTexAlpha4(t, p) textureGather(t, p, 3)
657  #define FxaaTexOffAlpha4(t, p, o) textureGatherOffset(t, p, o, 3)
658  #define FxaaTexGreen4(t, p) textureGather(t, p, 1)
659  #define FxaaTexOffGreen4(t, p, o) textureGatherOffset(t, p, o, 1)
660  #endif
661 #endif
662 /*--------------------------------------------------------------------------*/
663 #if (FXAA_GLSL_130 == 1)
664  // Requires "#version 130" or better
665  #define FxaaTexTop(t, p) textureLod(t, p, 0.0)
666  #define FxaaTexOff(t, p, o, r) textureLodOffset(t, p, 0.0, o)
667  #if (FXAA_GATHER4_ALPHA == 1)
668  // use #extension GL_ARB_gpu_shader5 : enable
669  #define FxaaTexAlpha4(t, p) textureGather(t, p, 3)
670  #define FxaaTexOffAlpha4(t, p, o) textureGatherOffset(t, p, o, 3)
671  #define FxaaTexGreen4(t, p) textureGather(t, p, 1)
672  #define FxaaTexOffGreen4(t, p, o) textureGatherOffset(t, p, o, 1)
673  #endif
674 #endif
675 /*--------------------------------------------------------------------------*/
676 #if (FXAA_HLSL_3 == 1) || (FXAA_360 == 1) || (FXAA_PS3 == 1)
677  #define FxaaInt2 float2
678  #define FxaaTex sampler2D
679  #define FxaaTexTop(t, p) tex2Dlod(t, float4(p, 0.0, 0.0))
680  #define FxaaTexOff(t, p, o, r) tex2Dlod(t, float4(p + (o * r), 0, 0))
681 #endif
682 /*--------------------------------------------------------------------------*/
683 #if (FXAA_HLSL_4 == 1)
684  #define FxaaInt2 int2
685  struct FxaaTex { SamplerState smpl; Texture2D tex; };
686  #define FxaaTexTop(t, p) t.tex.SampleLevel(t.smpl, p, 0.0)
687  #define FxaaTexOff(t, p, o, r) t.tex.SampleLevel(t.smpl, p, 0.0, o)
688 #endif
689 /*--------------------------------------------------------------------------*/
690 #if (FXAA_HLSL_5 == 1)
691  #define FxaaInt2 int2
692  struct FxaaTex { SamplerState smpl; Texture2D tex; };
693  #define FxaaTexTop(t, p) t.tex.SampleLevel(t.smpl, p, 0.0)
694  #define FxaaTexOff(t, p, o, r) t.tex.SampleLevel(t.smpl, p, 0.0, o)
695  #define FxaaTexAlpha4(t, p) t.tex.GatherAlpha(t.smpl, p)
696  #define FxaaTexOffAlpha4(t, p, o) t.tex.GatherAlpha(t.smpl, p, o)
697  #define FxaaTexGreen4(t, p) t.tex.GatherGreen(t.smpl, p)
698  #define FxaaTexOffGreen4(t, p, o) t.tex.GatherGreen(t.smpl, p, o)
699 #endif
700 
701 
702 /*============================================================================
703  GREEN AS LUMA OPTION SUPPORT FUNCTION
704 ============================================================================*/
705 #if (FXAA_GREEN_AS_LUMA == 0)
706  FxaaFloat FxaaLuma(FxaaFloat4 rgba) { return rgba.w; }
707 #else
708  FxaaFloat FxaaLuma(FxaaFloat4 rgba) { return rgba.y; }
709 #endif
710 
711 
712 
713 
714 /*============================================================================
715 
716  FXAA3 QUALITY - PC
717 
718 ============================================================================*/
719 #if (FXAA_PC == 1)
720 /*--------------------------------------------------------------------------*/
721 FxaaFloat4 FxaaPixelShader(
722  //
723  // Use noperspective interpolation here (turn off perspective interpolation).
724  // {xy} = center of pixel
725  FxaaFloat2 pos,
726  //
727  // Used only for FXAA Console, and not used on the 360 version.
728  // Use noperspective interpolation here (turn off perspective interpolation).
729  // {xy__} = upper left of pixel
730  // {__zw} = lower right of pixel
731  FxaaFloat4 fxaaConsolePosPos,
732  //
733  // Input color texture.
734  // {rgb_} = color in linear or perceptual color space
735  // if (FXAA_GREEN_AS_LUMA == 0)
736  // {___a} = luma in perceptual color space (not linear)
737  FxaaTex tex,
738  //
739  // Only used on the optimized 360 version of FXAA Console.
740  // For everything but 360, just use the same input here as for "tex".
741  // For 360, same texture, just alias with a 2nd sampler.
742  // This sampler needs to have an exponent bias of -1.
743  FxaaTex fxaaConsole360TexExpBiasNegOne,
744  //
745  // Only used on the optimized 360 version of FXAA Console.
746  // For everything but 360, just use the same input here as for "tex".
747  // For 360, same texture, just alias with a 3nd sampler.
748  // This sampler needs to have an exponent bias of -2.
749  FxaaTex fxaaConsole360TexExpBiasNegTwo,
750  //
751  // Only used on FXAA Quality.
752  // This must be from a constant/uniform.
753  // {x_} = 1.0/screenWidthInPixels
754  // {_y} = 1.0/screenHeightInPixels
755  FxaaFloat2 fxaaQualityRcpFrame,
756  //
757  // Only used on FXAA Console.
758  // This must be from a constant/uniform.
759  // This effects sub-pixel AA quality and inversely sharpness.
760  // Where N ranges between,
761  // N = 0.50 (default)
762  // N = 0.33 (sharper)
763  // {x___} = -N/screenWidthInPixels
764  // {_y__} = -N/screenHeightInPixels
765  // {__z_} = N/screenWidthInPixels
766  // {___w} = N/screenHeightInPixels
767  FxaaFloat4 fxaaConsoleRcpFrameOpt,
768  //
769  // Only used on FXAA Console.
770  // Not used on 360, but used on PS3 and PC.
771  // This must be from a constant/uniform.
772  // {x___} = -2.0/screenWidthInPixels
773  // {_y__} = -2.0/screenHeightInPixels
774  // {__z_} = 2.0/screenWidthInPixels
775  // {___w} = 2.0/screenHeightInPixels
776  FxaaFloat4 fxaaConsoleRcpFrameOpt2,
777  //
778  // Only used on FXAA Console.
779  // Only used on 360 in place of fxaaConsoleRcpFrameOpt2.
780  // This must be from a constant/uniform.
781  // {x___} = 8.0/screenWidthInPixels
782  // {_y__} = 8.0/screenHeightInPixels
783  // {__z_} = -4.0/screenWidthInPixels
784  // {___w} = -4.0/screenHeightInPixels
785  FxaaFloat4 fxaaConsole360RcpFrameOpt2,
786  //
787  // Only used on FXAA Quality.
788  // This used to be the FXAA_QUALITY__SUBPIX define.
789  // It is here now to allow easier tuning.
790  // Choose the amount of sub-pixel aliasing removal.
791  // This can effect sharpness.
792  // 1.00 - upper limit (softer)
793  // 0.75 - default amount of filtering
794  // 0.50 - lower limit (sharper, less sub-pixel aliasing removal)
795  // 0.25 - almost off
796  // 0.00 - completely off
797  FxaaFloat fxaaQualitySubpix,
798  //
799  // Only used on FXAA Quality.
800  // This used to be the FXAA_QUALITY__EDGE_THRESHOLD define.
801  // It is here now to allow easier tuning.
802  // The minimum amount of local contrast required to apply algorithm.
803  // 0.333 - too little (faster)
804  // 0.250 - low quality
805  // 0.166 - default
806  // 0.125 - high quality
807  // 0.063 - overkill (slower)
808  FxaaFloat fxaaQualityEdgeThreshold,
809  //
810  // Only used on FXAA Quality.
811  // This used to be the FXAA_QUALITY__EDGE_THRESHOLD_MIN define.
812  // It is here now to allow easier tuning.
813  // Trims the algorithm from processing darks.
814  // 0.0833 - upper limit (default, the start of visible unfiltered edges)
815  // 0.0625 - high quality (faster)
816  // 0.0312 - visible limit (slower)
817  // Special notes when using FXAA_GREEN_AS_LUMA,
818  // Likely want to set this to zero.
819  // As colors that are mostly not-green
820  // will appear very dark in the green channel!
821  // Tune by looking at mostly non-green content,
822  // then start at zero and increase until aliasing is a problem.
823  FxaaFloat fxaaQualityEdgeThresholdMin,
824  //
825  // Only used on FXAA Console.
826  // This used to be the FXAA_CONSOLE__EDGE_SHARPNESS define.
827  // It is here now to allow easier tuning.
828  // This does not effect PS3, as this needs to be compiled in.
829  // Use FXAA_CONSOLE__PS3_EDGE_SHARPNESS for PS3.
830  // Due to the PS3 being ALU bound,
831  // there are only three safe values here: 2 and 4 and 8.
832  // These options use the shaders ability to a free *|/ by 2|4|8.
833  // For all other platforms can be a non-power of two.
834  // 8.0 is sharper (default!!!)
835  // 4.0 is softer
836  // 2.0 is really soft (good only for vector graphics inputs)
837  FxaaFloat fxaaConsoleEdgeSharpness,
838  //
839  // Only used on FXAA Console.
840  // This used to be the FXAA_CONSOLE__EDGE_THRESHOLD define.
841  // It is here now to allow easier tuning.
842  // This does not effect PS3, as this needs to be compiled in.
843  // Use FXAA_CONSOLE__PS3_EDGE_THRESHOLD for PS3.
844  // Due to the PS3 being ALU bound,
845  // there are only two safe values here: 1/4 and 1/8.
846  // These options use the shaders ability to a free *|/ by 2|4|8.
847  // The console setting has a different mapping than the quality setting.
848  // Other platforms can use other values.
849  // 0.125 leaves less aliasing, but is softer (default!!!)
850  // 0.25 leaves more aliasing, and is sharper
851  FxaaFloat fxaaConsoleEdgeThreshold,
852  //
853  // Only used on FXAA Console.
854  // This used to be the FXAA_CONSOLE__EDGE_THRESHOLD_MIN define.
855  // It is here now to allow easier tuning.
856  // Trims the algorithm from processing darks.
857  // The console setting has a different mapping than the quality setting.
858  // This only applies when FXAA_EARLY_EXIT is 1.
859  // This does not apply to PS3,
860  // PS3 was simplified to avoid more shader instructions.
861  // 0.06 - faster but more aliasing in darks
862  // 0.05 - default
863  // 0.04 - slower and less aliasing in darks
864  // Special notes when using FXAA_GREEN_AS_LUMA,
865  // Likely want to set this to zero.
866  // As colors that are mostly not-green
867  // will appear very dark in the green channel!
868  // Tune by looking at mostly non-green content,
869  // then start at zero and increase until aliasing is a problem.
870  FxaaFloat fxaaConsoleEdgeThresholdMin,
871  //
872  // Extra constants for 360 FXAA Console only.
873  // Use zeros or anything else for other platforms.
874  // These must be in physical constant registers and NOT immedates.
875  // Immedates will result in compiler un-optimizing.
876  // {xyzw} = float4(1.0, -1.0, 0.25, -0.25)
877  FxaaFloat4 fxaaConsole360ConstDir
878 ) {
879 /*--------------------------------------------------------------------------*/
880  FxaaFloat2 posM;
881  posM.x = pos.x;
882  posM.y = pos.y;
883  #if (FXAA_GATHER4_ALPHA == 1)
884  #if (FXAA_DISCARD == 0)
885  FxaaFloat4 rgbyM = FxaaTexTop(tex, posM);
886  #if (FXAA_GREEN_AS_LUMA == 0)
887  #define lumaM rgbyM.w
888  #else
889  #define lumaM rgbyM.y
890  #endif
891  #endif
892  #if (FXAA_GREEN_AS_LUMA == 0)
893  FxaaFloat4 luma4A = FxaaTexAlpha4(tex, posM);
894  FxaaFloat4 luma4B = FxaaTexOffAlpha4(tex, posM, FxaaInt2(-1, -1));
895  #else
896  FxaaFloat4 luma4A = FxaaTexGreen4(tex, posM);
897  FxaaFloat4 luma4B = FxaaTexOffGreen4(tex, posM, FxaaInt2(-1, -1));
898  #endif
899  #if (FXAA_DISCARD == 1)
900  #define lumaM luma4A.w
901  #endif
902  #define lumaE luma4A.z
903  #define lumaS luma4A.x
904  #define lumaSE luma4A.y
905  #define lumaNW luma4B.w
906  #define lumaN luma4B.z
907  #define lumaW luma4B.x
908  #else
909  FxaaFloat4 rgbyM = FxaaTexTop(tex, posM);
910  #if (FXAA_GREEN_AS_LUMA == 0)
911  #define lumaM rgbyM.w
912  #else
913  #define lumaM rgbyM.y
914  #endif
915  FxaaFloat lumaS = FxaaLuma(FxaaTexOff(tex, posM, FxaaInt2( 0, 1), fxaaQualityRcpFrame.xy));
916  FxaaFloat lumaE = FxaaLuma(FxaaTexOff(tex, posM, FxaaInt2( 1, 0), fxaaQualityRcpFrame.xy));
917  FxaaFloat lumaN = FxaaLuma(FxaaTexOff(tex, posM, FxaaInt2( 0,-1), fxaaQualityRcpFrame.xy));
918  FxaaFloat lumaW = FxaaLuma(FxaaTexOff(tex, posM, FxaaInt2(-1, 0), fxaaQualityRcpFrame.xy));
919  #endif
920 /*--------------------------------------------------------------------------*/
921  FxaaFloat maxSM = max(lumaS, lumaM);
922  FxaaFloat minSM = min(lumaS, lumaM);
923  FxaaFloat maxESM = max(lumaE, maxSM);
924  FxaaFloat minESM = min(lumaE, minSM);
925  FxaaFloat maxWN = max(lumaN, lumaW);
926  FxaaFloat minWN = min(lumaN, lumaW);
927  FxaaFloat rangeMax = max(maxWN, maxESM);
928  FxaaFloat rangeMin = min(minWN, minESM);
929  FxaaFloat rangeMaxScaled = rangeMax * fxaaQualityEdgeThreshold;
930  FxaaFloat range = rangeMax - rangeMin;
931  FxaaFloat rangeMaxClamped = max(fxaaQualityEdgeThresholdMin, rangeMaxScaled);
932  FxaaBool earlyExit = range < rangeMaxClamped;
933 /*--------------------------------------------------------------------------*/
934  if(earlyExit)
935  #if (FXAA_DISCARD == 1)
936  FxaaDiscard;
937  #else
938  return rgbyM;
939  #endif
940 /*--------------------------------------------------------------------------*/
941  #if (FXAA_GATHER4_ALPHA == 0)
942  FxaaFloat lumaNW = FxaaLuma(FxaaTexOff(tex, posM, FxaaInt2(-1,-1), fxaaQualityRcpFrame.xy));
943  FxaaFloat lumaSE = FxaaLuma(FxaaTexOff(tex, posM, FxaaInt2( 1, 1), fxaaQualityRcpFrame.xy));
944  FxaaFloat lumaNE = FxaaLuma(FxaaTexOff(tex, posM, FxaaInt2( 1,-1), fxaaQualityRcpFrame.xy));
945  FxaaFloat lumaSW = FxaaLuma(FxaaTexOff(tex, posM, FxaaInt2(-1, 1), fxaaQualityRcpFrame.xy));
946  #else
947  FxaaFloat lumaNE = FxaaLuma(FxaaTexOff(tex, posM, FxaaInt2(1, -1), fxaaQualityRcpFrame.xy));
948  FxaaFloat lumaSW = FxaaLuma(FxaaTexOff(tex, posM, FxaaInt2(-1, 1), fxaaQualityRcpFrame.xy));
949  #endif
950 /*--------------------------------------------------------------------------*/
951  FxaaFloat lumaNS = lumaN + lumaS;
952  FxaaFloat lumaWE = lumaW + lumaE;
953  FxaaFloat subpixRcpRange = 1.0/range;
954  FxaaFloat subpixNSWE = lumaNS + lumaWE;
955  FxaaFloat edgeHorz1 = (-2.0 * lumaM) + lumaNS;
956  FxaaFloat edgeVert1 = (-2.0 * lumaM) + lumaWE;
957 /*--------------------------------------------------------------------------*/
958  FxaaFloat lumaNESE = lumaNE + lumaSE;
959  FxaaFloat lumaNWNE = lumaNW + lumaNE;
960  FxaaFloat edgeHorz2 = (-2.0 * lumaE) + lumaNESE;
961  FxaaFloat edgeVert2 = (-2.0 * lumaN) + lumaNWNE;
962 /*--------------------------------------------------------------------------*/
963  FxaaFloat lumaNWSW = lumaNW + lumaSW;
964  FxaaFloat lumaSWSE = lumaSW + lumaSE;
965  FxaaFloat edgeHorz4 = (abs(edgeHorz1) * 2.0) + abs(edgeHorz2);
966  FxaaFloat edgeVert4 = (abs(edgeVert1) * 2.0) + abs(edgeVert2);
967  FxaaFloat edgeHorz3 = (-2.0 * lumaW) + lumaNWSW;
968  FxaaFloat edgeVert3 = (-2.0 * lumaS) + lumaSWSE;
969  FxaaFloat edgeHorz = abs(edgeHorz3) + edgeHorz4;
970  FxaaFloat edgeVert = abs(edgeVert3) + edgeVert4;
971 /*--------------------------------------------------------------------------*/
972  FxaaFloat subpixNWSWNESE = lumaNWSW + lumaNESE;
973  FxaaFloat lengthSign = fxaaQualityRcpFrame.x;
974  FxaaBool horzSpan = edgeHorz >= edgeVert;
975  FxaaFloat subpixA = subpixNSWE * 2.0 + subpixNWSWNESE;
976 /*--------------------------------------------------------------------------*/
977  if(!horzSpan) lumaN = lumaW;
978  if(!horzSpan) lumaS = lumaE;
979  if(horzSpan) lengthSign = fxaaQualityRcpFrame.y;
980  FxaaFloat subpixB = (subpixA * (1.0/12.0)) - lumaM;
981 /*--------------------------------------------------------------------------*/
982  FxaaFloat gradientN = lumaN - lumaM;
983  FxaaFloat gradientS = lumaS - lumaM;
984  FxaaFloat lumaNN = lumaN + lumaM;
985  FxaaFloat lumaSS = lumaS + lumaM;
986  FxaaBool pairN = abs(gradientN) >= abs(gradientS);
987  FxaaFloat gradient = max(abs(gradientN), abs(gradientS));
988  if(pairN) lengthSign = -lengthSign;
989  FxaaFloat subpixC = FxaaSat(abs(subpixB) * subpixRcpRange);
990 /*--------------------------------------------------------------------------*/
991  FxaaFloat2 posB;
992  posB.x = posM.x;
993  posB.y = posM.y;
994  FxaaFloat2 offNP;
995  offNP.x = (!horzSpan) ? 0.0 : fxaaQualityRcpFrame.x;
996  offNP.y = ( horzSpan) ? 0.0 : fxaaQualityRcpFrame.y;
997  if(!horzSpan) posB.x += lengthSign * 0.5;
998  if( horzSpan) posB.y += lengthSign * 0.5;
999 /*--------------------------------------------------------------------------*/
1000  FxaaFloat2 posN;
1001  posN.x = posB.x - offNP.x * FXAA_QUALITY__P0;
1002  posN.y = posB.y - offNP.y * FXAA_QUALITY__P0;
1003  FxaaFloat2 posP;
1004  posP.x = posB.x + offNP.x * FXAA_QUALITY__P0;
1005  posP.y = posB.y + offNP.y * FXAA_QUALITY__P0;
1006  FxaaFloat subpixD = ((-2.0)*subpixC) + 3.0;
1007  FxaaFloat lumaEndN = FxaaLuma(FxaaTexTop(tex, posN));
1008  FxaaFloat subpixE = subpixC * subpixC;
1009  FxaaFloat lumaEndP = FxaaLuma(FxaaTexTop(tex, posP));
1010 /*--------------------------------------------------------------------------*/
1011  if(!pairN) lumaNN = lumaSS;
1012  FxaaFloat gradientScaled = gradient * 1.0/4.0;
1013  FxaaFloat lumaMM = lumaM - lumaNN * 0.5;
1014  FxaaFloat subpixF = subpixD * subpixE;
1015  FxaaBool lumaMLTZero = lumaMM < 0.0;
1016 /*--------------------------------------------------------------------------*/
1017  lumaEndN -= lumaNN * 0.5;
1018  lumaEndP -= lumaNN * 0.5;
1019  FxaaBool doneN = abs(lumaEndN) >= gradientScaled;
1020  FxaaBool doneP = abs(lumaEndP) >= gradientScaled;
1021  if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P1;
1022  if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P1;
1023  FxaaBool doneNP = (!doneN) || (!doneP);
1024  if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P1;
1025  if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P1;
1026 /*--------------------------------------------------------------------------*/
1027  if(doneNP) {
1028  if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));
1029  if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));
1030  if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;
1031  if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;
1032  doneN = abs(lumaEndN) >= gradientScaled;
1033  doneP = abs(lumaEndP) >= gradientScaled;
1034  if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P2;
1035  if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P2;
1036  doneNP = (!doneN) || (!doneP);
1037  if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P2;
1038  if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P2;
1039 /*--------------------------------------------------------------------------*/
1040  #if (FXAA_QUALITY__PS > 3)
1041  if(doneNP) {
1042  if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));
1043  if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));
1044  if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;
1045  if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;
1046  doneN = abs(lumaEndN) >= gradientScaled;
1047  doneP = abs(lumaEndP) >= gradientScaled;
1048  if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P3;
1049  if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P3;
1050  doneNP = (!doneN) || (!doneP);
1051  if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P3;
1052  if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P3;
1053 /*--------------------------------------------------------------------------*/
1054  #if (FXAA_QUALITY__PS > 4)
1055  if(doneNP) {
1056  if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));
1057  if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));
1058  if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;
1059  if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;
1060  doneN = abs(lumaEndN) >= gradientScaled;
1061  doneP = abs(lumaEndP) >= gradientScaled;
1062  if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P4;
1063  if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P4;
1064  doneNP = (!doneN) || (!doneP);
1065  if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P4;
1066  if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P4;
1067 /*--------------------------------------------------------------------------*/
1068  #if (FXAA_QUALITY__PS > 5)
1069  if(doneNP) {
1070  if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));
1071  if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));
1072  if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;
1073  if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;
1074  doneN = abs(lumaEndN) >= gradientScaled;
1075  doneP = abs(lumaEndP) >= gradientScaled;
1076  if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P5;
1077  if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P5;
1078  doneNP = (!doneN) || (!doneP);
1079  if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P5;
1080  if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P5;
1081 /*--------------------------------------------------------------------------*/
1082  #if (FXAA_QUALITY__PS > 6)
1083  if(doneNP) {
1084  if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));
1085  if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));
1086  if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;
1087  if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;
1088  doneN = abs(lumaEndN) >= gradientScaled;
1089  doneP = abs(lumaEndP) >= gradientScaled;
1090  if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P6;
1091  if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P6;
1092  doneNP = (!doneN) || (!doneP);
1093  if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P6;
1094  if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P6;
1095 /*--------------------------------------------------------------------------*/
1096  #if (FXAA_QUALITY__PS > 7)
1097  if(doneNP) {
1098  if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));
1099  if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));
1100  if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;
1101  if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;
1102  doneN = abs(lumaEndN) >= gradientScaled;
1103  doneP = abs(lumaEndP) >= gradientScaled;
1104  if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P7;
1105  if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P7;
1106  doneNP = (!doneN) || (!doneP);
1107  if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P7;
1108  if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P7;
1109 /*--------------------------------------------------------------------------*/
1110  #if (FXAA_QUALITY__PS > 8)
1111  if(doneNP) {
1112  if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));
1113  if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));
1114  if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;
1115  if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;
1116  doneN = abs(lumaEndN) >= gradientScaled;
1117  doneP = abs(lumaEndP) >= gradientScaled;
1118  if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P8;
1119  if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P8;
1120  doneNP = (!doneN) || (!doneP);
1121  if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P8;
1122  if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P8;
1123 /*--------------------------------------------------------------------------*/
1124  #if (FXAA_QUALITY__PS > 9)
1125  if(doneNP) {
1126  if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));
1127  if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));
1128  if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;
1129  if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;
1130  doneN = abs(lumaEndN) >= gradientScaled;
1131  doneP = abs(lumaEndP) >= gradientScaled;
1132  if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P9;
1133  if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P9;
1134  doneNP = (!doneN) || (!doneP);
1135  if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P9;
1136  if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P9;
1137 /*--------------------------------------------------------------------------*/
1138  #if (FXAA_QUALITY__PS > 10)
1139  if(doneNP) {
1140  if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));
1141  if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));
1142  if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;
1143  if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;
1144  doneN = abs(lumaEndN) >= gradientScaled;
1145  doneP = abs(lumaEndP) >= gradientScaled;
1146  if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P10;
1147  if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P10;
1148  doneNP = (!doneN) || (!doneP);
1149  if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P10;
1150  if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P10;
1151 /*--------------------------------------------------------------------------*/
1152  #if (FXAA_QUALITY__PS > 11)
1153  if(doneNP) {
1154  if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));
1155  if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));
1156  if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;
1157  if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;
1158  doneN = abs(lumaEndN) >= gradientScaled;
1159  doneP = abs(lumaEndP) >= gradientScaled;
1160  if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P11;
1161  if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P11;
1162  doneNP = (!doneN) || (!doneP);
1163  if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P11;
1164  if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P11;
1165 /*--------------------------------------------------------------------------*/
1166  #if (FXAA_QUALITY__PS > 12)
1167  if(doneNP) {
1168  if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));
1169  if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));
1170  if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;
1171  if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;
1172  doneN = abs(lumaEndN) >= gradientScaled;
1173  doneP = abs(lumaEndP) >= gradientScaled;
1174  if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P12;
1175  if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P12;
1176  doneNP = (!doneN) || (!doneP);
1177  if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P12;
1178  if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P12;
1179 /*--------------------------------------------------------------------------*/
1180  }
1181  #endif
1182 /*--------------------------------------------------------------------------*/
1183  }
1184  #endif
1185 /*--------------------------------------------------------------------------*/
1186  }
1187  #endif
1188 /*--------------------------------------------------------------------------*/
1189  }
1190  #endif
1191 /*--------------------------------------------------------------------------*/
1192  }
1193  #endif
1194 /*--------------------------------------------------------------------------*/
1195  }
1196  #endif
1197 /*--------------------------------------------------------------------------*/
1198  }
1199  #endif
1200 /*--------------------------------------------------------------------------*/
1201  }
1202  #endif
1203 /*--------------------------------------------------------------------------*/
1204  }
1205  #endif
1206 /*--------------------------------------------------------------------------*/
1207  }
1208  #endif
1209 /*--------------------------------------------------------------------------*/
1210  }
1211 /*--------------------------------------------------------------------------*/
1212  FxaaFloat dstN = posM.x - posN.x;
1213  FxaaFloat dstP = posP.x - posM.x;
1214  if(!horzSpan) dstN = posM.y - posN.y;
1215  if(!horzSpan) dstP = posP.y - posM.y;
1216 /*--------------------------------------------------------------------------*/
1217  FxaaBool goodSpanN = (lumaEndN < 0.0) != lumaMLTZero;
1218  FxaaFloat spanLength = (dstP + dstN);
1219  FxaaBool goodSpanP = (lumaEndP < 0.0) != lumaMLTZero;
1220  FxaaFloat spanLengthRcp = 1.0/spanLength;
1221 /*--------------------------------------------------------------------------*/
1222  FxaaBool directionN = dstN < dstP;
1223  FxaaFloat dst = min(dstN, dstP);
1224  FxaaBool goodSpan = directionN ? goodSpanN : goodSpanP;
1225  FxaaFloat subpixG = subpixF * subpixF;
1226  FxaaFloat pixelOffset = (dst * (-spanLengthRcp)) + 0.5;
1227  FxaaFloat subpixH = subpixG * fxaaQualitySubpix;
1228 /*--------------------------------------------------------------------------*/
1229  FxaaFloat pixelOffsetGood = goodSpan ? pixelOffset : 0.0;
1230  FxaaFloat pixelOffsetSubpix = max(pixelOffsetGood, subpixH);
1231  if(!horzSpan) posM.x += pixelOffsetSubpix * lengthSign;
1232  if( horzSpan) posM.y += pixelOffsetSubpix * lengthSign;
1233  #if (FXAA_DISCARD == 1)
1234  return FxaaTexTop(tex, posM);
1235  #else
1236  return FxaaFloat4(FxaaTexTop(tex, posM).xyz, lumaM);
1237  #endif
1238 }
1239 /*==========================================================================*/
1240 #endif
1241 
1242 
1243 
1244 
1245 /*============================================================================
1246 
1247  FXAA3 CONSOLE - PC VERSION
1248 
1249 ------------------------------------------------------------------------------
1250 Instead of using this on PC, I'd suggest just using FXAA Quality with
1251  #define FXAA_QUALITY__PRESET 10
1252 Or
1253  #define FXAA_QUALITY__PRESET 20
1254 Either are higher qualilty and almost as fast as this on modern PC GPUs.
1255 ============================================================================*/
1256 #if (FXAA_PC_CONSOLE == 1)
1257 /*--------------------------------------------------------------------------*/
1258 FxaaFloat4 FxaaPixelShader(
1259  // See FXAA Quality FxaaPixelShader() source for docs on Inputs!
1260  FxaaFloat2 pos,
1261  FxaaFloat4 fxaaConsolePosPos,
1262  FxaaTex tex,
1263  FxaaTex fxaaConsole360TexExpBiasNegOne,
1264  FxaaTex fxaaConsole360TexExpBiasNegTwo,
1265  FxaaFloat2 fxaaQualityRcpFrame,
1266  FxaaFloat4 fxaaConsoleRcpFrameOpt,
1267  FxaaFloat4 fxaaConsoleRcpFrameOpt2,
1268  FxaaFloat4 fxaaConsole360RcpFrameOpt2,
1269  FxaaFloat fxaaQualitySubpix,
1270  FxaaFloat fxaaQualityEdgeThreshold,
1271  FxaaFloat fxaaQualityEdgeThresholdMin,
1272  FxaaFloat fxaaConsoleEdgeSharpness,
1273  FxaaFloat fxaaConsoleEdgeThreshold,
1274  FxaaFloat fxaaConsoleEdgeThresholdMin,
1275  FxaaFloat4 fxaaConsole360ConstDir
1276 ) {
1277 /*--------------------------------------------------------------------------*/
1278  FxaaFloat lumaNw = FxaaLuma(FxaaTexTop(tex, fxaaConsolePosPos.xy));
1279  FxaaFloat lumaSw = FxaaLuma(FxaaTexTop(tex, fxaaConsolePosPos.xw));
1280  FxaaFloat lumaNe = FxaaLuma(FxaaTexTop(tex, fxaaConsolePosPos.zy));
1281  FxaaFloat lumaSe = FxaaLuma(FxaaTexTop(tex, fxaaConsolePosPos.zw));
1282 /*--------------------------------------------------------------------------*/
1283  FxaaFloat4 rgbyM = FxaaTexTop(tex, pos.xy);
1284  #if (FXAA_GREEN_AS_LUMA == 0)
1285  FxaaFloat lumaM = rgbyM.w;
1286  #else
1287  FxaaFloat lumaM = rgbyM.y;
1288  #endif
1289 /*--------------------------------------------------------------------------*/
1290  FxaaFloat lumaMaxNwSw = max(lumaNw, lumaSw);
1291  lumaNe += 1.0/384.0;
1292  FxaaFloat lumaMinNwSw = min(lumaNw, lumaSw);
1293 /*--------------------------------------------------------------------------*/
1294  FxaaFloat lumaMaxNeSe = max(lumaNe, lumaSe);
1295  FxaaFloat lumaMinNeSe = min(lumaNe, lumaSe);
1296 /*--------------------------------------------------------------------------*/
1297  FxaaFloat lumaMax = max(lumaMaxNeSe, lumaMaxNwSw);
1298  FxaaFloat lumaMin = min(lumaMinNeSe, lumaMinNwSw);
1299 /*--------------------------------------------------------------------------*/
1300  FxaaFloat lumaMaxScaled = lumaMax * fxaaConsoleEdgeThreshold;
1301 /*--------------------------------------------------------------------------*/
1302  FxaaFloat lumaMinM = min(lumaMin, lumaM);
1303  FxaaFloat lumaMaxScaledClamped = max(fxaaConsoleEdgeThresholdMin, lumaMaxScaled);
1304  FxaaFloat lumaMaxM = max(lumaMax, lumaM);
1305  FxaaFloat dirSwMinusNe = lumaSw - lumaNe;
1306  FxaaFloat lumaMaxSubMinM = lumaMaxM - lumaMinM;
1307  FxaaFloat dirSeMinusNw = lumaSe - lumaNw;
1308  if(lumaMaxSubMinM < lumaMaxScaledClamped) return rgbyM;
1309 /*--------------------------------------------------------------------------*/
1310  FxaaFloat2 dir;
1311  dir.x = dirSwMinusNe + dirSeMinusNw;
1312  dir.y = dirSwMinusNe - dirSeMinusNw;
1313 /*--------------------------------------------------------------------------*/
1314  FxaaFloat2 dir1 = normalize(dir.xy);
1315  FxaaFloat4 rgbyN1 = FxaaTexTop(tex, pos.xy - dir1 * fxaaConsoleRcpFrameOpt.zw);
1316  FxaaFloat4 rgbyP1 = FxaaTexTop(tex, pos.xy + dir1 * fxaaConsoleRcpFrameOpt.zw);
1317 /*--------------------------------------------------------------------------*/
1318  FxaaFloat dirAbsMinTimesC = min(abs(dir1.x), abs(dir1.y)) * fxaaConsoleEdgeSharpness;
1319  FxaaFloat2 dir2 = clamp(dir1.xy / dirAbsMinTimesC, -2.0, 2.0);
1320 /*--------------------------------------------------------------------------*/
1321  FxaaFloat4 rgbyN2 = FxaaTexTop(tex, pos.xy - dir2 * fxaaConsoleRcpFrameOpt2.zw);
1322  FxaaFloat4 rgbyP2 = FxaaTexTop(tex, pos.xy + dir2 * fxaaConsoleRcpFrameOpt2.zw);
1323 /*--------------------------------------------------------------------------*/
1324  FxaaFloat4 rgbyA = rgbyN1 + rgbyP1;
1325  FxaaFloat4 rgbyB = ((rgbyN2 + rgbyP2) * 0.25) + (rgbyA * 0.25);
1326 /*--------------------------------------------------------------------------*/
1327  #if (FXAA_GREEN_AS_LUMA == 0)
1328  FxaaBool twoTap = (rgbyB.w < lumaMin) || (rgbyB.w > lumaMax);
1329  #else
1330  FxaaBool twoTap = (rgbyB.y < lumaMin) || (rgbyB.y > lumaMax);
1331  #endif
1332  if(twoTap) rgbyB.xyz = rgbyA.xyz * 0.5;
1333  return rgbyB; }
1334 /*==========================================================================*/
1335 #endif
1336 
1337 
1338 
1339 /*============================================================================
1340 
1341  FXAA3 CONSOLE - 360 PIXEL SHADER
1342 
1343 ------------------------------------------------------------------------------
1344 This optimized version thanks to suggestions from Andy Luedke.
1345 Should be fully tex bound in all cases.
1346 As of the FXAA 3.11 release, I have still not tested this code,
1347 however I fixed a bug which was in both FXAA 3.9 and FXAA 3.10.
1348 And note this is replacing the old unoptimized version.
1349 If it does not work, please let me know so I can fix it.
1350 ============================================================================*/
1351 #if (FXAA_360 == 1)
1352 /*--------------------------------------------------------------------------*/
1353 [reduceTempRegUsage(4)]
1354 float4 FxaaPixelShader(
1355  // See FXAA Quality FxaaPixelShader() source for docs on Inputs!
1356  FxaaFloat2 pos,
1357  FxaaFloat4 fxaaConsolePosPos,
1358  FxaaTex tex,
1359  FxaaTex fxaaConsole360TexExpBiasNegOne,
1360  FxaaTex fxaaConsole360TexExpBiasNegTwo,
1361  FxaaFloat2 fxaaQualityRcpFrame,
1362  FxaaFloat4 fxaaConsoleRcpFrameOpt,
1363  FxaaFloat4 fxaaConsoleRcpFrameOpt2,
1364  FxaaFloat4 fxaaConsole360RcpFrameOpt2,
1365  FxaaFloat fxaaQualitySubpix,
1366  FxaaFloat fxaaQualityEdgeThreshold,
1367  FxaaFloat fxaaQualityEdgeThresholdMin,
1368  FxaaFloat fxaaConsoleEdgeSharpness,
1369  FxaaFloat fxaaConsoleEdgeThreshold,
1370  FxaaFloat fxaaConsoleEdgeThresholdMin,
1371  FxaaFloat4 fxaaConsole360ConstDir
1372 ) {
1373 /*--------------------------------------------------------------------------*/
1374  float4 lumaNwNeSwSe;
1375  #if (FXAA_GREEN_AS_LUMA == 0)
1376  asm {
1377  tfetch2D lumaNwNeSwSe.w___, tex, pos.xy, OffsetX = -0.5, OffsetY = -0.5, UseComputedLOD=false
1378  tfetch2D lumaNwNeSwSe._w__, tex, pos.xy, OffsetX = 0.5, OffsetY = -0.5, UseComputedLOD=false
1379  tfetch2D lumaNwNeSwSe.__w_, tex, pos.xy, OffsetX = -0.5, OffsetY = 0.5, UseComputedLOD=false
1380  tfetch2D lumaNwNeSwSe.___w, tex, pos.xy, OffsetX = 0.5, OffsetY = 0.5, UseComputedLOD=false
1381  };
1382  #else
1383  asm {
1384  tfetch2D lumaNwNeSwSe.y___, tex, pos.xy, OffsetX = -0.5, OffsetY = -0.5, UseComputedLOD=false
1385  tfetch2D lumaNwNeSwSe._y__, tex, pos.xy, OffsetX = 0.5, OffsetY = -0.5, UseComputedLOD=false
1386  tfetch2D lumaNwNeSwSe.__y_, tex, pos.xy, OffsetX = -0.5, OffsetY = 0.5, UseComputedLOD=false
1387  tfetch2D lumaNwNeSwSe.___y, tex, pos.xy, OffsetX = 0.5, OffsetY = 0.5, UseComputedLOD=false
1388  };
1389  #endif
1390 /*--------------------------------------------------------------------------*/
1391  lumaNwNeSwSe.y += 1.0/384.0;
1392  float2 lumaMinTemp = min(lumaNwNeSwSe.xy, lumaNwNeSwSe.zw);
1393  float2 lumaMaxTemp = max(lumaNwNeSwSe.xy, lumaNwNeSwSe.zw);
1394  float lumaMin = min(lumaMinTemp.x, lumaMinTemp.y);
1395  float lumaMax = max(lumaMaxTemp.x, lumaMaxTemp.y);
1396 /*--------------------------------------------------------------------------*/
1397  float4 rgbyM = tex2Dlod(tex, float4(pos.xy, 0.0, 0.0));
1398  #if (FXAA_GREEN_AS_LUMA == 0)
1399  float lumaMinM = min(lumaMin, rgbyM.w);
1400  float lumaMaxM = max(lumaMax, rgbyM.w);
1401  #else
1402  float lumaMinM = min(lumaMin, rgbyM.y);
1403  float lumaMaxM = max(lumaMax, rgbyM.y);
1404  #endif
1405  if((lumaMaxM - lumaMinM) < max(fxaaConsoleEdgeThresholdMin, lumaMax * fxaaConsoleEdgeThreshold)) return rgbyM;
1406 /*--------------------------------------------------------------------------*/
1407  float2 dir;
1408  dir.x = dot(lumaNwNeSwSe, fxaaConsole360ConstDir.yyxx);
1409  dir.y = dot(lumaNwNeSwSe, fxaaConsole360ConstDir.xyxy);
1410  dir = normalize(dir);
1411 /*--------------------------------------------------------------------------*/
1412  float4 dir1 = dir.xyxy * fxaaConsoleRcpFrameOpt.xyzw;
1413 /*--------------------------------------------------------------------------*/
1414  float4 dir2;
1415  float dirAbsMinTimesC = min(abs(dir.x), abs(dir.y)) * fxaaConsoleEdgeSharpness;
1416  dir2 = saturate(fxaaConsole360ConstDir.zzww * dir.xyxy / dirAbsMinTimesC + 0.5);
1417  dir2 = dir2 * fxaaConsole360RcpFrameOpt2.xyxy + fxaaConsole360RcpFrameOpt2.zwzw;
1418 /*--------------------------------------------------------------------------*/
1419  float4 rgbyN1 = tex2Dlod(fxaaConsole360TexExpBiasNegOne, float4(pos.xy + dir1.xy, 0.0, 0.0));
1420  float4 rgbyP1 = tex2Dlod(fxaaConsole360TexExpBiasNegOne, float4(pos.xy + dir1.zw, 0.0, 0.0));
1421  float4 rgbyN2 = tex2Dlod(fxaaConsole360TexExpBiasNegTwo, float4(pos.xy + dir2.xy, 0.0, 0.0));
1422  float4 rgbyP2 = tex2Dlod(fxaaConsole360TexExpBiasNegTwo, float4(pos.xy + dir2.zw, 0.0, 0.0));
1423 /*--------------------------------------------------------------------------*/
1424  float4 rgbyA = rgbyN1 + rgbyP1;
1425  float4 rgbyB = rgbyN2 + rgbyP2 * 0.5 + rgbyA;
1426 /*--------------------------------------------------------------------------*/
1427  float4 rgbyR = ((rgbyB.w - lumaMax) > 0.0) ? rgbyA : rgbyB;
1428  rgbyR = ((rgbyB.w - lumaMin) > 0.0) ? rgbyR : rgbyA;
1429  return rgbyR; }
1430 /*==========================================================================*/
1431 #endif
1432 
1433 
1434 
1435 /*============================================================================
1436 
1437  FXAA3 CONSOLE - OPTIMIZED PS3 PIXEL SHADER (NO EARLY EXIT)
1438 
1439 ==============================================================================
1440 The code below does not exactly match the assembly.
1441 I have a feeling that 12 cycles is possible, but was not able to get there.
1442 Might have to increase register count to get full performance.
1443 Note this shader does not use perspective interpolation.
1444 
1445 Use the following cgc options,
1446 
1447  --fenable-bx2 --fastmath --fastprecision --nofloatbindings
1448 
1449 ------------------------------------------------------------------------------
1450  NVSHADERPERF OUTPUT
1451 ------------------------------------------------------------------------------
1452 For reference and to aid in debug, output of NVShaderPerf should match this,
1453 
1454 Shader to schedule:
1455  0: texpkb h0.w(TRUE), v5.zyxx, #0
1456  2: addh h2.z(TRUE), h0.w, constant(0.001953, 0.000000, 0.000000, 0.000000).x
1457  4: texpkb h0.w(TRUE), v5.xwxx, #0
1458  6: addh h0.z(TRUE), -h2, h0.w
1459  7: texpkb h1.w(TRUE), v5, #0
1460  9: addh h0.x(TRUE), h0.z, -h1.w
1461  10: addh h3.w(TRUE), h0.z, h1
1462  11: texpkb h2.w(TRUE), v5.zwzz, #0
1463  13: addh h0.z(TRUE), h3.w, -h2.w
1464  14: addh h0.x(TRUE), h2.w, h0
1465  15: nrmh h1.xz(TRUE), h0_n
1466  16: minh_m8 h0.x(TRUE), |h1|, |h1.z|
1467  17: maxh h4.w(TRUE), h0, h1
1468  18: divx h2.xy(TRUE), h1_n.xzzw, h0_n
1469  19: movr r1.zw(TRUE), v4.xxxy
1470  20: madr r2.xz(TRUE), -h1, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).zzww, r1.zzww
1471  22: minh h5.w(TRUE), h0, h1
1472  23: texpkb h0(TRUE), r2.xzxx, #0
1473  25: madr r0.zw(TRUE), h1.xzxz, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w), r1
1474  27: maxh h4.x(TRUE), h2.z, h2.w
1475  28: texpkb h1(TRUE), r0.zwzz, #0
1476  30: addh_d2 h1(TRUE), h0, h1
1477  31: madr r0.xy(TRUE), -h2, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).xyxx, r1.zwzz
1478  33: texpkb h0(TRUE), r0, #0
1479  35: minh h4.z(TRUE), h2, h2.w
1480  36: fenct TRUE
1481  37: madr r1.xy(TRUE), h2, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).xyxx, r1.zwzz
1482  39: texpkb h2(TRUE), r1, #0
1483  41: addh_d2 h0(TRUE), h0, h2
1484  42: maxh h2.w(TRUE), h4, h4.x
1485  43: minh h2.x(TRUE), h5.w, h4.z
1486  44: addh_d2 h0(TRUE), h0, h1
1487  45: slth h2.x(TRUE), h0.w, h2
1488  46: sgth h2.w(TRUE), h0, h2
1489  47: movh h0(TRUE), h0
1490  48: addx.c0 rc(TRUE), h2, h2.w
1491  49: movh h0(c0.NE.x), h1
1492 
1493 IPU0 ------ Simplified schedule: --------
1494 Pass | Unit | uOp | PC: Op
1495 -----+--------+------+-------------------------
1496  1 | SCT0/1 | mov | 0: TXLr h0.w, g[TEX1].zyxx, const.xxxx, TEX0;
1497  | TEX | txl | 0: TXLr h0.w, g[TEX1].zyxx, const.xxxx, TEX0;
1498  | SCB1 | add | 2: ADDh h2.z, h0.--w-, const.--x-;
1499  | | |
1500  2 | SCT0/1 | mov | 4: TXLr h0.w, g[TEX1].xwxx, const.xxxx, TEX0;
1501  | TEX | txl | 4: TXLr h0.w, g[TEX1].xwxx, const.xxxx, TEX0;
1502  | SCB1 | add | 6: ADDh h0.z,-h2, h0.--w-;
1503  | | |
1504  3 | SCT0/1 | mov | 7: TXLr h1.w, g[TEX1], const.xxxx, TEX0;
1505  | TEX | txl | 7: TXLr h1.w, g[TEX1], const.xxxx, TEX0;
1506  | SCB0 | add | 9: ADDh h0.x, h0.z---,-h1.w---;
1507  | SCB1 | add | 10: ADDh h3.w, h0.---z, h1;
1508  | | |
1509  4 | SCT0/1 | mov | 11: TXLr h2.w, g[TEX1].zwzz, const.xxxx, TEX0;
1510  | TEX | txl | 11: TXLr h2.w, g[TEX1].zwzz, const.xxxx, TEX0;
1511  | SCB0 | add | 14: ADDh h0.x, h2.w---, h0;
1512  | SCB1 | add | 13: ADDh h0.z, h3.--w-,-h2.--w-;
1513  | | |
1514  5 | SCT1 | mov | 15: NRMh h1.xz, h0;
1515  | SRB | nrm | 15: NRMh h1.xz, h0;
1516  | SCB0 | min | 16: MINh*8 h0.x, |h1|, |h1.z---|;
1517  | SCB1 | max | 17: MAXh h4.w, h0, h1;
1518  | | |
1519  6 | SCT0 | div | 18: DIVx h2.xy, h1.xz--, h0;
1520  | SCT1 | mov | 19: MOVr r1.zw, g[TEX0].--xy;
1521  | SCB0 | mad | 20: MADr r2.xz,-h1, const.z-w-, r1.z-w-;
1522  | SCB1 | min | 22: MINh h5.w, h0, h1;
1523  | | |
1524  7 | SCT0/1 | mov | 23: TXLr h0, r2.xzxx, const.xxxx, TEX0;
1525  | TEX | txl | 23: TXLr h0, r2.xzxx, const.xxxx, TEX0;
1526  | SCB0 | max | 27: MAXh h4.x, h2.z---, h2.w---;
1527  | SCB1 | mad | 25: MADr r0.zw, h1.--xz, const, r1;
1528  | | |
1529  8 | SCT0/1 | mov | 28: TXLr h1, r0.zwzz, const.xxxx, TEX0;
1530  | TEX | txl | 28: TXLr h1, r0.zwzz, const.xxxx, TEX0;
1531  | SCB0/1 | add | 30: ADDh/2 h1, h0, h1;
1532  | | |
1533  9 | SCT0 | mad | 31: MADr r0.xy,-h2, const.xy--, r1.zw--;
1534  | SCT1 | mov | 33: TXLr h0, r0, const.zzzz, TEX0;
1535  | TEX | txl | 33: TXLr h0, r0, const.zzzz, TEX0;
1536  | SCB1 | min | 35: MINh h4.z, h2, h2.--w-;
1537  | | |
1538  10 | SCT0 | mad | 37: MADr r1.xy, h2, const.xy--, r1.zw--;
1539  | SCT1 | mov | 39: TXLr h2, r1, const.zzzz, TEX0;
1540  | TEX | txl | 39: TXLr h2, r1, const.zzzz, TEX0;
1541  | SCB0/1 | add | 41: ADDh/2 h0, h0, h2;
1542  | | |
1543  11 | SCT0 | min | 43: MINh h2.x, h5.w---, h4.z---;
1544  | SCT1 | max | 42: MAXh h2.w, h4, h4.---x;
1545  | SCB0/1 | add | 44: ADDh/2 h0, h0, h1;
1546  | | |
1547  12 | SCT0 | set | 45: SLTh h2.x, h0.w---, h2;
1548  | SCT1 | set | 46: SGTh h2.w, h0, h2;
1549  | SCB0/1 | mul | 47: MOVh h0, h0;
1550  | | |
1551  13 | SCT0 | mad | 48: ADDxc0_s rc, h2, h2.w---;
1552  | SCB0/1 | mul | 49: MOVh h0(NE0.xxxx), h1;
1553 
1554 Pass SCT TEX SCB
1555  1: 0% 100% 25%
1556  2: 0% 100% 25%
1557  3: 0% 100% 50%
1558  4: 0% 100% 50%
1559  5: 0% 0% 50%
1560  6: 100% 0% 75%
1561  7: 0% 100% 75%
1562  8: 0% 100% 100%
1563  9: 0% 100% 25%
1564  10: 0% 100% 100%
1565  11: 50% 0% 100%
1566  12: 50% 0% 100%
1567  13: 25% 0% 100%
1568 
1569 MEAN: 17% 61% 67%
1570 
1571 Pass SCT0 SCT1 TEX SCB0 SCB1
1572  1: 0% 0% 100% 0% 100%
1573  2: 0% 0% 100% 0% 100%
1574  3: 0% 0% 100% 100% 100%
1575  4: 0% 0% 100% 100% 100%
1576  5: 0% 0% 0% 100% 100%
1577  6: 100% 100% 0% 100% 100%
1578  7: 0% 0% 100% 100% 100%
1579  8: 0% 0% 100% 100% 100%
1580  9: 0% 0% 100% 0% 100%
1581  10: 0% 0% 100% 100% 100%
1582  11: 100% 100% 0% 100% 100%
1583  12: 100% 100% 0% 100% 100%
1584  13: 100% 0% 0% 100% 100%
1585 
1586 MEAN: 30% 23% 61% 76% 100%
1587 Fragment Performance Setup: Driver RSX Compiler, GPU RSX, Flags 0x5
1588 Results 13 cycles, 3 r regs, 923,076,923 pixels/s
1589 ============================================================================*/
1590 #if (FXAA_PS3 == 1) && (FXAA_EARLY_EXIT == 0)
1591 /*--------------------------------------------------------------------------*/
1592 #pragma regcount 7
1593 #pragma disablepc all
1594 #pragma option O3
1595 #pragma option OutColorPrec=fp16
1596 #pragma texformat default RGBA8
1597 /*==========================================================================*/
1598 half4 FxaaPixelShader(
1599  // See FXAA Quality FxaaPixelShader() source for docs on Inputs!
1600  FxaaFloat2 pos,
1601  FxaaFloat4 fxaaConsolePosPos,
1602  FxaaTex tex,
1603  FxaaTex fxaaConsole360TexExpBiasNegOne,
1604  FxaaTex fxaaConsole360TexExpBiasNegTwo,
1605  FxaaFloat2 fxaaQualityRcpFrame,
1606  FxaaFloat4 fxaaConsoleRcpFrameOpt,
1607  FxaaFloat4 fxaaConsoleRcpFrameOpt2,
1608  FxaaFloat4 fxaaConsole360RcpFrameOpt2,
1609  FxaaFloat fxaaQualitySubpix,
1610  FxaaFloat fxaaQualityEdgeThreshold,
1611  FxaaFloat fxaaQualityEdgeThresholdMin,
1612  FxaaFloat fxaaConsoleEdgeSharpness,
1613  FxaaFloat fxaaConsoleEdgeThreshold,
1614  FxaaFloat fxaaConsoleEdgeThresholdMin,
1615  FxaaFloat4 fxaaConsole360ConstDir
1616 ) {
1617 /*--------------------------------------------------------------------------*/
1618 // (1)
1619  half4 dir;
1620  half4 lumaNe = h4tex2Dlod(tex, half4(fxaaConsolePosPos.zy, 0, 0));
1621  #if (FXAA_GREEN_AS_LUMA == 0)
1622  lumaNe.w += half(1.0/512.0);
1623  dir.x = -lumaNe.w;
1624  dir.z = -lumaNe.w;
1625  #else
1626  lumaNe.y += half(1.0/512.0);
1627  dir.x = -lumaNe.y;
1628  dir.z = -lumaNe.y;
1629  #endif
1630 /*--------------------------------------------------------------------------*/
1631 // (2)
1632  half4 lumaSw = h4tex2Dlod(tex, half4(fxaaConsolePosPos.xw, 0, 0));
1633  #if (FXAA_GREEN_AS_LUMA == 0)
1634  dir.x += lumaSw.w;
1635  dir.z += lumaSw.w;
1636  #else
1637  dir.x += lumaSw.y;
1638  dir.z += lumaSw.y;
1639  #endif
1640 /*--------------------------------------------------------------------------*/
1641 // (3)
1642  half4 lumaNw = h4tex2Dlod(tex, half4(fxaaConsolePosPos.xy, 0, 0));
1643  #if (FXAA_GREEN_AS_LUMA == 0)
1644  dir.x -= lumaNw.w;
1645  dir.z += lumaNw.w;
1646  #else
1647  dir.x -= lumaNw.y;
1648  dir.z += lumaNw.y;
1649  #endif
1650 /*--------------------------------------------------------------------------*/
1651 // (4)
1652  half4 lumaSe = h4tex2Dlod(tex, half4(fxaaConsolePosPos.zw, 0, 0));
1653  #if (FXAA_GREEN_AS_LUMA == 0)
1654  dir.x += lumaSe.w;
1655  dir.z -= lumaSe.w;
1656  #else
1657  dir.x += lumaSe.y;
1658  dir.z -= lumaSe.y;
1659  #endif
1660 /*--------------------------------------------------------------------------*/
1661 // (5)
1662  half4 dir1_pos;
1663  dir1_pos.xy = normalize(dir.xyz).xz;
1664  half dirAbsMinTimesC = min(abs(dir1_pos.x), abs(dir1_pos.y)) * half(FXAA_CONSOLE__PS3_EDGE_SHARPNESS);
1665 /*--------------------------------------------------------------------------*/
1666 // (6)
1667  half4 dir2_pos;
1668  dir2_pos.xy = clamp(dir1_pos.xy / dirAbsMinTimesC, half(-2.0), half(2.0));
1669  dir1_pos.zw = pos.xy;
1670  dir2_pos.zw = pos.xy;
1671  half4 temp1N;
1672  temp1N.xy = dir1_pos.zw - dir1_pos.xy * fxaaConsoleRcpFrameOpt.zw;
1673 /*--------------------------------------------------------------------------*/
1674 // (7)
1675  temp1N = h4tex2Dlod(tex, half4(temp1N.xy, 0.0, 0.0));
1676  half4 rgby1;
1677  rgby1.xy = dir1_pos.zw + dir1_pos.xy * fxaaConsoleRcpFrameOpt.zw;
1678 /*--------------------------------------------------------------------------*/
1679 // (8)
1680  rgby1 = h4tex2Dlod(tex, half4(rgby1.xy, 0.0, 0.0));
1681  rgby1 = (temp1N + rgby1) * 0.5;
1682 /*--------------------------------------------------------------------------*/
1683 // (9)
1684  half4 temp2N;
1685  temp2N.xy = dir2_pos.zw - dir2_pos.xy * fxaaConsoleRcpFrameOpt2.zw;
1686  temp2N = h4tex2Dlod(tex, half4(temp2N.xy, 0.0, 0.0));
1687 /*--------------------------------------------------------------------------*/
1688 // (10)
1689  half4 rgby2;
1690  rgby2.xy = dir2_pos.zw + dir2_pos.xy * fxaaConsoleRcpFrameOpt2.zw;
1691  rgby2 = h4tex2Dlod(tex, half4(rgby2.xy, 0.0, 0.0));
1692  rgby2 = (temp2N + rgby2) * 0.5;
1693 /*--------------------------------------------------------------------------*/
1694 // (11)
1695  // compilier moves these scalar ops up to other cycles
1696  #if (FXAA_GREEN_AS_LUMA == 0)
1697  half lumaMin = min(min(lumaNw.w, lumaSw.w), min(lumaNe.w, lumaSe.w));
1698  half lumaMax = max(max(lumaNw.w, lumaSw.w), max(lumaNe.w, lumaSe.w));
1699  #else
1700  half lumaMin = min(min(lumaNw.y, lumaSw.y), min(lumaNe.y, lumaSe.y));
1701  half lumaMax = max(max(lumaNw.y, lumaSw.y), max(lumaNe.y, lumaSe.y));
1702  #endif
1703  rgby2 = (rgby2 + rgby1) * 0.5;
1704 /*--------------------------------------------------------------------------*/
1705 // (12)
1706  #if (FXAA_GREEN_AS_LUMA == 0)
1707  bool twoTapLt = rgby2.w < lumaMin;
1708  bool twoTapGt = rgby2.w > lumaMax;
1709  #else
1710  bool twoTapLt = rgby2.y < lumaMin;
1711  bool twoTapGt = rgby2.y > lumaMax;
1712  #endif
1713 /*--------------------------------------------------------------------------*/
1714 // (13)
1715  if(twoTapLt || twoTapGt) rgby2 = rgby1;
1716 /*--------------------------------------------------------------------------*/
1717  return rgby2; }
1718 /*==========================================================================*/
1719 #endif
1720 
1721 
1722 
1723 /*============================================================================
1724 
1725  FXAA3 CONSOLE - OPTIMIZED PS3 PIXEL SHADER (WITH EARLY EXIT)
1726 
1727 ==============================================================================
1728 The code mostly matches the assembly.
1729 I have a feeling that 14 cycles is possible, but was not able to get there.
1730 Might have to increase register count to get full performance.
1731 Note this shader does not use perspective interpolation.
1732 
1733 Use the following cgc options,
1734 
1735  --fenable-bx2 --fastmath --fastprecision --nofloatbindings
1736 
1737 Use of FXAA_GREEN_AS_LUMA currently adds a cycle (16 clks).
1738 Will look at fixing this for FXAA 3.12.
1739 ------------------------------------------------------------------------------
1740  NVSHADERPERF OUTPUT
1741 ------------------------------------------------------------------------------
1742 For reference and to aid in debug, output of NVShaderPerf should match this,
1743 
1744 Shader to schedule:
1745  0: texpkb h0.w(TRUE), v5.zyxx, #0
1746  2: addh h2.y(TRUE), h0.w, constant(0.001953, 0.000000, 0.000000, 0.000000).x
1747  4: texpkb h1.w(TRUE), v5.xwxx, #0
1748  6: addh h0.x(TRUE), h1.w, -h2.y
1749  7: texpkb h2.w(TRUE), v5.zwzz, #0
1750  9: minh h4.w(TRUE), h2.y, h2
1751  10: maxh h5.x(TRUE), h2.y, h2.w
1752  11: texpkb h0.w(TRUE), v5, #0
1753  13: addh h3.w(TRUE), -h0, h0.x
1754  14: addh h0.x(TRUE), h0.w, h0
1755  15: addh h0.z(TRUE), -h2.w, h0.x
1756  16: addh h0.x(TRUE), h2.w, h3.w
1757  17: minh h5.y(TRUE), h0.w, h1.w
1758  18: nrmh h2.xz(TRUE), h0_n
1759  19: minh_m8 h2.w(TRUE), |h2.x|, |h2.z|
1760  20: divx h4.xy(TRUE), h2_n.xzzw, h2_n.w
1761  21: movr r1.zw(TRUE), v4.xxxy
1762  22: maxh h2.w(TRUE), h0, h1
1763  23: fenct TRUE
1764  24: madr r0.xy(TRUE), -h2.xzzw, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).zwzz, r1.zwzz
1765  26: texpkb h0(TRUE), r0, #0
1766  28: maxh h5.x(TRUE), h2.w, h5
1767  29: minh h5.w(TRUE), h5.y, h4
1768  30: madr r1.xy(TRUE), h2.xzzw, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).zwzz, r1.zwzz
1769  32: texpkb h2(TRUE), r1, #0
1770  34: addh_d2 h2(TRUE), h0, h2
1771  35: texpkb h1(TRUE), v4, #0
1772  37: maxh h5.y(TRUE), h5.x, h1.w
1773  38: minh h4.w(TRUE), h1, h5
1774  39: madr r0.xy(TRUE), -h4, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).xyxx, r1.zwzz
1775  41: texpkb h0(TRUE), r0, #0
1776  43: addh_m8 h5.z(TRUE), h5.y, -h4.w
1777  44: madr r2.xy(TRUE), h4, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).xyxx, r1.zwzz
1778  46: texpkb h3(TRUE), r2, #0
1779  48: addh_d2 h0(TRUE), h0, h3
1780  49: addh_d2 h3(TRUE), h0, h2
1781  50: movh h0(TRUE), h3
1782  51: slth h3.x(TRUE), h3.w, h5.w
1783  52: sgth h3.w(TRUE), h3, h5.x
1784  53: addx.c0 rc(TRUE), h3.x, h3
1785  54: slth.c0 rc(TRUE), h5.z, h5
1786  55: movh h0(c0.NE.w), h2
1787  56: movh h0(c0.NE.x), h1
1788 
1789 IPU0 ------ Simplified schedule: --------
1790 Pass | Unit | uOp | PC: Op
1791 -----+--------+------+-------------------------
1792  1 | SCT0/1 | mov | 0: TXLr h0.w, g[TEX1].zyxx, const.xxxx, TEX0;
1793  | TEX | txl | 0: TXLr h0.w, g[TEX1].zyxx, const.xxxx, TEX0;
1794  | SCB0 | add | 2: ADDh h2.y, h0.-w--, const.-x--;
1795  | | |
1796  2 | SCT0/1 | mov | 4: TXLr h1.w, g[TEX1].xwxx, const.xxxx, TEX0;
1797  | TEX | txl | 4: TXLr h1.w, g[TEX1].xwxx, const.xxxx, TEX0;
1798  | SCB0 | add | 6: ADDh h0.x, h1.w---,-h2.y---;
1799  | | |
1800  3 | SCT0/1 | mov | 7: TXLr h2.w, g[TEX1].zwzz, const.xxxx, TEX0;
1801  | TEX | txl | 7: TXLr h2.w, g[TEX1].zwzz, const.xxxx, TEX0;
1802  | SCB0 | max | 10: MAXh h5.x, h2.y---, h2.w---;
1803  | SCB1 | min | 9: MINh h4.w, h2.---y, h2;
1804  | | |
1805  4 | SCT0/1 | mov | 11: TXLr h0.w, g[TEX1], const.xxxx, TEX0;
1806  | TEX | txl | 11: TXLr h0.w, g[TEX1], const.xxxx, TEX0;
1807  | SCB0 | add | 14: ADDh h0.x, h0.w---, h0;
1808  | SCB1 | add | 13: ADDh h3.w,-h0, h0.---x;
1809  | | |
1810  5 | SCT0 | mad | 16: ADDh h0.x, h2.w---, h3.w---;
1811  | SCT1 | mad | 15: ADDh h0.z,-h2.--w-, h0.--x-;
1812  | SCB0 | min | 17: MINh h5.y, h0.-w--, h1.-w--;
1813  | | |
1814  6 | SCT1 | mov | 18: NRMh h2.xz, h0;
1815  | SRB | nrm | 18: NRMh h2.xz, h0;
1816  | SCB1 | min | 19: MINh*8 h2.w, |h2.---x|, |h2.---z|;
1817  | | |
1818  7 | SCT0 | div | 20: DIVx h4.xy, h2.xz--, h2.ww--;
1819  | SCT1 | mov | 21: MOVr r1.zw, g[TEX0].--xy;
1820  | SCB1 | max | 22: MAXh h2.w, h0, h1;
1821  | | |
1822  8 | SCT0 | mad | 24: MADr r0.xy,-h2.xz--, const.zw--, r1.zw--;
1823  | SCT1 | mov | 26: TXLr h0, r0, const.xxxx, TEX0;
1824  | TEX | txl | 26: TXLr h0, r0, const.xxxx, TEX0;
1825  | SCB0 | max | 28: MAXh h5.x, h2.w---, h5;
1826  | SCB1 | min | 29: MINh h5.w, h5.---y, h4;
1827  | | |
1828  9 | SCT0 | mad | 30: MADr r1.xy, h2.xz--, const.zw--, r1.zw--;
1829  | SCT1 | mov | 32: TXLr h2, r1, const.xxxx, TEX0;
1830  | TEX | txl | 32: TXLr h2, r1, const.xxxx, TEX0;
1831  | SCB0/1 | add | 34: ADDh/2 h2, h0, h2;
1832  | | |
1833  10 | SCT0/1 | mov | 35: TXLr h1, g[TEX0], const.xxxx, TEX0;
1834  | TEX | txl | 35: TXLr h1, g[TEX0], const.xxxx, TEX0;
1835  | SCB0 | max | 37: MAXh h5.y, h5.-x--, h1.-w--;
1836  | SCB1 | min | 38: MINh h4.w, h1, h5;
1837  | | |
1838  11 | SCT0 | mad | 39: MADr r0.xy,-h4, const.xy--, r1.zw--;
1839  | SCT1 | mov | 41: TXLr h0, r0, const.zzzz, TEX0;
1840  | TEX | txl | 41: TXLr h0, r0, const.zzzz, TEX0;
1841  | SCB0 | mad | 44: MADr r2.xy, h4, const.xy--, r1.zw--;
1842  | SCB1 | add | 43: ADDh*8 h5.z, h5.--y-,-h4.--w-;
1843  | | |
1844  12 | SCT0/1 | mov | 46: TXLr h3, r2, const.xxxx, TEX0;
1845  | TEX | txl | 46: TXLr h3, r2, const.xxxx, TEX0;
1846  | SCB0/1 | add | 48: ADDh/2 h0, h0, h3;
1847  | | |
1848  13 | SCT0/1 | mad | 49: ADDh/2 h3, h0, h2;
1849  | SCB0/1 | mul | 50: MOVh h0, h3;
1850  | | |
1851  14 | SCT0 | set | 51: SLTh h3.x, h3.w---, h5.w---;
1852  | SCT1 | set | 52: SGTh h3.w, h3, h5.---x;
1853  | SCB0 | set | 54: SLThc0 rc, h5.z---, h5;
1854  | SCB1 | add | 53: ADDxc0_s rc, h3.---x, h3;
1855  | | |
1856  15 | SCT0/1 | mul | 55: MOVh h0(NE0.wwww), h2;
1857  | SCB0/1 | mul | 56: MOVh h0(NE0.xxxx), h1;
1858 
1859 Pass SCT TEX SCB
1860  1: 0% 100% 25%
1861  2: 0% 100% 25%
1862  3: 0% 100% 50%
1863  4: 0% 100% 50%
1864  5: 50% 0% 25%
1865  6: 0% 0% 25%
1866  7: 100% 0% 25%
1867  8: 0% 100% 50%
1868  9: 0% 100% 100%
1869  10: 0% 100% 50%
1870  11: 0% 100% 75%
1871  12: 0% 100% 100%
1872  13: 100% 0% 100%
1873  14: 50% 0% 50%
1874  15: 100% 0% 100%
1875 
1876 MEAN: 26% 60% 56%
1877 
1878 Pass SCT0 SCT1 TEX SCB0 SCB1
1879  1: 0% 0% 100% 100% 0%
1880  2: 0% 0% 100% 100% 0%
1881  3: 0% 0% 100% 100% 100%
1882  4: 0% 0% 100% 100% 100%
1883  5: 100% 100% 0% 100% 0%
1884  6: 0% 0% 0% 0% 100%
1885  7: 100% 100% 0% 0% 100%
1886  8: 0% 0% 100% 100% 100%
1887  9: 0% 0% 100% 100% 100%
1888  10: 0% 0% 100% 100% 100%
1889  11: 0% 0% 100% 100% 100%
1890  12: 0% 0% 100% 100% 100%
1891  13: 100% 100% 0% 100% 100%
1892  14: 100% 100% 0% 100% 100%
1893  15: 100% 100% 0% 100% 100%
1894 
1895 MEAN: 33% 33% 60% 86% 80%
1896 Fragment Performance Setup: Driver RSX Compiler, GPU RSX, Flags 0x5
1897 Results 15 cycles, 3 r regs, 800,000,000 pixels/s
1898 ============================================================================*/
1899 #if (FXAA_PS3 == 1) && (FXAA_EARLY_EXIT == 1)
1900 /*--------------------------------------------------------------------------*/
1901 #pragma regcount 7
1902 #pragma disablepc all
1903 #pragma option O2
1904 #pragma option OutColorPrec=fp16
1905 #pragma texformat default RGBA8
1906 /*==========================================================================*/
1907 half4 FxaaPixelShader(
1908  // See FXAA Quality FxaaPixelShader() source for docs on Inputs!
1909  FxaaFloat2 pos,
1910  FxaaFloat4 fxaaConsolePosPos,
1911  FxaaTex tex,
1912  FxaaTex fxaaConsole360TexExpBiasNegOne,
1913  FxaaTex fxaaConsole360TexExpBiasNegTwo,
1914  FxaaFloat2 fxaaQualityRcpFrame,
1915  FxaaFloat4 fxaaConsoleRcpFrameOpt,
1916  FxaaFloat4 fxaaConsoleRcpFrameOpt2,
1917  FxaaFloat4 fxaaConsole360RcpFrameOpt2,
1918  FxaaFloat fxaaQualitySubpix,
1919  FxaaFloat fxaaQualityEdgeThreshold,
1920  FxaaFloat fxaaQualityEdgeThresholdMin,
1921  FxaaFloat fxaaConsoleEdgeSharpness,
1922  FxaaFloat fxaaConsoleEdgeThreshold,
1923  FxaaFloat fxaaConsoleEdgeThresholdMin,
1924  FxaaFloat4 fxaaConsole360ConstDir
1925 ) {
1926 /*--------------------------------------------------------------------------*/
1927 // (1)
1928  half4 rgbyNe = h4tex2Dlod(tex, half4(fxaaConsolePosPos.zy, 0, 0));
1929  #if (FXAA_GREEN_AS_LUMA == 0)
1930  half lumaNe = rgbyNe.w + half(1.0/512.0);
1931  #else
1932  half lumaNe = rgbyNe.y + half(1.0/512.0);
1933  #endif
1934 /*--------------------------------------------------------------------------*/
1935 // (2)
1936  half4 lumaSw = h4tex2Dlod(tex, half4(fxaaConsolePosPos.xw, 0, 0));
1937  #if (FXAA_GREEN_AS_LUMA == 0)
1938  half lumaSwNegNe = lumaSw.w - lumaNe;
1939  #else
1940  half lumaSwNegNe = lumaSw.y - lumaNe;
1941  #endif
1942 /*--------------------------------------------------------------------------*/
1943 // (3)
1944  half4 lumaNw = h4tex2Dlod(tex, half4(fxaaConsolePosPos.xy, 0, 0));
1945  #if (FXAA_GREEN_AS_LUMA == 0)
1946  half lumaMaxNwSw = max(lumaNw.w, lumaSw.w);
1947  half lumaMinNwSw = min(lumaNw.w, lumaSw.w);
1948  #else
1949  half lumaMaxNwSw = max(lumaNw.y, lumaSw.y);
1950  half lumaMinNwSw = min(lumaNw.y, lumaSw.y);
1951  #endif
1952 /*--------------------------------------------------------------------------*/
1953 // (4)
1954  half4 lumaSe = h4tex2Dlod(tex, half4(fxaaConsolePosPos.zw, 0, 0));
1955  #if (FXAA_GREEN_AS_LUMA == 0)
1956  half dirZ = lumaNw.w + lumaSwNegNe;
1957  half dirX = -lumaNw.w + lumaSwNegNe;
1958  #else
1959  half dirZ = lumaNw.y + lumaSwNegNe;
1960  half dirX = -lumaNw.y + lumaSwNegNe;
1961  #endif
1962 /*--------------------------------------------------------------------------*/
1963 // (5)
1964  half3 dir;
1965  dir.y = 0.0;
1966  #if (FXAA_GREEN_AS_LUMA == 0)
1967  dir.x = lumaSe.w + dirX;
1968  dir.z = -lumaSe.w + dirZ;
1969  half lumaMinNeSe = min(lumaNe, lumaSe.w);
1970  #else
1971  dir.x = lumaSe.y + dirX;
1972  dir.z = -lumaSe.y + dirZ;
1973  half lumaMinNeSe = min(lumaNe, lumaSe.y);
1974  #endif
1975 /*--------------------------------------------------------------------------*/
1976 // (6)
1977  half4 dir1_pos;
1978  dir1_pos.xy = normalize(dir).xz;
1979  half dirAbsMinTimes8 = min(abs(dir1_pos.x), abs(dir1_pos.y)) * half(FXAA_CONSOLE__PS3_EDGE_SHARPNESS);
1980 /*--------------------------------------------------------------------------*/
1981 // (7)
1982  half4 dir2_pos;
1983  dir2_pos.xy = clamp(dir1_pos.xy / dirAbsMinTimes8, half(-2.0), half(2.0));
1984  dir1_pos.zw = pos.xy;
1985  dir2_pos.zw = pos.xy;
1986  #if (FXAA_GREEN_AS_LUMA == 0)
1987  half lumaMaxNeSe = max(lumaNe, lumaSe.w);
1988  #else
1989  half lumaMaxNeSe = max(lumaNe, lumaSe.y);
1990  #endif
1991 /*--------------------------------------------------------------------------*/
1992 // (8)
1993  half4 temp1N;
1994  temp1N.xy = dir1_pos.zw - dir1_pos.xy * fxaaConsoleRcpFrameOpt.zw;
1995  temp1N = h4tex2Dlod(tex, half4(temp1N.xy, 0.0, 0.0));
1996  half lumaMax = max(lumaMaxNwSw, lumaMaxNeSe);
1997  half lumaMin = min(lumaMinNwSw, lumaMinNeSe);
1998 /*--------------------------------------------------------------------------*/
1999 // (9)
2000  half4 rgby1;
2001  rgby1.xy = dir1_pos.zw + dir1_pos.xy * fxaaConsoleRcpFrameOpt.zw;
2002  rgby1 = h4tex2Dlod(tex, half4(rgby1.xy, 0.0, 0.0));
2003  rgby1 = (temp1N + rgby1) * 0.5;
2004 /*--------------------------------------------------------------------------*/
2005 // (10)
2006  half4 rgbyM = h4tex2Dlod(tex, half4(pos.xy, 0.0, 0.0));
2007  #if (FXAA_GREEN_AS_LUMA == 0)
2008  half lumaMaxM = max(lumaMax, rgbyM.w);
2009  half lumaMinM = min(lumaMin, rgbyM.w);
2010  #else
2011  half lumaMaxM = max(lumaMax, rgbyM.y);
2012  half lumaMinM = min(lumaMin, rgbyM.y);
2013  #endif
2014 /*--------------------------------------------------------------------------*/
2015 // (11)
2016  half4 temp2N;
2017  temp2N.xy = dir2_pos.zw - dir2_pos.xy * fxaaConsoleRcpFrameOpt2.zw;
2018  temp2N = h4tex2Dlod(tex, half4(temp2N.xy, 0.0, 0.0));
2019  half4 rgby2;
2020  rgby2.xy = dir2_pos.zw + dir2_pos.xy * fxaaConsoleRcpFrameOpt2.zw;
2021  half lumaRangeM = (lumaMaxM - lumaMinM) / FXAA_CONSOLE__PS3_EDGE_THRESHOLD;
2022 /*--------------------------------------------------------------------------*/
2023 // (12)
2024  rgby2 = h4tex2Dlod(tex, half4(rgby2.xy, 0.0, 0.0));
2025  rgby2 = (temp2N + rgby2) * 0.5;
2026 /*--------------------------------------------------------------------------*/
2027 // (13)
2028  rgby2 = (rgby2 + rgby1) * 0.5;
2029 /*--------------------------------------------------------------------------*/
2030 // (14)
2031  #if (FXAA_GREEN_AS_LUMA == 0)
2032  bool twoTapLt = rgby2.w < lumaMin;
2033  bool twoTapGt = rgby2.w > lumaMax;
2034  #else
2035  bool twoTapLt = rgby2.y < lumaMin;
2036  bool twoTapGt = rgby2.y > lumaMax;
2037  #endif
2038  bool earlyExit = lumaRangeM < lumaMax;
2039  bool twoTap = twoTapLt || twoTapGt;
2040 /*--------------------------------------------------------------------------*/
2041 // (15)
2042  if(twoTap) rgby2 = rgby1;
2043  if(earlyExit) rgby2 = rgbyM;
2044 /*--------------------------------------------------------------------------*/
2045  return rgby2; }
2046 /*==========================================================================*/
2047 #endif
Scalar dot(const VectorT< Scalar, N > &_v1, const VectorT< Scalar, N > &_v2)
Definition: MeshNode2T.cc:263