so I was looking some ways to improve quality and trying to not decrease performance too much... and I came up with some interesting stuffs. This is my journey to cool shadows:

Mentioned FPS are for that camera view.

Tested with Nvidia 540m, i7, Ubuntu 14.04.

These are PSSM shadows, but it works with other shadows too (haven't tested too much)

**No filtering**at all:

FPS: 73

=======================

Original pbs template shadows with

**2x2 PCF**:

FPS: 72

=======================

**PCF 5x5:**

Code: Select all

```
vec3 UVCoords = psPosLN.xyz / psPosLN.w;
float shadowFactor = 0.0;
float bias = 0.0000000001;
for (int y = 0 ; y < 5 ; y++) {
for (int x = 0 ; x < 5 ; x++) {
vec2 offsets = vec2( (x-2.5) * invShadowMapSize.x, (y-2.5) * invShadowMapSize.y );
if ( psPosLN.z+bias <= texture( shadowMap, UVCoords.xy + offsets ).r ){
shadowFactor+=0.04;
}
}
}
return shadowFactor;
```

FPS: 67

=======================

**PCF 5x5 with pseudo-random offset:**

Code: Select all

```
float random(in vec3 seed)
{
return fract(sin(dot(seed, vec3(53.1215, 21.1352, 9.1322))) * 43758.5453);
}
------------------------------
vec3 UVCoords = psPosLN.xyz / psPosLN.w;
float shadowFactor = 0.0;
float bias = 0.0000000001;
for (int y = 0 ; y < 5 ; y++) {
for (int x = 0 ; x < 5 ; x++) {
vec2 offsets = vec2( (x-2) * invShadowMapSize.x, (y-2) * invShadowMapSize.y );
offsets = offsets + vec2(random(offsets.xyy)* invShadowMapSize.x,random(offsets.yxy)* invShadowMapSize.y);
if ( psPosLN.z+bias <= texture( shadowMap, UVCoords.xy + offsets ).r ){
shadowFactor+=0.04;
}
}
}
return shadowFactor;
```

FPS: 48

=======================

**PCF 5x5 with pseudo-random poissonDisk:**

Code: Select all

```
float random(in vec3 seed)
{
return fract(sin(dot(seed, vec3(53.1215, 21.1352, 9.1322))) * 43758.5453);
}
------------------------------
vec2 poissonDisk[4] = vec2[](
vec2( -0.94201624, -0.39906216 ),
vec2( 0.94558609, -0.76890725 ),
vec2( -0.094184101, -0.92938870 ),
vec2( 0.34495938, 0.29387760 )
);
vec3 UVCoords = psPosLN.xyz / psPosLN.w;
float shadowFactor = 0.0;
float bias = 0.0000000001;
for (int y = 0 ; y < 5 ; y++) {
for (int x = 0 ; x < 5 ; x++) {
vec2 offsets = vec2( (x-2) * invShadowMapSize.x, (y-2) * invShadowMapSize.y );
int index = int( 4*random( gl_FragCoord.xyz / gl_FragCoord.w ) )%4;
offsets = offsets + poissonDisk[index]*0.001;
if ( psPosLN.z+bias <= texture( shadowMap, UVCoords.xy + offsets ).r ){
shadowFactor+=0.04;
}
}
}
return shadowFactor;
```

FPS: 40

=======================

Then I read that Unity5 uses a 5x5 PCF, and it looks very good/soft, so I guess that what I was lacking is some linear interpolation.

https://unity3d.com/sites/default/files ... hics-8.jpg

I couldn't find how to enable linear filtering on the shadow texture, but here I found a way:

http://codeflow.org/entries/2013/feb/15 ... w-mapping/

this is the result with

**just the interpolation**:

FPS: 72

this is the code from that site

Code: Select all

```
float texture2DCompare(sampler2D depths, vec2 uv, float compare){
float depth = texture2D(depths, uv).r;
return step(compare, depth);
}
float texture2DShadowLerp(sampler2D depths, vec2 size, vec2 uv, float compare){
vec2 texelSize = vec2(1.0)/size;
vec2 f = fract(uv*size+0.5);
vec2 centroidUV = floor(uv*size+0.5)/size;
float lb = texture2DCompare(depths, centroidUV+texelSize*vec2(0.0, 0.0), compare);
float lt = texture2DCompare(depths, centroidUV+texelSize*vec2(0.0, 1.0), compare);
float rb = texture2DCompare(depths, centroidUV+texelSize*vec2(1.0, 0.0), compare);
float rt = texture2DCompare(depths, centroidUV+texelSize*vec2(1.0, 1.0), compare);
float a = mix(lb, lt, f.y);
float b = mix(rb, rt, f.y);
float c = mix(a, b, f.x);
return c;
}
-----------------------------------------
return texture2DShadowLerp(shadowMap, 1/invShadowMapSize, psPosLN.xy / psPosLN.w, psPosLN.z);
```

=======================

**PCF 5x5 + linear interpolation**. Now I think we are talking serious!:

Code: Select all

```
vec3 UVCoords = psPosLN.xyz / psPosLN.w;
float shadowFactor = 0.0;
for (int y = 0 ; y < 5 ; y++) {
for (int x = 0 ; x < 5 ; x++) {
vec2 offsets = vec2( (x-2) * invShadowMapSize.x, (y-2) * invShadowMapSize.y );
shadowFactor += texture2DShadowLerp(shadowMap, 1/invShadowMapSize, UVCoords.xy + offsets, psPosLN.z);
}
}
return shadowFactor*0.04;
```

FPS: 36

=======================

So I think maybe with linear interpolation, PCF 3x3 would be enough.

**PCF 3x3 + linear interpolation:**

Code: Select all

```
vec3 UVCoords = psPosLN.xyz / psPosLN.w;
float shadowFactor = 0.0;
for (int y = 0 ; y < 3 ; y++) {
for (int x = 0 ; x < 3 ; x++) {
vec2 offsets = vec2( (x-1) * invShadowMapSize.x, (y-1) * invShadowMapSize.y );
shadowFactor += texture2DShadowLerp(shadowMap, 1/invShadowMapSize, UVCoords.xy + offsets, psPosLN.z);
}
}
return shadowFactor*0.11111111;
```

FPS: 56

=======================

Then I found this post:

http://www.ogre3d.org/forums/viewtopic.php?f=1&t=78834

That uses the same linear interpolation, and then gives a 3x3 version with less lookups.

**PCF 3x3 + linear interpolation + optimization:**

Code: Select all

```
vec2 uv = psPosLN.xy / psPosLN.w;
vec2 texelSize = invShadowMapSize;
vec2 size = 1 / invShadowMapSize;
vec2 centroidUV = floor(uv * size + 0.5) / size;
vec2 f = fract(uv * size + 0.5);
int X = 3;
vec2 topLeft = centroidUV - texelSize * 1.5;
// load all pixels needed for the computation
// this way a pixel wont be loaded twice
float kernel[9];
for(int i = 0; i < X; i++){
for(int j = 0; j < X; j++){
kernel[i * X + j] = texture2DCompare(shadowMap, topLeft + vec2(i, j) * texelSize, psPosLN.z);
}
}
float kernel_interpolated[4];
kernel_interpolated[0] = kernel[0] + kernel[1] + kernel[3] + kernel[4];
kernel_interpolated[0] /= 4.0;
kernel_interpolated[1] = kernel[1] + kernel[2] + kernel[4] + kernel[5];
kernel_interpolated[1] /= 4.0;
kernel_interpolated[2] = kernel[3] + kernel[4] + kernel[6] + kernel[7];
kernel_interpolated[2] /= 4.0;
kernel_interpolated[3] = kernel[4] + kernel[5] + kernel[7] + kernel[8];
kernel_interpolated[3] /= 4.0;
float a = mix(kernel_interpolated[0], kernel_interpolated[1], f.y);
float b = mix(kernel_interpolated[2], kernel_interpolated[3], f.y);
float c = mix(a, b, f.x);
return c;
```

FPS: 69

=======================

Conclusions:

My vote is for something like "PCF 3x3 + linear interpolation + optimization" but I am going to try to make it look like "PCF 3x3 + linear interpolation", and would be almost enough for my needs.

I believe PCF can be enabled by hardware, but its beyond my skills.

I believe that linear interpolation, or bilinear filtering can be enabled for the texture so when you do a simple PCF it would look good instantly, but I don't know how to enable it.

And the only thing that I think its a MUST is STABLE PSSM shadows, like this:

http://www.garagegames.com/community/blogs/view/21284

or this one:

http://www.ogre3d.org/forums/viewtopic.php?f=11&t=71142

but again is beyond my skills.

Hopefully some of this techniques could be on ogre 2.1 by default, maybe be able to activate some filtering like:

Code: Select all

`shadow_map 0 2048 2048 PF_FLOAT32_R light 0 split 0 pcf 3 linearFiltering on`