thanks for your response. I'm trying to debug everything, but its really hard to understand, what is going on.
Code: Select all
//#include "SyntaxHighlightingMisc.h"
#define ushort uint
#define ogre_float4x3 float4x3
//Short used for read operations. It's an int in GLSL & HLSL. An ushort in Metal
#define rshort2 int2
#define rint int
//Short used for write operations. It's an int in GLSL. An ushort in HLSL & Metal
#define wshort2 uint2
#define wshort3 uint3
#define toFloat3x3( x ) ((float3x3)(x))
#define buildFloat3x3( row0, row1, row2 ) transpose( float3x3( row0, row1, row2 ) )
#define min3( a, b, c ) min( a, min( b, c ) )
#define max3( a, b, c ) max( a, max( b, c ) )
#define INLINE
#define NO_INTERPOLATION_PREFIX nointerpolation
#define NO_INTERPOLATION_SUFFIX
#define finalDrawId input.drawId
#define PARAMS_ARG_DECL
#define PARAMS_ARG
#define floatBitsToUint(x) asuint(x)
#define uintBitsToFloat(x) asfloat(x)
#define floatBitsToInt(x) asint(x)
#define fract frac
#define lessThan( a, b ) (a < b)
#define inVs_vertexId input.vertexId
#define inVs_vertex input.vertex
#define inVs_blendWeights input.blendWeights
#define inVs_blendIndices input.blendIndices
#define inVs_qtangent input.qtangent
#define inVs_drawId input.drawId
#define inVs_uv0 input.uv0
#define inVs_uv1 input.uv1
#define outVs_Position outVs.gl_Position
#define outVs_viewportIndex outVs.gl_ViewportIndex
#define outVs_clipDistance0 outVs.gl_ClipDistance0
#define gl_SampleMaskIn0 gl_SampleMask
#define interpolateAtSample( interp, subsample ) EvaluateAttributeAtSample( interp, subsample )
#define findLSB firstbitlow
#define outPs_colour0 outPs.colour0
#define OGRE_Sample( tex, sampler, uv ) tex.Sample( sampler, uv )
#define OGRE_SampleLevel( tex, sampler, uv, lod ) tex.SampleLevel( sampler, uv, lod )
#define OGRE_SampleArray2D( tex, sampler, uv, arrayIdx ) tex.Sample( sampler, float3( uv, arrayIdx ) )
#define OGRE_SampleArray2DLevel( tex, sampler, uv, arrayIdx, lod ) tex.SampleLevel( sampler, float3( uv, arrayIdx ), lod )
#define OGRE_SampleArrayCubeLevel( tex, sampler, uv, arrayIdx, lod ) tex.SampleLevel( sampler, float4( uv, arrayIdx ), lod )
#define OGRE_SampleGrad( tex, sampler, uv, ddx, ddy ) tex.SampleGrad( sampler, uv, ddx, ddy )
#define OGRE_SampleArray2DGrad( tex, sampler, uv, arrayIdx, ddx, ddy ) tex.SampleGrad( sampler, float3( uv, arrayIdx ), ddx, ddy )
#define OGRE_ddx( val ) ddx( val )
#define OGRE_ddy( val ) ddy( val )
#define OGRE_Load2D( tex, iuv, lod ) tex.Load( int3( iuv, lod ) )
#define OGRE_Load2DMS( tex, iuv, subsample ) tex.Load( iuv, subsample )
#define OGRE_Load3D( tex, iuv, lod ) tex.Load( int4( iuv, lod ) )
#define bufferFetch( buffer, idx ) buffer.Load( idx )
#define bufferFetch1( buffer, idx ) buffer.Load( idx ).x
#define structuredBufferFetch( buffer, idx ) buffer[idx]
#define OGRE_Texture3D_float4 Texture3D
#define OGRE_SAMPLER_ARG_DECL( samplerName ) , SamplerState samplerName
#define OGRE_SAMPLER_ARG( samplerName ) , samplerName
#define CONST_BUFFER( bufferName, bindingPoint ) cbuffer bufferName : register(b##bindingPoint)
#define CONST_BUFFER_STRUCT_BEGIN( structName, bindingPoint ) cbuffer structName : register(b##bindingPoint) { struct _##structName
#define CONST_BUFFER_STRUCT_END( variableName ) variableName; }
#define FLAT_INTERPOLANT( decl, bindingPoint ) nointerpolation decl : TEXCOORD##bindingPoint
#define INTERPOLANT( decl, bindingPoint ) decl : TEXCOORD##bindingPoint
#define OGRE_OUT_REF( declType, variableName ) out declType variableName
#define OGRE_INOUT_REF( declType, variableName ) inout declType variableName
#define OGRE_ARRAY_START( type ) {
#define OGRE_ARRAY_END }
#define worldViewMat passBuf.view
float4x4 UNPACK_MAT4( Buffer<float4> matrixBuf, uint pixelIdx )
{
float4 row1 = matrixBuf.Load( int((pixelIdx) << 2u) );
float4 row2 = matrixBuf.Load( int(((pixelIdx) << 2u) + 1u) );
float4 row3 = matrixBuf.Load( int(((pixelIdx) << 2u) + 2u) );
float4 row4 = matrixBuf.Load( int(((pixelIdx) << 2u) + 3u) );
return transpose( float4x4( row1, row2, row3, row4 ) );
}
float4x3 UNPACK_MAT4x3( Buffer<float4> matrixBuf, uint pixelIdx )
{
float4 row1 = matrixBuf.Load( int((pixelIdx) << 2u) );
float4 row2 = matrixBuf.Load( int(((pixelIdx) << 2u) + 1u) );
float4 row3 = matrixBuf.Load( int(((pixelIdx) << 2u) + 2u) );
return transpose( float3x4( row1, row2, row3 ) );
}
// START UNIFORM DECLARATION
struct ShadowReceiverData
{
float4x4 texViewProj;
float2 shadowDepthRange;
float2 padding;
float4 invShadowMapSize;
};
struct Light
{
float3 position;
uint lightMask;
float4 diffuse; //.w contains numNonCasterDirectionalLights
float3 specular;
};
#define numNonCasterDirectionalLights lights[0].diffuse.w
#define areaLightDiffuseMipmapStart areaApproxLights[0].diffuse.w
#define areaLightNumMipmapsSpecFactor areaApproxLights[0].specular.w
#define numAreaApproxLights areaApproxLights[0].doubleSided.y
#define numAreaApproxLightsWithMask areaApproxLights[0].doubleSided.z
#define numAreaLtcLights areaLtcLights[0].points[0].w
#define numAreaLtcLights areaLtcLights[0].points[0].w
struct AreaLight
{
float3 position;
uint lightMask;
float4 diffuse; //[0].w contains diffuse mipmap start
float4 specular; //[0].w contains mipmap scale
float4 attenuation; //.w contains texture array idx
//Custom 2D Shape:
// direction.xyz direction
// direction.w invHalfRectSize.x
// tangent.xyz tangent
// tangent.w invHalfRectSize.y
float4 direction;
float4 tangent;
float4 doubleSided; //.y contains numAreaApproxLights
//.z contains numAreaApproxLightsWithMask
};
struct AreaLtcLight
{
float3 position;
uint lightMask;
float4 diffuse; //.w contains attenuation range
float4 specular; //.w contains doubleSided
float4 points[4]; //.w contains numAreaLtcLights
//points[1].w, points[2].w, points[3].w contain obbFadeFactorLtc.xyz
};
//Uniforms that change per pass
CONST_BUFFER_STRUCT_BEGIN( PassBuffer, 0 )
{
//Vertex shader (common to both receiver and casters)
float4x4 viewProj;
//Vertex shader
float4x4 view;
//-------------------------------------------------------------------------
//Pixel shader
float3x3 invViewMatCubemap;
float padding; //Compatibility with GLSL
float4 pccVctMinDistance_invPccVctInvDistance_rightEyePixelStartX_unused;
float4 ambientUpperHemi;
float4 ambientLowerHemi;
float4 ambientHemisphereDir;
Light lights[2];
//Forward3D
//f3dData.x = minDistance;
//f3dData.y = invMaxDistance;
//f3dData.z = f3dNumSlicesSub1;
//f3dData.w = uint cellsPerTableOnGrid0 (floatBitsToUint);
//Clustered Forward:
//f3dData.x = minDistance;
//f3dData.y = invExponentK;
//f3dData.z = f3dNumSlicesSub1;
//f3dData.w = renderWindow->getHeight();
float4 f3dData;
float4 fwdScreenToGrid;
#define pccVctMinDistance pccVctMinDistance_invPccVctInvDistance_rightEyePixelStartX_unused.x
#define invPccVctInvDistance pccVctMinDistance_invPccVctInvDistance_rightEyePixelStartX_unused.y
#define rightEyePixelStartX pccVctMinDistance_invPccVctInvDistance_rightEyePixelStartX_unused.z
}
CONST_BUFFER_STRUCT_END( passBuf );
//Uniforms that change per Item/Entity
CONST_BUFFER( InstanceBuffer, 2 )
{
//.x =
//The lower 9 bits contain the material's start index.
//The higher 23 bits contain the world matrix start index.
//
//.y =
//shadowConstantBias. Send the bias directly to avoid an
//unnecessary indirection during the shadow mapping pass.
//Must be loaded with uintBitsToFloat
//
//.z =
//lightMask. Ogre must have been compiled with OGRE_NO_FINE_LIGHT_MASK_GRANULARITY
uint4 worldMaterialIdx[2];
};
// END UNIFORM DECLARATION
struct VS_INPUT
{
float4 vertex : POSITION;
float3 normal : NORMAL;
float3 tangent : TANGENT;
uint4 blendIndices : BLENDINDICES;
float4 blendWeights : BLENDWEIGHT;
float2 uv0 : TEXCOORD0;
float4 uv1 : TEXCOORD1;
uint drawId : DRAWID;
};
struct PS_INPUT
{
FLAT_INTERPOLANT( ushort drawId, 0 );
INTERPOLANT( float3 pos, 1 );
INTERPOLANT( float3 normal, 2 );
INTERPOLANT( float3 tangent, 3 );
INTERPOLANT( float2 uv0, 4 );
INTERPOLANT( float4 uv1, 5 );
float4 gl_Position: SV_Position;
};
// START UNIFORM D3D DECLARATION
Buffer<float4> worldMatBuf : register(t0);
// END UNIFORM D3D DECLARATION
PS_INPUT main( VS_INPUT input )
{
PS_INPUT outVs;
float3 normal = input.normal;
float3 tangent = input.tangent;
uint _idx = (inVs_blendIndices[0] << 1u) + inVs_blendIndices[0]; //inVs_blendIndices[0] * 3u; a 32-bit int multiply is 4 cycles on GCN! (and mul24 is not exposed to GLSL...)
uint matStart = worldMaterialIdx[inVs_drawId].x >> 9u;
float4 worldMat[3];
worldMat[0] = bufferFetch( worldMatBuf, int(matStart + _idx + 0u) );
worldMat[1] = bufferFetch( worldMatBuf, int(matStart + _idx + 1u) );
worldMat[2] = bufferFetch( worldMatBuf, int(matStart + _idx + 2u) );
float4 worldPos;
worldPos.x = dot( worldMat[0], inVs_vertex );
worldPos.y = dot( worldMat[1], inVs_vertex );
worldPos.z = dot( worldMat[2], inVs_vertex );
worldPos.xyz *= inVs_blendWeights[0];
float3 worldNorm;
worldNorm.x = dot( worldMat[0].xyz, normal );
worldNorm.y = dot( worldMat[1].xyz, normal );
worldNorm.z = dot( worldMat[2].xyz, normal );
worldNorm *= inVs_blendWeights[0];
float3 worldTang;
worldTang.x = dot( worldMat[0].xyz, tangent );
worldTang.y = dot( worldMat[1].xyz, tangent );
worldTang.z = dot( worldMat[2].xyz, tangent );
worldTang *= inVs_blendWeights[0];
float4 tmp;
tmp.w = 1.0;
//!NeedsMoreThan1BonePerVertex
_idx = (inVs_blendIndices[1] << 1u) + inVs_blendIndices[1]; //inVs_blendIndices[1] * 3; a 32-bit int multiply is 4 cycles on GCN! (and mul24 is not exposed to GLSL...)
worldMat[0] = bufferFetch( worldMatBuf, int(matStart + _idx + 0u) );
worldMat[1] = bufferFetch( worldMatBuf, int(matStart + _idx + 1u) );
worldMat[2] = bufferFetch( worldMatBuf, int(matStart + _idx + 2u) );
tmp.x = dot( worldMat[0], inVs_vertex );
tmp.y = dot( worldMat[1], inVs_vertex );
tmp.z = dot( worldMat[2], inVs_vertex );
worldPos.xyz += (tmp * inVs_blendWeights[1]).xyz;
tmp.x = dot( worldMat[0].xyz, normal );
tmp.y = dot( worldMat[1].xyz, normal );
tmp.z = dot( worldMat[2].xyz, normal );
worldNorm += tmp.xyz * inVs_blendWeights[1];
tmp.x = dot( worldMat[0].xyz, tangent );
tmp.y = dot( worldMat[1].xyz, tangent );
tmp.z = dot( worldMat[2].xyz, tangent );
worldTang += tmp.xyz * inVs_blendWeights[1];
worldPos.w = 1.0;
//Lighting is in view space
outVs.pos = mul( worldPos, worldViewMat ).xyz;
outVs.normal = mul( worldNorm, toFloat3x3( worldViewMat ) );
outVs.tangent = mul( worldTang, toFloat3x3( worldViewMat ) );
outVs_Position = mul( worldPos, passBuf.viewProj );
/// hlms_uv_count will be 0 on shadow caster passes w/out alpha test
outVs.uv0 = inVs_uv0;
outVs.uv1 = inVs_uv1;
outVs.drawId = inVs_drawId;
return outVs;
}
_vs_5_0_
the lastReturnedValue is null, so it will be compiled. Then in OgreD3D11HLSLProgram.cpp in function:
Always "getMicrocodeFromCache();" is called, because the cache does exist in the hashMap and never "compileMicrocode();", so its never added to the cache and isCacheDirty is never set to "true". But why?
Is maybe the hash wrong calculated, or is the hash by accident the same as another one?