傳統(tǒng)的蒙皮骨骼動畫混合方法易于理解,但是在SM 2.0的256常量限制下,骨骼數(shù)保守計(jì)算最多50根骨頭,因此對美術(shù)的工作流程以及模型渲染方法造成了很大的障礙
float4x4 matBoneArray[40]; // 這是傳輸?shù)钠款i
VS_OUTPUT vs_main( SkinnedVS_INPUT In )
{
VS_OUTPUT Out = (VS_OUTPUT)0;
float4x4 skinTransform = 0;
skinTransform += matBoneArray[In.BoneIndices.x] * In.BoneWeights.x;
skinTransform += matBoneArray[In.BoneIndices.y] * In.BoneWeights.y;
skinTransform += matBoneArray[In.BoneIndices.z] * In.BoneWeights.z;
skinTransform += matBoneArray[In.BoneIndices.w] * In.BoneWeights.w;
float4 localpos = mul(In.Position, skinTransform);
Out.Position = mul( localpos, matViewProj );
Out.TexCoord = In.TexCoord;
return Out;
}
matBoneArray這個數(shù)組是骨骼的LocalRot和LocalTranslation 通過以下函數(shù)build出來
Matrix4& Matrix4::FromTranslationRotation( const Vector3& translation, const Quaternion& rotation )
{
float xx = rotation.x * rotation.x * 2.0f, yy = rotation.y * rotation.y * 2.0f, zz = rotation.z * rotation.z * 2.0f;
float xy = rotation.x * rotation.y * 2.0f, zw = rotation.z * rotation.w * 2.0f, xz = rotation.x * rotation.z * 2.0f;
float yw = rotation.y * rotation.w * 2.0f, yz = rotation.y * rotation.z * 2.0f, xw = rotation.x * rotation.w * 2.0f;
m[0][0] = 1.0f - yy - zz; m[0][1] = xy + zw; m[0][2] = xz - yw; m[0][3] = 0.0f;
m[1][0] = xy - zw; m[1][1] = 1.0f - xx - zz; m[1][2] = yz + xw; m[1][3] = 0.0f;
m[2][0] = xz + yw; m[2][1] = yz - xw; m[2][2] = 1.0f - xx - yy; m[2][3] = 0.0f;
m[3][0] = translation.x; m[3][1] = translation.y; m[3][2] = translation.z; m[3][3] = 1.0f;
return *this;
}
從這里你可以發(fā)現(xiàn), 本來每根骨頭只需要2個float4 傳遞變換信息的,現(xiàn)在卻需要4個float4,也就是一個矩陣來傳遞,矩陣中還有很多不使用的變量也被傳輸?shù)紾PU中,這里就是優(yōu)化的點(diǎn).
重新調(diào)整后的Shader代碼:
float4x4 BuildFromTransRot( float4 translation, float4 rot )
{
float4 rotation = rot;
float xx = rotation.x * rotation.x * 2.0f, yy = rotation.y * rotation.y * 2.0f, zz = rotation.z * rotation.z * 2.0f;
float xy = rotation.x * rotation.y * 2.0f, zw = rotation.z * rotation.w * 2.0f, xz = rotation.x * rotation.z * 2.0f;
float yw = rotation.y * rotation.w * 2.0f, yz = rotation.y * rotation.z * 2.0f, xw = rotation.x * rotation.w * 2.0f;
float4x4 m = {
{1.0f - yy - zz, xy + zw, xz - yw, 0},
{xy - zw, 1.0f - xx - zz, yz + xw, 0},
{xz + yw, yz - xw, 1.0f - xx - yy, 0},
{translation.x, translation.y, translation.z, 1}
};
return m;
}
float4x4 GetBoneElement( float index )
{
return BuildFromTransRot( vecBoneLocalTrans[index], vecBoneLocalRot[index] );
}
VS_OUTPUT vs_main( SkinnedVS_INPUT In )
{
VS_OUTPUT Out = (VS_OUTPUT)0;
float4x4 skinTransform = 0;
skinTransform += GetBoneElement(In.BoneIndices.x) * In.BoneWeights.x;
skinTransform += GetBoneElement(In.BoneIndices.y) * In.BoneWeights.y;
skinTransform += GetBoneElement(In.BoneIndices.z) * In.BoneWeights.z;
skinTransform += GetBoneElement(In.BoneIndices.w) * In.BoneWeights.w;
float4 localpos = mul(In.Position, skinTransform);
Out.Position = mul( localpos, matViewProj );
Out.TexCoord = In.TexCoord;
return Out;
}
我們將骨頭的local旋轉(zhuǎn)及偏移傳遞至GPU,然后在GPU內(nèi)重組,雖然對GPU性能計(jì)算有部分損耗,但是骨骼數(shù)量就能保守提高到100個.