diff --git a/Code/DQ skinning/DualQuaternionSkinner.cs b/Code/DQ skinning/DualQuaternionSkinner.cs
index 2fabfb1..e63c5b8 100644
--- a/Code/DQ skinning/DualQuaternionSkinner.cs
+++ b/Code/DQ skinning/DualQuaternionSkinner.cs
@@ -10,7 +10,8 @@
/// SkinnedMeshRenderer is required to extract some information about the mesh during Start() and is destroyed immediately after.
///
[RequireComponent(typeof(MeshFilter))]
-public class DualQuaternionSkinner : MonoBehaviour {
+public class DualQuaternionSkinner : MonoBehaviour
+{
struct DualQuaternion
{
@@ -31,11 +32,9 @@ struct BoneWeightInfo
public float weight3;
}
- DualQuaternion[] poseDualQuaternions;
-
- const int numthreads = 1024; // must be same in compute shader code
+ const int numthreads = 1024; // must be same in compute shader code
const int textureWidth = 1024; // no need to adjust compute shaders
-
+
public ComputeShader shaderComputeBoneDQ;
public ComputeShader shaderDQBlend;
public ComputeShader shaderApplyMorph;
@@ -46,7 +45,10 @@ struct BoneWeightInfo
///
public bool started { get; private set; } = false;
- ComputeBuffer bufPoseDq;
+ DualQuaternion[] poseDualQuaternions;
+ Matrix4x4[] poseMatrices;
+
+ ComputeBuffer bufPoseMatrices;
ComputeBuffer bufSkinnedDq;
ComputeBuffer bufOriginalVertices;
ComputeBuffer bufOriginalNormals;
@@ -232,7 +234,7 @@ void SetMesh(Mesh mesh)
{
// could use float3 instead of float4 but NVidia says structures not aligned to 128 bits are slow
// https://developer.nvidia.com/content/understanding-structured-buffer-performance
- this.arrBufMorphDeltaTangents[i] = new ComputeBuffer(this.mf.mesh.vertexCount, sizeof(float) * 4);
+ this.arrBufMorphDeltaTangents[i] = new ComputeBuffer(this.mf.mesh.vertexCount, sizeof(float) * 4);
for (int k = 0; k < this.mf.mesh.vertexCount; k++)
tempVec4[k] = deltaTangents[k];
this.arrBufMorphDeltaTangents[i].SetData(tempVec4);
@@ -254,6 +256,7 @@ void SetMesh(Mesh mesh)
this.shaderDQBlend.SetInt("textureWidth", textureWidth);
this.poseDualQuaternions = new DualQuaternion[this.mf.mesh.bindposes.Length];
+ this.poseMatrices = new Matrix4x4[this.mf.mesh.bindposes.Length];
// initiate textures and buffers
@@ -281,8 +284,8 @@ void SetMesh(Mesh mesh)
this.rtSkinnedData_3.Create();
this.shaderDQBlend.SetTexture(this.kernelHandleComputeBoneDQ, "skinned_data_3", this.rtSkinnedData_3);
- this.bufPoseDq = new ComputeBuffer(this.mf.mesh.bindposes.Length, sizeof(float) * 8);
- this.shaderComputeBoneDQ.SetBuffer(this.kernelHandleComputeBoneDQ, "pose_dual_quaternions", this.bufPoseDq);
+ this.bufPoseMatrices = new ComputeBuffer(this.mf.mesh.bindposes.Length, sizeof(float) * 16);
+ this.shaderComputeBoneDQ.SetBuffer(this.kernelHandleComputeBoneDQ, "pose_matrices", this.bufPoseMatrices);
this.bufSkinnedDq = new ComputeBuffer(this.mf.mesh.bindposes.Length, sizeof(float) * 8);
this.shaderComputeBoneDQ.SetBuffer(this.kernelHandleComputeBoneDQ, "skinned_dual_quaternions", this.bufSkinnedDq);
@@ -442,7 +445,7 @@ void ReleaseBuffers()
this.bufOriginalNormals?.Release();
this.bufOriginalVertices?.Release();
this.bufOriginalTangents?.Release();
- this.bufPoseDq?.Release();
+ this.bufPoseMatrices?.Release();
this.bufSkinnedDq?.Release();
this.bufMorphedNormals?.Release();
this.bufMorphedVertices?.Release();
@@ -464,13 +467,13 @@ void ReleaseBuffers()
void OnDestroy()
{
- this.ReleaseBuffers();
+ this.ReleaseBuffers();
}
// Use this for initialization
void Start()
{
- this.shaderComputeBoneDQ = (ComputeShader)Instantiate(this.shaderComputeBoneDQ); // bug workaround
+ this.shaderComputeBoneDQ = (ComputeShader)Instantiate(this.shaderComputeBoneDQ); // bug workaround
this.shaderDQBlend = (ComputeShader)Instantiate(this.shaderDQBlend); // bug workaround
this.shaderApplyMorph = (ComputeShader)Instantiate(this.shaderApplyMorph); // bug workaround
@@ -502,35 +505,16 @@ void Start()
}
// Update is called once per frame
- void Update () {
+ void Update()
+ {
this.ApplyAllMorphs();
- this.mf.mesh.MarkDynamic (); // once or every frame? idk.
- // at least it does not affect performance
-
- this.shaderComputeBoneDQ.SetVector(
- "parent_rotation_quaternion",
- Quaternion.Inverse(this.transform.parent.rotation).ToVector4()
- );
-
- this.shaderComputeBoneDQ.SetVector (
- "parent_translation_quaternion",
- new Vector4(
- - this.transform.parent.position.x,
- - this.transform.parent.position.y,
- - this.transform.parent.position.z,
- 1
- )
- );
+ this.mf.mesh.MarkDynamic(); // once or every frame? idk.
+ // at least it does not affect performance
- this.shaderComputeBoneDQ.SetVector(
- "parent_scale",
- new Vector4(
- this.transform.parent.lossyScale.x,
- this.transform.parent.lossyScale.y,
- this.transform.parent.lossyScale.z,
- 1
- )
+ this.shaderComputeBoneDQ.SetMatrix(
+ "self_matrix",
+ this.transform.worldToLocalMatrix
);
for (int i = 0; i < this.bones.Length; i++)
@@ -541,17 +525,24 @@ void Update () {
// could use float3 instead of float4 for position but NVidia says structures not aligned to 128 bits are slow
// https://developer.nvidia.com/content/understanding-structured-buffer-performance
- this.poseDualQuaternions[i].position = new Vector4(pos.x, pos.y, pos.z, 1);
- }
+ this.poseDualQuaternions[i].position = new Vector4(
+ pos.x,
+ pos.y,
+ pos.z,
+ 0
+ );
- this.bufPoseDq.SetData(this.poseDualQuaternions);
+ this.poseMatrices[i] = this.bones[i].localToWorldMatrix;
+ }
+
+ this.bufPoseMatrices.SetData(this.poseMatrices);
// Calculate blended quaternions
int numThreadGroups = this.bones.Length / numthreads;
if (this.bones.Length % numthreads != 0)
{
- numThreadGroups ++;
+ numThreadGroups++;
}
this.shaderComputeBoneDQ.Dispatch(this.kernelHandleDQBlend, numThreadGroups, 1, 1);
diff --git a/Code/DQ skinning/Shaders/Compute/ComputeBoneDQ.compute b/Code/DQ skinning/Shaders/Compute/ComputeBoneDQ.compute
index 796f97a..ec71a34 100644
--- a/Code/DQ skinning/Shaders/Compute/ComputeBoneDQ.compute
+++ b/Code/DQ skinning/Shaders/Compute/ComputeBoneDQ.compute
@@ -3,10 +3,6 @@
#include "DQ.cginc"
-float4 parent_rotation_quaternion;
-float4 parent_translation_quaternion;
-float4 parent_scale;
-
struct boneWeight
{
int boneIndex0;
@@ -20,34 +16,26 @@ struct boneWeight
float boneWeight3;
};
-RWStructuredBuffer pose_dual_quaternions;
+RWStructuredBuffer pose_matrices;
+float4x4 self_matrix;
+
RWStructuredBuffer bind_dual_quaternions;
RWStructuredBuffer skinned_dual_quaternions;
[numthreads(1024,1,1)]
void CSMain (uint3 id : SV_DispatchThreadID)
{
- struct dual_quaternion dq_parent;
- struct dual_quaternion dq_pose;
struct dual_quaternion dq_bind;
+ dq_bind.rotation_quaternion = bind_dual_quaternions.Load(id.x).rotation_quaternion;
+ dq_bind.translation_quaternion = bind_dual_quaternions.Load(id.x).translation_quaternion;
+ dq_bind.translation_quaternion = QuaternionMultiply(dq_bind.translation_quaternion, dq_bind.rotation_quaternion) * 0.5;
- dq_parent .rotation_quaternion = parent_rotation_quaternion;
- dq_pose .rotation_quaternion = pose_dual_quaternions.Load(id.x).rotation_quaternion;
- dq_bind .rotation_quaternion = bind_dual_quaternions.Load(id.x).rotation_quaternion;
-
- dq_parent .translation_quaternion = parent_translation_quaternion;
- dq_pose .translation_quaternion = pose_dual_quaternions.Load(id.x).translation_quaternion;
- dq_bind .translation_quaternion = bind_dual_quaternions.Load(id.x).translation_quaternion;
+ float4x4 pose_matrix = transpose(pose_matrices.Load(id.x));
+ pose_matrix = mul(self_matrix, pose_matrix);
- dq_parent .translation_quaternion = QuaternionMultiply(dq_parent .rotation_quaternion, dq_parent .translation_quaternion ) * 0.5;
- dq_pose .translation_quaternion = QuaternionMultiply(dq_pose .translation_quaternion, dq_pose .rotation_quaternion ) * 0.5;
- dq_bind .translation_quaternion = QuaternionMultiply(dq_bind .translation_quaternion, dq_bind .rotation_quaternion ) * 0.5;
-
- struct dual_quaternion dq_skinned = DualQuaternionMultiply(dq_parent, dq_pose);
-
- dq_skinned.translation_quaternion /= parent_scale;
+ struct dual_quaternion dq_pose = DualQuaternionFromMatrix4x4(pose_matrix);
- dq_skinned = DualQuaternionMultiply(dq_skinned, dq_bind);
+ struct dual_quaternion dq_skinned = DualQuaternionMultiply(dq_pose, dq_bind);
skinned_dual_quaternions[id.x].rotation_quaternion = dq_skinned.rotation_quaternion;
skinned_dual_quaternions[id.x].translation_quaternion = dq_skinned.translation_quaternion;
diff --git a/Code/DQ skinning/Shaders/Compute/DQ.cginc b/Code/DQ skinning/Shaders/Compute/DQ.cginc
index 9868013..7518592 100644
--- a/Code/DQ skinning/Shaders/Compute/DQ.cginc
+++ b/Code/DQ skinning/Shaders/Compute/DQ.cginc
@@ -40,4 +40,28 @@ struct dual_quaternion DualQuaternionShortestPath(struct dual_quaternion dq1, st
dq1.rotation_quaternion = isBadPath ? -dq1.rotation_quaternion : dq1.rotation_quaternion;
dq1.translation_quaternion = isBadPath ? -dq1.translation_quaternion : dq1.translation_quaternion;
return dq1;
+}
+
+float4 QuaternionApplyRotation(float4 v, float4 rotQ)
+{
+ v = QuaternionMultiply(rotQ, v);
+ return QuaternionMultiply(v, QuaternionInvert(rotQ));
+}
+
+struct dual_quaternion DualQuaternionFromMatrix4x4(float4x4 m)
+{
+ struct dual_quaternion dq;
+
+ dq.rotation_quaternion.w = sqrt(m[0][0] + m[1][1] + m[2][2] + 1.0)*0.5; // assume m[3][3] = 1.0
+
+ float w4 = dq.rotation_quaternion.w * 4.0;
+
+ dq.rotation_quaternion.x = (m[2][1] - m[1][2]) / w4;
+ dq.rotation_quaternion.y = (m[0][2] - m[2][0]) / w4;
+ dq.rotation_quaternion.z = (m[1][0] - m[0][1]) / w4;
+
+ dq.translation_quaternion = float4(m[0][3], m[1][3], m[2][3], 0);
+ dq.translation_quaternion = QuaternionMultiply(dq.translation_quaternion, dq.rotation_quaternion) * 0.5;
+
+ return dq;
}
\ No newline at end of file