From 9d7806cb43f475b079e59c02d891175ce85bb1a5 Mon Sep 17 00:00:00 2001
From: D-AIRY <admin@ds-servers.com>
Date: Sat, 28 Nov 2020 14:02:45 +0300
Subject: [PATCH] Reduced GPU-side vertex size

---
 source/anim/AnimatedModelProvider.cpp     | 16 +++++++++
 source/anim/AnimatedModelShared.cpp       | 40 +++++++++++++++++++----
 source/anim/AnimatedModelShared.h         |  2 +-
 source/anim/DynamicModelProvider.cpp      | 14 +++++++-
 source/anim/DynamicModelShared.cpp        | 28 +++++++++++++---
 source/anim/DynamicModelShared.h          |  2 +-
 source/xcommon/resource/IXResourceModel.h | 20 ++++++++++++
 7 files changed, 107 insertions(+), 15 deletions(-)

diff --git a/source/anim/AnimatedModelProvider.cpp b/source/anim/AnimatedModelProvider.cpp
index 014167dcd..a3728936d 100644
--- a/source/anim/AnimatedModelProvider.cpp
+++ b/source/anim/AnimatedModelProvider.cpp
@@ -30,6 +30,8 @@ void CAnimatedModelProvider::setDevice(IGXDevice *pDevice)
 {
 	m_pRenderContext = pDevice;
 
+#if 0
+	// 72 bytes
 	GXVertexElement layoutDynamicEx[] =
 	{
 		{0, 0, GXDECLTYPE_FLOAT3, GXDECLUSAGE_POSITION},
@@ -41,6 +43,20 @@ void CAnimatedModelProvider::setDevice(IGXDevice *pDevice)
 		{0, 60, GXDECLTYPE_FLOAT4, GXDECLUSAGE_BLENDWEIGHT},
 		GX_DECL_END()
 	};
+#endif
+
+	// 52 bytes
+	GXVertexElement layoutDynamicEx[] =
+	{
+		{0, 0, GXDECLTYPE_FLOAT3, GXDECLUSAGE_POSITION},
+		{0, 12, GXDECLTYPE_FLOAT2, GXDECLUSAGE_TEXCOORD},
+		{0, 20, GXDECLTYPE_SHORT4N, GXDECLUSAGE_NORMAL},
+		{0, 28, GXDECLTYPE_SHORT4N, GXDECLUSAGE_TANGENT},
+		{0, 36, GXDECLTYPE_SHORT4N, GXDECLUSAGE_BINORMAL},
+		{0, 44, GXDECLTYPE_UBYTE4, GXDECLUSAGE_BLENDINDICES},
+		{0, 48, GXDECLTYPE_UBYTE4N, GXDECLUSAGE_BLENDWEIGHT},
+		GX_DECL_END()
+	};
 
 	m_pVertexDeclaration = getDevice()->createVertexDeclaration(layoutDynamicEx);
 	
diff --git a/source/anim/AnimatedModelShared.cpp b/source/anim/AnimatedModelShared.cpp
index 2fdf6207a..44edd18cd 100644
--- a/source/anim/AnimatedModelShared.cpp
+++ b/source/anim/AnimatedModelShared.cpp
@@ -459,11 +459,11 @@ bool CAnimatedModelShared::init(UINT uResourceCount, IXResourceModelAnimated **p
 		}
 
 		UINT **ppIndices = new UINT*[m_uLodCount];
-		XResourceModelAnimatedVertex **ppVertices = new XResourceModelAnimatedVertex*[m_uLodCount];
+		XResourceModelAnimatedVertexGPU **ppVertices = new XResourceModelAnimatedVertexGPU*[m_uLodCount];
 		for(UINT i = 0; i < m_uLodCount; ++i)
 		{
 			ppIndices[i] = new UINT[aLodIndexCount[i]];
-			ppVertices[i] = new XResourceModelAnimatedVertex[aLodVertexCount[i]];
+			ppVertices[i] = new XResourceModelAnimatedVertexGPU[aLodVertexCount[i]];
 		}
 
 
@@ -494,9 +494,35 @@ bool CAnimatedModelShared::init(UINT uResourceCount, IXResourceModelAnimated **p
 						{
 							// append subset to lod (group by materialId)
 
-							//memcpy(&ppIndices[uLod][subset.uStartIndex], pSubset->pIndices, sizeof(UINT) * pSubset->iIndexCount);
-							memcpy(&ppVertices[uLod][subset.uStartVertex], pSubset->pVertices, sizeof(XResourceModelAnimatedVertex) * pSubset->iVertexCount);
+							//memcpy(&ppVertices[uLod][subset.uStartVertex], pSubset->pVertices, sizeof(XResourceModelAnimatedVertex) * pSubset->iVertexCount);
+							for(UINT k = 0; k < pSubset->iVertexCount; ++k)
+							{
+#define TO_SHORT(v) ((short)((v) * 32767.0f))
+								auto &dst = (ppVertices[uLod] + subset.uStartVertex)[k];
+								auto &src = pSubset->pVertices[k];
+								dst.vPos = src.vPos;
+								dst.vTex = src.vTex;
+								dst.vNorm[0] = TO_SHORT(src.vNorm.x);
+								dst.vNorm[1] = TO_SHORT(src.vNorm.y);
+								dst.vNorm[2] = TO_SHORT(src.vNorm.z);
+								dst.vTangent[0] = TO_SHORT(src.vTangent.x);
+								dst.vTangent[1] = TO_SHORT(src.vTangent.y);
+								dst.vTangent[2] = TO_SHORT(src.vTangent.z);
+								dst.vBinorm[0] = TO_SHORT(src.vBinorm.x);
+								dst.vBinorm[1] = TO_SHORT(src.vBinorm.y);
+								dst.vBinorm[2] = TO_SHORT(src.vBinorm.z);
+								for(UINT k = 0; k < 4; ++k)
+								{
+									dst.u8BoneIndices[k] = src.u8BoneIndices[k];
+								}
+								dst.vBoneWeights[0] = (byte)(src.vBoneWeights.x * 255.0f);
+								dst.vBoneWeights[1] = (byte)(src.vBoneWeights.y * 255.0f);
+								dst.vBoneWeights[2] = (byte)(src.vBoneWeights.z * 255.0f);
+								dst.vBoneWeights[3] = (byte)(src.vBoneWeights.w * 255.0f);
+#undef TO_SHORT
+							}
 
+							//memcpy(&ppIndices[uLod][subset.uStartIndex], pSubset->pIndices, sizeof(UINT) * pSubset->iIndexCount);
 							UINT uIndexDelta = aLodVertexCount[uLod];
 							for(UINT j = 0; j < pSubset->iIndexCount; ++j)
 							{
@@ -506,7 +532,7 @@ bool CAnimatedModelShared::init(UINT uResourceCount, IXResourceModelAnimated **p
 							//@TODO: optimize that!!!
 							for(UINT j = 0; j < pSubset->iVertexCount; ++j)
 							{
-								XResourceModelAnimatedVertex &vtx = ppVertices[uLod][subset.uStartVertex + j];
+								XResourceModelAnimatedVertexGPU &vtx = ppVertices[uLod][subset.uStartVertex + j];
 								for(UINT k = 0; k < 4; ++k)
 								{
 									vtx.u8BoneIndices[k] = getBoneId(aSubsets[i].pResource->getBoneName(vtx.u8BoneIndices[k]));
@@ -546,7 +572,7 @@ bool CAnimatedModelShared::init(UINT uResourceCount, IXResourceModelAnimated **p
 				if(m_pProvider->getCore()->isOnMainThread())
 				{
 					m_ppIndexBuffer[i] = m_pDevice->createIndexBuffer(sizeof(UINT) * aLodIndexCount[i], GXBUFFER_USAGE_STATIC, GXIT_UINT32, ppIndices[i]);
-					IGXVertexBuffer *pVertexBuffer = m_pDevice->createVertexBuffer(sizeof(XResourceModelAnimatedVertex) * aLodVertexCount[i], GXBUFFER_USAGE_STATIC, ppVertices[i]);
+					IGXVertexBuffer *pVertexBuffer = m_pDevice->createVertexBuffer(sizeof(XResourceModelAnimatedVertexGPU) * aLodVertexCount[i], GXBUFFER_USAGE_STATIC, ppVertices[i]);
 					m_ppRenderBuffer[i] = m_pDevice->createRenderBuffer(1, &pVertexBuffer, m_pProvider->getVertexDeclaration());
 					mem_release(pVertexBuffer);
 				}
@@ -680,7 +706,7 @@ void CAnimatedModelShared::initGPUresources()
 	for(UINT i = 0; i < m_uLodCount; ++i)
 	{
 		m_ppIndexBuffer[i] = m_pDevice->createIndexBuffer(sizeof(UINT) * m_puTempTotalIndices[i], GXBUFFER_USAGE_STATIC, GXIT_UINT32, m_ppTempIndices[i]);
-		IGXVertexBuffer *pVertexBuffer = m_pDevice->createVertexBuffer(sizeof(XResourceModelAnimatedVertex) * m_puTempTotalVertices[i], GXBUFFER_USAGE_STATIC, m_ppTempVertices[i]);
+		IGXVertexBuffer *pVertexBuffer = m_pDevice->createVertexBuffer(sizeof(XResourceModelAnimatedVertexGPU) * m_puTempTotalVertices[i], GXBUFFER_USAGE_STATIC, m_ppTempVertices[i]);
 		m_ppRenderBuffer[i] = m_pDevice->createRenderBuffer(1, &pVertexBuffer, m_pProvider->getVertexDeclaration());
 		mem_release(pVertexBuffer);
 
diff --git a/source/anim/AnimatedModelShared.h b/source/anim/AnimatedModelShared.h
index f8e84fee2..9c67a1404 100644
--- a/source/anim/AnimatedModelShared.h
+++ b/source/anim/AnimatedModelShared.h
@@ -72,7 +72,7 @@ protected:
 	IGXIndexBuffer **m_ppIndexBuffer = NULL;
 
 	UINT **m_ppTempIndices = NULL;
-	XResourceModelAnimatedVertex **m_ppTempVertices = NULL;
+	XResourceModelAnimatedVertexGPU **m_ppTempVertices = NULL;
 	UINT *m_puTempTotalIndices = 0;
 	UINT *m_puTempTotalVertices = 0;
 
diff --git a/source/anim/DynamicModelProvider.cpp b/source/anim/DynamicModelProvider.cpp
index 2d3fe536b..98c6715f2 100644
--- a/source/anim/DynamicModelProvider.cpp
+++ b/source/anim/DynamicModelProvider.cpp
@@ -209,7 +209,8 @@ void CDynamicModelProvider::setDevice(IGXDevice *pDevice)
 {
 	m_pRenderContext = pDevice;
 
-
+#if 0
+	// 56 bytes
 	GXVertexElement layoutStaticEx[] =
 	{
 		{0, 0, GXDECLTYPE_FLOAT3, GXDECLUSAGE_POSITION},
@@ -219,6 +220,17 @@ void CDynamicModelProvider::setDevice(IGXDevice *pDevice)
 		{0, 44, GXDECLTYPE_FLOAT3, GXDECLUSAGE_BINORMAL},
 		GX_DECL_END()
 	};
+#endif
+	// 40 bytes/ 28?
+	GXVertexElement layoutStaticEx[] =
+	{
+		{0, 0, GXDECLTYPE_FLOAT3, GXDECLUSAGE_POSITION},
+		{0, 12, GXDECLTYPE_FLOAT2, GXDECLUSAGE_TEXCOORD},
+		{0, 20, GXDECLTYPE_SHORT4N, GXDECLUSAGE_NORMAL},
+		{0, 28, GXDECLTYPE_SHORT4N, GXDECLUSAGE_TANGENT},
+		{0, 36, GXDECLTYPE_SHORT4N, GXDECLUSAGE_BINORMAL},
+		GX_DECL_END()
+	};
 
 	m_pVertexDeclaration = pDevice->createVertexDeclaration(layoutStaticEx);
 
diff --git a/source/anim/DynamicModelShared.cpp b/source/anim/DynamicModelShared.cpp
index 0216f5d03..f4444d798 100644
--- a/source/anim/DynamicModelShared.cpp
+++ b/source/anim/DynamicModelShared.cpp
@@ -138,7 +138,7 @@ bool CDynamicModelShared::init(IXResourceModelStatic *pResource)
 			if(!m_pProvider->getCore()->isOnMainThread())
 			{
 				m_ppTempIndices = new UINT*[uLodCount];
-				m_ppTempVertices = new XResourceModelStaticVertex*[uLodCount];
+				m_ppTempVertices = new XResourceModelStaticVertexGPU*[uLodCount];
 				m_puTempTotalIndices = new UINT[uLodCount];
 				m_puTempTotalVertices = new UINT[uLodCount];
 			}
@@ -157,7 +157,7 @@ bool CDynamicModelShared::init(IXResourceModelStatic *pResource)
 					uTotalVertices += pSubset->iVertexCount;
 				}
 
-				XResourceModelStaticVertex *pVertices = new XResourceModelStaticVertex[uTotalVertices];
+				XResourceModelStaticVertexGPU *pVertices = new XResourceModelStaticVertexGPU[uTotalVertices];
 				UINT *pIndices = new UINT[uTotalIndices];
 
 				subset_t subset;
@@ -174,7 +174,25 @@ bool CDynamicModelShared::init(IXResourceModelStatic *pResource)
 							subset.uIndexCount = pSubset->iIndexCount;
 							subset.uVertexCount = pSubset->iVertexCount;
 
-							memcpy(pVertices + subset.uStartVertex, pSubset->pVertices, sizeof(XResourceModelStaticVertex) * pSubset->iVertexCount);
+							//memcpy(pVertices + subset.uStartVertex, pSubset->pVertices, sizeof(XResourceModelStaticVertex) * pSubset->iVertexCount);
+							for(UINT k = 0; k < pSubset->iVertexCount; ++k)
+							{
+#define TO_SHORT(v) ((short)((v) * 32767.0f))
+								auto &dst = (pVertices + subset.uStartVertex)[k];
+								auto &src = pSubset->pVertices[k];
+								dst.vPos = src.vPos;
+								dst.vTex = src.vTex;
+								dst.vNorm[0] = TO_SHORT(src.vNorm.x);
+								dst.vNorm[1] = TO_SHORT(src.vNorm.y);
+								dst.vNorm[2] = TO_SHORT(src.vNorm.z);
+								dst.vTangent[0] = TO_SHORT(src.vTangent.x);
+								dst.vTangent[1] = TO_SHORT(src.vTangent.y);
+								dst.vTangent[2] = TO_SHORT(src.vTangent.z);
+								dst.vBinorm[0] = TO_SHORT(src.vBinorm.x);
+								dst.vBinorm[1] = TO_SHORT(src.vBinorm.y);
+								dst.vBinorm[2] = TO_SHORT(src.vBinorm.z);
+#undef TO_SHORT
+							}
 							memcpy(pIndices + subset.uStartIndex, pSubset->pIndices, sizeof(UINT) * pSubset->iIndexCount);
 
 							m_aLods[i][uMaterial] = subset;
@@ -204,7 +222,7 @@ bool CDynamicModelShared::init(IXResourceModelStatic *pResource)
 				if(m_pProvider->getCore()->isOnMainThread())
 				{
 					m_ppIndexBuffer[i] = m_pDevice->createIndexBuffer(sizeof(UINT) * uTotalIndices, GXBUFFER_USAGE_STATIC, GXIT_UINT32, pIndices);
-					IGXVertexBuffer *pVertexBuffer = m_pDevice->createVertexBuffer(sizeof(XResourceModelStaticVertex) * uTotalVertices, GXBUFFER_USAGE_STATIC, pVertices);
+					IGXVertexBuffer *pVertexBuffer = m_pDevice->createVertexBuffer(sizeof(XResourceModelStaticVertexGPU) * uTotalVertices, GXBUFFER_USAGE_STATIC, pVertices);
 					m_ppRenderBuffer[i] = m_pDevice->createRenderBuffer(1, &pVertexBuffer, m_pProvider->getVertexDeclaration());
 					mem_release(pVertexBuffer);
 
@@ -358,7 +376,7 @@ void CDynamicModelShared::initGPUresources()
 	for(UINT i = 0, l = m_aLods.size(); i < l; ++i)
 	{
 		m_ppIndexBuffer[i] = m_pDevice->createIndexBuffer(sizeof(UINT) * m_puTempTotalIndices[i], GXBUFFER_USAGE_STATIC, GXIT_UINT32, m_ppTempIndices[i]);
-		IGXVertexBuffer *pVertexBuffer = m_pDevice->createVertexBuffer(sizeof(XResourceModelStaticVertex) * m_puTempTotalVertices[i], GXBUFFER_USAGE_STATIC, m_ppTempVertices[i]);
+		IGXVertexBuffer *pVertexBuffer = m_pDevice->createVertexBuffer(sizeof(XResourceModelStaticVertexGPU) * m_puTempTotalVertices[i], GXBUFFER_USAGE_STATIC, m_ppTempVertices[i]);
 		m_ppRenderBuffer[i] = m_pDevice->createRenderBuffer(1, &pVertexBuffer, m_pProvider->getVertexDeclaration());
 		mem_release(pVertexBuffer);
 
diff --git a/source/anim/DynamicModelShared.h b/source/anim/DynamicModelShared.h
index 0fba2c613..92ee37785 100644
--- a/source/anim/DynamicModelShared.h
+++ b/source/anim/DynamicModelShared.h
@@ -63,7 +63,7 @@ protected:
 	IGXIndexBuffer **m_ppIndexBuffer = NULL;
 
 	UINT **m_ppTempIndices = NULL;
-	XResourceModelStaticVertex **m_ppTempVertices = NULL;
+	XResourceModelStaticVertexGPU **m_ppTempVertices = NULL;
 	UINT *m_puTempTotalIndices = 0;
 	UINT *m_puTempTotalVertices = 0;
 
diff --git a/source/xcommon/resource/IXResourceModel.h b/source/xcommon/resource/IXResourceModel.h
index 0ed5a3925..b8f83caa9 100644
--- a/source/xcommon/resource/IXResourceModel.h
+++ b/source/xcommon/resource/IXResourceModel.h
@@ -98,6 +98,15 @@ struct XResourceModelStaticVertex
 	float3_t vBinorm;   /*!< Бинормаль */
 };
 
+struct XResourceModelStaticVertexGPU
+{
+	float3_t vPos;			/*!< Позиция */
+	float2_t vTex;      /*!< Текстурные координаты */
+	short vNorm[4];     /*!< Нормаль */
+	short vTangent[4];  /*!< Тангент */
+	short vBinorm[4];   /*!< Бинормаль */
+};
+
 struct XResourceModelStaticSubset
 {
 	uint32_t iMaterialID = 0; //!< Идентификатор материала
@@ -118,6 +127,17 @@ struct XResourceModelAnimatedVertex
 	float4_t vBoneWeights; /*!< Веса костей */
 };
 
+struct XResourceModelAnimatedVertexGPU
+{
+	float3_t vPos;      /*!< Позиция */
+	float2_t vTex;      /*!< Текстурные координаты */
+	short vNorm[4];     /*!< Нормаль */
+	short vTangent[4];  /*!< Тангент */
+	short vBinorm[4];   /*!< Бинормаль */
+	byte u8BoneIndices[4];  /*!< Индексы костей */
+	byte vBoneWeights[4]; /*!< Веса костей */
+};
+
 struct XResourceModelAnimatedSubset
 {
 	uint32_t iMaterialID = 0; //!< Идентификатор материала
-- 
GitLab