forked from metin-server/m2dev-client-src
new mesh deformer using SSE2 instructions
This commit is contained in:
153
src/EterGrnLib/Deform.cpp
Normal file
153
src/EterGrnLib/Deform.cpp
Normal file
@@ -0,0 +1,153 @@
|
|||||||
|
#include "Deform.h"
|
||||||
|
#include <xmmintrin.h>
|
||||||
|
#include <emmintrin.h>
|
||||||
|
|
||||||
|
void DeformPWNT3432toGrannyPNGBT33332D(granny_int32x Count, void const* SourceInit, void* DestInit,
|
||||||
|
granny_matrix_4x4 const* Transforms,
|
||||||
|
granny_int32x CopySize, granny_int32x SourceStride, granny_int32x DestStride)
|
||||||
|
{
|
||||||
|
const float inv255 = 1.0f / 255.0f;
|
||||||
|
|
||||||
|
const granny_pwnt3432_vertex* src = (const granny_pwnt3432_vertex*)SourceInit;
|
||||||
|
granny_pnt332_vertex* dst = (granny_pnt332_vertex*)DestInit;
|
||||||
|
|
||||||
|
while (Count--) {
|
||||||
|
const __m128 srcPos = _mm_set_ps(1.0f, src->Position[2], src->Position[1], src->Position[0]);
|
||||||
|
const __m128 srcNrm = _mm_set_ps(0.0f, src->Normal[2], src->Normal[1], src->Normal[0]);
|
||||||
|
|
||||||
|
const __m128 px = _mm_shuffle_ps(srcPos, srcPos, _MM_SHUFFLE(0, 0, 0, 0));
|
||||||
|
const __m128 py = _mm_shuffle_ps(srcPos, srcPos, _MM_SHUFFLE(1, 1, 1, 1));
|
||||||
|
const __m128 pz = _mm_shuffle_ps(srcPos, srcPos, _MM_SHUFFLE(2, 2, 2, 2));
|
||||||
|
const __m128 pw = _mm_shuffle_ps(srcPos, srcPos, _MM_SHUFFLE(3, 3, 3, 3));
|
||||||
|
|
||||||
|
const __m128 nx = _mm_shuffle_ps(srcNrm, srcNrm, _MM_SHUFFLE(0, 0, 0, 0));
|
||||||
|
const __m128 ny = _mm_shuffle_ps(srcNrm, srcNrm, _MM_SHUFFLE(1, 1, 1, 1));
|
||||||
|
const __m128 nz = _mm_shuffle_ps(srcNrm, srcNrm, _MM_SHUFFLE(2, 2, 2, 2));
|
||||||
|
|
||||||
|
__m128 P = _mm_setzero_ps();
|
||||||
|
__m128 N = _mm_setzero_ps();
|
||||||
|
|
||||||
|
for (int i = 0; i < 4; ++i) {
|
||||||
|
const int bi = src->BoneIndices[i];
|
||||||
|
const float wS = (float)src->BoneWeights[i] * inv255;
|
||||||
|
if (wS <= 0.0f) continue;
|
||||||
|
|
||||||
|
const float* m = (const float*)(&Transforms[bi]);
|
||||||
|
|
||||||
|
const __m128 r0 = _mm_loadu_ps(m + 0);
|
||||||
|
const __m128 r1 = _mm_loadu_ps(m + 4);
|
||||||
|
const __m128 r2 = _mm_loadu_ps(m + 8);
|
||||||
|
const __m128 r3 = _mm_loadu_ps(m + 12);
|
||||||
|
|
||||||
|
__m128 p = _mm_add_ps(_mm_mul_ps(r0, px), _mm_mul_ps(r1, py));
|
||||||
|
p = _mm_add_ps(p, _mm_mul_ps(r2, pz));
|
||||||
|
p = _mm_add_ps(p, _mm_mul_ps(r3, pw));
|
||||||
|
|
||||||
|
const __m128 w = _mm_set1_ps(wS);
|
||||||
|
P = _mm_add_ps(P, _mm_mul_ps(p, w));
|
||||||
|
|
||||||
|
__m128 n = _mm_add_ps(_mm_mul_ps(r0, nx), _mm_mul_ps(r1, ny));
|
||||||
|
n = _mm_add_ps(n, _mm_mul_ps(r2, nz));
|
||||||
|
N = _mm_add_ps(N, _mm_mul_ps(n, w));
|
||||||
|
}
|
||||||
|
|
||||||
|
float pOut[4], nOut[4];
|
||||||
|
_mm_storeu_ps(pOut, P);
|
||||||
|
_mm_storeu_ps(nOut, N);
|
||||||
|
|
||||||
|
dst->Position[0] = pOut[0];
|
||||||
|
dst->Position[1] = pOut[1];
|
||||||
|
dst->Position[2] = pOut[2];
|
||||||
|
|
||||||
|
dst->Normal[0] = nOut[0];
|
||||||
|
dst->Normal[1] = nOut[1];
|
||||||
|
dst->Normal[2] = nOut[2];
|
||||||
|
|
||||||
|
dst->UV[0] = src->UV[0];
|
||||||
|
dst->UV[1] = src->UV[1];
|
||||||
|
|
||||||
|
src = (const granny_pwnt3432_vertex*)((const granny_uint8*)src + SourceStride);
|
||||||
|
dst = (granny_pnt332_vertex*)((granny_uint8*)dst + DestStride);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void DeformPWNT3432toGrannyPNGBT33332I(granny_int32x Count, void const* SourceInit, void* DestInit,
|
||||||
|
granny_int32x const* TransformTable, granny_matrix_4x4 const* Transforms,
|
||||||
|
granny_int32x CopySize, granny_int32x SourceStride, granny_int32x DestStride)
|
||||||
|
{
|
||||||
|
const float inv255 = 1.0f / 255.0f;
|
||||||
|
|
||||||
|
const granny_pwnt3432_vertex* src = (const granny_pwnt3432_vertex*)SourceInit;
|
||||||
|
granny_pnt332_vertex* dst = (granny_pnt332_vertex*)DestInit;
|
||||||
|
|
||||||
|
while (Count--) {
|
||||||
|
const __m128 srcPos = _mm_set_ps(1.0f, src->Position[2], src->Position[1], src->Position[0]);
|
||||||
|
const __m128 srcNrm = _mm_set_ps(0.0f, src->Normal[2], src->Normal[1], src->Normal[0]);
|
||||||
|
|
||||||
|
const __m128 px = _mm_shuffle_ps(srcPos, srcPos, _MM_SHUFFLE(0, 0, 0, 0));
|
||||||
|
const __m128 py = _mm_shuffle_ps(srcPos, srcPos, _MM_SHUFFLE(1, 1, 1, 1));
|
||||||
|
const __m128 pz = _mm_shuffle_ps(srcPos, srcPos, _MM_SHUFFLE(2, 2, 2, 2));
|
||||||
|
const __m128 pw = _mm_shuffle_ps(srcPos, srcPos, _MM_SHUFFLE(3, 3, 3, 3));
|
||||||
|
|
||||||
|
const __m128 nx = _mm_shuffle_ps(srcNrm, srcNrm, _MM_SHUFFLE(0, 0, 0, 0));
|
||||||
|
const __m128 ny = _mm_shuffle_ps(srcNrm, srcNrm, _MM_SHUFFLE(1, 1, 1, 1));
|
||||||
|
const __m128 nz = _mm_shuffle_ps(srcNrm, srcNrm, _MM_SHUFFLE(2, 2, 2, 2));
|
||||||
|
|
||||||
|
__m128 P = _mm_setzero_ps();
|
||||||
|
__m128 N = _mm_setzero_ps();
|
||||||
|
|
||||||
|
for (int i = 0; i < 4; ++i) {
|
||||||
|
const int bi = TransformTable[src->BoneIndices[i]];
|
||||||
|
const float wS = (float)src->BoneWeights[i] * inv255;
|
||||||
|
if (wS <= 0.0f) continue;
|
||||||
|
|
||||||
|
const float* m = (const float*)(&Transforms[bi]);
|
||||||
|
|
||||||
|
const __m128 r0 = _mm_loadu_ps(m + 0);
|
||||||
|
const __m128 r1 = _mm_loadu_ps(m + 4);
|
||||||
|
const __m128 r2 = _mm_loadu_ps(m + 8);
|
||||||
|
const __m128 r3 = _mm_loadu_ps(m + 12);
|
||||||
|
|
||||||
|
__m128 p = _mm_add_ps(_mm_mul_ps(r0, px), _mm_mul_ps(r1, py));
|
||||||
|
p = _mm_add_ps(p, _mm_mul_ps(r2, pz));
|
||||||
|
p = _mm_add_ps(p, _mm_mul_ps(r3, pw));
|
||||||
|
|
||||||
|
const __m128 w = _mm_set1_ps(wS);
|
||||||
|
P = _mm_add_ps(P, _mm_mul_ps(p, w));
|
||||||
|
|
||||||
|
__m128 n = _mm_add_ps(_mm_mul_ps(r0, nx), _mm_mul_ps(r1, ny));
|
||||||
|
n = _mm_add_ps(n, _mm_mul_ps(r2, nz));
|
||||||
|
N = _mm_add_ps(N, _mm_mul_ps(n, w));
|
||||||
|
}
|
||||||
|
|
||||||
|
float pOut[4], nOut[4];
|
||||||
|
_mm_storeu_ps(pOut, P);
|
||||||
|
_mm_storeu_ps(nOut, N);
|
||||||
|
|
||||||
|
dst->Position[0] = pOut[0];
|
||||||
|
dst->Position[1] = pOut[1];
|
||||||
|
dst->Position[2] = pOut[2];
|
||||||
|
|
||||||
|
dst->Normal[0] = nOut[0];
|
||||||
|
dst->Normal[1] = nOut[1];
|
||||||
|
dst->Normal[2] = nOut[2];
|
||||||
|
|
||||||
|
dst->UV[0] = src->UV[0];
|
||||||
|
dst->UV[1] = src->UV[1];
|
||||||
|
|
||||||
|
src = (const granny_pwnt3432_vertex*)((const granny_uint8*)src + SourceStride);
|
||||||
|
dst = (granny_pnt332_vertex*)((granny_uint8*)dst + DestStride);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void DeformPWNT3432toGrannyPNGBT33332(granny_int32x Count, void const* SourceInit, void* DestInit,
|
||||||
|
granny_int32x const* TransformTable, granny_matrix_4x4 const* Transforms,
|
||||||
|
granny_int32x CopySize, granny_int32x SourceStride, granny_int32x DestStride)
|
||||||
|
{
|
||||||
|
if (TransformTable) {
|
||||||
|
DeformPWNT3432toGrannyPNGBT33332I(Count, SourceInit, DestInit, TransformTable, Transforms, CopySize, SourceStride, DestStride);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
DeformPWNT3432toGrannyPNGBT33332D(Count, SourceInit, DestInit, Transforms, CopySize, SourceStride, DestStride);
|
||||||
|
}
|
||||||
|
}
|
||||||
6
src/EterGrnLib/Deform.h
Normal file
6
src/EterGrnLib/Deform.h
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <granny.h>
|
||||||
|
|
||||||
|
void DeformPWNT3432toGrannyPNGBT33332(granny_int32x Count, void const* SourceInit, void* DestInit,
|
||||||
|
granny_int32x const* TransformTable, granny_matrix_4x4 const* Transforms,
|
||||||
|
granny_int32x CopySize, granny_int32x SourceStride, granny_int32x DestStride);
|
||||||
@@ -2,6 +2,7 @@
|
|||||||
#include "Mesh.h"
|
#include "Mesh.h"
|
||||||
#include "Model.h"
|
#include "Model.h"
|
||||||
#include "Material.h"
|
#include "Material.h"
|
||||||
|
#include "Deform.h"
|
||||||
|
|
||||||
granny_data_type_definition GrannyPNT3322VertexType[5] =
|
granny_data_type_definition GrannyPNT3322VertexType[5] =
|
||||||
{
|
{
|
||||||
@@ -42,30 +43,45 @@ void CGrannyMesh::NEW_LoadVertices(void * dstBaseVertices)
|
|||||||
GrannyCopyMeshVertices(pgrnMesh, m_pgrnMeshType, dstVertices);
|
GrannyCopyMeshVertices(pgrnMesh, m_pgrnMeshType, dstVertices);
|
||||||
}
|
}
|
||||||
|
|
||||||
void CGrannyMesh::DeformPNTVertices(void * dstBaseVertices, D3DXMATRIX * boneMatrices, granny_mesh_binding* pgrnMeshBinding) const
|
void CGrannyMesh::DeformPNTVertices(void* dstBaseVertices, D3DXMATRIX* boneMatrices, granny_mesh_binding* pgrnMeshBinding) const
|
||||||
{
|
{
|
||||||
assert(dstBaseVertices != NULL);
|
assert(dstBaseVertices != NULL);
|
||||||
assert(boneMatrices != NULL);
|
assert(boneMatrices != NULL);
|
||||||
assert(m_pgrnMeshDeformer != NULL);
|
assert(m_pgrnMeshDeformer != NULL);
|
||||||
|
|
||||||
const granny_mesh * pgrnMesh = GetGrannyMeshPointer();
|
const granny_mesh* pgrnMesh = GetGrannyMeshPointer();
|
||||||
|
|
||||||
TPNTVertex * srcVertices = (TPNTVertex *) GrannyGetMeshVertices(pgrnMesh);
|
TPNTVertex* srcVertices = (TPNTVertex*)GrannyGetMeshVertices(pgrnMesh);
|
||||||
TPNTVertex * dstVertices = ((TPNTVertex *) dstBaseVertices) + m_vtxBasePos;
|
TPNTVertex* dstVertices = ((TPNTVertex*)dstBaseVertices) + m_vtxBasePos;
|
||||||
|
|
||||||
int vtxCount = GrannyGetMeshVertexCount(pgrnMesh);
|
int vtxCount = GrannyGetMeshVertexCount(pgrnMesh);
|
||||||
|
|
||||||
// WORK
|
// WORK
|
||||||
granny_int32x * boneIndices = (granny_int32x*)GrannyGetMeshBindingToBoneIndices(pgrnMeshBinding);
|
granny_int32x* boneIndices = (granny_int32x*)GrannyGetMeshBindingToBoneIndices(pgrnMeshBinding);
|
||||||
// END_OF_WORK
|
// END_OF_WORK
|
||||||
|
|
||||||
|
extern bool CPU_HAS_SSE2;
|
||||||
|
if (CPU_HAS_SSE2) {
|
||||||
|
DeformPWNT3432toGrannyPNGBT33332(
|
||||||
|
vtxCount,
|
||||||
|
srcVertices,
|
||||||
|
dstVertices,
|
||||||
|
boneIndices,
|
||||||
|
(granny_matrix_4x4 const*)boneMatrices,
|
||||||
|
sizeof(granny_pwnt3432_vertex),
|
||||||
|
sizeof(granny_pwnt3432_vertex),
|
||||||
|
sizeof(granny_pnt332_vertex)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
else {
|
||||||
GrannyDeformVertices(
|
GrannyDeformVertices(
|
||||||
m_pgrnMeshDeformer,
|
m_pgrnMeshDeformer,
|
||||||
boneIndices,
|
boneIndices,
|
||||||
(float *)boneMatrices,
|
(float*)boneMatrices,
|
||||||
vtxCount,
|
vtxCount,
|
||||||
srcVertices,
|
srcVertices,
|
||||||
dstVertices);
|
dstVertices);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool CGrannyMesh::CanDeformPNTVertices() const
|
bool CGrannyMesh::CanDeformPNTVertices() const
|
||||||
|
|||||||
@@ -3,6 +3,7 @@
|
|||||||
#include "../eterBase/Stl.h"
|
#include "../eterBase/Stl.h"
|
||||||
#include "../eterBase/Debug.h"
|
#include "../eterBase/Debug.h"
|
||||||
|
|
||||||
|
bool CPU_HAS_SSE2 = false;
|
||||||
bool GRAPHICS_CAPS_CAN_NOT_DRAW_LINE = false;
|
bool GRAPHICS_CAPS_CAN_NOT_DRAW_LINE = false;
|
||||||
bool GRAPHICS_CAPS_CAN_NOT_DRAW_SHADOW = false;
|
bool GRAPHICS_CAPS_CAN_NOT_DRAW_SHADOW = false;
|
||||||
bool GRAPHICS_CAPS_HALF_SIZE_IMAGE = false;
|
bool GRAPHICS_CAPS_HALF_SIZE_IMAGE = false;
|
||||||
@@ -561,6 +562,12 @@ RETRY:
|
|||||||
ms_isLowTextureMemory = true;
|
ms_isLowTextureMemory = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// CPU check
|
||||||
|
int cpuInfo[4] = { 0 };
|
||||||
|
__cpuid(cpuInfo, 1);
|
||||||
|
|
||||||
|
CPU_HAS_SSE2 = cpuInfo[3] & (1 << 26);
|
||||||
|
|
||||||
return (iRet);
|
return (iRet);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user