52 #include "GPUCacheOptimizer.hh"
65 GPUCacheOptimizer::GPUCacheOptimizer(
unsigned int NumTris,
unsigned int NumVerts,
unsigned int IndexSize,
const void* pIndices) :
68 m_IndexSize(IndexSize),
70 m_NumTransformations(0)
75 GPUCacheOptimizer::~GPUCacheOptimizer(
void)
82 unsigned int GPUCacheOptimizer::GetIndex(
unsigned int i)
const
84 assert(i < m_NumTris * 3);
86 return GetIndex(i, m_IndexSize, m_pIndices);
89 unsigned int GPUCacheOptimizer::GetIndex(
unsigned int i,
unsigned int IndexSize,
const void* pIB)
93 case 4:
return ((
const unsigned int*)pIB)[i];
break;
94 case 2:
return ((
const unsigned short*)pIB)[i];
break;
95 case 1:
return ((
const unsigned char*)pIB)[i];
break;
97 assert(i == 1 || i == 2 || i == 4);
102 void GPUCacheOptimizer::SetIndex(
unsigned int i,
unsigned int val,
unsigned int IndexSize,
void* pIB)
106 case 4: ((
unsigned int*)pIB)[i] = val;
break;
107 case 2: ((
unsigned short*)pIB)[i] = val;
break;
108 case 1: ((
unsigned char*)pIB)[i] = val;
break;
110 assert(i == 1 || i == 2 || i == 4);
118 assert(DstIndexSize == 1 ||DstIndexSize == 2 || DstIndexSize == 4);
122 char* pSrc = (
char*)m_pIndices;
127 pSrc =
new char[m_IndexSize * m_NumTris * 3];
128 memcpy(pSrc, m_pIndices, m_IndexSize * m_NumTris * 3);
133 for (
unsigned int i = 0; i < m_NumTris; ++i)
135 for (
int k = 0; k < 3; ++k)
137 unsigned int TriVertex = GetIndex(
m_pTriMap[i] * 3 + k, m_IndexSize, pSrc);
140 SetIndex(i * 3 + k, TriVertex, DstIndexSize, pDst);
144 if (bTmpCopy)
delete [] pSrc;
150 const unsigned int* pVertMap,
151 unsigned int IndexSize,
void* pInOutIndices,
152 unsigned int VertexStride,
void* pInOutVertices)
154 if (pVertMap && pInOutIndices && pInOutVertices && VertexStride)
157 char* pTmpBuf =
new char[VertexStride * NumVerts];
158 memcpy(pTmpBuf, pInOutVertices, VertexStride * NumVerts);
160 char* pVertexOut = (
char*)pInOutVertices;
164 for (
unsigned int i = 0; i < NumVerts; ++i)
168 if (pVertMap[i] < NumVerts)
169 memcpy(pVertexOut + pVertMap[i] * VertexStride,
170 pTmpBuf + i * VertexStride, VertexStride);
175 for (
unsigned int i = 0; i < NumTris * 3; ++i)
179 unsigned int v = GetIndex(i, IndexSize, pInOutIndices);
180 SetIndex(i, pVertMap[v], IndexSize, pInOutIndices);
190 unsigned int IndexSize,
const void* pIndices,
191 unsigned int* pVertMap)
195 unsigned int uCounter = 0;
197 memset(pVertMap, 0xFF, NumVerts *
sizeof(
unsigned int));
199 for (
unsigned int i = 0; i < NumTris * 3; ++i)
203 if (IndexSize == 2) vertex = ((
const unsigned short*)pIndices)[i];
204 else vertex = ((
const unsigned int*)pIndices)[i];
206 if (pVertMap[vertex] == 0xFFFFFFFF)
207 pVertMap[vertex] = uCounter++;
215 if (m_NumTransformations)
return m_NumTransformations;
217 unsigned int NumIndices = 3 * m_NumTris;
218 if (!NumIndices)
return 0;
220 unsigned int* Cache =
new unsigned int[VertexCacheSize];
221 unsigned int NumSlotsInUse = 0;
222 unsigned int LastSlot = 0;
223 m_NumTransformations = 0;
225 for (
unsigned int i = 0; i < m_NumTris; ++i)
230 for (
int k = 0; k < 3; ++k)
232 unsigned int Idx = GetIndex(t * 3 + k);
235 for (
unsigned int k = 0; k < NumSlotsInUse && !bInCache; ++k)
237 if (Cache[k] == Idx) bInCache = 1;
242 ++m_NumTransformations;
243 if (NumSlotsInUse < VertexCacheSize)
245 Cache[NumSlotsInUse++] = Idx;
250 if (LastSlot == VertexCacheSize) LastSlot = 0;
251 Cache[LastSlot++] = Idx;
260 return m_NumTransformations;
268 return float(NumT) / float(m_NumTris);
276 return float(NumT) / float(m_NumVerts);
287 float fNewScore = -1.0f;
290 if (iCachePos < 0) fNewScore = 0.0f;
296 const float LastTriScore = 0.75f;
297 fNewScore = LastTriScore;
301 const float CacheDecayPower = 1.5f;
305 const float Scaler = 1.0f / (MaxSizeVertexCache - 3);
306 fNewScore = 1.0f - ( iCachePos - 3 ) * Scaler;
307 fNewScore = powf( fNewScore, CacheDecayPower);
314 const float ValenceBoostScale = 2.0f;
315 const float ValenceBoostPower = 0.5f;
317 float ValenceBoost = powf(
float(
iNumTrisLeft), -
float(ValenceBoostPower));
318 fNewScore += ValenceBoostScale * ValenceBoost;
324 void GPUCacheOptimizer::Opt_Vertex::RemoveTriFromList(
unsigned int tri)
326 for (
int k = 0; k < iNumTrisLeft; ++k)
331 pTris[k] = pTris[iNumTrisLeft-1];
347 if (NumVerts < 3 || !NumTris)
return;
353 memset(pVerts, 0,
sizeof(
Opt_Vertex) * NumVerts);
356 for (
unsigned int i = 0; i < NumTris; ++i)
362 for (
unsigned int k = 0; k < 3; ++k)
365 const unsigned int idx = GetIndex(i * 3 + k);
366 pThisTri->
v[k] = idx;
374 const unsigned int vertexTriAdjBufSize = NumTris * 3;
375 unsigned int vertexTriAdjBufOffset = 0;
376 unsigned int* vertexTriAdjBuf =
new unsigned int[vertexTriAdjBufSize];
379 for (
unsigned int i = 0; i < NumTris; ++i)
382 for (
int k = 0; k < 3; ++k)
386 pV->pTris = vertexTriAdjBuf + vertexTriAdjBufOffset;
410 int iTimeStamp = CacheSize + 1;
413 unsigned int numTrisAdded = 0;
415 std::vector<unsigned int> N;
428 Opt_Tris* pT = pTris + pV->pTris[m];
433 m_pTriMap[numTrisAdded++] = pV->pTris[m];
435 for (
int k = 0; k < 3; ++k)
438 const unsigned int v = pT->
v[k];
440 DeadEndVertexStack.push(v);
449 if (iTimeStamp - adjV->iCachePos > (
int)CacheSize)
450 adjV->iCachePos = iTimeStamp++;
461 for (
unsigned int k = 0; k < N.size(); ++k)
472 if (iTimeStamp - pV->iCachePos + 2 * pV->
iNumTrisLeft <= (
int)CacheSize)
473 m = iTimeStamp - pV->iCachePos;
486 while (DeadEndVertexStack.
length() && (n == -1))
490 const unsigned int d = DeadEndVertexStack.pop();
492 if (pVerts[d].iNumTrisLeft > 0)
496 while (i+1 < NumVerts && (n == -1))
499 if (pVerts[i].iNumTrisLeft > 0)
512 delete [] vertexTriAdjBuf;
520 GPUCacheEfficiencyTester::GPUCacheEfficiencyTester(
unsigned int NumTris,
unsigned int NumVerts,
521 unsigned int IndexSize,
const void* pIndices)
524 for (
unsigned int i = 0; i < NumTris; ++i)
m_pTriMap[i] = i;
void WriteIndexBuffer(unsigned int DstIndexSize, void *pDst)
Applies the remapping on the initial pIndices constructor's param and stores the result in the given ...
unsigned int v[3]
vertices of this triangle
int iNumTrisLeft
tris left to add to final result
float ComputeATVR(unsigned int VertexCacheSize=16)
Measures the efficiency use of the vertex cache. ATVR: Average Transform to Vertex Ratio similar to A...
Namespace providing different geometric functions concerning angles.
GPUCacheOptimizerTipsify(unsigned int CacheSize, unsigned int NumTris, unsigned int NumVerts, unsigned int IndexSize, const void *pIndices)
The actual computation happens here in this constructor.
unsigned int ComputeNumberOfVertexTransformations(unsigned int VertexCacheSize=16)
Returns the total number of vertex transforms performed with a certain VertexCache.
static void RemapVertices(unsigned int NumTris, unsigned int NumVerts, const unsigned int *pVertMap, unsigned int IndexSize, void *pInOutIndices, unsigned int VertexStride, void *pInOutVertices)
Applies the remap table of OptimizeVertices to a vertex and index buffer.
Simple and fast fixed size stack used in tipsify implementation.
unsigned int length() const
current stack length
float ComputeACMR(unsigned int VertexCacheSize=16)
Measures the efficiency use of the vertex cache. ACMR: Average Cache Miss Ratio.
int iNumTrisTotal
tris using this vertex
void FindScore(unsigned int MaxSizeVertexCache)
forsyth's score function
static void OptimizeVertices(unsigned int NumTris, unsigned int NumVerts, unsigned int IndexSize, const void *pIndices, unsigned int *pVertMap)
Reorders vertex buffer to minimize memory address jumps.