uint32 size indices and vertexBaseIndex to Forsyth (#1304)

- Forsyth::OptimizeFaces() now has 2 overloads, 1 for uint16_t and one
for uint32_t sized indices. Internally each overload now calls down to
the templated OptimizeFacesImpl() providing the respective index type.

- The new 'vertexBaseIndex' is the starting vertex index
subtracted from each index in indexList to allow safe operation on
multiple objects in a single index buffer.

- Also replaced the interal int typedefs with common types provided by
stdint.h
This commit is contained in:
Kyle Rocha 2017-12-30 20:46:21 -08:00 committed by Branimir Karadžić
parent de9d9671fa
commit 1f3846224b
3 changed files with 85 additions and 70 deletions

View File

@ -22,10 +22,6 @@
namespace Forsyth
{
typedef unsigned int uint;
typedef unsigned short uint16;
typedef unsigned char byte;
//-----------------------------------------------------------------------------
// OptimizeFaces
//-----------------------------------------------------------------------------
@ -36,13 +32,27 @@ namespace Forsyth
// the number of indices in the list
// vertexCount
// the largest index value in indexList
// vertexBaseIndex
// starting vertex index subtracted from each index in indexList to
// allow safe operation on multiple objects in a single index buffer
// newIndexList
// a pointer to a preallocated buffer the same size as indexList to
// hold the optimized index list
// lruCacheSize
// the size of the simulated post-transform cache (max:64)
//-----------------------------------------------------------------------------
void OptimizeFaces(const uint16* indexList, uint indexCount, uint vertexCount, uint16* newIndexList, uint16 lruCacheSize);
template<class IndexT>
void OptimizeFacesImpl(const IndexT* indexList, uint32_t indexCount, uint32_t vertexCount, IndexT vertexBaseIndex, IndexT* newIndexList, uint16_t lruCacheSize);
void OptimizeFaces(const uint16_t* indexList, uint32_t indexCount, uint32_t vertexCount, uint16_t vertexBaseIndex, uint16_t* newIndexList, uint16_t lruCacheSize)
{
OptimizeFacesImpl<uint16_t>( indexList, indexCount, vertexCount, vertexBaseIndex, newIndexList, lruCacheSize );
}
void OptimizeFaces(const uint32_t* indexList, uint32_t indexCount, uint32_t vertexCount, uint32_t vertexBaseIndex, uint32_t* newIndexList, uint16_t lruCacheSize)
{
OptimizeFacesImpl<uint32_t>( indexList, indexCount, vertexCount, vertexBaseIndex, newIndexList, lruCacheSize );
}
namespace
{
@ -82,7 +92,7 @@ namespace Forsyth
return score;
}
float ComputeVertexValenceScore(uint numActiveFaces)
float ComputeVertexValenceScore(uint32_t numActiveFaces)
{
const float FindVertexScore_ValenceBoostScale = 2.0f;
const float FindVertexScore_ValenceBoostPower = 0.5f;
@ -100,7 +110,7 @@ namespace Forsyth
const int kMaxVertexCacheSize = 64;
const uint kMaxPrecomputedVertexValenceScores = 64;
const uint32_t kMaxPrecomputedVertexValenceScores = 64;
float s_vertexCacheScores[kMaxVertexCacheSize+1][kMaxVertexCacheSize];
float s_vertexValenceScores[kMaxPrecomputedVertexValenceScores];
@ -114,7 +124,7 @@ namespace Forsyth
}
}
for (uint valence=0; valence<kMaxPrecomputedVertexValenceScores; ++valence)
for (uint32_t valence=0; valence<kMaxPrecomputedVertexValenceScores; ++valence)
{
s_vertexValenceScores[valence] = ComputeVertexValenceScore(valence);
}
@ -133,7 +143,7 @@ namespace Forsyth
// return s_vertexValenceScores[numActiveTris];
// }
float FindVertexScore(uint numActiveFaces, uint cachePosition, uint vertexCacheSize)
float FindVertexScore(uint32_t numActiveFaces, uint32_t cachePosition, uint32_t vertexCacheSize)
{
assert(s_vertexScoresComputed); (void)s_vertexScoresComputed;
@ -163,37 +173,38 @@ namespace Forsyth
struct OptimizeVertexData
{
float score;
uint activeFaceListStart;
uint activeFaceListSize;
uint16 cachePos0;
uint16 cachePos1;
float score;
uint32_t activeFaceListStart;
uint32_t activeFaceListSize;
uint16_t cachePos0;
uint16_t cachePos1;
OptimizeVertexData() : score(0.f), activeFaceListStart(0), activeFaceListSize(0), cachePos0(0), cachePos1(0) { }
};
}
void OptimizeFaces(const uint16* indexList, uint indexCount, uint vertexCount, uint16* newIndexList, uint16 lruCacheSize)
template<class IndexT>
void OptimizeFacesImpl(const IndexT* indexList, uint32_t indexCount, uint32_t vertexCount, IndexT vertexBaseIndex, IndexT* newIndexList, uint16_t lruCacheSize)
{
std::vector<OptimizeVertexData> vertexDataList;
std::vector< OptimizeVertexData > vertexDataList;
vertexDataList.resize(vertexCount);
// compute face count per vertex
for (uint i=0; i<indexCount; ++i)
for (uint32_t i=0; i<indexCount; ++i)
{
uint16 index = indexList[i];
assert(index < vertexCount);
IndexT index = indexList[i] - vertexBaseIndex;
assert((index >= 0) && (index < vertexCount));
OptimizeVertexData& vertexData = vertexDataList[index];
vertexData.activeFaceListSize++;
}
std::vector<uint> activeFaceList;
std::vector<uint32_t> activeFaceList;
const uint16 kEvictedCacheIndex = std::numeric_limits<uint16>::max();
const IndexT kEvictedCacheIndex = std::numeric_limits<uint16_t>::max();
{
// allocate face list per vertex
uint curActiveFaceListPos = 0;
for (uint i=0; i<vertexCount; ++i)
uint32_t curActiveFaceListPos = 0;
for (uint32_t i=0; i<vertexCount; ++i)
{
OptimizeVertexData& vertexData = vertexDataList[i];
vertexData.cachePos0 = kEvictedCacheIndex;
@ -207,45 +218,45 @@ namespace Forsyth
}
// fill out face list per vertex
for (uint i=0; i<indexCount; i+=3)
for (uint32_t i=0; i<indexCount; i+=3)
{
for (uint j=0; j<3; ++j)
for (uint32_t j=0; j<3; ++j)
{
uint16 index = indexList[i+j];
IndexT index = indexList[i+j] - vertexBaseIndex;
OptimizeVertexData& vertexData = vertexDataList[index];
activeFaceList[vertexData.activeFaceListStart + vertexData.activeFaceListSize] = i;
vertexData.activeFaceListSize++;
}
}
std::vector<byte> processedFaceList;
std::vector<uint8_t> processedFaceList;
processedFaceList.resize(indexCount);
uint16 vertexCacheBuffer[(kMaxVertexCacheSize+3)*2];
uint16* cache0 = vertexCacheBuffer;
uint16* cache1 = vertexCacheBuffer+(kMaxVertexCacheSize+3);
uint16 entriesInCache0 = 0;
IndexT vertexCacheBuffer[(kMaxVertexCacheSize+3)*2];
IndexT* cache0 = vertexCacheBuffer;
IndexT* cache1 = vertexCacheBuffer+(kMaxVertexCacheSize+3);
IndexT entriesInCache0 = 0;
uint bestFace = 0;
uint32_t bestFace = 0;
float bestScore = -1.f;
const float maxValenceScore = FindVertexScore(1, kEvictedCacheIndex, lruCacheSize) * 3.f;
for (uint i = 0; i < indexCount; i += 3)
for (uint32_t i = 0; i < indexCount; i += 3)
{
if (bestScore < 0.f)
{
// no verts in the cache are used by any unprocessed faces so
// search all unprocessed faces for a new starting point
for (uint j = 0; j < indexCount; j += 3)
for (uint32_t j = 0; j < indexCount; j += 3)
{
if (processedFaceList[j] == 0)
{
uint face = j;
uint32_t face = j;
float faceScore = 0.f;
for (uint k=0; k<3; ++k)
for (uint32_t k=0; k<3; ++k)
{
uint16 index = indexList[face+k];
IndexT index = indexList[face+k] - vertexBaseIndex;
OptimizeVertexData& vertexData = vertexDataList[index];
assert(vertexData.activeFaceListSize > 0);
assert(vertexData.cachePos0 >= lruCacheSize);
@ -269,13 +280,13 @@ namespace Forsyth
}
processedFaceList[bestFace] = 1;
uint16 entriesInCache1 = 0;
uint16_t entriesInCache1 = 0;
// add bestFace to LRU cache and to newIndexList
for (uint v = 0; v < 3; ++v)
for (uint32_t v = 0; v < 3; ++v)
{
uint16 index = indexList[bestFace+v];
newIndexList[i+v] = index;
IndexT index = indexList[bestFace+v] - vertexBaseIndex;
newIndexList[i+v] = index + vertexBaseIndex;
OptimizeVertexData& vertexData = vertexDataList[index];
@ -292,9 +303,9 @@ namespace Forsyth
}
assert(vertexData.activeFaceListSize > 0);
uint* begin = &activeFaceList[vertexData.activeFaceListStart];
uint* end = &(activeFaceList[vertexData.activeFaceListStart + vertexData.activeFaceListSize - 1]) + 1;
uint* it = std::find(begin, end, bestFace);
uint32_t* begin = &activeFaceList[vertexData.activeFaceListStart];
uint32_t* end = &(activeFaceList[vertexData.activeFaceListStart + vertexData.activeFaceListSize - 1]) + 1;
uint32_t* it = std::find(begin, end, bestFace);
assert(it != end);
std::swap(*it, *(end-1));
--vertexData.activeFaceListSize;
@ -303,9 +314,9 @@ namespace Forsyth
}
// move the rest of the old verts in the cache down and compute their new scores
for (uint c0 = 0; c0 < entriesInCache0; ++c0)
for (uint32_t c0 = 0; c0 < entriesInCache0; ++c0)
{
uint16 index = cache0[c0];
IndexT index = cache0[c0];
OptimizeVertexData& vertexData = vertexDataList[index];
if (vertexData.cachePos1 >= entriesInCache1)
@ -318,19 +329,19 @@ namespace Forsyth
// find the best scoring triangle in the current cache (including up to 3 that were just evicted)
bestScore = -1.f;
for (uint c1 = 0; c1 < entriesInCache1; ++c1)
for (uint32_t c1 = 0; c1 < entriesInCache1; ++c1)
{
uint16 index = cache1[c1];
IndexT index = cache1[c1];
OptimizeVertexData& vertexData = vertexDataList[index];
vertexData.cachePos0 = vertexData.cachePos1;
vertexData.cachePos1 = kEvictedCacheIndex;
for (uint j=0; j<vertexData.activeFaceListSize; ++j)
for (uint32_t j=0; j<vertexData.activeFaceListSize; ++j)
{
uint face = activeFaceList[vertexData.activeFaceListStart+j];
uint32_t face = activeFaceList[vertexData.activeFaceListStart+j];
float faceScore = 0.f;
for (uint v=0; v<3; v++)
for (uint32_t v=0; v<3; v++)
{
uint16 faceIndex = indexList[face+v];
IndexT faceIndex = indexList[face+v] - vertexBaseIndex;
OptimizeVertexData& faceVertexData = vertexDataList[faceIndex];
faceScore += faceVertexData.score;
}

View File

@ -21,23 +21,27 @@
namespace Forsyth
{
//-----------------------------------------------------------------------------
// OptimizeFaces
//-----------------------------------------------------------------------------
// Parameters:
// indexList
// input index list
// indexCount
// the number of indices in the list
// vertexCount
// the largest index value in indexList
// newIndexList
// a pointer to a preallocated buffer the same size as indexList to
// hold the optimized index list
// lruCacheSize
// the size of the simulated post-transform cache (max:64)
//-----------------------------------------------------------------------------
void OptimizeFaces(const uint16_t* indexList, uint32_t indexCount, uint32_t vertexCount, uint16_t* newIndexList, uint16_t lruCacheSize);
//-----------------------------------------------------------------------------
// OptimizeFaces
//-----------------------------------------------------------------------------
// Parameters:
// indexList
// input index list
// indexCount
// the number of indices in the list
// vertexCount
// the largest index value in indexList
// vertexBaseIndex
// starting vertex index subtracted from each index in indexList to
// allow safe operation on multiple objects in a single index buffer
// newIndexList
// a pointer to a preallocated buffer the same size as indexList to
// hold the optimized index list
// lruCacheSize
// the size of the simulated post-transform cache (max:64)
//-----------------------------------------------------------------------------
void OptimizeFaces(const uint16_t* indexList, uint32_t indexCount, uint32_t vertexCount, uint16_t vertexBaseIndex, uint16_t* newIndexList, uint16_t lruCacheSize);
void OptimizeFaces(const uint32_t* indexList, uint32_t indexCount, uint32_t vertexCount, uint32_t vertexBaseIndex, uint32_t* newIndexList, uint16_t lruCacheSize);
} // namespace Forsyth

View File

@ -126,7 +126,7 @@ long int fsize(FILE* _file)
void triangleReorder(uint16_t* _indices, uint32_t _numIndices, uint32_t _numVertices, uint16_t _cacheSize)
{
uint16_t* newIndexList = new uint16_t[_numIndices];
Forsyth::OptimizeFaces(_indices, _numIndices, _numVertices, newIndexList, _cacheSize);
Forsyth::OptimizeFaces(_indices, _numIndices, _numVertices, 0, newIndexList, _cacheSize);
bx::memCopy(_indices, newIndexList, _numIndices*2);
delete [] newIndexList;
}