Performance optimizations:

1. Triangle rasterization: replaced loops over all the grid cells of the AABB with looping over the area of the triangle itself (i.e. bounds are set per row).
2. Region expansion: instead of sweeping through all the cells at each expandRegion() and finding the right level - sweep once per few levels and divide among several stacks. Left over are appended.
This commit is contained in:
axelrodR 2014-01-08 12:02:00 +02:00
parent b3d27bdb46
commit 8e4633c64b
2 changed files with 128 additions and 32 deletions

View File

@ -95,7 +95,7 @@ static void addSpan(rcHeightfield& hf, const int x, const int y,
s->area = area;
s->next = 0;
// Empty cell, add he first span.
// Empty cell, add the first span.
if (!hf.spans[idx])
{
hf.spans[idx] = s;
@ -222,14 +222,10 @@ static void rasterizeTri(const float* v0, const float* v1, const float* v2,
if (!overlapBounds(bmin, bmax, tmin, tmax))
return;
// Calculate the footpring of the triangle on the grid.
int x0 = (int)((tmin[0] - bmin[0])*ics);
// Calculate the footprint of the triangle on the grid's y-axis
int y0 = (int)((tmin[2] - bmin[2])*ics);
int x1 = (int)((tmax[0] - bmin[0])*ics);
int y1 = (int)((tmax[2] - bmin[2])*ics);
x0 = rcClamp(x0, 0, w-1);
y0 = rcClamp(y0, 0, h-1);
x1 = rcClamp(x1, 0, w-1);
y1 = rcClamp(y1, 0, h-1);
// Clip the triangle into all grid cells it touches.
@ -248,6 +244,17 @@ static void rasterizeTri(const float* v0, const float* v1, const float* v2,
nvrow = clipPoly(out, nvrow, inrow, 0, -1, cz+cs);
if (nvrow < 3) continue;
float minX = inrow[0], maxX = inrow[0];
for (int i=1; i<nvrow; ++i)
{
if (minX > inrow[i*3]) minX = inrow[i*3];
if (maxX < inrow[i*3]) maxX = inrow[i*3];
}
int x0 = (int)((minX - bmin[0])*ics);
int x1 = (int)((maxX - bmin[0])*ics);
x0 = rcClamp(x0, 0, w-1);
x1 = rcClamp(x1, 0, w-1);
for (int x = x0; x <= x1; ++x)
{
// Clip polygon to column.

View File

@ -286,7 +286,10 @@ static bool floodRegion(int x, int y, int i,
if (nr & RC_BORDER_REG) // Do not take borders into account.
continue;
if (nr != 0 && nr != r)
{
ar = nr;
break;
}
const rcCompactSpan& as = chf.spans[ai];
@ -300,7 +303,10 @@ static bool floodRegion(int x, int y, int i,
continue;
unsigned short nr2 = srcReg[ai2];
if (nr2 != 0 && nr2 != r)
{
ar = nr2;
break;
}
}
}
}
@ -340,30 +346,44 @@ static unsigned short* expandRegions(int maxIter, unsigned short level,
rcCompactHeightfield& chf,
unsigned short* srcReg, unsigned short* srcDist,
unsigned short* dstReg, unsigned short* dstDist,
rcIntArray& stack)
rcIntArray& stack,
bool fillStack)
{
const int w = chf.width;
const int h = chf.height;
// Find cells revealed by the raised level.
stack.resize(0);
for (int y = 0; y < h; ++y)
if (fillStack)
{
for (int x = 0; x < w; ++x)
// Find cells revealed by the raised level.
stack.resize(0);
for (int y = 0; y < h; ++y)
{
const rcCompactCell& c = chf.cells[x+y*w];
for (int i = (int)c.index, ni = (int)(c.index+c.count); i < ni; ++i)
for (int x = 0; x < w; ++x)
{
if (chf.dist[i] >= level && srcReg[i] == 0 && chf.areas[i] != RC_NULL_AREA)
const rcCompactCell& c = chf.cells[x+y*w];
for (int i = (int)c.index, ni = (int)(c.index+c.count); i < ni; ++i)
{
stack.push(x);
stack.push(y);
stack.push(i);
if (chf.dist[i] >= level && srcReg[i] == 0 && chf.areas[i] != RC_NULL_AREA)
{
stack.push(x);
stack.push(y);
stack.push(i);
}
}
}
}
}
else // use cells in the input stack
{
// mark all cells which already have a region
for (int j=0; j<stack.size(); j+=3)
{
int i = stack[j+2];
if (srcReg[i] != 0)
stack[j+2] = -1;
}
}
int iter = 0;
while (stack.size() > 0)
{
@ -434,6 +454,61 @@ static unsigned short* expandRegions(int maxIter, unsigned short level,
}
static void sortCellsByLevel(unsigned short startLevel,
rcCompactHeightfield& chf,
unsigned short* srcReg,
unsigned int nbStacks, rcIntArray* stacks,
unsigned short loglevelsPerStack) // the levels per stack (2 in our case) as a bit shift
{
const int w = chf.width;
const int h = chf.height;
startLevel = startLevel >> loglevelsPerStack;
for (unsigned int j=0; j<nbStacks; ++j)
stacks[j].resize(0);
// put all cells in the level range into the appropriate stacks
for (int y = 0; y < h; ++y)
{
for (int x = 0; x < w; ++x)
{
const rcCompactCell& c = chf.cells[x+y*w];
for (int i = (int)c.index, ni = (int)(c.index+c.count); i < ni; ++i)
{
if (chf.areas[i] == RC_NULL_AREA || srcReg[i] != 0)
continue;
int level = chf.dist[i] >> loglevelsPerStack;
int sId = startLevel - level;
if (sId >= (int)nbStacks)
continue;
if (sId < 0)
sId = 0;
stacks[sId].push(x);
stacks[sId].push(y);
stacks[sId].push(i);
}
}
}
}
static void appendStacks(rcIntArray& srcStack, rcIntArray& dstStack,
unsigned short* srcReg)
{
for (int j=0; j<srcStack.size(); j+=3)
{
int i = srcStack[j+2];
if ((i < 0) || (srcReg[i] != 0))
continue;
dstStack.push(srcStack[j]);
dstStack.push(srcStack[j+1]);
dstStack.push(srcStack[j+2]);
}
}
struct rcRegion
{
inline rcRegion(unsigned short i) :
@ -1236,7 +1311,13 @@ bool rcBuildRegions(rcContext* ctx, rcCompactHeightfield& chf,
}
ctx->startTimer(RC_TIMER_BUILD_REGIONS_WATERSHED);
const int LOG_NB_STACKS = 3;
const int NB_STACKS = 1 << LOG_NB_STACKS;
rcIntArray lvlStacks[NB_STACKS];
for (int i=0; i<NB_STACKS; ++i)
lvlStacks[i].resize(1024);
rcIntArray stack(1024);
rcIntArray visited(1024);
@ -1271,14 +1352,25 @@ bool rcBuildRegions(rcContext* ctx, rcCompactHeightfield& chf,
chf.borderSize = borderSize;
}
int sId = -1;
while (level > 0)
{
level = level >= 2 ? level-2 : 0;
sId = (sId+1) & (NB_STACKS-1);
// ctx->startTimer(RC_TIMER_DIVIDE_TO_LEVELS);
if (sId == 0)
sortCellsByLevel(level, chf, srcReg, NB_STACKS, lvlStacks, 1);
else
appendStacks(lvlStacks[sId-1], lvlStacks[sId], srcReg); // copy left overs from last level
// ctx->stopTimer(RC_TIMER_DIVIDE_TO_LEVELS);
ctx->startTimer(RC_TIMER_BUILD_REGIONS_EXPAND);
// Expand current regions until no empty connected cells found.
if (expandRegions(expandIters, level, chf, srcReg, srcDist, dstReg, dstDist, stack) != srcReg)
if (expandRegions(expandIters, level, chf, srcReg, srcDist, dstReg, dstDist, lvlStacks[sId], false) != srcReg)
{
rcSwap(srcReg, dstReg);
rcSwap(srcDist, dstDist);
@ -1289,18 +1381,15 @@ bool rcBuildRegions(rcContext* ctx, rcCompactHeightfield& chf,
ctx->startTimer(RC_TIMER_BUILD_REGIONS_FLOOD);
// Mark new regions with IDs.
for (int y = 0; y < h; ++y)
for (int j=0; j<lvlStacks[sId].size(); j+=3)
{
for (int x = 0; x < w; ++x)
int x = lvlStacks[sId][j];
int y = lvlStacks[sId][j+1];
int i = lvlStacks[sId][j+2];
if (i >= 0 && srcReg[i] == 0)
{
const rcCompactCell& c = chf.cells[x+y*w];
for (int i = (int)c.index, ni = (int)(c.index+c.count); i < ni; ++i)
{
if (chf.dist[i] < level || srcReg[i] != 0 || chf.areas[i] == RC_NULL_AREA)
continue;
if (floodRegion(x, y, i, level, regionId, chf, srcReg, srcDist, stack))
regionId++;
}
if (floodRegion(x, y, i, level, regionId, chf, srcReg, srcDist, stack))
regionId++;
}
}
@ -1308,7 +1397,7 @@ bool rcBuildRegions(rcContext* ctx, rcCompactHeightfield& chf,
}
// Expand current regions until no empty connected cells found.
if (expandRegions(expandIters*8, 0, chf, srcReg, srcDist, dstReg, dstDist, stack) != srcReg)
if (expandRegions(expandIters*8, 0, chf, srcReg, srcDist, dstReg, dstDist, stack, true) != srcReg)
{
rcSwap(srcReg, dstReg);
rcSwap(srcDist, dstDist);