From 8e4633c64b8150b5dcb00289add05d3c8a48ccd2 Mon Sep 17 00:00:00 2001 From: axelrodR Date: Wed, 8 Jan 2014 12:02:00 +0200 Subject: [PATCH 1/3] Performance optimizations: 1. Triangle rasterization: replaced loops over all the grid cells of the AABB with looping over the area of the triangle itself (i.e. bounds are set per row). 2. Region expansion: instead of sweeping through all the cells at each expandRegion() and finding the right level - sweep once per few levels and divide among several stacks. Left over are appended. --- Recast/Source/RecastRasterization.cpp | 19 ++-- Recast/Source/RecastRegion.cpp | 141 +++++++++++++++++++++----- 2 files changed, 128 insertions(+), 32 deletions(-) diff --git a/Recast/Source/RecastRasterization.cpp b/Recast/Source/RecastRasterization.cpp index d2bb7c9..1e7750d 100644 --- a/Recast/Source/RecastRasterization.cpp +++ b/Recast/Source/RecastRasterization.cpp @@ -95,7 +95,7 @@ static void addSpan(rcHeightfield& hf, const int x, const int y, s->area = area; s->next = 0; - // Empty cell, add he first span. + // Empty cell, add the first span. if (!hf.spans[idx]) { hf.spans[idx] = s; @@ -222,14 +222,10 @@ static void rasterizeTri(const float* v0, const float* v1, const float* v2, if (!overlapBounds(bmin, bmax, tmin, tmax)) return; - // Calculate the footpring of the triangle on the grid. - int x0 = (int)((tmin[0] - bmin[0])*ics); + // Calculate the footprint of the triangle on the grid's y-axis int y0 = (int)((tmin[2] - bmin[2])*ics); - int x1 = (int)((tmax[0] - bmin[0])*ics); int y1 = (int)((tmax[2] - bmin[2])*ics); - x0 = rcClamp(x0, 0, w-1); y0 = rcClamp(y0, 0, h-1); - x1 = rcClamp(x1, 0, w-1); y1 = rcClamp(y1, 0, h-1); // Clip the triangle into all grid cells it touches. @@ -248,6 +244,17 @@ static void rasterizeTri(const float* v0, const float* v1, const float* v2, nvrow = clipPoly(out, nvrow, inrow, 0, -1, cz+cs); if (nvrow < 3) continue; + float minX = inrow[0], maxX = inrow[0]; + for (int i=1; i inrow[i*3]) minX = inrow[i*3]; + if (maxX < inrow[i*3]) maxX = inrow[i*3]; + } + int x0 = (int)((minX - bmin[0])*ics); + int x1 = (int)((maxX - bmin[0])*ics); + x0 = rcClamp(x0, 0, w-1); + x1 = rcClamp(x1, 0, w-1); + for (int x = x0; x <= x1; ++x) { // Clip polygon to column. diff --git a/Recast/Source/RecastRegion.cpp b/Recast/Source/RecastRegion.cpp index 76e631c..589fac2 100644 --- a/Recast/Source/RecastRegion.cpp +++ b/Recast/Source/RecastRegion.cpp @@ -286,7 +286,10 @@ static bool floodRegion(int x, int y, int i, if (nr & RC_BORDER_REG) // Do not take borders into account. continue; if (nr != 0 && nr != r) + { ar = nr; + break; + } const rcCompactSpan& as = chf.spans[ai]; @@ -300,7 +303,10 @@ static bool floodRegion(int x, int y, int i, continue; unsigned short nr2 = srcReg[ai2]; if (nr2 != 0 && nr2 != r) + { ar = nr2; + break; + } } } } @@ -340,30 +346,44 @@ static unsigned short* expandRegions(int maxIter, unsigned short level, rcCompactHeightfield& chf, unsigned short* srcReg, unsigned short* srcDist, unsigned short* dstReg, unsigned short* dstDist, - rcIntArray& stack) + rcIntArray& stack, + bool fillStack) { const int w = chf.width; const int h = chf.height; - // Find cells revealed by the raised level. - stack.resize(0); - for (int y = 0; y < h; ++y) + if (fillStack) { - for (int x = 0; x < w; ++x) + // Find cells revealed by the raised level. + stack.resize(0); + for (int y = 0; y < h; ++y) { - const rcCompactCell& c = chf.cells[x+y*w]; - for (int i = (int)c.index, ni = (int)(c.index+c.count); i < ni; ++i) + for (int x = 0; x < w; ++x) { - if (chf.dist[i] >= level && srcReg[i] == 0 && chf.areas[i] != RC_NULL_AREA) + const rcCompactCell& c = chf.cells[x+y*w]; + for (int i = (int)c.index, ni = (int)(c.index+c.count); i < ni; ++i) { - stack.push(x); - stack.push(y); - stack.push(i); + if (chf.dist[i] >= level && srcReg[i] == 0 && chf.areas[i] != RC_NULL_AREA) + { + stack.push(x); + stack.push(y); + stack.push(i); + } } } } } - + else // use cells in the input stack + { + // mark all cells which already have a region + for (int j=0; j 0) { @@ -434,6 +454,61 @@ static unsigned short* expandRegions(int maxIter, unsigned short level, } + +static void sortCellsByLevel(unsigned short startLevel, + rcCompactHeightfield& chf, + unsigned short* srcReg, + unsigned int nbStacks, rcIntArray* stacks, + unsigned short loglevelsPerStack) // the levels per stack (2 in our case) as a bit shift +{ + const int w = chf.width; + const int h = chf.height; + startLevel = startLevel >> loglevelsPerStack; + + for (unsigned int j=0; j> loglevelsPerStack; + int sId = startLevel - level; + if (sId >= (int)nbStacks) + continue; + if (sId < 0) + sId = 0; + + stacks[sId].push(x); + stacks[sId].push(y); + stacks[sId].push(i); + } + } + } +} + + +static void appendStacks(rcIntArray& srcStack, rcIntArray& dstStack, + unsigned short* srcReg) +{ + for (int j=0; jstartTimer(RC_TIMER_BUILD_REGIONS_WATERSHED); - + + const int LOG_NB_STACKS = 3; + const int NB_STACKS = 1 << LOG_NB_STACKS; + rcIntArray lvlStacks[NB_STACKS]; + for (int i=0; i 0) { level = level >= 2 ? level-2 : 0; - + sId = (sId+1) & (NB_STACKS-1); + +// ctx->startTimer(RC_TIMER_DIVIDE_TO_LEVELS); + + if (sId == 0) + sortCellsByLevel(level, chf, srcReg, NB_STACKS, lvlStacks, 1); + else + appendStacks(lvlStacks[sId-1], lvlStacks[sId], srcReg); // copy left overs from last level + +// ctx->stopTimer(RC_TIMER_DIVIDE_TO_LEVELS); + ctx->startTimer(RC_TIMER_BUILD_REGIONS_EXPAND); // Expand current regions until no empty connected cells found. - if (expandRegions(expandIters, level, chf, srcReg, srcDist, dstReg, dstDist, stack) != srcReg) + if (expandRegions(expandIters, level, chf, srcReg, srcDist, dstReg, dstDist, lvlStacks[sId], false) != srcReg) { rcSwap(srcReg, dstReg); rcSwap(srcDist, dstDist); @@ -1289,18 +1381,15 @@ bool rcBuildRegions(rcContext* ctx, rcCompactHeightfield& chf, ctx->startTimer(RC_TIMER_BUILD_REGIONS_FLOOD); // Mark new regions with IDs. - for (int y = 0; y < h; ++y) + for (int j=0; j= 0 && srcReg[i] == 0) { - const rcCompactCell& c = chf.cells[x+y*w]; - for (int i = (int)c.index, ni = (int)(c.index+c.count); i < ni; ++i) - { - if (chf.dist[i] < level || srcReg[i] != 0 || chf.areas[i] == RC_NULL_AREA) - continue; - if (floodRegion(x, y, i, level, regionId, chf, srcReg, srcDist, stack)) - regionId++; - } + if (floodRegion(x, y, i, level, regionId, chf, srcReg, srcDist, stack)) + regionId++; } } @@ -1308,7 +1397,7 @@ bool rcBuildRegions(rcContext* ctx, rcCompactHeightfield& chf, } // Expand current regions until no empty connected cells found. - if (expandRegions(expandIters*8, 0, chf, srcReg, srcDist, dstReg, dstDist, stack) != srcReg) + if (expandRegions(expandIters*8, 0, chf, srcReg, srcDist, dstReg, dstDist, stack, true) != srcReg) { rcSwap(srcReg, dstReg); rcSwap(srcDist, dstDist); From e063ba6f5adfff896906df3caf98b486492d952b Mon Sep 17 00:00:00 2001 From: axelrodR Date: Wed, 15 Jan 2014 14:14:08 +0200 Subject: [PATCH 2/3] Rasterization optimization: replaced clipping for each individual cell. the clipping line/plane between two adjacent cells is common so data from former cell can be reused if we keep track of the "remaining polygon". --- Recast/Source/RecastRasterization.cpp | 79 +++++++++++++++++---------- 1 file changed, 49 insertions(+), 30 deletions(-) diff --git a/Recast/Source/RecastRasterization.cpp b/Recast/Source/RecastRasterization.cpp index 1e7750d..f082438 100644 --- a/Recast/Source/RecastRasterization.cpp +++ b/Recast/Source/RecastRasterization.cpp @@ -169,36 +169,53 @@ void rcAddSpan(rcContext* /*ctx*/, rcHeightfield& hf, const int x, const int y, addSpan(hf, x,y, smin, smax, area, flagMergeThr); } -static int clipPoly(const float* in, int n, float* out, float pnx, float pnz, float pd) +// divides a convex polygons into two convex polygons on both sides of a line +static void dividePoly(const float* in, int nbIn, + float* out1, int* nb1, + float* out2, int* nb2, + float x, int axis) { float d[12]; - for (int i = 0; i < n; ++i) - d[i] = pnx*in[i*3+0] + pnz*in[i*3+2] + pd; - - int m = 0; - for (int i = 0, j = n-1; i < n; j=i, ++i) + for (int i = 0; i < nbIn; ++i) + d[i] = x - in[i*3+axis]; + + int m = 0, n = 0; + for (int i = 0, j = nbIn-1; i < nbIn; j=i, ++i) { bool ina = d[j] >= 0; bool inb = d[i] >= 0; if (ina != inb) { float s = d[j] / (d[j] - d[i]); - out[m*3+0] = in[j*3+0] + (in[i*3+0] - in[j*3+0])*s; - out[m*3+1] = in[j*3+1] + (in[i*3+1] - in[j*3+1])*s; - out[m*3+2] = in[j*3+2] + (in[i*3+2] - in[j*3+2])*s; + out1[m*3+0] = in[j*3+0] + (in[i*3+0] - in[j*3+0])*s; + out1[m*3+1] = in[j*3+1] + (in[i*3+1] - in[j*3+1])*s; + out1[m*3+2] = in[j*3+2] + (in[i*3+2] - in[j*3+2])*s; + rcVcopy(out2 + n*3, out1 + m*3); m++; + n++; } if (inb) { - out[m*3+0] = in[i*3+0]; - out[m*3+1] = in[i*3+1]; - out[m*3+2] = in[i*3+2]; + out1[m*3+0] = in[i*3+0]; + out1[m*3+1] = in[i*3+1]; + out1[m*3+2] = in[i*3+2]; m++; + if (d[0] != 0) // not on the line + continue; } + + // i-th point is on the other half plane or on the line + out2[n*3+0] = in[i*3+0]; + out2[n*3+1] = in[i*3+1]; + out2[n*3+2] = in[i*3+2]; + n++; } - return m; + + *nb1 = m; + *nb2 = n; } + static void rasterizeTri(const float* v0, const float* v1, const float* v2, const unsigned char area, rcHeightfield& hf, const float* bmin, const float* bmax, @@ -229,21 +246,23 @@ static void rasterizeTri(const float* v0, const float* v1, const float* v2, y1 = rcClamp(y1, 0, h-1); // Clip the triangle into all grid cells it touches. - float in[7*3], out[7*3], inrow[7*3]; + float buf[7*3*4]; + float *in = buf, *inrow = buf+7*3, *p1 = inrow+7*3, *p2 = p1+7*3; + + rcVcopy(&in[0], v0); + rcVcopy(&in[1*3], v1); + rcVcopy(&in[2*3], v2); + int nvrow, nvIn = 3; for (int y = y0; y <= y1; ++y) { - // Clip polygon to row. - rcVcopy(&in[0], v0); - rcVcopy(&in[1*3], v1); - rcVcopy(&in[2*3], v2); - int nvrow = 3; + // Clip polygon to row. Store the remaining polygon as well const float cz = bmin[2] + y*cs; - nvrow = clipPoly(in, nvrow, out, 0, 1, -cz); - if (nvrow < 3) continue; - nvrow = clipPoly(out, nvrow, inrow, 0, -1, cz+cs); + dividePoly(in, nvIn, inrow, &nvrow, p1, &nvIn, cz+cs, 2); + rcSwap(in, p1); if (nvrow < 3) continue; + // find the horizontal bounds in the row float minX = inrow[0], maxX = inrow[0]; for (int i=1; i Date: Sun, 19 Jan 2014 14:02:41 +0200 Subject: [PATCH 3/3] Optimization of of the mesh detail construction: replaced the first of the 2 flood-fill algorithms (the one used to find the span corresponding to the center of the polygon) with a search of span at the center with the region matching the polygon. --- Recast/Source/RecastMeshDetail.cpp | 132 ++++++++--------------------- 1 file changed, 35 insertions(+), 97 deletions(-) diff --git a/Recast/Source/RecastMeshDetail.cpp b/Recast/Source/RecastMeshDetail.cpp index 77438fd..3b94663 100644 --- a/Recast/Source/RecastMeshDetail.cpp +++ b/Recast/Source/RecastMeshDetail.cpp @@ -744,60 +744,20 @@ static bool buildPolyDetail(rcContext* ctx, const float* in, const int nin, static void getHeightData(const rcCompactHeightfield& chf, const unsigned short* poly, const int npoly, const unsigned short* verts, const int bs, - rcHeightPatch& hp, rcIntArray& stack) + rcHeightPatch& hp, rcIntArray& stack, + int region) { - // Floodfill the heightfield to get 2D height data, - // starting at vertex locations as seeds. - // Note: Reads to the compact heightfield are offset by border size (bs) // since border size offset is already removed from the polymesh vertices. - - memset(hp.data, 0, sizeof(unsigned short)*hp.width*hp.height); - + stack.resize(0); - + static const int offset[9*2] = { - 0,0, -1,-1, 0,-1, 1,-1, 1,0, 1,1, 0,1, -1,1, -1,0, + 0,0, -1,0, 0,1, 1,0, 0,-1, -1,-1, -1,1, 1,1, 1,-1 }; - - // Use poly vertices as seed points for the flood fill. - for (int j = 0; j < npoly; ++j) - { - int cx = 0, cz = 0, ci =-1; - int dmin = RC_UNSET_HEIGHT; - for (int k = 0; k < 9; ++k) - { - const int ax = (int)verts[poly[j]*3+0] + offset[k*2+0]; - const int ay = (int)verts[poly[j]*3+1]; - const int az = (int)verts[poly[j]*3+2] + offset[k*2+1]; - if (ax < hp.xmin || ax >= hp.xmin+hp.width || - az < hp.ymin || az >= hp.ymin+hp.height) - continue; - - const rcCompactCell& c = chf.cells[(ax+bs)+(az+bs)*chf.width]; - for (int i = (int)c.index, ni = (int)(c.index+c.count); i < ni; ++i) - { - const rcCompactSpan& s = chf.spans[i]; - int d = rcAbs(ay - (int)s.y); - if (d < dmin) - { - cx = ax; - cz = az; - ci = i; - dmin = d; - } - } - } - if (ci != -1) - { - stack.push(cx); - stack.push(cz); - stack.push(ci); - } - } - - // Find center of the polygon using flood fill. + + // find the center of the polygon int pcx = 0, pcz = 0; for (int j = 0; j < npoly; ++j) { @@ -806,58 +766,37 @@ static void getHeightData(const rcCompactHeightfield& chf, } pcx /= npoly; pcz /= npoly; - - for (int i = 0; i < stack.size(); i += 3) - { - int cx = stack[i+0]; - int cy = stack[i+1]; - int idx = cx-hp.xmin+(cy-hp.ymin)*hp.width; - hp.data[idx] = 1; - } - - while (stack.size() > 0) - { - int ci = stack.pop(); - int cy = stack.pop(); - int cx = stack.pop(); - - // Check if close to center of the polygon. - if (rcAbs(cx-pcx) <= 1 && rcAbs(cy-pcz) <= 1) - { - stack.resize(0); - stack.push(cx); - stack.push(cy); - stack.push(ci); - break; - } - - const rcCompactSpan& cs = chf.spans[ci]; - - for (int dir = 0; dir < 4; ++dir) - { - if (rcGetCon(cs, dir) == RC_NOT_CONNECTED) continue; - - const int ax = cx + rcGetDirOffsetX(dir); - const int ay = cy + rcGetDirOffsetY(dir); - - if (ax < hp.xmin || ax >= (hp.xmin+hp.width) || - ay < hp.ymin || ay >= (hp.ymin+hp.height)) - continue; - - if (hp.data[ax-hp.xmin+(ay-hp.ymin)*hp.width] != 0) - continue; - - const int ai = (int)chf.cells[(ax+bs)+(ay+bs)*chf.width].index + rcGetCon(cs, dir); - int idx = ax-hp.xmin+(ay-hp.ymin)*hp.width; - hp.data[idx] = 1; - - stack.push(ax); - stack.push(ay); - stack.push(ai); + // find a span with the right region around this point + // No need to check for connectivity because the region ensures it + for (int dir = 0; dir < 9; ++dir) + { + int ax = pcx + offset[dir*2+0]; + int az = pcz + offset[dir*2+1]; + + if (ax < hp.xmin || ax >= hp.xmin+hp.width || + az < hp.ymin || az >= hp.ymin+hp.height) + continue; + + const rcCompactCell& c = chf.cells[(ax+bs)+(az+bs)*chf.width]; + for (int i = (int)c.index, ni = (int)(c.index+c.count); i < ni; ++i) + { + const rcCompactSpan& s = chf.spans[i]; + if (s.reg == region) + { + stack.push(ax); + stack.push(az); + stack.push(i); + break; + } } + if (stack.size() > 0) + break; } + // Floodfill the heightfield to get 2D height data, + // starting at center location found above as seed. + memset(hp.data, 0xff, sizeof(unsigned short)*hp.width*hp.height); // Mark start locations. @@ -914,7 +853,6 @@ static void getHeightData(const rcCompactHeightfield& chf, stack.push(ai); } } - } static unsigned char getEdgeFlags(const float* va, const float* vb, @@ -1072,7 +1010,7 @@ bool rcBuildPolyMeshDetail(rcContext* ctx, const rcPolyMesh& mesh, const rcCompa hp.ymin = bounds[i*4+2]; hp.width = bounds[i*4+1]-bounds[i*4+0]; hp.height = bounds[i*4+3]-bounds[i*4+2]; - getHeightData(chf, p, npoly, mesh.verts, borderSize, hp, stack); + getHeightData(chf, p, npoly, mesh.verts, borderSize, hp, stack, mesh.regs[i]); // Build detail mesh. int nverts = 0;