yade-dev team mailing list archive
-
yade-dev team
-
Mailing list archive
-
Message #11261
[Branch ~yade-pkg/yade/git-trunk] Rev 4164: improve falling back to 1-thread in parrallel collider (fix https://bugs.launchpad.net/yade/+bug/...
------------------------------------------------------------
revno: 4164
committer: Bruno Chareyre <bruno.chareyre@xxxxxxxxxxx>
timestamp: Mon 2014-09-15 12:27:11 +0200
message:
improve falling back to 1-thread in parrallel collider (fix https://bugs.launchpad.net/yade/+bug/1368591)
modified:
pkg/common/InsertionSortCollider.cpp
--
lp:yade
https://code.launchpad.net/~yade-pkg/yade/git-trunk
Your team Yade developers is subscribed to branch lp:yade.
To unsubscribe from this branch go to https://code.launchpad.net/~yade-pkg/yade/git-trunk/+edit-subscription
=== modified file 'pkg/common/InsertionSortCollider.cpp'
--- pkg/common/InsertionSortCollider.cpp 2014-09-15 10:27:11 +0000
+++ pkg/common/InsertionSortCollider.cpp 2014-09-15 10:27:11 +0000
@@ -58,7 +58,7 @@
void InsertionSortCollider::insertionSortParallel(VecBounds& v, InteractionContainer* interactions, Scene*, bool doCollide){
#ifdef YADE_OPENMP
assert(!periodic);
- assert(v.size==(long)v.vec.size());
+ assert(v.size==(long)v.vec.size());
if (ompThreads<=1) return insertionSort(v,interactions, scene, doCollide);
Real chunksVerlet = 4*verletDist;//is 2* the theoretical requirement?
@@ -73,16 +73,16 @@
bool changeChunks=false;
for(unsigned n=1; n<nChunks;n++) if (chunksVerlet>(v[chunks[n]].coord-v[chunks[n-1]].coord)) changeChunks=true;
if (!changeChunks) break;
- nChunks--; chunkSize = unsigned(v.size/nChunks)+1; chunks.clear();
+ nChunks--; chunkSize = unsigned(v.size/nChunks)+1; chunks.clear();
for(unsigned n=0; n<nChunks;n++) chunks.push_back(n*chunkSize); chunks.push_back(v.size);
}
static unsigned warnOnce=0;
- if (nChunks<unsigned(ompThreads) && !warnOnce++) LOG_WARN("Parallel insertion: only "<<nChunks <<" thread(s) used. The number of bodies is probably too small for allowing more threads. The contact detection should succeed but not all available threads are used.");
+ if (nChunks<unsigned(ompThreads) && !warnOnce++) LOG_WARN("Parallel insertion: only "<<nChunks <<" thread(s) used. The number of bodies is probably too small for allowing more threads, or the geometry is flat. The contact detection should succeed but not all available threads are used.");
///Define per-thread containers bufferizing the actual insertion of new interactions, since inserting is not thread-safe
std::vector<std::vector<std::pair<Body::id_t,Body::id_t> > > newInteractions;
newInteractions.resize(ompThreads,std::vector<std::pair<Body::id_t,Body::id_t> >());
- for (int kk=0; kk<ompThreads; kk++) newInteractions[kk].reserve(1000);
+ for (int kk=0; kk<ompThreads; kk++) newInteractions[kk].reserve(long(chunkSize*0.3));
/// First sort, independant in each chunk
#pragma omp parallel for schedule(dynamic,1) num_threads(ompThreads>0 ? min(ompThreads,omp_get_max_threads()) : omp_get_max_threads())
@@ -103,21 +103,22 @@
v[j+1]=viInit;
}
}
-
///In the second sort, the chunks are connected consistently.
///If sorting requires to move a bound past half-chunk, the algorithm is not thread safe,
- /// if it happens we roll-back and run the 1-thread sort + send warning
+ /// if it happens we run the 1-thread sort at the end
bool parallelFailed=false;
#pragma omp parallel for schedule(dynamic,1) num_threads(ompThreads>0 ? min(ompThreads,omp_get_max_threads()) : omp_get_max_threads())
for (unsigned k=1; k<nChunks;k++) {
-
int threadNum = omp_get_thread_num();
long i=chunks[k];
- for(; v[i]<v[i-1]; i++){
+ long halfChunkStart = long(i-chunkSize*0.5);
+ long halfChunkEnd = long(i+chunkSize*0.5);
+ for(; i<halfChunkEnd; i++){
+ if (!(v[i]<v[i-1])) break; //contiguous chunks now connected consistently
const Bounds viInit=v[i]; long j=i-1; /* cache hasBB; otherwise 1% overall performance hit */ const bool viInitBB=viInit.flags.hasBB;
const bool isMin=viInit.flags.isMin;
- while(j>=chunks[k-1] && viInit<v[j]){
+ while(j>=halfChunkStart && viInit<v[j]){
v[j+1]=v[j];
if(isMin && !v[j].flags.isMin && doCollide && viInitBB && v[j].flags.hasBB && (viInit.id!=v[j].id)) {
const Body::id_t& id1 = v[j].id; const Body::id_t& id2 = viInit.id;
@@ -127,25 +128,16 @@
j--;
}
v[j+1]=viInit;
- if (j<=long(chunks[k]-chunkSize*0.5)) {
- LOG_WARN("parallel sort not guaranteed to succeed; in chunk "<<k<<" of "<<nChunks<< ", bound descending past half-chunk. Parallel colliding will be completed in single thread. Consider turning collider.ompThreads=1, it may be more efficient in this case.");
- parallelFailed=true;}
+ if (j<halfChunkStart) parallelFailed=true;
}
- if (i>=long(chunks[k]+chunkSize*0.5)) {
- LOG_WARN("parallel sort not guaranteed to succeed; in chunk "<<k+1<<" of "<<nChunks<< ", bound advancing past half-chunk. Parallel colliding will be completed in single thread. Consider turning collider.ompThreads=1, it may be more efficient in this case.");
- parallelFailed=true;}
+ if (i>=halfChunkEnd) parallelFailed=true;
}
- /// Check again, just to be sure...
- for (unsigned k=1; k<nChunks;k++) if (v[chunks[k]]<v[chunks[k]-1]) {
- LOG_WARN("Parallel colliding will be completed in single thread. Consider turning collider.ompThreads=1, it may be more efficient in this case.");
- parallelFailed=true;}
-
- /// Now insert interactions sequentially
+ /// Now insert interactions sequentially
for (int n=0;n<ompThreads;n++)
for (size_t k=0, kend=newInteractions[n].size();k<kend;k++)
/*if (!interactions->found(newInteractions[n][k].first,newInteractions[n][k].second))*/ //Not needed, already checked above
interactions->insert(shared_ptr<Interaction>(new Interaction(newInteractions[n][k].first,newInteractions[n][k].second)));
-
+ /// If some bounds traversed more than a half-chunk, we complete colliding with the sequential sort
if (parallelFailed) return insertionSort(v,interactions, scene, doCollide);
#endif
}