yade-dev team mailing list archive

Thread
Date

[Branch ~yade-pkg/yade/git-trunk] Rev 3922: Merge branch 'master' of github.com:yade/trunk

To: Yade developers <yade-dev@xxxxxxxxxxxxxxxxxxx>
From: noreply@xxxxxxxxxxxxx
Date: Wed, 16 Apr 2014 21:13:25 -0000
Reply-to: noreply@xxxxxxxxxxxxx
Sender: bounces@xxxxxxxxxxxxx

Merge authors:
  Anton Gladky (gladky-anton)
  Bruno Chareyre (bruno-chareyre)
  Bruno Chareyre (bruno-chareyre)
  jduriez (jduriez)
------------------------------------------------------------
revno: 3922 [merge]
committer: Luc Sibille <luc.sibille@xxxxxxxxxxxxxxx>
timestamp: Wed 2014-04-16 19:01:30 +0200
message:
  Merge branch 'master' of github.com:yade/trunk
modified:
  core/InteractionContainer.hpp
  doc/references.bib
  doc/sphinx/references.bib
  lib/base/openmp-accu.hpp
  lib/triangulation/FlowBoundingSphere.ipp
  pkg/common/Dispatching.cpp
  pkg/common/Dispatching.hpp
  pkg/common/InsertionSortCollider.cpp
  pkg/common/InsertionSortCollider.hpp
  pkg/common/InteractionLoop.cpp
  pkg/dem/Law2_ScGeom_CapillaryPhys_Capillarity.hpp
  pkg/pfv/FlowEngine.hpp
  py/_utils.cpp
  py/wrapper/yadeWrapper.cpp


--
lp:yade
https://code.launchpad.net/~yade-pkg/yade/git-trunk

Your team Yade developers is subscribed to branch lp:yade.
To unsubscribe from this branch go to https://code.launchpad.net/~yade-pkg/yade/git-trunk/+edit-subscription

=== modified file 'core/InteractionContainer.hpp'
--- core/InteractionContainer.hpp	2014-02-03 11:21:42 +0000
+++ core/InteractionContainer.hpp	2014-02-22 04:56:33 +0000
@@ -37,10 +37,6 @@
 
 Future (?):
 
-* The shared_ptr<Interaction> might be duplicated in body id2 as well. That would allow to retrieve
-  in a straigthforward manner all interactions with given body id, for instance. Performance implications
-  are not clear.
-
 * the linear vector might be removed; in favor of linear traversal of bodies by their subdomains,
   then traversing the map in each body. If the previous point would come to realization, half of the
   interactions would have to be skipped explicitly in such a case.
@@ -112,14 +108,31 @@
 		*/
 		template<class T> size_t conditionalyEraseNonReal(const T& t, Scene* rb){
 			// beware iterators here, since erase is invalidating them. We need to iterate carefully, and keep in mind that erasing one interaction is moving the last one to the current position.
+			// For the parallel flavor we build the list to be erased in parallel, then it is erased sequentially. Still significant speedup since checking bounds is the most expensive part.
+		#ifndef YADE_OPENMP
 			size_t initSize=currSize;
 		 	for (size_t linPos=0; linPos<currSize;){
 				const shared_ptr<Interaction>& i=linIntrs[linPos];
 				if(!i->isReal() && t.shouldBeErased(i->getId1(),i->getId2(),rb)) erase(i->getId1(),i->getId2(),linPos);
 				else linPos++;}
 			return initSize-currSize;
+		#else
+			unsigned nThreads= omp_get_max_threads();
+			assert(nThreads>0);
+			std::vector<std::vector<Vector3i > >toErase;
+			toErase.resize(nThreads,std::vector<Vector3i >());
+			for (unsigned kk=0;  kk<nThreads; kk++) toErase[kk].reserve(1000);//A smarter value than 1000?			
+			size_t initSize=currSize;
+			#pragma omp parallel for schedule(guided,100) num_threads(nThreads)
+			for (size_t linPos=0; linPos<currSize;linPos++){
+				const shared_ptr<Interaction>& i=linIntrs[linPos];
+				if(!i->isReal() && t.shouldBeErased(i->getId1(),i->getId2(),rb)) toErase[omp_get_thread_num()].push_back(Vector3i(i->getId1(),i->getId2(),linPos)) ;
+				}
+			for (unsigned int kk=0;  kk<nThreads; kk++) for (size_t ii(0), jj(toErase[kk].size()); ii<jj;ii++) erase(toErase[kk][ii][0],toErase[kk][ii][1],toErase[kk][ii][2]);
+			return initSize-currSize;
+		#endif
 		}
-
+		
 	// we must call Scene's ctor (and from Scene::postLoad), since we depend on the existing BodyContainer at that point.
 	void postLoad__calledFromScene(const shared_ptr<BodyContainer>&);
 	void preLoad(InteractionContainer&);

=== modified file 'doc/references.bib'
--- doc/references.bib	2014-04-09 09:11:49 +0000
+++ doc/references.bib	2014-04-15 13:55:10 +0000
@@ -631,6 +631,17 @@
 	author = "Diego Mas Ivars and Matthew E. Pierce and Caroline Darcel and Juan Reyes-Montes and David O. Potyondy and R. Paul Young and Peter A. Cundall"
 }
 
+@article{Potyondy2004,
+title = "A bonded-particle model for rock ",
+journal = "International Journal of Rock Mechanics and Mining Sciences ",
+volume = "41",
+number = "8",
+pages = "1329 - 1364",
+year = "2004",
+doi = "10.1016/j.ijrmms.2004.09.011",
+author = "D.O. Potyondy and P.A. Cundall"
+}
+
 @book{Radjai2011,
 	title={Discrete-Element Modeling of Granular Materials},
 	author={Radjai, F. and Dubois, F.},

=== modified file 'doc/sphinx/references.bib'
--- doc/sphinx/references.bib	2013-10-04 12:35:58 +0000
+++ doc/sphinx/references.bib	2014-04-15 13:55:10 +0000
@@ -116,6 +116,16 @@
   publisher = {American Physical Society}
 }
 
+@article{Potyondy2004,
+title = "A bonded-particle model for rock ",
+journal = "International Journal of Rock Mechanics and Mining Sciences ",
+volume = "41",
+number = "8",
+pages = "1329 - 1364",
+year = "2004",
+doi = "10.1016/j.ijrmms.2004.09.011",
+author = "D.O. Potyondy and P.A. Cundall"
+}
 
 @article{Jung1997,
 	author={Derek Jung and  Kamal K. Gupta},

=== modified file 'lib/base/openmp-accu.hpp'
--- lib/base/openmp-accu.hpp	2014-04-14 14:44:08 +0000
+++ lib/base/openmp-accu.hpp	2014-04-15 14:45:52 +0000
@@ -113,6 +113,57 @@
 	// only useful for debugging
 	std::vector<T> getPerThreadData() const { std::vector<T> ret; for(int i=0; i<nThreads; i++) ret.push_back(*(T*)(data+i*eSize)); return ret; }
 };
+
+/* OpenMP implementation of std::vector. 
+ * Very minimal functionality, which is required by Yade
+ */ 
+template<typename T>
+class OpenMPVector{
+  std::vector<std::vector<T> > vals;
+  size_t sizeV;
+  public:
+    OpenMPVector() {sizeV = omp_get_max_threads(); vals.resize(sizeV);};
+    void push_back (const T& val) {vals[omp_get_thread_num()].push_back(val);};
+    size_t size() const {
+      size_t sumSize = 0;
+      for (size_t i=0; i<sizeV; i++) {
+        sumSize += vals[i].size();
+      }
+      return sumSize;
+    }
+    
+    size_t size(size_t t) const {
+      if (t >= sizeV) {
+        std::cerr<< ("Index is out of range.")<<std::endl; exit (EXIT_FAILURE);
+      } else {
+        return vals[t].size();
+      }
+    }
+    
+    size_t sizeT() {
+      return sizeV;
+    }
+    
+    T operator[](size_t ix) const {
+      if (ix >= size()) {
+        std::cerr<< ("Index is out of range.")<<std::endl; exit (EXIT_FAILURE);
+      } else {
+        size_t t = 0;
+        while (ix >= vals[t].size()) {
+          ix-=vals[t].size();
+          t+=1;
+        }
+        return vals[t][ix];
+      }
+    }
+    
+    void clear() {
+      for (size_t i=0; i<sizeV; i++) {
+        vals[i].clear();
+      }
+    }
+    
+};
 #else 
 template<typename T>
 class OpenMPArrayAccumulator{
@@ -145,6 +196,8 @@
 	// debugging only
 	std::vector<T> getPerThreadData() const { std::vector<T> ret; ret.push_back(data); return ret; }
 };
+
+using OpenMPVector=std::vector;
 #endif
 
 // boost serialization

=== modified file 'lib/triangulation/FlowBoundingSphere.ipp'
--- lib/triangulation/FlowBoundingSphere.ipp	2014-04-07 09:33:19 +0000
+++ lib/triangulation/FlowBoundingSphere.ipp	2014-04-15 16:39:53 +0000
@@ -609,8 +609,8 @@
 	vector<double> constrictions;
 	for (FiniteFacetsIterator f_it=Tri.finite_facets_begin(); f_it != Tri.finite_facets_end();f_it++){
 		//in the periodic case, we skip facets with lowest id out of the base period
-		if ( ((f_it->first->info().index < f_it->first->neighbor(f_it->second)->info().index) && f_it->first->info().isGhost)
-		||  ((f_it->first->info().index > f_it->first->neighbor(f_it->second)->info().index) && f_it->first->neighbor(f_it->second)->info().isGhost)
+		if ( ((f_it->first->info().index <= f_it->first->neighbor(f_it->second)->info().index) && f_it->first->info().isGhost)
+		||  ((f_it->first->info().index >= f_it->first->neighbor(f_it->second)->info().index) && f_it->first->neighbor(f_it->second)->info().isGhost)
 		|| f_it->first->info().index == 0 || f_it->first->neighbor(f_it->second)->info().index == 0) continue;
 		constrictions.push_back(computeEffectiveRadius(f_it->first, f_it->second));
 	}
@@ -624,8 +624,8 @@
 	vector<Constriction> constrictions;
 	for (FiniteFacetsIterator f_it=Tri.finite_facets_begin(); f_it != Tri.finite_facets_end();f_it++){
 		//in the periodic case, we skip facets with lowest id out of the base period
- 		 if ( ((f_it->first->info().index < f_it->first->neighbor(f_it->second)->info().index) && f_it->first->info().isGhost)
-		||  ((f_it->first->info().index > f_it->first->neighbor(f_it->second)->info().index) && f_it->first->neighbor(f_it->second)->info().isGhost)
+ 		 if ( ((f_it->first->info().index <= f_it->first->neighbor(f_it->second)->info().index) && f_it->first->info().isGhost)
+		||  ((f_it->first->info().index >= f_it->first->neighbor(f_it->second)->info().index) && f_it->first->neighbor(f_it->second)->info().isGhost)
 		|| f_it->first->info().index == 0 || f_it->first->neighbor(f_it->second)->info().index == 0) continue;
 		vector<double> rn;
 		const CVector& normal = f_it->first->info().facetSurfaces[f_it->second];

=== modified file 'pkg/common/Dispatching.cpp'
--- pkg/common/Dispatching.cpp	2014-02-03 11:21:42 +0000
+++ pkg/common/Dispatching.cpp	2014-04-16 10:30:23 +0000
@@ -17,12 +17,21 @@
 	updateScenePtr();
 	shared_ptr<BodyContainer>& bodies = scene->bodies;
 	const long numBodies=(long)bodies->size();
-	//#pragma omp parallel for
+	#pragma omp parallel for num_threads(ompThreads>0 ? min(ompThreads,omp_get_max_threads()) : omp_get_max_threads())
 	for(int id=0; id<numBodies; id++){
 		if(!bodies->exists(id)) continue; // don't delete this check  - Janek
 		const shared_ptr<Body>& b=(*bodies)[id];
+		processBody(b);
+	}
+// 	With -j4, this update takes more time that the dispatching in itslef, and it is quite useless: commented out
+// 	scene->updateBound();
+}
+
+void BoundDispatcher::processBody(const shared_ptr<Body>& b)
+{
+// 	const shared_ptr<Body>& b=(*bodies)[id];
 		shared_ptr<Shape>& shape=b->shape;
-		if(!shape || !b->isBounded()) continue;
+		if(!b->isBounded() || !shape) return;
 		if(b->bound) {
 			Real& sweepLength = b->bound->sweepLength;
 			if (targetInterv>=0) {
@@ -36,12 +45,12 @@
 			} else sweepLength=sweepDist;
 		} 
 		#ifdef BV_FUNCTOR_CACHE
-		if(!shape->boundFunctor){ shape->boundFunctor=this->getFunctor1D(shape); if(!shape->boundFunctor) continue; }
+		if(!shape->boundFunctor){ shape->boundFunctor=this->getFunctor1D(shape); if(!shape->boundFunctor) return; }
 		shape->boundFunctor->go(shape,b->bound,b->state->se3,b.get());
 		#else
 		operator()(shape,b->bound,b->state->se3,b.get());
 		#endif
-		if(!b->bound) continue; // the functor did not create new bound
+		if(!b->bound) return; // the functor did not create new bound
 		b->bound->refPos=b->state->pos;
 		b->bound->lastUpdateIter=scene->iter;
 		const Real& sweepLength = b->bound->sweepLength;
@@ -51,8 +60,6 @@
 			aabb->max+=Vector3r(sweepLength,sweepLength,sweepLength);
 		}
 	}
-	scene->updateBound();
-}
 
 
 /********************************************************************

=== modified file 'pkg/common/Dispatching.hpp'
--- pkg/common/Dispatching.hpp	2012-01-23 14:43:54 +0000
+++ pkg/common/Dispatching.hpp	2014-02-22 04:56:33 +0000
@@ -81,6 +81,7 @@
 	public:
 		virtual void action();
 		virtual bool isActivated(){ return activated; }
+		void processBody(const shared_ptr<Body>&);
 	DECLARE_LOGGER;
 	YADE_DISPATCHER1D_FUNCTOR_DOC_ATTRS_CTOR_PY(BoundDispatcher,BoundFunctor,/*optional doc*/,
 		/*additional attrs*/

=== modified file 'pkg/common/InsertionSortCollider.cpp'
--- pkg/common/InsertionSortCollider.cpp	2014-02-02 20:49:51 +0000
+++ pkg/common/InsertionSortCollider.cpp	2014-02-24 14:34:20 +0000
@@ -11,6 +11,9 @@
 #include<algorithm>
 #include<vector>
 #include<boost/static_assert.hpp>
+#ifdef YADE_OPENMP
+  #include<omp.h>
+#endif
 
 using namespace std;
 
@@ -53,6 +56,92 @@
 	}
 }
 
+
+//Periodic version, only for non-periodic case at the moment (feel free to implement for the periodic case...)
+void InsertionSortCollider::insertionSortParallel(VecBounds& v, InteractionContainer* interactions, Scene*, bool doCollide){
+#ifdef YADE_OPENMP
+	assert(!periodic);	
+	assert(v.size==(long)v.vec.size());	
+	if (ompThreads<=1) return insertionSort(v,interactions, scene, doCollide);
+	
+	Real chunksVerlet = 4*verletDist;//is 2* the theoretical requirement?
+	if (chunksVerlet<=0) {LOG_ERROR("Parallel insertion sort needs verletDist>0");}
+	
+	///chunks defines subsets of the bounds lists, we make sure they are not too small wrt. verlet dist.
+	std::vector<unsigned> chunks;
+	unsigned nChunks = ompThreads;
+	unsigned chunkSize = unsigned(v.size/nChunks)+1;
+	for(unsigned n=0; n<nChunks;n++) chunks.push_back(n*chunkSize); chunks.push_back(v.size);
+	while (nChunks>1){
+		bool changeChunks=false;
+		for(unsigned n=1; n<nChunks;n++) if (chunksVerlet>(v[chunks[n]].coord-v[chunks[n-1]].coord)) changeChunks=true;
+		if (!changeChunks) break;
+		nChunks--; chunkSize = unsigned(v.size/nChunks)+1; chunks.clear();		
+		for(unsigned n=0; n<nChunks;n++) chunks.push_back(n*chunkSize); chunks.push_back(v.size);
+	}
+	static unsigned warnOnce=0;
+	if (nChunks<unsigned(ompThreads) && !warnOnce++) LOG_WARN("Parallel insertion: only "<<nChunks <<" thread(s) used. The number of bodies is probably too small for allowing more threads. The contact detection should succeed but not all available threads are used.");
+
+	///Define per-thread containers bufferizing the actual insertion of new interactions, since inserting is not thread-safe
+	std::vector<std::vector<std::pair<Body::id_t,Body::id_t> > > newInteractions;
+	newInteractions.resize(ompThreads,std::vector<std::pair<Body::id_t,Body::id_t> >());
+	for (int kk=0;  kk<ompThreads; kk++) newInteractions[kk].reserve(100);
+	
+	/// First sort, independant in each chunk
+	#pragma omp parallel for schedule(dynamic,1) num_threads(ompThreads>0 ? min(ompThreads,omp_get_max_threads()) : omp_get_max_threads())
+	for (unsigned k=0; k<nChunks;k++) {
+		int threadNum = omp_get_thread_num();
+		for(long i=chunks[k]+1; i<chunks[k+1]; i++){
+			const Bounds viInit=v[i]; long j=i-1; const bool viInitBB=viInit.flags.hasBB;
+			const bool isMin=viInit.flags.isMin; 
+			while(j>=chunks[k] && v[j]>viInit){
+				v[j+1]=v[j];
+				if(isMin && !v[j].flags.isMin && doCollide && viInitBB && v[j].flags.hasBB && (viInit.id!=v[j].id)) {
+					const Body::id_t& id1 = v[j].id; const Body::id_t& id2 = viInit.id; 
+					if (spatialOverlap(id1,id2) && Collider::mayCollide(Body::byId(id1,scene).get(),Body::byId(id2,scene).get()) && !interactions->found(id1,id2))
+						newInteractions[threadNum].push_back(std::pair<Body::id_t,Body::id_t>(v[j].id,viInit.id));
+				}
+				j--;
+			}
+			v[j+1]=viInit;
+		}
+	}
+	
+	///In the second sort, the chunks are connected consistently.
+	///If sorting requires to move a bound past half-chunk, the algorithm is not thread safe, if it happens we error out.
+	///Better than computing with messed up interactions
+	#pragma omp parallel for schedule(dynamic,1) num_threads(ompThreads>0 ? min(ompThreads,omp_get_max_threads()) : omp_get_max_threads())
+	for (unsigned k=1; k<nChunks;k++) {
+		
+		int threadNum = omp_get_thread_num();
+		long i=chunks[k];
+		for(; v[i]<v[i-1]; i++){
+			const Bounds viInit=v[i]; long j=i-1; /* cache hasBB; otherwise 1% overall performance hit */ const bool viInitBB=viInit.flags.hasBB;
+			const bool isMin=viInit.flags.isMin; 
+
+			while(j>=chunks[k-1] && viInit<v[j]){
+				v[j+1]=v[j];
+				if(isMin && !v[j].flags.isMin && doCollide && viInitBB && v[j].flags.hasBB && (viInit.id!=v[j].id)) {
+					const Body::id_t& id1 = v[j].id; const Body::id_t& id2 = viInit.id;
+					//FIXME: do we need the check with found(id1,id2) here? It is checked again below...
+					if (spatialOverlap(id1,id2) && Collider::mayCollide(Body::byId(id1,scene).get(),Body::byId(id2,scene).get()) && !interactions->found(id1,id2))
+						newInteractions[threadNum].push_back(std::pair<Body::id_t,Body::id_t>(v[j].id,viInit.id));}
+				j--;
+			}
+			v[j+1]=viInit;
+			if (j<=long(chunks[k]-chunkSize*0.5)) LOG_ERROR("parallel sort not guaranteed to succeed; in chunk "<<k<<" of "<<nChunks<< ", bound descending past half-chunk. Consider turning ompThreads=1 for thread safety.");
+		}
+		if (i>=long(chunks[k]+chunkSize*0.5)) LOG_ERROR("parallel sort not guaranteed to succeed; in chunk "<<k+1<<" of "<<nChunks<< ", bound advancing past half-chunk. Consider turning ompThreads=1 for thread safety.")
+	}
+	/// Check again, just to be sure...
+	for (unsigned k=1; k<nChunks;k++) if (v[chunks[k]]<v[chunks[k]-1]) LOG_ERROR("parallel sort failed, consider turning ompThreads=1");
+
+	/// Now insert interactions sequentially	
+	for (int n=0;n<ompThreads;n++) for (size_t k=0, kend=newInteractions[n].size();k<kend;k++) if (!interactions->found(newInteractions[n][k].first,newInteractions[n][k].second)) interactions->insert(shared_ptr<Interaction>(new Interaction(newInteractions[n][k].first,newInteractions[n][k].second)));
+#endif
+}
+
+
 vector<Body::id_t> InsertionSortCollider::probeBoundingVolume(const Bound& bv){
 	if(periodic){ throw invalid_argument("InsertionSortCollider::probeBoundingVolume: handling periodic boundary not implemented."); }
 	vector<Body::id_t> ret;
@@ -101,7 +190,9 @@
 	long nBodies=(long)scene->bodies->size();
 	InteractionContainer* interactions=scene->interactions.get();
 	scene->interactions->iterColliderLastRun=-1;
-
+	#ifdef YADE_OPENMP
+	if (ompThreads<=0) ompThreads = omp_get_max_threads();
+	#endif
 	// periodicity changed, force reinit
 	if(scene->isPeriodic != periodic){
 		for(int i=0; i<3; i++) BB[i].vec.clear();
@@ -153,6 +244,11 @@
 			// if no spheres, disable stride
 			verletDist=isinf(minR) ? 0 : abs(verletDist)*minR;
 		}
+		// if interactions are dirty, force reinitialization
+		if(scene->interactions->dirty){
+			doInitSort=true;
+			scene->interactions->dirty=false;
+		}
 		
 		// update bounds via boundDispatcher
 		boundDispatcher->scene=scene;
@@ -161,12 +257,8 @@
 		boundDispatcher->targetInterv=targetInterv;
 		boundDispatcher->updatingDispFactor=updatingDispFactor;
 		boundDispatcher->action();
+		ISC_CHECKPOINT("boundDispatcher");
 
-		// if interactions are dirty, force reinitialization
-		if(scene->interactions->dirty){
-			doInitSort=true;
-			scene->interactions->dirty=false;
-		}
 		
 		// STRIDE
 		if(verletDist>0){
@@ -176,67 +268,61 @@
 				if(!newton){ throw runtime_error("InsertionSortCollider.verletDist>0, but unable to locate NewtonIntegrator within O.engines."); }
 			}
 		}
-	ISC_CHECKPOINT("init");
-
 		// STRIDE
 			// get us ready for strides, if they were deactivated
-			if(!strideActive && verletDist>0 && newton->maxVelocitySq>=0){ // maxVelocitySq is a really computed value
-				strideActive=true;
-			}
+			if(!strideActive && verletDist>0 && newton->maxVelocitySq>=0) strideActive=true;
 			if(strideActive){
 				assert(verletDist>0);
 				assert(strideActive); assert(newton->maxVelocitySq>=0);
-					newton->updatingDispFactor=updatingDispFactor;
-			} else { /* !strideActive */
-				boundDispatcher->sweepDist=0;
-			}
+				newton->updatingDispFactor=updatingDispFactor;
+			} else boundDispatcher->sweepDist=0;
 
 	ISC_CHECKPOINT("bound");
 
-	// copy bounds along given axis into our arrays
-		for(long i=0; i<2*nBodies; i++){
-			for(int j=0; j<3; j++){
+	// copy bounds along given axis into our arrays 
+	#pragma omp parallel for schedule(guided) num_threads(ompThreads>0 ? min(ompThreads,omp_get_max_threads()) : omp_get_max_threads())
+	for(long i=0; i<2*nBodies; i++){
+// 		const long cacheIter = scene->iter;
+		for(int j=0; j<3; j++){
 				VecBounds& BBj=BB[j];
-				const Body::id_t id=BBj[i].id;
+				Bounds& BBji = BBj[i];
+				const Body::id_t id=BBji.id;
 				const shared_ptr<Body>& b=Body::byId(id,scene);
 				if(b){
 					const shared_ptr<Bound>& bv=b->bound;
 					// coordinate is min/max if has bounding volume, otherwise both are the position. Add periodic shift so that we are inside the cell
 					// watch out for the parentheses around ?: within ?: (there was unwanted conversion of the Reals to bools!)
-					
-					BBj[i].coord=((BBj[i].flags.hasBB=((bool)bv)) ? (BBj[i].flags.isMin ? bv->min[j] : bv->max[j]) : (b->state->pos[j])) - (periodic ? BBj.cellDim*BBj[i].period : 0.);
-					
+					BBji.coord=((BBji.flags.hasBB=((bool)bv)) ? (BBji.flags.isMin ? bv->min[j] : bv->max[j]) : (b->state->pos[j])) - (periodic ? BBj.cellDim*BBji.period : 0.);
+					// if initializing periodic, shift coords & record the period into BBj[i].period
+					if(doInitSort && periodic) BBji.coord=cellWrap(BBji.coord,0,BBj.cellDim,BBji.period);
+					// for each body, copy its minima and maxima, for quick checks of overlaps later
+					//bounds have been all updated when j==0, we can safely copy them here when j==1
+					if (BBji.flags.isMin && j==1 &&bv) {
+						 memcpy(&minima[3*id],&bv->min,3*sizeof(Real)); memcpy(&maxima[3*id],&bv->max,3*sizeof(Real)); 
+					}					
 				} else { BBj[i].flags.hasBB=false; /* for vanished body, keep the coordinate as-is, to minimize inversions. */ }
-				// if initializing periodic, shift coords & record the period into BBj[i].period
-				if(doInitSort && periodic) {
-					BBj[i].coord=cellWrap(BBj[i].coord,0,BBj.cellDim,BBj[i].period);
-				}
-			}	
+			}
 		}
-	// for each body, copy its minima and maxima, for quick checks of overlaps later
-	BOOST_STATIC_ASSERT(sizeof(Vector3r)==3*sizeof(Real));
-	for(Body::id_t id=0; id<nBodies; id++){
-		const shared_ptr<Body>& b=Body::byId(id,scene);
-		if(b){
-			const shared_ptr<Bound>& bv=b->bound;
-			if(bv) { memcpy(&minima[3*id],&bv->min,3*sizeof(Real)); memcpy(&maxima[3*id],&bv->max,3*sizeof(Real)); } // ⇐ faster than 6 assignments 
-			else{ const Vector3r& pos=b->state->pos; memcpy(&minima[3*id],&pos,3*sizeof(Real)); memcpy(&maxima[3*id],&pos,3*sizeof(Real)); }
-		} else { memset(&minima[3*id],0,3*sizeof(Real)); memset(&maxima[3*id],0,3*sizeof(Real)); }
-	}
 
 	ISC_CHECKPOINT("copy");
 
-	// process interactions that the constitutive law asked to be erased
-// 	interactions->erasePending(*this,scene);
+	// remove interactions which have disconnected bounds and are not real (will run parallel if YADE_OPENMP)
 	interactions->conditionalyEraseNonReal(*this,scene);
-	
+
 	ISC_CHECKPOINT("erase");
 
 	// sort
 		// the regular case
 		if(!doInitSort && !sortThenCollide){
-			/* each inversion in insertionSort calls handleBoundInversion, which in turns may add/remove interaction */
-			if(!periodic) for(int i=0; i<3; i++) insertionSort(BB[i],interactions,scene); 
+			/* each inversion in insertionSort calls may add interaction */
+			//1000 bodies is heuristic minimum above which parallel sort is called
+			if(!periodic) for(int i=0; i<3; i++) {
+			#ifdef YADE_OPENMP
+				if (ompThreads<=1 || nBodies<1000) insertionSort(BB[i],interactions,scene);
+				else insertionSortParallel(BB[i],interactions,scene);} 
+			#else
+				insertionSort(BB[i],interactions,scene);} 
+			#endif
 			else for(int i=0; i<3; i++) insertionSortPeri(BB[i],interactions,scene);
 		}
 		// create initial interactions (much slower)
@@ -244,6 +330,7 @@
 			if(doInitSort){
 				// the initial sort is in independent in 3 dimensions, may be run in parallel; it seems that there is no time gain running in parallel, though
 				// important to reset loInx for periodic simulation (!!)
+// 				#pragma omp parallel for schedule(dynamic,1) num_threads(min(ompThreads,3))
 				for(int i=0; i<3; i++) { BB[i].loIdx=0; std::sort(BB[i].vec.begin(),BB[i].vec.end()); }
 				numReinit++;
 			} else { // sortThenCollide
@@ -255,6 +342,12 @@
 			VecBounds& V=BB[sortAxis];
 			// go through potential aabb collisions, create interactions as necessary
 			if(!periodic){
+			#ifdef YADE_OPENMP
+				std::vector<std::vector<std::pair<Body::id_t,Body::id_t> > > newInts;
+				newInts.resize(ompThreads,std::vector<std::pair<Body::id_t,Body::id_t> >());
+				for (int kk=0;  kk<ompThreads; kk++) newInts[kk].reserve(unsigned(10*nBodies/ompThreads));
+				#pragma omp parallel for schedule(guided,200) num_threads(ompThreads)
+			#endif
 				for(long i=0; i<2*nBodies; i++){
 					// start from the lower bound (i.e. skipping upper bounds)
 					// skip bodies without bbox, because they don't collide
@@ -265,11 +358,23 @@
 						const Body::id_t& jid=V[j].id;
 						// take 2 of the same condition (only handle collision [min_i..max_i]+min_j, not [min_i..max_i]+min_i (symmetric)
 						if(!(V[j].flags.isMin && V[j].flags.hasBB)) continue;
-						/* abuse the same function here; since it does spatial overlap check first, it is OK to use it */
-						handleBoundInversion(iid,jid,interactions,scene);
-						assert(j<2*nBodies-1);
+						if (spatialOverlap(iid,jid) && Collider::mayCollide(Body::byId(iid,scene).get(),Body::byId(jid,scene).get()) ){
+						#ifdef YADE_OPENMP
+							unsigned int threadNum = omp_get_thread_num();
+							newInts[threadNum].push_back(std::pair<Body::id_t,Body::id_t>(iid,jid));
+						#else
+							if (!interactions->found(iid,jid))
+							interactions->insert(shared_ptr<Interaction>(new Interaction(iid,jid)));
+						#endif
+						}
 					}
 				}
+				//go through newly created candidates sequentially, duplicates coming from different threads may exist so we check existence with found()
+				#ifdef YADE_OPENMP
+				for (int n=0;n<ompThreads;n++) for (size_t k=0, kend=newInts[n].size();k<kend;k++)
+					if (!interactions->found(newInts[n][k].first,newInts[n][k].second))
+						interactions->insert(shared_ptr<Interaction>(new Interaction(newInts[n][k].first,newInts[n][k].second)));
+				#endif
 			} else { // periodic case: see comments above
 				for(long i=0; i<2*nBodies; i++){
 					if(!(V[i].flags.isMin && V[i].flags.hasBB)) continue;
@@ -354,8 +459,6 @@
 // called by the insertion sort if 2 bodies swapped their bounds
 void InsertionSortCollider::handleBoundInversionPeri(Body::id_t id1, Body::id_t id2, InteractionContainer* interactions, Scene*){
 	assert(periodic);
-	
-	///fast
 	Vector3i periods;
 	bool overlap=spatialOverlapPeri(id1,id2,scene,periods);
 	if (overlap && Collider::mayCollide(Body::byId(id1,scene).get(),Body::byId(id2,scene).get()) && !interactions->found(id1,id2)){
@@ -363,45 +466,6 @@
 		newI->cellDist=periods;
 		interactions->insert(newI);
 	}
-	
-	///Slow
-	// do bboxes overlap in all 3 dimensions?
-// 	Vector3i periods;
-// 	bool overlap=spatialOverlapPeri(id1,id2,scene,periods);
-// 	// existing interaction?
-// 	const shared_ptr<Interaction>& I=interactions->find(id1,id2);
-// 	bool hasInter=(bool)I;
-// 	#ifdef PISC_DEBUG
-// 		if(watchIds(id1,id2)) LOG_DEBUG("Inversion #"<<id1<<"+#"<<id2<<", overlap=="<<overlap<<", hasInter=="<<hasInter);
-// 	#endif
-// 	// interaction doesn't exist and shouldn't, or it exists and should
-// 	if(likely(!overlap && !hasInter)) return;
-// 	if(overlap && hasInter){  return; }
-// 	// create interaction if not yet existing
-// 	if(overlap && !hasInter){ // second condition only for readability
-// 		#ifdef PISC_DEBUG
-// 			if(watchIds(id1,id2)) LOG_DEBUG("Attemtping collision of #"<<id1<<"+#"<<id2);
-// 		#endif
-// 		if(!Collider::mayCollide(Body::byId(id1,scene).get(),Body::byId(id2,scene).get())) return;
-// 		// LOG_TRACE("Creating new interaction #"<<id1<<"+#"<<id2);
-// 		shared_ptr<Interaction> newI=shared_ptr<Interaction>(new Interaction(id1,id2));
-// 		newI->cellDist=periods;
-// 		#ifdef PISC_DEBUG
-// 			if(watchIds(id1,id2)) LOG_DEBUG("Created intr #"<<id1<<"+#"<<id2<<", periods="<<periods);
-// 		#endif
-// 		interactions->insert(newI);
-// 		return;
-// 	}
-// 	if(!overlap && hasInter){
-// 		if(!I->isReal()) {
-// 			interactions->erase(id1,id2);
-// 			#ifdef PISC_DEBUG
-// 				if(watchIds(id1,id2)) LOG_DEBUG("Erased intr #"<<id1<<"+#"<<id2);
-// 			#endif
-// 		}
-// 		return;
-// 	}
-// 	assert(false); // unreachable
 }
 
 /* Performance hint

=== modified file 'pkg/common/InsertionSortCollider.hpp'
--- pkg/common/InsertionSortCollider.hpp	2014-02-20 16:49:26 +0000
+++ pkg/common/InsertionSortCollider.hpp	2014-04-16 10:30:23 +0000
@@ -8,11 +8,6 @@
 
 /*! Periodic collider notes.
 
-Use
-===
-* scripts/test/periodic-simple.py
-* In the future, triaxial compression working by growing/shrinking the cell should be implemented.
-
 Architecture
 ============
 Values from bounding boxes are added information about period in which they are.
@@ -50,7 +45,8 @@
 
 Requirements
 ============
-* No body can have Aabb larger than about .499*cellSize. Exception is thrown if that is false.
+* By default, no body can have Aabb larger than about .499*cellSize. Exception is thrown if that is false.
+	Large bodies are accepted if allowBiggerThanPeriod (experimental)
 * Constitutive law must not get body positions from Body::state directly.
 	If it does, it uses Interaction::cellDist to compute periodic position.
 * No body can get further away than MAXINT periods. It will do horrible things if there is overflow. Not checked at the moment.
@@ -67,7 +63,7 @@
 
 
 // #define this macro to enable timing within this engine
-//#define ISC_TIMING
+// #define ISC_TIMING
 
 // #define to turn on some tracing information for the periodic part
 // all code under this can be probably removed at some point, when the collider will have been tested thoroughly
@@ -157,6 +153,7 @@
   	    http://en.wikipedia.org/wiki/Insertion_sort has the algorithm and other details
 	*/
 	void insertionSort(VecBounds& v,InteractionContainer*,Scene*,bool doCollide=true);
+	void insertionSortParallel(VecBounds& v,InteractionContainer*,Scene*,bool doCollide=true);
 	void handleBoundInversion(Body::id_t,Body::id_t,InteractionContainer*,Scene*);
 // 	bool spatialOverlap(Body::id_t,Body::id_t) const;
 

=== modified file 'pkg/common/InteractionLoop.cpp'
--- pkg/common/InteractionLoop.cpp	2014-02-03 11:21:42 +0000
+++ pkg/common/InteractionLoop.cpp	2014-04-16 10:30:23 +0000
@@ -21,15 +21,6 @@
 
 
 void InteractionLoop::action(){
-// 	if(eraseIntsInLoop && scene->interactions->conditionalyEraseNonReal(scene)>0 && !alreadyWarnedNoCollider){
-// 		LOG_WARN("Interactions pending erase found (erased), no collider being used?");
-// 		alreadyWarnedNoCollider=true;
-// 	}
-	/*
-	if(scene->interactions->dirty){
-		throw std::logic_error("InteractionContainer::dirty is true; the collider should re-initialize in such case and clear the dirty flag.");
-	}
-	*/
 	// update Scene* of the dispatchers
 	geomDispatcher->scene=physDispatcher->scene=lawDispatcher->scene=scene;
 	// ask dispatchers to update Scene* of their functors
@@ -60,11 +51,9 @@
 	// (only for some kinds of colliders; see comment for InteractionContainer::iterColliderLastRun)
 	bool removeUnseenIntrs=(scene->interactions->iterColliderLastRun>=0 && scene->interactions->iterColliderLastRun==scene->iter);
 
-
-
 	#ifdef YADE_OPENMP
 	const long size=scene->interactions->size();
-	#pragma omp parallel for schedule(guided) num_threads(ompThreads>0 ? ompThreads : omp_get_max_threads())
+	#pragma omp parallel for schedule(guided) num_threads(ompThreads>0 ? min(ompThreads,omp_get_max_threads()) : omp_get_max_threads())
 	for(long i=0; i<size; i++){
 		const shared_ptr<Interaction>& I=(*scene->interactions)[i];
 	#else
@@ -154,15 +143,4 @@
 			if(callbackPtrs[i]!=NULL) (*(callbackPtrs[i]))(callbacks[i].get(),I.get());
 		}
 	}
-	
-	// process eraseAfterLoop
-	#ifdef YADE_OPENMP
-		FOREACH(list<idPair>& l, eraseAfterLoopIds){
-			FOREACH(idPair p,l) scene->interactions->erase(p.first,p.second);
-			l.clear();
-		}
-	#else
-		FOREACH(idPair p, eraseAfterLoopIds) scene->interactions->erase(p.first,p.second);
-		eraseAfterLoopIds.clear();
-	#endif
 }

=== modified file 'pkg/dem/Law2_ScGeom_CapillaryPhys_Capillarity.hpp'
--- pkg/dem/Law2_ScGeom_CapillaryPhys_Capillarity.hpp	2014-03-21 18:45:24 +0000
+++ pkg/dem/Law2_ScGeom_CapillaryPhys_Capillarity.hpp	2014-04-16 09:32:47 +0000
@@ -93,7 +93,7 @@
 		void action();
 		void postLoad(Law2_ScGeom_CapillaryPhys_Capillarity&);
 		
-	YADE_CLASS_BASE_DOC_ATTRS_DEPREC_INIT_CTOR_PY(Law2_ScGeom_CapillaryPhys_Capillarity,GlobalEngine,"This law allows one to take into account capillary forces/effects between spheres coming from the presence of interparticular liquid bridges (menisci).\n\nThe control parameter is the capillary pressure (or suction) Uc = ugas - Uliquid. Liquid bridges properties (volume V, extent over interacting grains delta1 and delta2) are computed as a result of the defined capillary pressure and of the interacting geometry (spheres radii and interparticular distance).\n\nReferences: in english [Scholtes2009b]_; more detailed, but in french [Scholtes2009d]_.\n\nThe law needs ascii files M(r=i) with i=R1/R2 to work (see https://yade-dem.org/index.php/CapillaryTriaxialTest). These ASCII files contain a set of results from the resolution of the Laplace-Young equation for different configurations of the interacting geometry.\n\nIn order to allow capillary forces between distant spheres, it is necessary to enlarge the bounding boxes using :yref:`Bo1_Sphere_Aabb::aabbEnlargeFactor` and make the Ig2 define define distant interactions via :yref:`interactionDetectionFactor<Ig2_Sphere_Sphere_ScGeom::interactionDetectionFactor>`. It is also necessary to disable interactions removal by the constitutive law (:yref:`Law2<Law2_ScGeom_FrictPhys_CundallStrack::neverErase>=True`). The only combinations of laws supported are currently capillary law + :yref:`Law2_ScGeom_FrictPhys_CundallStrack` and capillary law + :yref:`Law2_ScGeom_MindlinPhys_Mindlin` (and the other variants of Hertz-Mindlin).\n\nSee CapillaryPhys-example.py for an example script.",
+	YADE_CLASS_BASE_DOC_ATTRS_DEPREC_INIT_CTOR_PY(Law2_ScGeom_CapillaryPhys_Capillarity,GlobalEngine,"This law allows one to take into account capillary forces/effects between spheres coming from the presence of interparticular liquid bridges (menisci).\n\nThe control parameter is the capillary pressure (or suction) Uc = ugas - Uliquid. Liquid bridges properties (volume V, extent over interacting grains delta1 and delta2) are computed as a result of the defined capillary pressure and of the interacting geometry (spheres radii and interparticular distance).\n\nReferences: in english [Scholtes2009b]_; more detailed, but in french [Scholtes2009d]_.\n\nThe law needs ascii files M(r=i) with i=R1/R2 to work (see https://yade-dem.org/wiki/CapillaryTriaxialTest). These ASCII files contain a set of results from the resolution of the Laplace-Young equation for different configurations of the interacting geometry.\n\nIn order to allow capillary forces between distant spheres, it is necessary to enlarge the bounding boxes using :yref:`Bo1_Sphere_Aabb::aabbEnlargeFactor` and make the Ig2 define define distant interactions via :yref:`interactionDetectionFactor<Ig2_Sphere_Sphere_ScGeom::interactionDetectionFactor>`. It is also necessary to disable interactions removal by the constitutive law (:yref:`Law2<Law2_ScGeom_FrictPhys_CundallStrack::neverErase>=True`). The only combinations of laws supported are currently capillary law + :yref:`Law2_ScGeom_FrictPhys_CundallStrack` and capillary law + :yref:`Law2_ScGeom_MindlinPhys_Mindlin` (and the other variants of Hertz-Mindlin).\n\nSee CapillaryPhys-example.py for an example script.",
 	((Real,capillaryPressure,0.,,"Value of the capillary pressure Uc defines as Uc=Ugas-Uliquid"))
 	((bool,fusionDetection,false,,"If true potential menisci overlaps are checked"))
 	((bool,binaryFusion,true,,"If true, capillary forces are set to zero as soon as, at least, 1 overlap (menisci fusion) is detected"))

=== modified file 'pkg/pfv/FlowEngine.hpp'
--- pkg/pfv/FlowEngine.hpp	2014-04-07 09:33:19 +0000
+++ pkg/pfv/FlowEngine.hpp	2014-04-15 16:39:53 +0000
@@ -385,11 +385,11 @@
 	FlowCellInfo (void)
 	{
 		modulePermeability.resize(4, 0);
-		cellForce.resize(4);
-		facetSurfaces.resize(4);
-		facetFluidSurfacesRatio.resize(4);
-		facetSphereCrossSections.resize(4);
-		unitForceVectors.resize(4);
+		cellForce.resize(4,CGAL::NULL_VECTOR);
+		facetSurfaces.resize(4,CGAL::NULL_VECTOR);
+		facetFluidSurfacesRatio.resize(4,0);
+		facetSphereCrossSections.resize(4,CGAL::NULL_VECTOR);
+		unitForceVectors.resize(4,CGAL::NULL_VECTOR);
 		for (int k=0; k<4;k++) for (int l=0; l<3;l++) solidSurfaces[k][l]=0;
 		rayHydr.resize(4, 0);
 		invSumK=index=volumeSign=s=volumeVariation=pression=invVoidV=fict=0;

=== modified file 'py/_utils.cpp'
--- py/_utils.cpp	2014-04-02 15:33:41 +0000
+++ py/_utils.cpp	2014-04-15 16:49:07 +0000
@@ -536,7 +536,7 @@
 	//py::def("stressTensorOfPeriodicCell",Shop__stressTensorOfPeriodicCell,(py::args("smallStrains")=false),"Compute overall (macroscopic) stress of periodic cell using equation published in [Kuhl2001]_:\n\n.. math:: \\vec{\\sigma}=\\frac{1}{V}\\sum_cl^c[\\vec{N}^cf_N^c+\\vec{T}^{cT}\\cdot\\vec{f}^c_T],\n\nwhere $V$ is volume of the cell, $l^c$ length of interaction $c$, $f^c_N$ normal force and $\\vec{f}^c_T$ shear force. Sumed are values over all interactions $c$. $\\vec{N}^c$ and $\\vec{T}^{cT}$ are projection tensors (see the original publication for more details):\n\n.. math:: \\vec{N}=\\vec{n}\\otimes\\vec{n}\\rightarrow N_{ij}=n_in_j\n\n.. math:: \\vec{T}^T=\\vec{I}_{sym}\\cdot\\vec{n}-\\vec{n}\\otimes\\vec{n}\\otimes\\vec{n}\\rightarrow T^T_{ijk}=\\frac{1}{2}(\\delta_{ik}\\delta_{jl}+\\delta_{il}\\delta_{jk})n_l-n_in_jn_k\n\n.. math:: \\vec{T}^T\\cdot\\vec{f}_T\\equiv T^T_{ijk}f_k=(\\delta_{ik}n_j/2+\\delta_{jk}n_i/2-n_in_jn_k)f_k=n_jf_i/2+n_if_j/2-n_in_jn_kf_k,\n\nwhere $n$ is unit vector oriented along the interaction (:yref:`normal<GenericSpheresContact::normal>`) and $\\delta$ is Kronecker's delta. As $\\vec{n}$ and $\\vec{f}_T$ are perpendicular (therfore $n_if_i=0$) we can write\n\n.. math:: \\sigma_{ij}=\\frac{1}{V}\\sum l[n_in_jf_N+n_jf^T_i/2+n_if^T_j/2]\n\n:param bool smallStrains: if false (large strains), real values of volume and interaction lengths are computed. If true, only :yref:`refLength<Dem3DofGeom::refLength>` of interactions and initial volume are computed (can save some time).\n\n:return: macroscopic stress tensor as Matrix3");
 	py::def("normalShearStressTensors",Shop__normalShearStressTensors,(py::args("compressionPositive")=false,py::args("splitNormalTensor")=false,py::args("thresholdForce")=NaN),"Compute overall stress tensor of the periodic cell decomposed in 2 parts, one contributed by normal forces, the other by shear forces. The formulation can be found in [Thornton2000]_, eq. (3):\n\n.. math:: \\tens{\\sigma}_{ij}=\\frac{2}{V}\\sum R N \\vec{n}_i \\vec{n}_j+\\frac{2}{V}\\sum R T \\vec{n}_i\\vec{t}_j\n\nwhere $V$ is the cell volume, $R$ is \"contact radius\" (in our implementation, current distance between particle centroids), $\\vec{n}$ is the normal vector, $\\vec{t}$ is a vector perpendicular to $\\vec{n}$, $N$ and $T$ are norms of normal and shear forces.\n\n:param bool splitNormalTensor: if true the function returns normal stress tensor split into two parts according to the two subnetworks of strong an weak forces.\n\n:param Real thresholdForce: threshold value according to which the normal stress tensor can be split (e.g. a zero value would make distinction between tensile and compressive forces).");
 	py::def("fabricTensor",Shop__fabricTensor,(py::args("splitTensor")=false,py::args("revertSign")=false,py::args("thresholdForce")=NaN),"Compute the fabric tensor of the periodic cell. The original paper can be found in [Satake1982]_.\n\n:param bool splitTensor: split the fabric tensor into two parts related to the strong and weak contact forces respectively.\n\n:param bool revertSign: it must be set to true if the contact law's convention takes compressive forces as positive.\n\n:param Real thresholdForce: if the fabric tensor is split into two parts, a threshold value can be specified otherwise the mean contact force is considered by default. It is worth to note that this value has a sign and the user needs to set it according to the convention adopted for the contact law. To note that this value could be set to zero if one wanted to make distinction between compressive and tensile forces.");
-	py::def("bodyStressTensors",Shop__getStressLWForEachBody,"Compute and return a table with per-particle stress tensors. Each tensor represents the average stress in one particle, obtained from the contour integral of applied load as detailed below. This definition is considering each sphere as a continuum. It can be considered exact in the context of spheres at static equilibrium, interacting at contact points with negligible volume changes of the solid phase (this last assumption is not restricting possible deformations and volume changes at the packing scale).\n\nProof:\n\nFirst, we remark the identity:  $\\sigma_{ij}=\\delta_{ik}\\sigma_{kj}=x_{i,k}\\sigma_{kj}=(x_{i}\\sigma_{kj})_{,k}-x_{i}\\sigma_{kj,k}$.\n\nAt equilibrium, the divergence of stress is null: $\\sigma_{kj,k}=\\vec{0}$. Consequently, after divergence theorem: $\\frac{1}{V}\\int_V \\sigma_{ij}dV = \\frac{1}{V}\\int_V (x_{i}\\sigma_{kj})_{,k}dV = \\frac{1}{V}\\int_{\\partial V}x_i\\sigma_{kj}n_kdS = \\frac{1}{V}\\sum_bx_i^bf_j^b$.\n\nThe last equality is implicitely based on the representation of external loads as Dirac distributions whose zeros are the so-called *contact points*: 0-sized surfaces on which the *contact forces* are applied, located at $x_i$ in the deformed configuration.\n\nA weighted average of per-body stresses will give the average stress inside the solid phase. There is a simple relation between the stress inside the solid phase and the stress in an equivalent continuum in the absence of fluid pressure. For porosity $n$, the relation reads: $\\sigma_{ij}^{equ.}=(1-n)\\sigma_{ij}^{solid}$.");
+	py::def("bodyStressTensors",Shop__getStressLWForEachBody,"Compute and return a table with per-particle stress tensors. Each tensor represents the average stress in one particle, obtained from the contour integral of applied load as detailed below. This definition is considering each sphere as a continuum. It can be considered exact in the context of spheres at static equilibrium, interacting at contact points with negligible volume changes of the solid phase (this last assumption is not restricting possible deformations and volume changes at the packing scale).\n\nProof: \n\nFirst, we remark the identity:  $\\sigma_{ij}=\\delta_{ik}\\sigma_{kj}=x_{i,k}\\sigma_{kj}=(x_{i}\\sigma_{kj})_{,k}-x_{i}\\sigma_{kj,k}$.\n\nAt equilibrium, the divergence of stress is null: $\\sigma_{kj,k}=\\vec{0}$. Consequently, after divergence theorem: $\\frac{1}{V}\\int_V \\sigma_{ij}dV = \\frac{1}{V}\\int_V (x_{i}\\sigma_{kj})_{,k}dV = \\frac{1}{V}\\int_{\\partial V}x_i\\sigma_{kj}n_kdS = \\frac{1}{V}\\sum_bx_i^bf_j^b$.\n\nThe last equality is implicitely based on the representation of external loads as Dirac distributions whose zeros are the so-called *contact points*: 0-sized surfaces on which the *contact forces* are applied, located at $x_i$ in the deformed configuration.\n\nA weighted average of per-body stresses will give the average stress inside the solid phase. There is a simple relation between the stress inside the solid phase and the stress in an equivalent continuum in the absence of fluid pressure. For porosity $n$, the relation reads: $\\sigma_{ij}^{equ.}=(1-n)\\sigma_{ij}^{solid}$.\n\nThis last relation may not be very useful if porosity is not homogeneous. If it happens, one can define the equivalent bulk stress a the particles scale by assigning a volume to each particle. This volume can be obtained from :yref:`TesselationWrapper` (see e.g. [Catalano2014a]_)");
 	py::def("getStress",Shop::getStress,(py::args("volume")=0),"Compute and return Love-Weber stress tensor:\n\n $\\sigma_{ij}=\\frac{1}{V}\\sum_b f_i^b l_j^b$, where the sum is over all interactions, with $f$ the contact force and $l$ the branch vector (joining centers of the bodies). Stress is negativ for repulsive contact forces, i.e. compression. $V$ can be passed to the function. If it is not, it will be equal to one in non-periodic cases, or equal to the volume of the cell in periodic cases.");
 	py::def("getCapillaryStress",Shop::getCapillaryStress,(py::args("volume")=0),"Compute and return Love-Weber capillary stress tensor:\n\n $\\sigma^{cap}_{ij}=\\frac{1}{V}\\sum_b l_i^b f^{cap,b}_j$, where the sum is over all interactions, with $l$ the branch vector (joining centers of the bodies) and $f^{cap}$ is the capillary force. $V$ can be passed to the function. If it is not, it will be equal to one in non-periodic cases, or equal to the volume of the cell in periodic cases. Only the CapillaryPhys interaction type is supported presently.");
 	py::def("getBodyIdsContacts",Shop__getBodyIdsContacts,(py::args("bodyID")=0),"Get a list of body-ids, which contacts the given body.");

=== modified file 'py/wrapper/yadeWrapper.cpp'
--- py/wrapper/yadeWrapper.cpp	2014-03-31 16:18:51 +0000
+++ py/wrapper/yadeWrapper.cpp	2014-04-15 14:45:52 +0000
@@ -315,8 +315,8 @@
 			}
 			
 			//adapt position- and radii-informations and replace spheres from bpListTmp by clumps:
+			#ifdef YADE_OPENMP
 			omp_lock_t locker;
-			#ifdef YADE_OPENMP
 			omp_init_lock(&locker);//since bodies are created and deleted in following sections, it is neccessary to lock critical parts of the code (avoid seg fault)
 			#pragma omp parallel for schedule(dynamic) shared(locker)
 			for(int i=0; i<numReplaceTmp; i++) {
@@ -370,7 +370,9 @@
 					idsTmp[jj] = newSphere->id;
 				}
 				//cout << "thread " << omp_get_thread_num() << " unsets locker" << endl;
+			  #ifdef YADE_OPENMP
 				omp_unset_lock(&locker);//end of critical section
+        #endif
 				Body::id_t newClumpId = clump(idsTmp, discretization);
 				ret.append(python::make_tuple(newClumpId,idsTmp));
 				erase(b->id);