← Back to team overview

dhis2-devs team mailing list archive

[Branch ~dhis2-devs-core/dhis2/trunk] Rev 9343: Removed some logic. Seems that splitting on columns with typically low cardinality like period de...

 

------------------------------------------------------------
revno: 9343
committer: Lars Helge Øverland <larshelge@xxxxxxxxx>
branch nick: dhis2
timestamp: Tue 2012-12-18 01:49:03 +0100
message:
  Removed some logic. Seems that splitting on columns with typically low cardinality like period decreases performence.
modified:
  dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/DataQueryParams.java
  dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/QueryPlanner.java
  dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/data/DefaultAnalyticsService.java
  dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/data/DefaultQueryPlanner.java
  dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/data/JdbcAnalyticsManager.java
  dhis-2/dhis-services/dhis-service-analytics/src/test/java/org/hisp/dhis/analytics/data/QueryPlannerTest.java
  dhis-2/dhis-support/dhis-support-system/src/main/java/org/hisp/dhis/system/util/MathUtils.java


--
lp:dhis2
https://code.launchpad.net/~dhis2-devs-core/dhis2/trunk

Your team DHIS 2 developers is subscribed to branch lp:dhis2.
To unsubscribe from this branch go to https://code.launchpad.net/~dhis2-devs-core/dhis2/trunk/+edit-subscription
=== modified file 'dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/DataQueryParams.java'
--- dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/DataQueryParams.java	2012-12-17 16:58:29 +0000
+++ dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/DataQueryParams.java	2012-12-18 00:49:03 +0000
@@ -31,8 +31,6 @@
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-import java.util.SortedMap;
-import java.util.TreeMap;
 
 import org.hisp.dhis.common.Dxf2Namespace;
 
@@ -108,29 +106,6 @@
     // Logic
     // -------------------------------------------------------------------------
 
-    /**
-     * Prioritizing to split on dimensions with high cardinality, which is typically 
-     * organisation unit and data element in that order.
-     */
-    public SortedMap<String, List<String>> getDimensionValuesMap()
-    {
-        SortedMap<String, List<String>> map = new TreeMap<String, List<String>>();
-
-        map.put( ORGUNIT_DIM_ID, organisationUnits );
-        map.put( DATAELEMENT_DIM_ID, dataElements );
-        map.put( PERIOD_DIM_ID, periods );
-        
-        if ( dimensions != null )
-        {
-            for ( String dimension : dimensions.keySet() )
-            {
-                map.put( dimension, dimensions.get( dimension ) );
-            }
-        }
-        
-        return map;
-    }
-    
     public List<String> getDimensionNames()
     {
         List<String> list = new ArrayList<String>();
@@ -191,25 +166,6 @@
         throw new IllegalArgumentException( dimension );
     }
     
-    public String getLargestDimension()
-    {
-        Map<String, List<String>> map = getDimensionValuesMap();
-        
-        String dimension = map.keySet().iterator().next();
-        int size = map.get( dimension ).size();
-                
-        for ( String dim : map.keySet() )
-        {
-            if ( map.get( dim ).size() > size )
-            {
-                dimension = dim;
-                size = map.get( dim ).size();
-            }
-        }
-        
-        return dimension;
-    }
-
     @Override
     public int hashCode()
     {

=== modified file 'dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/QueryPlanner.java'
--- dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/QueryPlanner.java	2012-12-17 16:58:29 +0000
+++ dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/QueryPlanner.java	2012-12-18 00:49:03 +0000
@@ -35,24 +35,17 @@
      * Creates a list of DataQueryParams. It is mandatory to group the queries by
      * the following criteria: 1) partition / year 2) period type 3) organisation 
      * unit level. If the number of queries produced by this grouping is equal or
-     * larger than the number of optimal queries, those queries are returned.
+     * larger than the number of optimal queries, those queries are returned. Next
+     * splits on organisation unit dimension, and returns if optimal queries are
+     * satisfied. Next splits on data element dimension, and returns if optimal
+     * queries are satisfied. 
+     * 
+     * Does not attempt to split on period or organisation unit group set dimensions, 
+     * as splitting on columns with low cardinality typically decreases performance.
      * 
      * @param params the data query params.
      * @param optimalQueries the number of optimal queries for the planner to return.
-     * @return
+     * @return list of data query params.
      */
     List<DataQueryParams> planQuery( DataQueryParams params, int optimalQueries );
-
-    /**
-     * Gets the data dimension must suitable as partition key. Will first check
-     * if any of the dimensions have enough values to satisfy a optimal number of
-     * queries, and return that dimension if so. If not returns the dimension
-     * with the highest number of values. The order of the fixed dimensions are
-     * data element, organisation unit, period.
-     * 
-     * @param params the data query parameters.
-     * @param optimalQueries the optimal number of queries to create.
-     */
-    String getPartitionDimension( DataQueryParams params, int optimalQueries );
-
 }

=== modified file 'dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/data/DefaultAnalyticsService.java'
--- dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/data/DefaultAnalyticsService.java	2012-12-18 00:03:12 +0000
+++ dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/data/DefaultAnalyticsService.java	2012-12-18 00:49:03 +0000
@@ -40,6 +40,8 @@
 import org.hisp.dhis.common.Grid;
 import org.hisp.dhis.common.GridHeader;
 import org.hisp.dhis.system.grid.ListGrid;
+import org.hisp.dhis.system.util.MathUtils;
+import org.hisp.dhis.system.util.SystemUtils;
 import org.hisp.dhis.system.util.Timer;
 import org.springframework.beans.factory.annotation.Autowired;
 
@@ -90,9 +92,11 @@
     {
         Timer t = new Timer().start();
 
-        List<DataQueryParams> queries = queryPlanner.planQuery( params, 4 );
-        
-        t.getTime( "Planned query" );
+        int optimalQueries = MathUtils.getWithin( SystemUtils.getCpuCores(), 1, 16 );
+        
+        List<DataQueryParams> queries = queryPlanner.planQuery( params, optimalQueries );
+        
+        t.getTime( "Planned query for optimal: " + optimalQueries + ", got: " + queries.size() );
         
         List<Future<Map<String, Double>>> futures = new ArrayList<Future<Map<String, Double>>>();
         

=== modified file 'dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/data/DefaultQueryPlanner.java'
--- dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/data/DefaultQueryPlanner.java	2012-12-18 00:03:12 +0000
+++ dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/data/DefaultQueryPlanner.java	2012-12-18 00:49:03 +0000
@@ -30,7 +30,6 @@
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
-import java.util.SortedMap;
 
 import org.hisp.dhis.analytics.DataQueryParams;
 import org.hisp.dhis.analytics.QueryPlanner;
@@ -102,35 +101,9 @@
         // Group by data element
         // ---------------------------------------------------------------------
         
-        queries = splitByDimension( queries, DataQueryParams.DATAELEMENT_DIM_ID, optimalQueries );
-
-        if ( queries.size() >= optimalQueries )
-        {
-            return queries;
-        }
-
-        // ---------------------------------------------------------------------
-        // Group by dimensions
-        // ---------------------------------------------------------------------
-
-        return splitByBestDimension( queries, optimalQueries );
-    }
-    
-    public String getPartitionDimension( DataQueryParams params, int optimalQueries )
-    {
-        SortedMap<String, List<String>> map = params.getDimensionValuesMap();
-        
-        for ( String dimension : map.keySet() )
-        {
-            if ( map.get( dimension ).size() >= optimalQueries )
-            {
-                return dimension;
-            }
-        }
-        
-        return params.getLargestDimension();
-    }
-    
+        return splitByDimension( queries, DataQueryParams.DATAELEMENT_DIM_ID, optimalQueries );
+    }
+        
     public boolean canQueryFromDataMart( DataQueryParams params )
     {
         return true;
@@ -166,36 +139,6 @@
         return subQueries;
     }
     
-    /**
-     * Splits the given list of queries in sub queries on the most favorable
-     * dimension. This is determined by first checking if any dimensions will
-     * satisfy the optimal number of queries, if not the dimension with most
-     * options is selected.
-     */
-    private List<DataQueryParams> splitByBestDimension( List<DataQueryParams> queries, int optimalQueries )
-    {
-        int pageNo = MathUtils.divideToCeil( optimalQueries, queries.size() );
-        
-        List<DataQueryParams> subQueries = new ArrayList<DataQueryParams>();
-        
-        for ( DataQueryParams query : queries )
-        {
-            String dimension = getPartitionDimension( query, pageNo );
-            
-            List<String> values = query.getDimension( dimension );
-            
-            List<List<String>> valuePages = new PaginatedList<String>( values ).setNumberOfPages( pageNo ).getPages();
-        
-            for ( List<String> valuePage : valuePages )
-            {
-                DataQueryParams subQuery = new DataQueryParams( query );
-                subQuery.setDimension( dimension, valuePage );
-                subQueries.add( subQuery );
-            }
-        }
-        
-        return subQueries;
-    }
     
     /**
      * Groups the given query into sub queries based on its periods and which 

=== modified file 'dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/data/JdbcAnalyticsManager.java'
--- dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/data/JdbcAnalyticsManager.java	2012-12-18 00:03:12 +0000
+++ dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/data/JdbcAnalyticsManager.java	2012-12-18 00:49:03 +0000
@@ -131,7 +131,7 @@
         
         for ( String dim : params.getDynamicDimensionNames() )
         {
-            sql += "AND " + dim + " IN ( " + getQuotedCommaDelimitedString( dimensionValues.get( dim ) ) + " ) ";
+            sql += "and " + dim + " in ( " + getQuotedCommaDelimitedString( dimensionValues.get( dim ) ) + " ) ";
         }
         
         return sql;            

=== modified file 'dhis-2/dhis-services/dhis-service-analytics/src/test/java/org/hisp/dhis/analytics/data/QueryPlannerTest.java'
--- dhis-2/dhis-services/dhis-service-analytics/src/test/java/org/hisp/dhis/analytics/data/QueryPlannerTest.java	2012-12-18 00:03:12 +0000
+++ dhis-2/dhis-services/dhis-service-analytics/src/test/java/org/hisp/dhis/analytics/data/QueryPlannerTest.java	2012-12-18 00:49:03 +0000
@@ -27,9 +27,6 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-import static org.hisp.dhis.analytics.DataQueryParams.DATAELEMENT_DIM_ID;
-import static org.hisp.dhis.analytics.DataQueryParams.ORGUNIT_DIM_ID;
-import static org.hisp.dhis.analytics.DataQueryParams.PERIOD_DIM_ID;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
@@ -86,21 +83,6 @@
     // -------------------------------------------------------------------------
     // Tests
     // -------------------------------------------------------------------------
-
-    @Test
-    public void getPartitionDimension()
-    {
-        DataQueryParams params = new DataQueryParams();
-        params.setDataElements( Arrays.asList( "a", "b", "c", "d" ) );
-        params.setOrganisationUnits( Arrays.asList( "a", "b", "c", "d", "e" ) );
-        params.setPeriods( Arrays.asList( "2000Q1", "2000Q2", "2000Q3", "2000Q4", "2001Q1", "2001Q2" ) );
-        
-        assertEquals( DATAELEMENT_DIM_ID, queryPlanner.getPartitionDimension( params, 3 ) );
-        assertEquals( DATAELEMENT_DIM_ID, queryPlanner.getPartitionDimension( params, 4 ) );
-        assertEquals( ORGUNIT_DIM_ID, queryPlanner.getPartitionDimension( params, 5 ) );
-        assertEquals( PERIOD_DIM_ID, queryPlanner.getPartitionDimension( params, 6 ) );
-        assertEquals( PERIOD_DIM_ID, queryPlanner.getPartitionDimension( params, 7 ) );
-    }
     
     /**
      * Query spans 2 partitions. Splits in 2 queries for each partition, then
@@ -189,10 +171,9 @@
     }
     
     /**
-     * Splits on best dimension. Ignores organisation units, splits on 3 data elements,
-     * then splits in 2 queries on periods.
+     * Splits on 3 data elements, then splits in 2 queries on periods for a total
+     * of 6 queries.
      */
-    @Test
     public void planQueryD()
     {
         DataQueryParams params = new DataQueryParams();

=== modified file 'dhis-2/dhis-support/dhis-support-system/src/main/java/org/hisp/dhis/system/util/MathUtils.java'
--- dhis-2/dhis-support/dhis-support-system/src/main/java/org/hisp/dhis/system/util/MathUtils.java	2012-12-18 00:03:12 +0000
+++ dhis-2/dhis-support/dhis-support-system/src/main/java/org/hisp/dhis/system/util/MathUtils.java	2012-12-18 00:49:03 +0000
@@ -181,6 +181,22 @@
     }
     
     /**
+     * Returns the given value if between the min and max value. If lower than
+     * minimum, returns minimum, if higher than maximum, returns maximum.
+     * 
+     * @param value the value.
+     * @param min the minimum value.
+     * @param max the maximum value.
+     * @return an integer value.
+     */
+    public static int getWithin( int value, int min, int max )
+    {
+        value = Math.max( value, min );
+        value = Math.min( value, max );
+        return value;
+    }
+    
+    /**
      * Returns true if the provided String argument can be converted to a Double,
      * false if not.
      *