dhis2-devs team mailing list archive
-
dhis2-devs team
-
Mailing list archive
-
Message #20462
[Branch ~dhis2-devs-core/dhis2/trunk] Rev 9343: Removed some logic. Seems that splitting on columns with typically low cardinality like period de...
------------------------------------------------------------
revno: 9343
committer: Lars Helge Øverland <larshelge@xxxxxxxxx>
branch nick: dhis2
timestamp: Tue 2012-12-18 01:49:03 +0100
message:
Removed some logic. Seems that splitting on columns with typically low cardinality like period decreases performence.
modified:
dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/DataQueryParams.java
dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/QueryPlanner.java
dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/data/DefaultAnalyticsService.java
dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/data/DefaultQueryPlanner.java
dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/data/JdbcAnalyticsManager.java
dhis-2/dhis-services/dhis-service-analytics/src/test/java/org/hisp/dhis/analytics/data/QueryPlannerTest.java
dhis-2/dhis-support/dhis-support-system/src/main/java/org/hisp/dhis/system/util/MathUtils.java
--
lp:dhis2
https://code.launchpad.net/~dhis2-devs-core/dhis2/trunk
Your team DHIS 2 developers is subscribed to branch lp:dhis2.
To unsubscribe from this branch go to https://code.launchpad.net/~dhis2-devs-core/dhis2/trunk/+edit-subscription
=== modified file 'dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/DataQueryParams.java'
--- dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/DataQueryParams.java 2012-12-17 16:58:29 +0000
+++ dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/DataQueryParams.java 2012-12-18 00:49:03 +0000
@@ -31,8 +31,6 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
-import java.util.SortedMap;
-import java.util.TreeMap;
import org.hisp.dhis.common.Dxf2Namespace;
@@ -108,29 +106,6 @@
// Logic
// -------------------------------------------------------------------------
- /**
- * Prioritizing to split on dimensions with high cardinality, which is typically
- * organisation unit and data element in that order.
- */
- public SortedMap<String, List<String>> getDimensionValuesMap()
- {
- SortedMap<String, List<String>> map = new TreeMap<String, List<String>>();
-
- map.put( ORGUNIT_DIM_ID, organisationUnits );
- map.put( DATAELEMENT_DIM_ID, dataElements );
- map.put( PERIOD_DIM_ID, periods );
-
- if ( dimensions != null )
- {
- for ( String dimension : dimensions.keySet() )
- {
- map.put( dimension, dimensions.get( dimension ) );
- }
- }
-
- return map;
- }
-
public List<String> getDimensionNames()
{
List<String> list = new ArrayList<String>();
@@ -191,25 +166,6 @@
throw new IllegalArgumentException( dimension );
}
- public String getLargestDimension()
- {
- Map<String, List<String>> map = getDimensionValuesMap();
-
- String dimension = map.keySet().iterator().next();
- int size = map.get( dimension ).size();
-
- for ( String dim : map.keySet() )
- {
- if ( map.get( dim ).size() > size )
- {
- dimension = dim;
- size = map.get( dim ).size();
- }
- }
-
- return dimension;
- }
-
@Override
public int hashCode()
{
=== modified file 'dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/QueryPlanner.java'
--- dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/QueryPlanner.java 2012-12-17 16:58:29 +0000
+++ dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/QueryPlanner.java 2012-12-18 00:49:03 +0000
@@ -35,24 +35,17 @@
* Creates a list of DataQueryParams. It is mandatory to group the queries by
* the following criteria: 1) partition / year 2) period type 3) organisation
* unit level. If the number of queries produced by this grouping is equal or
- * larger than the number of optimal queries, those queries are returned.
+ * larger than the number of optimal queries, those queries are returned. Next
+ * splits on organisation unit dimension, and returns if optimal queries are
+ * satisfied. Next splits on data element dimension, and returns if optimal
+ * queries are satisfied.
+ *
+ * Does not attempt to split on period or organisation unit group set dimensions,
+ * as splitting on columns with low cardinality typically decreases performance.
*
* @param params the data query params.
* @param optimalQueries the number of optimal queries for the planner to return.
- * @return
+ * @return list of data query params.
*/
List<DataQueryParams> planQuery( DataQueryParams params, int optimalQueries );
-
- /**
- * Gets the data dimension must suitable as partition key. Will first check
- * if any of the dimensions have enough values to satisfy a optimal number of
- * queries, and return that dimension if so. If not returns the dimension
- * with the highest number of values. The order of the fixed dimensions are
- * data element, organisation unit, period.
- *
- * @param params the data query parameters.
- * @param optimalQueries the optimal number of queries to create.
- */
- String getPartitionDimension( DataQueryParams params, int optimalQueries );
-
}
=== modified file 'dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/data/DefaultAnalyticsService.java'
--- dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/data/DefaultAnalyticsService.java 2012-12-18 00:03:12 +0000
+++ dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/data/DefaultAnalyticsService.java 2012-12-18 00:49:03 +0000
@@ -40,6 +40,8 @@
import org.hisp.dhis.common.Grid;
import org.hisp.dhis.common.GridHeader;
import org.hisp.dhis.system.grid.ListGrid;
+import org.hisp.dhis.system.util.MathUtils;
+import org.hisp.dhis.system.util.SystemUtils;
import org.hisp.dhis.system.util.Timer;
import org.springframework.beans.factory.annotation.Autowired;
@@ -90,9 +92,11 @@
{
Timer t = new Timer().start();
- List<DataQueryParams> queries = queryPlanner.planQuery( params, 4 );
-
- t.getTime( "Planned query" );
+ int optimalQueries = MathUtils.getWithin( SystemUtils.getCpuCores(), 1, 16 );
+
+ List<DataQueryParams> queries = queryPlanner.planQuery( params, optimalQueries );
+
+ t.getTime( "Planned query for optimal: " + optimalQueries + ", got: " + queries.size() );
List<Future<Map<String, Double>>> futures = new ArrayList<Future<Map<String, Double>>>();
=== modified file 'dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/data/DefaultQueryPlanner.java'
--- dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/data/DefaultQueryPlanner.java 2012-12-18 00:03:12 +0000
+++ dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/data/DefaultQueryPlanner.java 2012-12-18 00:49:03 +0000
@@ -30,7 +30,6 @@
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
-import java.util.SortedMap;
import org.hisp.dhis.analytics.DataQueryParams;
import org.hisp.dhis.analytics.QueryPlanner;
@@ -102,35 +101,9 @@
// Group by data element
// ---------------------------------------------------------------------
- queries = splitByDimension( queries, DataQueryParams.DATAELEMENT_DIM_ID, optimalQueries );
-
- if ( queries.size() >= optimalQueries )
- {
- return queries;
- }
-
- // ---------------------------------------------------------------------
- // Group by dimensions
- // ---------------------------------------------------------------------
-
- return splitByBestDimension( queries, optimalQueries );
- }
-
- public String getPartitionDimension( DataQueryParams params, int optimalQueries )
- {
- SortedMap<String, List<String>> map = params.getDimensionValuesMap();
-
- for ( String dimension : map.keySet() )
- {
- if ( map.get( dimension ).size() >= optimalQueries )
- {
- return dimension;
- }
- }
-
- return params.getLargestDimension();
- }
-
+ return splitByDimension( queries, DataQueryParams.DATAELEMENT_DIM_ID, optimalQueries );
+ }
+
public boolean canQueryFromDataMart( DataQueryParams params )
{
return true;
@@ -166,36 +139,6 @@
return subQueries;
}
- /**
- * Splits the given list of queries in sub queries on the most favorable
- * dimension. This is determined by first checking if any dimensions will
- * satisfy the optimal number of queries, if not the dimension with most
- * options is selected.
- */
- private List<DataQueryParams> splitByBestDimension( List<DataQueryParams> queries, int optimalQueries )
- {
- int pageNo = MathUtils.divideToCeil( optimalQueries, queries.size() );
-
- List<DataQueryParams> subQueries = new ArrayList<DataQueryParams>();
-
- for ( DataQueryParams query : queries )
- {
- String dimension = getPartitionDimension( query, pageNo );
-
- List<String> values = query.getDimension( dimension );
-
- List<List<String>> valuePages = new PaginatedList<String>( values ).setNumberOfPages( pageNo ).getPages();
-
- for ( List<String> valuePage : valuePages )
- {
- DataQueryParams subQuery = new DataQueryParams( query );
- subQuery.setDimension( dimension, valuePage );
- subQueries.add( subQuery );
- }
- }
-
- return subQueries;
- }
/**
* Groups the given query into sub queries based on its periods and which
=== modified file 'dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/data/JdbcAnalyticsManager.java'
--- dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/data/JdbcAnalyticsManager.java 2012-12-18 00:03:12 +0000
+++ dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/data/JdbcAnalyticsManager.java 2012-12-18 00:49:03 +0000
@@ -131,7 +131,7 @@
for ( String dim : params.getDynamicDimensionNames() )
{
- sql += "AND " + dim + " IN ( " + getQuotedCommaDelimitedString( dimensionValues.get( dim ) ) + " ) ";
+ sql += "and " + dim + " in ( " + getQuotedCommaDelimitedString( dimensionValues.get( dim ) ) + " ) ";
}
return sql;
=== modified file 'dhis-2/dhis-services/dhis-service-analytics/src/test/java/org/hisp/dhis/analytics/data/QueryPlannerTest.java'
--- dhis-2/dhis-services/dhis-service-analytics/src/test/java/org/hisp/dhis/analytics/data/QueryPlannerTest.java 2012-12-18 00:03:12 +0000
+++ dhis-2/dhis-services/dhis-service-analytics/src/test/java/org/hisp/dhis/analytics/data/QueryPlannerTest.java 2012-12-18 00:49:03 +0000
@@ -27,9 +27,6 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-import static org.hisp.dhis.analytics.DataQueryParams.DATAELEMENT_DIM_ID;
-import static org.hisp.dhis.analytics.DataQueryParams.ORGUNIT_DIM_ID;
-import static org.hisp.dhis.analytics.DataQueryParams.PERIOD_DIM_ID;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
@@ -86,21 +83,6 @@
// -------------------------------------------------------------------------
// Tests
// -------------------------------------------------------------------------
-
- @Test
- public void getPartitionDimension()
- {
- DataQueryParams params = new DataQueryParams();
- params.setDataElements( Arrays.asList( "a", "b", "c", "d" ) );
- params.setOrganisationUnits( Arrays.asList( "a", "b", "c", "d", "e" ) );
- params.setPeriods( Arrays.asList( "2000Q1", "2000Q2", "2000Q3", "2000Q4", "2001Q1", "2001Q2" ) );
-
- assertEquals( DATAELEMENT_DIM_ID, queryPlanner.getPartitionDimension( params, 3 ) );
- assertEquals( DATAELEMENT_DIM_ID, queryPlanner.getPartitionDimension( params, 4 ) );
- assertEquals( ORGUNIT_DIM_ID, queryPlanner.getPartitionDimension( params, 5 ) );
- assertEquals( PERIOD_DIM_ID, queryPlanner.getPartitionDimension( params, 6 ) );
- assertEquals( PERIOD_DIM_ID, queryPlanner.getPartitionDimension( params, 7 ) );
- }
/**
* Query spans 2 partitions. Splits in 2 queries for each partition, then
@@ -189,10 +171,9 @@
}
/**
- * Splits on best dimension. Ignores organisation units, splits on 3 data elements,
- * then splits in 2 queries on periods.
+ * Splits on 3 data elements, then splits in 2 queries on periods for a total
+ * of 6 queries.
*/
- @Test
public void planQueryD()
{
DataQueryParams params = new DataQueryParams();
=== modified file 'dhis-2/dhis-support/dhis-support-system/src/main/java/org/hisp/dhis/system/util/MathUtils.java'
--- dhis-2/dhis-support/dhis-support-system/src/main/java/org/hisp/dhis/system/util/MathUtils.java 2012-12-18 00:03:12 +0000
+++ dhis-2/dhis-support/dhis-support-system/src/main/java/org/hisp/dhis/system/util/MathUtils.java 2012-12-18 00:49:03 +0000
@@ -181,6 +181,22 @@
}
/**
+ * Returns the given value if between the min and max value. If lower than
+ * minimum, returns minimum, if higher than maximum, returns maximum.
+ *
+ * @param value the value.
+ * @param min the minimum value.
+ * @param max the maximum value.
+ * @return an integer value.
+ */
+ public static int getWithin( int value, int min, int max )
+ {
+ value = Math.max( value, min );
+ value = Math.min( value, max );
+ return value;
+ }
+
+ /**
* Returns true if the provided String argument can be converted to a Double,
* false if not.
*