dhis2-devs team mailing list archive
-
dhis2-devs team
-
Mailing list archive
-
Message #38196
[Branch ~dhis2-devs-core/dhis2/trunk] Rev 19500: Analytics. Groups query by days in aggregation period for average aggregation queries. Fixes prob...
------------------------------------------------------------
revno: 19500
committer: Lars Helge Overland <larshelge@xxxxxxxxx>
branch nick: dhis2
timestamp: Tue 2015-06-23 14:28:45 +0200
message:
Analytics. Groups query by days in aggregation period for average aggregation queries. Fixes problem with wrong factor for monthly periods
modified:
dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/DataQueryGroups.java
dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/DataQueryParams.java
dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/data/DefaultQueryPlanner.java
dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/data/JdbcAnalyticsManager.java
dhis-2/dhis-services/dhis-service-analytics/src/test/java/org/hisp/dhis/analytics/data/QueryPlannerTest.java
--
lp:dhis2
https://code.launchpad.net/~dhis2-devs-core/dhis2/trunk
Your team DHIS 2 developers is subscribed to branch lp:dhis2.
To unsubscribe from this branch go to https://code.launchpad.net/~dhis2-devs-core/dhis2/trunk/+edit-subscription
=== modified file 'dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/DataQueryGroups.java'
--- dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/DataQueryGroups.java 2015-01-17 07:41:26 +0000
+++ dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/DataQueryGroups.java 2015-06-23 12:28:45 +0000
@@ -70,8 +70,8 @@
* in sequence due to the typical indicator query, where few data elements
* have the average aggregation operator and many have the sum. Performance
* will increase if optimal number of queries can be run in parallel for the
- * queries which take most time, which is in this case are the ones with
- * sum aggregation type.
+ * queries which take most time, which in this case are the ones with sum
+ * aggregation type.
*/
public List<List<DataQueryParams>> getSequentialQueries()
{
=== modified file 'dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/DataQueryParams.java'
--- dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/DataQueryParams.java 2015-06-19 13:27:23 +0000
+++ dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/DataQueryParams.java 2015-06-23 12:28:45 +0000
@@ -943,6 +943,22 @@
}
/**
+ * Returns the number of days in the first dimension period in this query.
+ * If no dimension periods exist, the frequency order of the period type of
+ * the query is returned. If no period type exists, -1 is returned.
+ * @return
+ */
+ public int getDaysInFirstPeriod()
+ {
+ List<NameableObject> periods = getPeriods();
+
+ Period period = !periods.isEmpty() ? (Period) periods.get( 0 ) : null;
+
+ return period != null ? period.getDaysInPeriod() : periodType != null ?
+ PeriodType.getPeriodTypeByName( periodType ).getFrequencyOrder() : -1;
+ }
+
+ /**
* Indicates whether this params defines an identifier scheme different from
* UID.
*/
=== modified file 'dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/data/DefaultQueryPlanner.java'
--- dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/data/DefaultQueryPlanner.java 2015-06-22 22:31:16 +0000
+++ dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/data/DefaultQueryPlanner.java 2015-06-23 12:28:45 +0000
@@ -70,6 +70,7 @@
import org.hisp.dhis.dataelement.DataElementGroup;
import org.hisp.dhis.organisationunit.OrganisationUnit;
import org.hisp.dhis.organisationunit.OrganisationUnitService;
+import org.hisp.dhis.period.Period;
import org.hisp.dhis.period.PeriodType;
import org.hisp.dhis.setting.SystemSettingManager;
import org.hisp.dhis.system.util.MathUtils;
@@ -265,17 +266,22 @@
List<DataQueryParams> groupedByAggregationType = groupByAggregationType( byDataType );
for ( DataQueryParams byAggregationType : groupedByAggregationType )
- {
- List<DataQueryParams> groupedByDataPeriodType = groupByDataPeriodType( byAggregationType );
+ {
+ List<DataQueryParams> groupedByDaysInPeriod = groupByDaysInPeriod( byAggregationType );
- for ( DataQueryParams byDataPeriodType : groupedByDataPeriodType )
+ for ( DataQueryParams byDaysInPeriod : groupedByDaysInPeriod )
{
- byDataPeriodType.setPartitions( byPartition.getPartitions() );
- byDataPeriodType.setPeriodType( byPeriodType.getPeriodType() );
- byDataPeriodType.setAggregationType( byAggregationType.getAggregationType() );
+ List<DataQueryParams> groupedByDataPeriodType = groupByDataPeriodType( byDaysInPeriod );
- queries.add( byDataPeriodType );
- }
+ for ( DataQueryParams byDataPeriodType : groupedByDataPeriodType )
+ {
+ byDataPeriodType.setPartitions( byPartition.getPartitions() );
+ byDataPeriodType.setPeriodType( byPeriodType.getPeriodType() );
+ byDataPeriodType.setAggregationType( byAggregationType.getAggregationType() );
+
+ queries.add( byDataPeriodType );
+ }
+ }
}
}
}
@@ -361,7 +367,7 @@
if ( subQueries.size() > queryGroups.getAllQueries().size() )
{
- log.debug( "Split on " + dimension + ": " + ( subQueries.size() / queryGroups.getAllQueries().size() ) );
+ log.debug( "Split on dimension " + dimension + ": " + ( subQueries.size() / queryGroups.getAllQueries().size() ) );
}
return new DataQueryGroups( subQueries );
@@ -636,6 +642,42 @@
}
/**
+ * Groups the given query into sub queries based on the number of days in the
+ * aggregation period. This only applies if the aggregation type is
+ * AVERAGE_SUM_INT and the query has at least one period as dimension option.
+ * This is necessary since the number of days in the aggregation period is
+ * part of the expression for aggregating the value.
+ */
+ private List<DataQueryParams> groupByDaysInPeriod( DataQueryParams params )
+ {
+ List<DataQueryParams> queries = new ArrayList<>();
+
+ if ( params.getPeriods().isEmpty() || !params.isAggregationType( AggregationType.AVERAGE_SUM_INT ) )
+ {
+ queries.add( params.instance() );
+ return queries;
+ }
+
+ ListMap<Integer, NameableObject> daysPeriodMap = getDaysPeriodMap( params.getPeriods() );
+
+ DimensionalObject periodDim = params.getDimension( PERIOD_DIM_ID );
+
+ for ( Integer days : daysPeriodMap.keySet() )
+ {
+ DataQueryParams query = params.instance();
+ query.setDimensionOptions( periodDim.getDimension(), periodDim.getDimensionType(), periodDim.getDimensionName(), daysPeriodMap.get( days ) );
+ queries.add( query );
+ }
+
+ if ( queries.size() > 1 )
+ {
+ log.debug( "Split on days in period: " + queries.size() );
+ }
+
+ return queries;
+ }
+
+ /**
* Groups the given query in sub queries based on the period type of its
* data elements. Sets the data period type on each query. This only applies
* if the aggregation type of the query involves disaggregation.
@@ -697,7 +739,7 @@
/**
* Creates a mapping between data type and data element for the given data elements.
*/
- private ListMap<DataType, NameableObject> getDataTypeDataElementMap( Collection<NameableObject> dataElements )
+ private ListMap<DataType, NameableObject> getDataTypeDataElementMap( Collection<NameableObject> dataElements )
{
ListMap<DataType, NameableObject> map = new ListMap<>();
@@ -732,6 +774,26 @@
return map;
}
+
+ /**
+ * Creates a mapping between the number of days in the period interval and period
+ * for the given periods.
+ */
+ private ListMap<Integer, NameableObject> getDaysPeriodMap( Collection<NameableObject> periods )
+ {
+ ListMap<Integer, NameableObject> map = new ListMap<>();
+
+ for ( NameableObject period : periods )
+ {
+ Period pe = (Period) period;
+
+ int days = pe.getDaysInPeriod();
+
+ map.putValue( days, pe );
+ }
+
+ return map;
+ }
/**
* Puts the given element into the map according to the value type, aggregation
=== modified file 'dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/data/JdbcAnalyticsManager.java'
--- dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/data/JdbcAnalyticsManager.java 2015-06-15 13:44:20 +0000
+++ dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/data/JdbcAnalyticsManager.java 2015-06-23 12:28:45 +0000
@@ -230,9 +230,7 @@
if ( params.isAggregationType( AVERAGE_SUM_INT ) )
{
- int days = PeriodType.getPeriodTypeByName( params.getPeriodType() ).getFrequencyOrder();
-
- sql += "sum(daysxvalue) / " + days;
+ sql += "sum(daysxvalue) / " + params.getDaysInFirstPeriod();
}
else if ( params.isAggregationType( AVERAGE_INT ) || params.isAggregationType( AVERAGE_INT_DISAGGREGATION ) )
{
=== modified file 'dhis-2/dhis-services/dhis-service-analytics/src/test/java/org/hisp/dhis/analytics/data/QueryPlannerTest.java'
--- dhis-2/dhis-services/dhis-service-analytics/src/test/java/org/hisp/dhis/analytics/data/QueryPlannerTest.java 2015-04-10 11:18:20 +0000
+++ dhis-2/dhis-services/dhis-service-analytics/src/test/java/org/hisp/dhis/analytics/data/QueryPlannerTest.java 2015-06-23 12:28:45 +0000
@@ -118,6 +118,8 @@
private DataElement deD;
private DataElement deE;
private DataElement deF;
+ private DataElement deG;
+ private DataElement deH;
private DataSet dsA;
private DataSet dsB;
@@ -153,12 +155,17 @@
deD = createDataElement( 'D', VALUE_TYPE_INT, AGGREGATION_OPERATOR_AVERAGE_SUM );
deE = createDataElement( 'E', VALUE_TYPE_STRING, AGGREGATION_OPERATOR_NONE );
deF = createDataElement( 'F', VALUE_TYPE_STRING, AGGREGATION_OPERATOR_NONE );
+ deG = createDataElement( 'G', VALUE_TYPE_INT, AGGREGATION_OPERATOR_SUM );
+ deH = createDataElement( 'H', VALUE_TYPE_INT, AGGREGATION_OPERATOR_SUM );
dataElementService.addDataElement( deA );
dataElementService.addDataElement( deB );
dataElementService.addDataElement( deC );
dataElementService.addDataElement( deD );
dataElementService.addDataElement( deE );
+ dataElementService.addDataElement( deF );
+ dataElementService.addDataElement( deG );
+ dataElementService.addDataElement( deH );
dsA = createDataSet( 'A', pt );
dsB = createDataSet( 'B', pt );
@@ -496,7 +503,7 @@
DataQueryParams params = new DataQueryParams();
params.setDataElements( getList( deA, deB, deC, deD ) );
params.setOrganisationUnits( getList( ouA, ouB, ouC, ouD, ouE ) );
- params.setPeriods( getList( createPeriod( "2000Q1" ), createPeriod( "2000Q2" ), createPeriod( "2000Q3" ), createPeriod( "2000Q4" ), createPeriod( "2001Q1" ), createPeriod( "2001Q2" ) ) );
+ params.setPeriods( getList( createPeriod( "200101" ), createPeriod( "200103" ), createPeriod( "200105" ), createPeriod( "200107" ), createPeriod( "2002Q3" ), createPeriod( "2002Q4" ) ) );
DataQueryGroups queryGroups = queryPlanner.planQuery( params, 4, ANALYTICS_TABLE_NAME );
@@ -581,7 +588,8 @@
}
/**
- * Splits on 3 data elements.
+ * Query spans 1 partition. Splits on 2 aggregation types, then splits one
+ * query on 3 days in period to satisfy optimal for a total of 4 queries.
*/
@Test
public void planQueryD()
@@ -594,9 +602,9 @@
DataQueryGroups queryGroups = queryPlanner.planQuery( params, 4, ANALYTICS_TABLE_NAME );
- assertEquals( 3, queryGroups.getAllQueries().size() );
+ assertEquals( 4, queryGroups.getAllQueries().size() );
assertEquals( 2, queryGroups.getSequentialQueries().size() );
- assertEquals( 2, queryGroups.getLargestGroupSize() );
+ assertEquals( 3, queryGroups.getLargestGroupSize() );
for ( DataQueryParams query : queryGroups.getAllQueries() )
{
@@ -607,7 +615,9 @@
}
/**
- * Splits on 3 data elements. No organisation units specified.
+ * Query spans 1 partition. Splits on 2 aggregation types, then splits one
+ * query on 3 days in period to satisfy optimal for a total of 4 queries. No
+ * organisation units specified.
*/
@Test
public void planQueryE()
@@ -619,9 +629,9 @@
DataQueryGroups queryGroups = queryPlanner.planQuery( params, 4, ANALYTICS_TABLE_NAME );
- assertEquals( 3, queryGroups.getAllQueries().size() );
+ assertEquals( 4, queryGroups.getAllQueries().size() );
assertEquals( 2, queryGroups.getSequentialQueries().size() );
- assertEquals( 2, queryGroups.getLargestGroupSize() );
+ assertEquals( 3, queryGroups.getLargestGroupSize() );
for ( DataQueryParams query : queryGroups.getAllQueries() )
{
@@ -632,7 +642,7 @@
}
/**
- * Splits on 3 queries on organisation units for an optimal of 4 queries. No
+ * Splits on 3 queries on organisation units for an optimal of 3 queries. No
* data elements specified.
*/
@Test
@@ -671,9 +681,9 @@
}
/**
- * Query filters span 2 partitions. Splits in 4 queries on data elements to
- * satisfy optimal for a total of 8 queries, because query has 2 different
- * aggregation types.
+ * Query filters span 2 partitions. Splits in 2 queries on data elements,
+ * then 2 queries on organisation units to satisfy optimal for a total of 8
+ * queries.
*/
@Test
public void planQueryH()
@@ -699,14 +709,15 @@
/**
* Query spans 3 period types. Splits in 3 queries for each period type, then
- * splits in 4 queries on data elements units to satisfy optimal for a total
- * of 12 queries, because query has 2 different aggregation types.
+ * splits in 2 queries on data type, then splits in 2 queries on data elements
+ * to satisfy optimal for a total of 12 queries, because query has 2 different
+ * aggregation types.
*/
@Test
public void planQueryI()
{
DataQueryParams params = new DataQueryParams();
- params.setDataElements( getList( deA, deB, deC, deD ) );
+ params.setDataElements( getList( deA, deB, deE, deF ) );
params.setOrganisationUnits( getList( ouA, ouB, ouC, ouD, ouE ) );
params.setPeriods( getList( createPeriod( "2000Q1" ), createPeriod( "2000Q2" ), createPeriod( "2000" ), createPeriod( "200002" ), createPeriod( "200003" ), createPeriod( "200004" ) ) );
@@ -763,9 +774,9 @@
}
/**
- * Splits in 2 queries for each aggregation type, then 2 queries for each
- * data type, then 2 queries for each organisation unit to satisfy optimal
- * for a total of 4 queries across 2 sequential queries.
+ * Splits in 2 queries for each data type, then 2 queries for each
+ * data element, then 2 queries for each organisation unit to satisfy optimal
+ * for a total of 8 queries with 4 queries across 2 sequential queries.
*/
@Test
public void planQueryL()
@@ -788,6 +799,32 @@
}
}
+ /**
+ * Query spans 1 partition. Splits on 2 queries for data types, then splits
+ * on 2 queries for data elements to satisfy optimal for a total of 4 queries.
+ */
+ @Test
+ public void planQueryM()
+ {
+ DataQueryParams params = new DataQueryParams();
+ params.setDataElements( getList( deA, deB, deG, deH ) );
+ params.setOrganisationUnits( getList( ouA ) );
+ params.setPeriods( getList( createPeriod( "200101" ), createPeriod( "200103" ) ) );
+
+ DataQueryGroups queryGroups = queryPlanner.planQuery( params, 4, ANALYTICS_TABLE_NAME );
+
+ assertEquals( 4, queryGroups.getAllQueries().size() );
+ assertEquals( 1, queryGroups.getSequentialQueries().size() );
+ assertEquals( 4, queryGroups.getLargestGroupSize() );
+
+ for ( DataQueryParams query : queryGroups.getAllQueries() )
+ {
+ assertTrue( samePeriodType( query.getPeriods() ) );
+ assertTrue( samePartition( query.getPeriods() ) );
+ assertDimensionNameNotNull( query );
+ }
+ }
+
// -------------------------------------------------------------------------
// Supportive methods
// -------------------------------------------------------------------------