← Back to team overview

dhis2-devs team mailing list archive

[Branch ~dhis2-devs-core/dhis2/trunk] Rev 19500: Analytics. Groups query by days in aggregation period for average aggregation queries. Fixes prob...

 

------------------------------------------------------------
revno: 19500
committer: Lars Helge Overland <larshelge@xxxxxxxxx>
branch nick: dhis2
timestamp: Tue 2015-06-23 14:28:45 +0200
message:
  Analytics. Groups query by days in aggregation period for average aggregation queries. Fixes problem with wrong factor for monthly periods
modified:
  dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/DataQueryGroups.java
  dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/DataQueryParams.java
  dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/data/DefaultQueryPlanner.java
  dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/data/JdbcAnalyticsManager.java
  dhis-2/dhis-services/dhis-service-analytics/src/test/java/org/hisp/dhis/analytics/data/QueryPlannerTest.java


--
lp:dhis2
https://code.launchpad.net/~dhis2-devs-core/dhis2/trunk

Your team DHIS 2 developers is subscribed to branch lp:dhis2.
To unsubscribe from this branch go to https://code.launchpad.net/~dhis2-devs-core/dhis2/trunk/+edit-subscription
=== modified file 'dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/DataQueryGroups.java'
--- dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/DataQueryGroups.java	2015-01-17 07:41:26 +0000
+++ dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/DataQueryGroups.java	2015-06-23 12:28:45 +0000
@@ -70,8 +70,8 @@
      * in sequence due to the typical indicator query, where few data elements
      * have the average aggregation operator and many have the sum. Performance
      * will increase if optimal number of queries can be run in parallel for the
-     * queries which take most time, which is in this case are the ones with
-     * sum aggregation type.
+     * queries which take most time, which in this case are the ones with sum 
+     * aggregation type.
      */
     public List<List<DataQueryParams>> getSequentialQueries()
     {

=== modified file 'dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/DataQueryParams.java'
--- dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/DataQueryParams.java	2015-06-19 13:27:23 +0000
+++ dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/DataQueryParams.java	2015-06-23 12:28:45 +0000
@@ -943,6 +943,22 @@
     }
     
     /**
+     * Returns the number of days in the first dimension period in this query.
+     * If no dimension periods exist, the frequency order of the period type of
+     * the query is returned. If no period type exists, -1 is returned.
+     * @return
+     */
+    public int getDaysInFirstPeriod()
+    {
+        List<NameableObject> periods = getPeriods();
+        
+        Period period = !periods.isEmpty() ? (Period) periods.get( 0 ) : null;
+        
+        return period != null ? period.getDaysInPeriod() : periodType != null ? 
+            PeriodType.getPeriodTypeByName( periodType ).getFrequencyOrder() : -1;
+    }
+    
+    /**
      * Indicates whether this params defines an identifier scheme different from
      * UID.
      */

=== modified file 'dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/data/DefaultQueryPlanner.java'
--- dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/data/DefaultQueryPlanner.java	2015-06-22 22:31:16 +0000
+++ dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/data/DefaultQueryPlanner.java	2015-06-23 12:28:45 +0000
@@ -70,6 +70,7 @@
 import org.hisp.dhis.dataelement.DataElementGroup;
 import org.hisp.dhis.organisationunit.OrganisationUnit;
 import org.hisp.dhis.organisationunit.OrganisationUnitService;
+import org.hisp.dhis.period.Period;
 import org.hisp.dhis.period.PeriodType;
 import org.hisp.dhis.setting.SystemSettingManager;
 import org.hisp.dhis.system.util.MathUtils;
@@ -265,17 +266,22 @@
                         List<DataQueryParams> groupedByAggregationType = groupByAggregationType( byDataType );
     
                         for ( DataQueryParams byAggregationType : groupedByAggregationType )
-                        {                            
-                            List<DataQueryParams> groupedByDataPeriodType = groupByDataPeriodType( byAggregationType );
+                        {
+                            List<DataQueryParams> groupedByDaysInPeriod = groupByDaysInPeriod( byAggregationType );
                             
-                            for ( DataQueryParams byDataPeriodType : groupedByDataPeriodType )
+                            for ( DataQueryParams byDaysInPeriod : groupedByDaysInPeriod )
                             {
-                                byDataPeriodType.setPartitions( byPartition.getPartitions() );
-                                byDataPeriodType.setPeriodType( byPeriodType.getPeriodType() );
-                                byDataPeriodType.setAggregationType( byAggregationType.getAggregationType() );
+                                List<DataQueryParams> groupedByDataPeriodType = groupByDataPeriodType( byDaysInPeriod );
                                 
-                                queries.add( byDataPeriodType );
-                            } 
+                                for ( DataQueryParams byDataPeriodType : groupedByDataPeriodType )
+                                {
+                                    byDataPeriodType.setPartitions( byPartition.getPartitions() );
+                                    byDataPeriodType.setPeriodType( byPeriodType.getPeriodType() );
+                                    byDataPeriodType.setAggregationType( byAggregationType.getAggregationType() );
+                                    
+                                    queries.add( byDataPeriodType );
+                                }
+                            }
                         }
                     }
                 }
@@ -361,7 +367,7 @@
 
         if ( subQueries.size() > queryGroups.getAllQueries().size() )
         {
-            log.debug( "Split on " + dimension + ": " + ( subQueries.size() / queryGroups.getAllQueries().size() ) );
+            log.debug( "Split on dimension " + dimension + ": " + ( subQueries.size() / queryGroups.getAllQueries().size() ) );
         }
         
         return new DataQueryGroups( subQueries );
@@ -636,6 +642,42 @@
     }
 
     /**
+     * Groups the given query into sub queries based on the number of days in the
+     * aggregation period. This only applies if the aggregation type is
+     * AVERAGE_SUM_INT and the query has at least one period as dimension option.
+     * This is necessary since the number of days in the aggregation period is 
+     * part of the expression for aggregating the value.
+     */
+    private List<DataQueryParams> groupByDaysInPeriod( DataQueryParams params )
+    {
+        List<DataQueryParams> queries = new ArrayList<>();
+        
+        if ( params.getPeriods().isEmpty() || !params.isAggregationType( AggregationType.AVERAGE_SUM_INT ) )
+        {
+            queries.add( params.instance() );
+            return queries;
+        }
+        
+        ListMap<Integer, NameableObject> daysPeriodMap = getDaysPeriodMap( params.getPeriods() );
+        
+        DimensionalObject periodDim = params.getDimension( PERIOD_DIM_ID );
+
+        for ( Integer days : daysPeriodMap.keySet() )
+        {
+            DataQueryParams query = params.instance();
+            query.setDimensionOptions( periodDim.getDimension(), periodDim.getDimensionType(), periodDim.getDimensionName(), daysPeriodMap.get( days ) );
+            queries.add( query );
+        }
+
+        if ( queries.size() > 1 )
+        {
+            log.debug( "Split on days in period: " + queries.size() );
+        }
+        
+        return queries;
+    }
+    
+    /**
      * Groups the given query in sub queries based on the period type of its
      * data elements. Sets the data period type on each query. This only applies
      * if the aggregation type of the query involves disaggregation.
@@ -697,7 +739,7 @@
     /**
      * Creates a mapping between data type and data element for the given data elements.
      */
-    private ListMap<DataType, NameableObject> getDataTypeDataElementMap(  Collection<NameableObject> dataElements )
+    private ListMap<DataType, NameableObject> getDataTypeDataElementMap( Collection<NameableObject> dataElements )
     {
         ListMap<DataType, NameableObject> map = new ListMap<>();
         
@@ -732,6 +774,26 @@
         
         return map;
     }
+    
+    /**
+     * Creates a mapping between the number of days in the period interval and period
+     * for the given periods.
+     */
+    private ListMap<Integer, NameableObject> getDaysPeriodMap( Collection<NameableObject> periods )
+    {
+        ListMap<Integer, NameableObject> map = new ListMap<>();
+        
+        for ( NameableObject period : periods )
+        {
+            Period pe = (Period) period;
+            
+            int days = pe.getDaysInPeriod();
+            
+            map.putValue( days, pe );
+        }
+        
+        return map;
+    }
 
     /**
      * Puts the given element into the map according to the value type, aggregation

=== modified file 'dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/data/JdbcAnalyticsManager.java'
--- dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/data/JdbcAnalyticsManager.java	2015-06-15 13:44:20 +0000
+++ dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/data/JdbcAnalyticsManager.java	2015-06-23 12:28:45 +0000
@@ -230,9 +230,7 @@
 
         if ( params.isAggregationType( AVERAGE_SUM_INT ) )
         {
-            int days = PeriodType.getPeriodTypeByName( params.getPeriodType() ).getFrequencyOrder();
-
-            sql += "sum(daysxvalue) / " + days;
+            sql += "sum(daysxvalue) / " + params.getDaysInFirstPeriod();
         }
         else if ( params.isAggregationType( AVERAGE_INT ) || params.isAggregationType( AVERAGE_INT_DISAGGREGATION ) )
         {

=== modified file 'dhis-2/dhis-services/dhis-service-analytics/src/test/java/org/hisp/dhis/analytics/data/QueryPlannerTest.java'
--- dhis-2/dhis-services/dhis-service-analytics/src/test/java/org/hisp/dhis/analytics/data/QueryPlannerTest.java	2015-04-10 11:18:20 +0000
+++ dhis-2/dhis-services/dhis-service-analytics/src/test/java/org/hisp/dhis/analytics/data/QueryPlannerTest.java	2015-06-23 12:28:45 +0000
@@ -118,6 +118,8 @@
     private DataElement deD;
     private DataElement deE;
     private DataElement deF;
+    private DataElement deG;
+    private DataElement deH;
     
     private DataSet dsA;
     private DataSet dsB;
@@ -153,12 +155,17 @@
         deD = createDataElement( 'D', VALUE_TYPE_INT, AGGREGATION_OPERATOR_AVERAGE_SUM );
         deE = createDataElement( 'E', VALUE_TYPE_STRING, AGGREGATION_OPERATOR_NONE );
         deF = createDataElement( 'F', VALUE_TYPE_STRING, AGGREGATION_OPERATOR_NONE );
+        deG = createDataElement( 'G', VALUE_TYPE_INT, AGGREGATION_OPERATOR_SUM );
+        deH = createDataElement( 'H', VALUE_TYPE_INT, AGGREGATION_OPERATOR_SUM );
         
         dataElementService.addDataElement( deA );
         dataElementService.addDataElement( deB );
         dataElementService.addDataElement( deC );
         dataElementService.addDataElement( deD );
         dataElementService.addDataElement( deE );
+        dataElementService.addDataElement( deF );
+        dataElementService.addDataElement( deG );
+        dataElementService.addDataElement( deH );
         
         dsA = createDataSet( 'A', pt );
         dsB = createDataSet( 'B', pt );
@@ -496,7 +503,7 @@
         DataQueryParams params = new DataQueryParams();
         params.setDataElements( getList( deA, deB, deC, deD ) );
         params.setOrganisationUnits( getList( ouA, ouB, ouC, ouD, ouE ) );
-        params.setPeriods( getList( createPeriod( "2000Q1" ), createPeriod( "2000Q2" ), createPeriod( "2000Q3" ), createPeriod( "2000Q4" ), createPeriod(  "2001Q1" ), createPeriod( "2001Q2" ) ) );
+        params.setPeriods( getList( createPeriod( "200101" ), createPeriod( "200103" ), createPeriod( "200105" ), createPeriod( "200107" ), createPeriod(  "2002Q3" ), createPeriod( "2002Q4" ) ) );
         
         DataQueryGroups queryGroups = queryPlanner.planQuery( params, 4, ANALYTICS_TABLE_NAME );
         
@@ -581,7 +588,8 @@
     }
     
     /**
-     * Splits on 3 data elements.
+     * Query spans 1 partition. Splits on 2 aggregation types, then splits one
+     * query on 3 days in period to satisfy optimal for a total of 4 queries.
      */
     @Test
     public void planQueryD()
@@ -594,9 +602,9 @@
         
         DataQueryGroups queryGroups = queryPlanner.planQuery( params, 4, ANALYTICS_TABLE_NAME );
         
-        assertEquals( 3, queryGroups.getAllQueries().size() );
+        assertEquals( 4, queryGroups.getAllQueries().size() );
         assertEquals( 2, queryGroups.getSequentialQueries().size() );
-        assertEquals( 2, queryGroups.getLargestGroupSize() );
+        assertEquals( 3, queryGroups.getLargestGroupSize() );
         
         for ( DataQueryParams query : queryGroups.getAllQueries() )
         {
@@ -607,7 +615,9 @@
     }
     
     /**
-     * Splits on 3 data elements. No organisation units specified.
+     * Query spans 1 partition. Splits on 2 aggregation types, then splits one
+     * query on 3 days in period to satisfy optimal for a total of 4 queries. No 
+     * organisation units specified.
      */
     @Test
     public void planQueryE()
@@ -619,9 +629,9 @@
 
         DataQueryGroups queryGroups = queryPlanner.planQuery( params, 4, ANALYTICS_TABLE_NAME );
 
-        assertEquals( 3, queryGroups.getAllQueries().size() );
+        assertEquals( 4, queryGroups.getAllQueries().size() );
         assertEquals( 2, queryGroups.getSequentialQueries().size() );
-        assertEquals( 2, queryGroups.getLargestGroupSize() );
+        assertEquals( 3, queryGroups.getLargestGroupSize() );
 
         for ( DataQueryParams query : queryGroups.getAllQueries() )
         {
@@ -632,7 +642,7 @@
     }
 
     /**
-     * Splits on 3 queries on organisation units for an optimal of 4 queries. No 
+     * Splits on 3 queries on organisation units for an optimal of 3 queries. No 
      * data elements specified.
      */
     @Test
@@ -671,9 +681,9 @@
     }
 
     /**
-     * Query filters span 2 partitions. Splits in 4 queries on data elements to 
-     * satisfy optimal for a total of 8 queries, because query has 2 different 
-     * aggregation types.
+     * Query filters span 2 partitions. Splits in 2 queries on data elements,
+     * then 2 queries on organisation units to satisfy optimal for a total of 8 
+     * queries.
      */
     @Test
     public void planQueryH()
@@ -699,14 +709,15 @@
 
     /**
      * Query spans 3 period types. Splits in 3 queries for each period type, then
-     * splits in 4 queries on data elements units to satisfy optimal for a total 
-     * of 12 queries, because query has 2 different aggregation types.
+     * splits in 2 queries on data type, then splits in 2 queries on data elements
+     * to satisfy optimal for a total of 12 queries, because query has 2 different 
+     * aggregation types.
      */
     @Test
     public void planQueryI()
     {
         DataQueryParams params = new DataQueryParams();
-        params.setDataElements( getList( deA, deB, deC, deD ) );
+        params.setDataElements( getList( deA, deB, deE, deF ) );
         params.setOrganisationUnits( getList( ouA, ouB, ouC, ouD, ouE ) );
         params.setPeriods( getList( createPeriod( "2000Q1" ), createPeriod( "2000Q2" ), createPeriod( "2000" ), createPeriod( "200002" ), createPeriod( "200003" ), createPeriod( "200004" ) ) );
         
@@ -763,9 +774,9 @@
     }
 
     /**
-     * Splits in 2 queries for each aggregation type, then 2 queries for each
-     * data type, then 2 queries for each organisation unit to satisfy optimal
-     * for a total of 4 queries across 2 sequential queries.
+     * Splits in 2 queries for each data type, then 2 queries for each
+     * data element, then 2 queries for each organisation unit to satisfy optimal
+     * for a total of 8 queries with 4 queries across 2 sequential queries.
      */
     @Test
     public void planQueryL()
@@ -788,6 +799,32 @@
         }
     }
 
+    /**
+     * Query spans 1 partition. Splits on 2 queries for data types, then splits 
+     * on 2 queries for data elements to satisfy optimal for a total of 4 queries.
+     */
+    @Test
+    public void planQueryM()
+    {
+        DataQueryParams params = new DataQueryParams();
+        params.setDataElements( getList( deA, deB, deG, deH ) );
+        params.setOrganisationUnits( getList( ouA ) );
+        params.setPeriods( getList( createPeriod( "200101" ), createPeriod( "200103" ) ) );
+        
+        DataQueryGroups queryGroups = queryPlanner.planQuery( params, 4, ANALYTICS_TABLE_NAME );
+        
+        assertEquals( 4, queryGroups.getAllQueries().size() );
+        assertEquals( 1, queryGroups.getSequentialQueries().size() );
+        assertEquals( 4, queryGroups.getLargestGroupSize() );
+        
+        for ( DataQueryParams query : queryGroups.getAllQueries() )
+        {
+            assertTrue( samePeriodType( query.getPeriods() ) );
+            assertTrue( samePartition( query.getPeriods() ) );
+            assertDimensionNameNotNull( query );
+        }
+    }
+    
     // -------------------------------------------------------------------------
     // Supportive methods
     // -------------------------------------------------------------------------