← Back to team overview

dhis2-devs team mailing list archive

[Branch ~dhis2-devs-core/dhis2/trunk] Rev 17894: Analytics. More accurate determination of which analytics partitions must be generated based on t...

 

------------------------------------------------------------
revno: 17894
committer: Lars Helge Overland <larshelge@xxxxxxxxx>
branch nick: dhis2
timestamp: Tue 2015-01-06 22:00:39 +0100
message:
  Analytics. More accurate determination of which analytics partitions must be generated based on the source data. Avoids having to generate a huge number of partitions when the database contains outlier data from long time ago.
modified:
  dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/AnalyticsTableManager.java
  dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/AbstractJdbcTableManager.java
  dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/DefaultAnalyticsTableService.java
  dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/JdbcAnalyticsTableManager.java
  dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/JdbcCompletenessTableManager.java
  dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/JdbcCompletenessTargetTableManager.java
  dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/JdbcEventAnalyticsTableManager.java
  dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/JdbcOrgUnitTargetTableManager.java
  dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/PartitionUtils.java


--
lp:dhis2
https://code.launchpad.net/~dhis2-devs-core/dhis2/trunk

Your team DHIS 2 developers is subscribed to branch lp:dhis2.
To unsubscribe from this branch go to https://code.launchpad.net/~dhis2-devs-core/dhis2/trunk/+edit-subscription
=== modified file 'dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/AnalyticsTableManager.java'
--- dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/AnalyticsTableManager.java	2014-06-04 16:18:39 +0000
+++ dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/AnalyticsTableManager.java	2015-01-06 21:00:39 +0000
@@ -47,21 +47,12 @@
     public static final String EVENT_ANALYTICS_TABLE_NAME = "analytics_event";
     
     /**
-     * Returns analytics tables which yearly partitions. Yearly partitions will
-     * be generated starting from the earliest existing data value until the
-     * latest existing data value.
-     * 
-     * @param lastYears the number of last years of data to include, null if all years.
-     */
-    List<AnalyticsTable> getTables( Integer lastYears );
-    
-    /**
      * Returns analytics tables which yearly partitions.
      * 
      * @param earliest the start date for the first year to generate table partitions.
      * @param latest the end date for the last year to generate table partitions.
      */
-    List<AnalyticsTable> getTables( Date earliest, Date latest );
+    List<AnalyticsTable> getTables( Date earliest );
     
     /**
      * Checks if the database content is in valid state for analytics table generation.
@@ -111,22 +102,11 @@
     Future<?> populateTableAsync( ConcurrentLinkedQueue<AnalyticsTable> tables );    
 
     /**
-     * Retrieves the start date of the period of the earliest data value row.
-     */
-    Date getEarliestData();
-    
-    /**
-     * Retrieves the end date of the period of the latest data value row.
-     */
-    Date getLatestData();
-    
-    /**
-     * Checks whether the given table has no rows, if so drops the table. Returns
-     * true if the table was empty and pruned, if not false.
+     * Returns all years for which it exists data values.
      * 
-     * @param table the analytics table.
+     * @param earliest the earliest date to include as data year, null if no restriction.
      */
-    boolean pruneTable( AnalyticsTable table );
+    List<Integer> getDataYears( Date earliest );
     
     /**
      * Drops the given table.

=== modified file 'dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/AbstractJdbcTableManager.java'
--- dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/AbstractJdbcTableManager.java	2014-11-11 03:58:32 +0000
+++ dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/AbstractJdbcTableManager.java	2015-01-06 21:00:39 +0000
@@ -29,7 +29,7 @@
  */
 
 import java.util.ArrayList;
-import java.util.Calendar;
+import java.util.Collections;
 import java.util.Date;
 import java.util.List;
 import java.util.Set;
@@ -49,13 +49,11 @@
 import org.hisp.dhis.jdbc.StatementBuilder;
 import org.hisp.dhis.organisationunit.OrganisationUnitGroupService;
 import org.hisp.dhis.organisationunit.OrganisationUnitService;
-import org.hisp.dhis.period.Cal;
 import org.hisp.dhis.period.Period;
 import org.hisp.dhis.setting.SystemSettingManager;
 import org.hisp.dhis.system.timer.SystemTimer;
 import org.hisp.dhis.system.timer.Timer;
 import org.hisp.dhis.system.util.ListUtils;
-import org.joda.time.DateTime;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.jdbc.BadSqlGrammarException;
 import org.springframework.jdbc.core.JdbcTemplate;
@@ -74,9 +72,6 @@
     public static final String PREFIX_ORGUNITLEVEL = "uidlevel";
     public static final String PREFIX_INDEX = "in_";
     
-    private static Date MIN_EARLIEST_DATE = new DateTime( 1800, 1, 1, 0, 0 ).toDate();
-    private static Date MAX_LATEST_DATE = new DateTime( 2100, 1, 1, 0, 0 ).toDate();
-    
     @Autowired
     protected OrganisationUnitService organisationUnitService;
     
@@ -124,45 +119,23 @@
 
     @Override
     @Transactional
-    public List<AnalyticsTable> getTables( Integer lastYears )
-    {
-        Date earliest;
-        
-        if ( lastYears != null )
-        {
-            earliest = new Cal().now().subtract( Calendar.YEAR, ( lastYears - 1 ) ).set( 1, 1 ).time();
-        }
-        else
-        {
-            earliest = getEarliestData();
-        }
-        
-        Date latest = getLatestData();
-        
-        earliest = earliest != null && earliest.before( MIN_EARLIEST_DATE ) ? MIN_EARLIEST_DATE : earliest;
-        latest = latest != null && latest.after( MAX_LATEST_DATE ) ? MAX_LATEST_DATE : latest;
-        
-        return getTables( earliest, latest );
-    }
-
-    @Override
-    @Transactional
-    public List<AnalyticsTable> getTables( Date earliest, Date latest )
-    {
-        log.info( "Get tables using earliest: " + earliest + ", latest: " + latest );
+    public List<AnalyticsTable> getTables( Date earliest )
+    {
+        log.info( "Get tables using earliest: " + earliest );
 
         List<AnalyticsTable> tables = new ArrayList<>();
         
-        if ( earliest != null && latest != null )
-        {        
-            String baseName = getTableName();
+        List<Integer> dataYears = getDataYears( earliest );
+
+        Collections.sort( dataYears );
+        
+        String baseName = getTableName();
+        
+        for ( Integer year : dataYears )
+        {
+            Period period = PartitionUtils.getPeriod( year );
             
-            List<Period> periods = PartitionUtils.getPeriods( earliest, latest );
-    
-            for ( Period period : periods )
-            {
-                tables.add( new AnalyticsTable( baseName, getDimensionColumns( null ), period ) );
-            }
+            tables.add( new AnalyticsTable( baseName, getDimensionColumns( null ), period ) );
         }
         
         return tables;
@@ -217,25 +190,6 @@
     }
 
     @Override
-    public boolean pruneTable( AnalyticsTable table )
-    {
-        String tableName = table.getTempTableName();
-        
-        if ( !hasRows( tableName ) )
-        {
-            final String sqlDrop = "drop table " + tableName;
-            
-            executeSilently( sqlDrop );
-            
-            log.info( "Drop SQL: " + sqlDrop );
-            
-            return true;
-        }
-        
-        return false;
-    }
-
-    @Override
     public void dropTable( String tableName )
     {
         final String realTable = tableName.replaceFirst( TABLE_TEMP_SUFFIX, "" );
@@ -345,7 +299,7 @@
      */
     protected void populateAndLog( String sql, String tableName )
     {
-        log.info( "Populate SQL for " + tableName + ": " + sql );
+        log.info( "Populating " + tableName );
 
         Timer t = new SystemTimer().start();
         

=== modified file 'dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/DefaultAnalyticsTableService.java'
--- dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/DefaultAnalyticsTableService.java	2014-12-15 18:08:16 +0000
+++ dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/DefaultAnalyticsTableService.java	2015-01-06 21:00:39 +0000
@@ -30,7 +30,7 @@
 
 import java.util.ArrayList;
 import java.util.Collection;
-import java.util.Iterator;
+import java.util.Date;
 import java.util.List;
 import java.util.concurrent.ConcurrentLinkedQueue;
 import java.util.concurrent.Future;
@@ -111,7 +111,9 @@
             return;
         }
         
-        final List<AnalyticsTable> tables = tableManager.getTables( lastYears );
+        Date earliest = PartitionUtils.getEarliestDate( lastYears );
+        
+        final List<AnalyticsTable> tables = tableManager.getTables( earliest );
         
         clock.logTime( "Partition tables: " + tables + ", last years: " + lastYears );        
         notifier.notify( taskId, "Creating analytics tables, processes: " + processNo + ", org unit levels: " + orgUnitLevelNo );
@@ -124,11 +126,6 @@
         populateTables( tables );
         
         clock.logTime( "Populated analytics tables" );
-        notifier.notify( taskId, "Pruned analytics tables" );
-        
-        pruneTables( tables );
-        
-        clock.logTime( "Pruned analytics tables" );
         notifier.notify( taskId, "Applying aggregation levels" );
         
         applyAggregationLevels( tables );
@@ -216,20 +213,7 @@
         
         ConcurrentUtils.waitForCompletion( futures );
     }
-    
-    private void pruneTables( List<AnalyticsTable> tables )
-    {
-        Iterator<AnalyticsTable> iterator = tables.iterator();
         
-        while ( iterator.hasNext() )
-        {
-            if ( tableManager.pruneTable( iterator.next() ) )
-            {
-                iterator.remove();
-            }
-        }
-    }
-    
     private void applyAggregationLevels( List<AnalyticsTable> tables )
     {
         int maxLevels = organisationUnitService.getMaxOfOrganisationUnitLevels();

=== modified file 'dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/JdbcAnalyticsTableManager.java'
--- dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/JdbcAnalyticsTableManager.java	2014-12-15 18:08:16 +0000
+++ dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/JdbcAnalyticsTableManager.java	2015-01-06 21:00:39 +0000
@@ -341,23 +341,20 @@
     }
     
     @Override
-    public Date getEarliestData()
-    {
-        final String sql = "select min(pe.startdate) from datavalue dv " +
-            "join period pe on dv.periodid=pe.periodid " +
-            "where pe.startdate is not null";
-        
-        return jdbcTemplate.queryForObject( sql, Date.class );
-    }
-
-    @Override
-    public Date getLatestData()
-    {
-        final String sql = "select max(pe.enddate) from datavalue dv " +
-            "join period pe on dv.periodid=pe.periodid " + 
-            "where pe.enddate is not null ";
-        
-        return jdbcTemplate.queryForObject( sql, Date.class );
+    public List<Integer> getDataYears( Date earliest )
+    {
+        String sql = 
+            "select distinct(extract(year from pe.startdate)) " +
+            "from datavalue dv " +
+            "inner join period pe on dv.periodid=pe.periodid " +
+            "where pe.startdate is not null ";
+        
+        if ( earliest != null )
+        {
+            sql += "and pe.startdate >= '" + DateUtils.getMediumDateString( earliest ) + "'";
+        }
+        
+        return jdbcTemplate.queryForList( sql, Integer.class );
     }
     
     @Override

=== modified file 'dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/JdbcCompletenessTableManager.java'
--- dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/JdbcCompletenessTableManager.java	2014-11-03 01:54:15 +0000
+++ dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/JdbcCompletenessTableManager.java	2015-01-06 21:00:39 +0000
@@ -192,25 +192,22 @@
     }
 
     @Override
-    public Date getEarliestData()
+    public List<Integer> getDataYears( Date earliest )
     {
-        final String sql = "select min(pe.startdate) from completedatasetregistration cdr " +
-            "join period pe on cdr.periodid=pe.periodid " +
+        String sql = 
+            "select distinct(extract(year from pe.startdate)) " +
+            "from completedatasetregistration cdr " +
+            "inner join period pe on cdr.periodid=pe.periodid " +
             "where pe.startdate is not null";
-        
-        return jdbcTemplate.queryForObject( sql, Date.class );
-    }
-
-    @Override
-    public Date getLatestData()
-    {
-        final String sql = "select max(pe.enddate) from completedatasetregistration cdr " +
-            "join period pe on cdr.periodid=pe.periodid " +
-            "where pe.enddate is not null";
-        
-        return jdbcTemplate.queryForObject( sql, Date.class );
-    }
-
+
+        if ( earliest != null )
+        {
+            sql += "and pe.startdate >= '" + DateUtils.getMediumDateString( earliest ) + "'";
+        }
+        
+        return jdbcTemplate.queryForList( sql, Integer.class );
+    }
+    
     @Override
     @Async
     public Future<?> applyAggregationLevels( ConcurrentLinkedQueue<AnalyticsTable> tables, Collection<String> dataElements, int aggregationLevel )

=== modified file 'dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/JdbcCompletenessTargetTableManager.java'
--- dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/JdbcCompletenessTargetTableManager.java	2014-11-03 01:54:15 +0000
+++ dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/JdbcCompletenessTargetTableManager.java	2015-01-06 21:00:39 +0000
@@ -49,7 +49,7 @@
 {
     @Override
     @Transactional
-    public List<AnalyticsTable> getTables( Integer lastYears )
+    public List<AnalyticsTable> getTables( Date earliest )
     {
         List<AnalyticsTable> tables = new ArrayList<>();
         tables.add( new AnalyticsTable( getTableName(), getDimensionColumns( null ) ) );
@@ -173,17 +173,11 @@
     }
 
     @Override
-    public Date getEarliestData()
-    {
-        return null; // Not relevant
-    }
-
-    @Override
-    public Date getLatestData()
-    {
-        return null; // Not relevant
-    }
-
+    public List<Integer> getDataYears( Date earliest )
+    {
+        return null; // Not relevant
+    }
+    
     @Override
     @Async
     public Future<?> applyAggregationLevels( ConcurrentLinkedQueue<AnalyticsTable> tables, Collection<String> dataElements, int aggregationLevel )

=== modified file 'dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/JdbcEventAnalyticsTableManager.java'
--- dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/JdbcEventAnalyticsTableManager.java	2014-11-03 01:54:15 +0000
+++ dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/JdbcEventAnalyticsTableManager.java	2015-01-06 21:00:39 +0000
@@ -33,6 +33,7 @@
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
+import java.util.Collections;
 import java.util.Date;
 import java.util.List;
 import java.util.concurrent.ConcurrentLinkedQueue;
@@ -68,27 +69,32 @@
 
     @Override
     @Transactional
-    public List<AnalyticsTable> getTables( Date earliest, Date latest )
+    public List<AnalyticsTable> getTables( Date earliest )
     {
-        log.info( "Get tables using earliest: " + earliest + ", latest: " + latest );
+        log.info( "Get tables using earliest: " + earliest );
 
         List<AnalyticsTable> tables = new ArrayList<>();
 
-        if ( earliest != null && latest != null )
+        List<Integer> dataYears = getDataYears( earliest );
+        
+        Collections.sort( dataYears );
+        
+        String baseName = getTableName();
+
+        for ( Integer year : dataYears )
         {
-            String baseName = getTableName();
-
-            List<Period> periods = PartitionUtils.getPeriods( earliest, latest );
-
-            for ( Period period : periods )
+            Period period = PartitionUtils.getPeriod( year );
+            
+            List<Integer> programs = getDataPrograms( period );
+            
+            for ( Integer id : programs )
             {
-                for ( Program program : programService.getAllPrograms() )
-                {
-                    AnalyticsTable table = new AnalyticsTable( baseName, null, period, program );
-                    List<String[]> dimensionColumns = getDimensionColumns( table );
-                    table.setDimensionColumns( dimensionColumns );
-                    tables.add( table );
-                }
+                Program program = programService.getProgram( id );
+                
+                AnalyticsTable table = new AnalyticsTable( baseName, null, period, program );
+                List<String[]> dimensionColumns = getDimensionColumns( table );
+                table.setDimensionColumns( dimensionColumns );
+                tables.add( table );
             }
         }
 
@@ -286,23 +292,38 @@
     }
 
     @Override
-    public Date getEarliestData()
-    {
-        final String sql = "select min(psi.executiondate) from programstageinstance psi "
-            + "where psi.executiondate is not null";
-
-        return jdbcTemplate.queryForObject( sql, Date.class );
-    }
-
-    @Override
-    public Date getLatestData()
-    {
-        final String sql = "select max(psi.executiondate) from programstageinstance psi "
-            + "where psi.executiondate is not null";
-
-        return jdbcTemplate.queryForObject( sql, Date.class );
-    }
-
+    public List<Integer> getDataYears( Date earliest )
+    {
+        String sql = 
+            "select distinct(extract(year from psi.executiondate)) " +
+            "from programstageinstance psi " +
+            "where psi.executiondate is not null";
+
+        if ( earliest != null )
+        {
+            sql += "and psi.executiondate >= '" + DateUtils.getMediumDateString( earliest ) + "'";
+        }
+        
+        return jdbcTemplate.queryForList( sql, Integer.class );
+    }
+    
+    private List<Integer> getDataPrograms( Period period )
+    {
+        final String start = DateUtils.getMediumDateString( period.getStartDate() );
+        final String end = DateUtils.getMediumDateString( period.getEndDate() );
+        
+        final String sql = 
+            "select distinct pi.programid " +
+            "from programstageinstance psi " +
+            "inner join programinstance pi on psi.programinstanceid = pi.programinstanceid " +
+            "where psi.executiondate >= '" + start + "' " + 
+            "and psi.executiondate <= '" + end + "' " +
+            "and psi.organisationunitid is not null " +
+            "and psi.executiondate is not null";
+        
+        return jdbcTemplate.queryForList( sql, Integer.class );
+    }
+    
     @Override
     @Async
     public Future<?> applyAggregationLevels( ConcurrentLinkedQueue<AnalyticsTable> tables,

=== modified file 'dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/JdbcOrgUnitTargetTableManager.java'
--- dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/JdbcOrgUnitTargetTableManager.java	2014-11-03 01:54:15 +0000
+++ dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/JdbcOrgUnitTargetTableManager.java	2015-01-06 21:00:39 +0000
@@ -48,7 +48,7 @@
 {
     @Override
     @Transactional
-    public List<AnalyticsTable> getTables( Integer lastYears )
+    public List<AnalyticsTable> getTables( Date earliest )
     {
         List<AnalyticsTable> tables = new ArrayList<>();
         tables.add( new AnalyticsTable( getTableName(), getDimensionColumns( null ) ) );
@@ -163,17 +163,11 @@
     }
 
     @Override
-    public Date getEarliestData()
-    {
-        return null; // Not relevant
-    }
-
-    @Override
-    public Date getLatestData()
-    {
-        return null; // Not relevant
-    }
-
+    public List<Integer> getDataYears( Date earliest )
+    {
+        return null; // Not relevant
+    }
+    
     @Override
     @Async
     public Future<?> applyAggregationLevels( ConcurrentLinkedQueue<AnalyticsTable> tables, Collection<String> dataElements, int aggregationLevel )

=== modified file 'dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/PartitionUtils.java'
--- dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/PartitionUtils.java	2014-09-22 09:56:45 +0000
+++ dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/PartitionUtils.java	2015-01-06 21:00:39 +0000
@@ -29,6 +29,7 @@
  */
 
 import java.util.ArrayList;
+import java.util.Calendar;
 import java.util.Collection;
 import java.util.Date;
 import java.util.List;
@@ -43,6 +44,7 @@
 import org.hisp.dhis.period.PeriodType;
 import org.hisp.dhis.period.YearlyPeriodType;
 import org.hisp.dhis.system.util.UniqueArrayList;
+import org.joda.time.DateTime;
 
 /**
  * @author Lars Helge Overland
@@ -52,22 +54,26 @@
     private static final YearlyPeriodType PERIODTYPE = new YearlyPeriodType();
 
     private static final String SEP = "_";
-
-    public static List<Period> getPeriods( Date earliest, Date latest )
-    {
-        List<Period> periods = new ArrayList<>();
-
-        Period period = PERIODTYPE.createPeriod( earliest );
-
-        while ( period != null && period.getStartDate().before( latest ) )
+        
+    public static Period getPeriod( Integer year )
+    {
+        DateTime time = new DateTime( year, 1, 1, 0, 0 );
+        
+        return PERIODTYPE.createPeriod( time.toDate() );
+    }
+
+    public static Date getEarliestDate( Integer lastYears )
+    {
+        Date earliest = null;
+        
+        if ( lastYears != null )
         {
-            periods.add( period );
-            period = PERIODTYPE.getNextPeriod( period );
+            earliest = new Cal().now().subtract( Calendar.YEAR, ( lastYears - 1 ) ).set( 1, 1 ).time();
         }
-
-        return periods;
+        
+        return earliest;
     }
-
+    
     //TODO optimize by including required filter periods only
 
     public static Partitions getPartitions( Period period, String tablePrefix, String tableSuffix, Set<String> validPartitions )