dhis2-devs team mailing list archive
-
dhis2-devs team
-
Mailing list archive
-
Message #34880
[Branch ~dhis2-devs-core/dhis2/trunk] Rev 17894: Analytics. More accurate determination of which analytics partitions must be generated based on t...
------------------------------------------------------------
revno: 17894
committer: Lars Helge Overland <larshelge@xxxxxxxxx>
branch nick: dhis2
timestamp: Tue 2015-01-06 22:00:39 +0100
message:
Analytics. More accurate determination of which analytics partitions must be generated based on the source data. Avoids having to generate a huge number of partitions when the database contains outlier data from long time ago.
modified:
dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/AnalyticsTableManager.java
dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/AbstractJdbcTableManager.java
dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/DefaultAnalyticsTableService.java
dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/JdbcAnalyticsTableManager.java
dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/JdbcCompletenessTableManager.java
dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/JdbcCompletenessTargetTableManager.java
dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/JdbcEventAnalyticsTableManager.java
dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/JdbcOrgUnitTargetTableManager.java
dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/PartitionUtils.java
--
lp:dhis2
https://code.launchpad.net/~dhis2-devs-core/dhis2/trunk
Your team DHIS 2 developers is subscribed to branch lp:dhis2.
To unsubscribe from this branch go to https://code.launchpad.net/~dhis2-devs-core/dhis2/trunk/+edit-subscription
=== modified file 'dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/AnalyticsTableManager.java'
--- dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/AnalyticsTableManager.java 2014-06-04 16:18:39 +0000
+++ dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/AnalyticsTableManager.java 2015-01-06 21:00:39 +0000
@@ -47,21 +47,12 @@
public static final String EVENT_ANALYTICS_TABLE_NAME = "analytics_event";
/**
- * Returns analytics tables which yearly partitions. Yearly partitions will
- * be generated starting from the earliest existing data value until the
- * latest existing data value.
- *
- * @param lastYears the number of last years of data to include, null if all years.
- */
- List<AnalyticsTable> getTables( Integer lastYears );
-
- /**
* Returns analytics tables which yearly partitions.
*
* @param earliest the start date for the first year to generate table partitions.
* @param latest the end date for the last year to generate table partitions.
*/
- List<AnalyticsTable> getTables( Date earliest, Date latest );
+ List<AnalyticsTable> getTables( Date earliest );
/**
* Checks if the database content is in valid state for analytics table generation.
@@ -111,22 +102,11 @@
Future<?> populateTableAsync( ConcurrentLinkedQueue<AnalyticsTable> tables );
/**
- * Retrieves the start date of the period of the earliest data value row.
- */
- Date getEarliestData();
-
- /**
- * Retrieves the end date of the period of the latest data value row.
- */
- Date getLatestData();
-
- /**
- * Checks whether the given table has no rows, if so drops the table. Returns
- * true if the table was empty and pruned, if not false.
+ * Returns all years for which it exists data values.
*
- * @param table the analytics table.
+ * @param earliest the earliest date to include as data year, null if no restriction.
*/
- boolean pruneTable( AnalyticsTable table );
+ List<Integer> getDataYears( Date earliest );
/**
* Drops the given table.
=== modified file 'dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/AbstractJdbcTableManager.java'
--- dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/AbstractJdbcTableManager.java 2014-11-11 03:58:32 +0000
+++ dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/AbstractJdbcTableManager.java 2015-01-06 21:00:39 +0000
@@ -29,7 +29,7 @@
*/
import java.util.ArrayList;
-import java.util.Calendar;
+import java.util.Collections;
import java.util.Date;
import java.util.List;
import java.util.Set;
@@ -49,13 +49,11 @@
import org.hisp.dhis.jdbc.StatementBuilder;
import org.hisp.dhis.organisationunit.OrganisationUnitGroupService;
import org.hisp.dhis.organisationunit.OrganisationUnitService;
-import org.hisp.dhis.period.Cal;
import org.hisp.dhis.period.Period;
import org.hisp.dhis.setting.SystemSettingManager;
import org.hisp.dhis.system.timer.SystemTimer;
import org.hisp.dhis.system.timer.Timer;
import org.hisp.dhis.system.util.ListUtils;
-import org.joda.time.DateTime;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.jdbc.BadSqlGrammarException;
import org.springframework.jdbc.core.JdbcTemplate;
@@ -74,9 +72,6 @@
public static final String PREFIX_ORGUNITLEVEL = "uidlevel";
public static final String PREFIX_INDEX = "in_";
- private static Date MIN_EARLIEST_DATE = new DateTime( 1800, 1, 1, 0, 0 ).toDate();
- private static Date MAX_LATEST_DATE = new DateTime( 2100, 1, 1, 0, 0 ).toDate();
-
@Autowired
protected OrganisationUnitService organisationUnitService;
@@ -124,45 +119,23 @@
@Override
@Transactional
- public List<AnalyticsTable> getTables( Integer lastYears )
- {
- Date earliest;
-
- if ( lastYears != null )
- {
- earliest = new Cal().now().subtract( Calendar.YEAR, ( lastYears - 1 ) ).set( 1, 1 ).time();
- }
- else
- {
- earliest = getEarliestData();
- }
-
- Date latest = getLatestData();
-
- earliest = earliest != null && earliest.before( MIN_EARLIEST_DATE ) ? MIN_EARLIEST_DATE : earliest;
- latest = latest != null && latest.after( MAX_LATEST_DATE ) ? MAX_LATEST_DATE : latest;
-
- return getTables( earliest, latest );
- }
-
- @Override
- @Transactional
- public List<AnalyticsTable> getTables( Date earliest, Date latest )
- {
- log.info( "Get tables using earliest: " + earliest + ", latest: " + latest );
+ public List<AnalyticsTable> getTables( Date earliest )
+ {
+ log.info( "Get tables using earliest: " + earliest );
List<AnalyticsTable> tables = new ArrayList<>();
- if ( earliest != null && latest != null )
- {
- String baseName = getTableName();
+ List<Integer> dataYears = getDataYears( earliest );
+
+ Collections.sort( dataYears );
+
+ String baseName = getTableName();
+
+ for ( Integer year : dataYears )
+ {
+ Period period = PartitionUtils.getPeriod( year );
- List<Period> periods = PartitionUtils.getPeriods( earliest, latest );
-
- for ( Period period : periods )
- {
- tables.add( new AnalyticsTable( baseName, getDimensionColumns( null ), period ) );
- }
+ tables.add( new AnalyticsTable( baseName, getDimensionColumns( null ), period ) );
}
return tables;
@@ -217,25 +190,6 @@
}
@Override
- public boolean pruneTable( AnalyticsTable table )
- {
- String tableName = table.getTempTableName();
-
- if ( !hasRows( tableName ) )
- {
- final String sqlDrop = "drop table " + tableName;
-
- executeSilently( sqlDrop );
-
- log.info( "Drop SQL: " + sqlDrop );
-
- return true;
- }
-
- return false;
- }
-
- @Override
public void dropTable( String tableName )
{
final String realTable = tableName.replaceFirst( TABLE_TEMP_SUFFIX, "" );
@@ -345,7 +299,7 @@
*/
protected void populateAndLog( String sql, String tableName )
{
- log.info( "Populate SQL for " + tableName + ": " + sql );
+ log.info( "Populating " + tableName );
Timer t = new SystemTimer().start();
=== modified file 'dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/DefaultAnalyticsTableService.java'
--- dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/DefaultAnalyticsTableService.java 2014-12-15 18:08:16 +0000
+++ dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/DefaultAnalyticsTableService.java 2015-01-06 21:00:39 +0000
@@ -30,7 +30,7 @@
import java.util.ArrayList;
import java.util.Collection;
-import java.util.Iterator;
+import java.util.Date;
import java.util.List;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.Future;
@@ -111,7 +111,9 @@
return;
}
- final List<AnalyticsTable> tables = tableManager.getTables( lastYears );
+ Date earliest = PartitionUtils.getEarliestDate( lastYears );
+
+ final List<AnalyticsTable> tables = tableManager.getTables( earliest );
clock.logTime( "Partition tables: " + tables + ", last years: " + lastYears );
notifier.notify( taskId, "Creating analytics tables, processes: " + processNo + ", org unit levels: " + orgUnitLevelNo );
@@ -124,11 +126,6 @@
populateTables( tables );
clock.logTime( "Populated analytics tables" );
- notifier.notify( taskId, "Pruned analytics tables" );
-
- pruneTables( tables );
-
- clock.logTime( "Pruned analytics tables" );
notifier.notify( taskId, "Applying aggregation levels" );
applyAggregationLevels( tables );
@@ -216,20 +213,7 @@
ConcurrentUtils.waitForCompletion( futures );
}
-
- private void pruneTables( List<AnalyticsTable> tables )
- {
- Iterator<AnalyticsTable> iterator = tables.iterator();
- while ( iterator.hasNext() )
- {
- if ( tableManager.pruneTable( iterator.next() ) )
- {
- iterator.remove();
- }
- }
- }
-
private void applyAggregationLevels( List<AnalyticsTable> tables )
{
int maxLevels = organisationUnitService.getMaxOfOrganisationUnitLevels();
=== modified file 'dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/JdbcAnalyticsTableManager.java'
--- dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/JdbcAnalyticsTableManager.java 2014-12-15 18:08:16 +0000
+++ dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/JdbcAnalyticsTableManager.java 2015-01-06 21:00:39 +0000
@@ -341,23 +341,20 @@
}
@Override
- public Date getEarliestData()
- {
- final String sql = "select min(pe.startdate) from datavalue dv " +
- "join period pe on dv.periodid=pe.periodid " +
- "where pe.startdate is not null";
-
- return jdbcTemplate.queryForObject( sql, Date.class );
- }
-
- @Override
- public Date getLatestData()
- {
- final String sql = "select max(pe.enddate) from datavalue dv " +
- "join period pe on dv.periodid=pe.periodid " +
- "where pe.enddate is not null ";
-
- return jdbcTemplate.queryForObject( sql, Date.class );
+ public List<Integer> getDataYears( Date earliest )
+ {
+ String sql =
+ "select distinct(extract(year from pe.startdate)) " +
+ "from datavalue dv " +
+ "inner join period pe on dv.periodid=pe.periodid " +
+ "where pe.startdate is not null ";
+
+ if ( earliest != null )
+ {
+ sql += "and pe.startdate >= '" + DateUtils.getMediumDateString( earliest ) + "'";
+ }
+
+ return jdbcTemplate.queryForList( sql, Integer.class );
}
@Override
=== modified file 'dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/JdbcCompletenessTableManager.java'
--- dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/JdbcCompletenessTableManager.java 2014-11-03 01:54:15 +0000
+++ dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/JdbcCompletenessTableManager.java 2015-01-06 21:00:39 +0000
@@ -192,25 +192,22 @@
}
@Override
- public Date getEarliestData()
+ public List<Integer> getDataYears( Date earliest )
{
- final String sql = "select min(pe.startdate) from completedatasetregistration cdr " +
- "join period pe on cdr.periodid=pe.periodid " +
+ String sql =
+ "select distinct(extract(year from pe.startdate)) " +
+ "from completedatasetregistration cdr " +
+ "inner join period pe on cdr.periodid=pe.periodid " +
"where pe.startdate is not null";
-
- return jdbcTemplate.queryForObject( sql, Date.class );
- }
-
- @Override
- public Date getLatestData()
- {
- final String sql = "select max(pe.enddate) from completedatasetregistration cdr " +
- "join period pe on cdr.periodid=pe.periodid " +
- "where pe.enddate is not null";
-
- return jdbcTemplate.queryForObject( sql, Date.class );
- }
-
+
+ if ( earliest != null )
+ {
+ sql += "and pe.startdate >= '" + DateUtils.getMediumDateString( earliest ) + "'";
+ }
+
+ return jdbcTemplate.queryForList( sql, Integer.class );
+ }
+
@Override
@Async
public Future<?> applyAggregationLevels( ConcurrentLinkedQueue<AnalyticsTable> tables, Collection<String> dataElements, int aggregationLevel )
=== modified file 'dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/JdbcCompletenessTargetTableManager.java'
--- dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/JdbcCompletenessTargetTableManager.java 2014-11-03 01:54:15 +0000
+++ dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/JdbcCompletenessTargetTableManager.java 2015-01-06 21:00:39 +0000
@@ -49,7 +49,7 @@
{
@Override
@Transactional
- public List<AnalyticsTable> getTables( Integer lastYears )
+ public List<AnalyticsTable> getTables( Date earliest )
{
List<AnalyticsTable> tables = new ArrayList<>();
tables.add( new AnalyticsTable( getTableName(), getDimensionColumns( null ) ) );
@@ -173,17 +173,11 @@
}
@Override
- public Date getEarliestData()
- {
- return null; // Not relevant
- }
-
- @Override
- public Date getLatestData()
- {
- return null; // Not relevant
- }
-
+ public List<Integer> getDataYears( Date earliest )
+ {
+ return null; // Not relevant
+ }
+
@Override
@Async
public Future<?> applyAggregationLevels( ConcurrentLinkedQueue<AnalyticsTable> tables, Collection<String> dataElements, int aggregationLevel )
=== modified file 'dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/JdbcEventAnalyticsTableManager.java'
--- dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/JdbcEventAnalyticsTableManager.java 2014-11-03 01:54:15 +0000
+++ dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/JdbcEventAnalyticsTableManager.java 2015-01-06 21:00:39 +0000
@@ -33,6 +33,7 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
+import java.util.Collections;
import java.util.Date;
import java.util.List;
import java.util.concurrent.ConcurrentLinkedQueue;
@@ -68,27 +69,32 @@
@Override
@Transactional
- public List<AnalyticsTable> getTables( Date earliest, Date latest )
+ public List<AnalyticsTable> getTables( Date earliest )
{
- log.info( "Get tables using earliest: " + earliest + ", latest: " + latest );
+ log.info( "Get tables using earliest: " + earliest );
List<AnalyticsTable> tables = new ArrayList<>();
- if ( earliest != null && latest != null )
+ List<Integer> dataYears = getDataYears( earliest );
+
+ Collections.sort( dataYears );
+
+ String baseName = getTableName();
+
+ for ( Integer year : dataYears )
{
- String baseName = getTableName();
-
- List<Period> periods = PartitionUtils.getPeriods( earliest, latest );
-
- for ( Period period : periods )
+ Period period = PartitionUtils.getPeriod( year );
+
+ List<Integer> programs = getDataPrograms( period );
+
+ for ( Integer id : programs )
{
- for ( Program program : programService.getAllPrograms() )
- {
- AnalyticsTable table = new AnalyticsTable( baseName, null, period, program );
- List<String[]> dimensionColumns = getDimensionColumns( table );
- table.setDimensionColumns( dimensionColumns );
- tables.add( table );
- }
+ Program program = programService.getProgram( id );
+
+ AnalyticsTable table = new AnalyticsTable( baseName, null, period, program );
+ List<String[]> dimensionColumns = getDimensionColumns( table );
+ table.setDimensionColumns( dimensionColumns );
+ tables.add( table );
}
}
@@ -286,23 +292,38 @@
}
@Override
- public Date getEarliestData()
- {
- final String sql = "select min(psi.executiondate) from programstageinstance psi "
- + "where psi.executiondate is not null";
-
- return jdbcTemplate.queryForObject( sql, Date.class );
- }
-
- @Override
- public Date getLatestData()
- {
- final String sql = "select max(psi.executiondate) from programstageinstance psi "
- + "where psi.executiondate is not null";
-
- return jdbcTemplate.queryForObject( sql, Date.class );
- }
-
+ public List<Integer> getDataYears( Date earliest )
+ {
+ String sql =
+ "select distinct(extract(year from psi.executiondate)) " +
+ "from programstageinstance psi " +
+ "where psi.executiondate is not null";
+
+ if ( earliest != null )
+ {
+ sql += "and psi.executiondate >= '" + DateUtils.getMediumDateString( earliest ) + "'";
+ }
+
+ return jdbcTemplate.queryForList( sql, Integer.class );
+ }
+
+ private List<Integer> getDataPrograms( Period period )
+ {
+ final String start = DateUtils.getMediumDateString( period.getStartDate() );
+ final String end = DateUtils.getMediumDateString( period.getEndDate() );
+
+ final String sql =
+ "select distinct pi.programid " +
+ "from programstageinstance psi " +
+ "inner join programinstance pi on psi.programinstanceid = pi.programinstanceid " +
+ "where psi.executiondate >= '" + start + "' " +
+ "and psi.executiondate <= '" + end + "' " +
+ "and psi.organisationunitid is not null " +
+ "and psi.executiondate is not null";
+
+ return jdbcTemplate.queryForList( sql, Integer.class );
+ }
+
@Override
@Async
public Future<?> applyAggregationLevels( ConcurrentLinkedQueue<AnalyticsTable> tables,
=== modified file 'dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/JdbcOrgUnitTargetTableManager.java'
--- dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/JdbcOrgUnitTargetTableManager.java 2014-11-03 01:54:15 +0000
+++ dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/JdbcOrgUnitTargetTableManager.java 2015-01-06 21:00:39 +0000
@@ -48,7 +48,7 @@
{
@Override
@Transactional
- public List<AnalyticsTable> getTables( Integer lastYears )
+ public List<AnalyticsTable> getTables( Date earliest )
{
List<AnalyticsTable> tables = new ArrayList<>();
tables.add( new AnalyticsTable( getTableName(), getDimensionColumns( null ) ) );
@@ -163,17 +163,11 @@
}
@Override
- public Date getEarliestData()
- {
- return null; // Not relevant
- }
-
- @Override
- public Date getLatestData()
- {
- return null; // Not relevant
- }
-
+ public List<Integer> getDataYears( Date earliest )
+ {
+ return null; // Not relevant
+ }
+
@Override
@Async
public Future<?> applyAggregationLevels( ConcurrentLinkedQueue<AnalyticsTable> tables, Collection<String> dataElements, int aggregationLevel )
=== modified file 'dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/PartitionUtils.java'
--- dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/PartitionUtils.java 2014-09-22 09:56:45 +0000
+++ dhis-2/dhis-services/dhis-service-analytics/src/main/java/org/hisp/dhis/analytics/table/PartitionUtils.java 2015-01-06 21:00:39 +0000
@@ -29,6 +29,7 @@
*/
import java.util.ArrayList;
+import java.util.Calendar;
import java.util.Collection;
import java.util.Date;
import java.util.List;
@@ -43,6 +44,7 @@
import org.hisp.dhis.period.PeriodType;
import org.hisp.dhis.period.YearlyPeriodType;
import org.hisp.dhis.system.util.UniqueArrayList;
+import org.joda.time.DateTime;
/**
* @author Lars Helge Overland
@@ -52,22 +54,26 @@
private static final YearlyPeriodType PERIODTYPE = new YearlyPeriodType();
private static final String SEP = "_";
-
- public static List<Period> getPeriods( Date earliest, Date latest )
- {
- List<Period> periods = new ArrayList<>();
-
- Period period = PERIODTYPE.createPeriod( earliest );
-
- while ( period != null && period.getStartDate().before( latest ) )
+
+ public static Period getPeriod( Integer year )
+ {
+ DateTime time = new DateTime( year, 1, 1, 0, 0 );
+
+ return PERIODTYPE.createPeriod( time.toDate() );
+ }
+
+ public static Date getEarliestDate( Integer lastYears )
+ {
+ Date earliest = null;
+
+ if ( lastYears != null )
{
- periods.add( period );
- period = PERIODTYPE.getNextPeriod( period );
+ earliest = new Cal().now().subtract( Calendar.YEAR, ( lastYears - 1 ) ).set( 1, 1 ).time();
}
-
- return periods;
+
+ return earliest;
}
-
+
//TODO optimize by including required filter periods only
public static Partitions getPartitions( Period period, String tablePrefix, String tableSuffix, Set<String> validPartitions )