← Back to team overview

anewt-developers team mailing list archive

[Branch ~uws/anewt/anewt.uws] Rev 1802: [core] Return early when parsing date strings

 

------------------------------------------------------------
revno: 1802
committer: Wouter Bolsterlee <uws@xxxxxxxxx>
branch nick: anewt
timestamp: Tue 2010-10-26 21:27:26 +0200
message:
  [core] Return early when parsing date strings
  
  AnewtDateTime::parse_string() applied various regexes to
  test date and time formats, even after a match was already
  found. This change makes the function return early when a
  match was found. Also put the most frequently encountered
  date formats on top to avoid useless regex matching in
  common cases.
  
  And while we're at it: some code cleanups, including the use
  of proper variable names (clearer and a bit more verbose).
modified:
  core/datetime.lib.php
  core/datetime.test.php


--
lp:anewt
https://code.launchpad.net/~uws/anewt/anewt.uws

Your team Anewt developers is subscribed to branch lp:anewt.
To unsubscribe from this branch go to https://code.launchpad.net/~uws/anewt/anewt.uws/+edit-subscription
=== modified file 'core/datetime.lib.php'
--- core/datetime.lib.php	2010-10-26 19:09:10 +0000
+++ core/datetime.lib.php	2010-10-26 19:27:26 +0000
@@ -201,8 +201,8 @@
 	{
 		assert('is_int($timestamp);');
 
-		list ($y, $m, $d, $h, $i, $s) = explode(' ', date('Y m d H i s', $timestamp));
-		$d = new AnewtDateTimeAtom((int) $y, (int) $m, (int) $d, (int) $h, (int) $i, (int) $s);
+		list ($year, $month, $day, $hour, $minute, $second) = explode(' ', date('Y m d H i s', $timestamp));
+		$d = new AnewtDateTimeAtom($year, $month, $day, $hour, $minute, $second);
 		return $d;
 	}
 
@@ -225,130 +225,194 @@
 		if (strlen($date) == 0)
 			return null;
 
-
-		/* Defaults */
-
-		$y = false; $m = 1; $d = 1; // date
-		$h = 0; $i = 0; $s = 0; // time
-
-
-		/* Date only */
-
-		// ISO 8601: 2005
+		$date_parsed = AnewtDateTime::real_parse_string($date);
+
+		if (!$date_parsed)
+			return null;
+
+		list ($year, $month, $day, $hour, $minute, $second) = $date_parsed;
+
+		if (!AnewtDateTime::is_valid_date_ymd($year, $month, $day))
+			return null;
+
+		if (!AnewtDateTime::is_valid_time_hms($hour, $minute, $second))
+			return null;
+
+		return new AnewtDateTimeAtom($year, $month, $day, $hour, $minute, $second);
+	}
+
+	/**
+	 * Real implementation of parse_string.
+	 *
+	 * \param $date
+	 */
+	static private function real_parse_string($date)
+	{
+		/* Default values */
+
+		$year = 0; $month = 1; $day = 1;
+		$hour = 0; $minute = 0; $second = 0;
+
+		/* The most common formats (just a guess) are put first, so that no time
+		 * is wasted testing uncommon regular expressions */
+
+		/* SQL92 DATETIME, e.g. 2005-10-31 12:00:00 */
+		$pattern = sprintf(
+			'/^(%s)-(%s)-(%s) (%s):(%s):(%s)(\.\d+)?(%s)?$/',
+			DATETIME_RE_YEAR, DATETIME_RE_MONTH, DATETIME_RE_DAY,
+			DATETIME_RE_HOUR, DATETIME_RE_MINUTE, DATETIME_RE_SECOND,
+			DATETIME_RE_TIMEZONE);
+		if (preg_match($pattern, $date, $matches) === 1)
+		{
+			$year = $matches[1];
+			$month = $matches[3];
+			$day = $matches[5];
+			$hour = $matches[7];
+			$minute = $matches[9];
+			$second = $matches[11];
+			return array($year, $month, $day, $hour, $minute, $second);
+		}
+
+
+		/* ISO 8601 (year, month, and day), e.g. 2005-10-31 */
+		$pattern = sprintf(
+			'/^(%s)-?(%s)-?(%s)$/',
+			DATETIME_RE_YEAR, DATETIME_RE_MONTH, DATETIME_RE_DAY);
+		if (preg_match($pattern, $date, $matches) === 1)
+		{
+			$year = $matches[1];
+			$month = $matches[3];
+			$day = $matches[5];
+			return array($year, $month, $day, $hour, $minute, $second);
+		}
+
+
+		/* Hour, minute, second */
+		$pattern = sprintf(
+			'/^(%s):(%s):(%s)$/',
+			DATETIME_RE_HOUR, DATETIME_RE_MINUTE, DATETIME_RE_SECOND);
+		if (preg_match($pattern, $date, $matches) === 1)
+		{
+			list ($year, $month, $day) = explode('-', strftime('%Y-%m-%d'));
+			$hour = $matches[1];
+			$minute = $matches[3];
+			$second = $matches[5];
+			return array($year, $month, $day, $hour, $minute, $second);
+		}
+
+
+		/* Full ISO 8601, e.g. 2005-10-31T12:00:00 (with and without T) */
+		$pattern = sprintf(
+			'/^(%s)-?(%s)-?(%s)[T ](%s):?(%s):?(%s)$/',
+			DATETIME_RE_YEAR, DATETIME_RE_MONTH, DATETIME_RE_DAY,
+			DATETIME_RE_HOUR, DATETIME_RE_MINUTE, DATETIME_RE_SECOND);
+		if (preg_match($pattern, $date, $matches) === 1)
+		{
+			$year = $matches[1];
+			$month = $matches[3];
+			$day = $matches[5];
+			$hour = $matches[7];
+			$minute = $matches[9];
+			$second = $matches[11];
+			return array($year, $month, $day, $hour, $minute, $second);
+		}
+
+
+		/* Hour, minute (without seconds) */
+		$pattern = sprintf(
+			'/^(%s):(%s)$/',
+			DATETIME_RE_HOUR, DATETIME_RE_MINUTE);
+		if (preg_match($pattern, $date, $matches) === 1)
+		{
+			list ($year, $month, $day) = explode('-', strftime('%Y-%m-%d'));
+			$hour = $matches[1];
+			$minute = $matches[3];
+			$second = 0;
+			return array($year, $month, $day, $hour, $minute, $second);
+		}
+
+
+		/* ISO 8601 (year only), e.g. 2005 */
 		$pattern = sprintf('/^(%s)$/', DATETIME_RE_YEAR);
 		if (preg_match($pattern, $date, $matches) === 1)
-			$y = $matches[1];
-
-
-		// ISO 8601: 2005-10
-		$pattern = sprintf('/^(%s)-?(%s)$/', DATETIME_RE_YEAR,
-				DATETIME_RE_MONTH);
+		{
+			$year = $matches[1];
+			return array($year, $month, $day, $hour, $minute, $second);
+		}
+
+
+		/* ISO 8601 (year and month), e.g. 2005-10 */
+		$pattern = sprintf(
+			'/^(%s)-?(%s)$/',
+			DATETIME_RE_YEAR, DATETIME_RE_MONTH);
 		if (preg_match($pattern, $date, $matches))
 		{
-			$y = $matches[1];
-			$m = $matches[3];
-		}
-
-		// ISO 8601: 2005-10-31
-		$pattern = sprintf('/^(%s)-?(%s)-?(%s)$/', DATETIME_RE_YEAR,
-				DATETIME_RE_MONTH, DATETIME_RE_DAY);
-		if (preg_match($pattern, $date, $matches) === 1)
-		{
-			$y = $matches[1];
-			$m = $matches[3];
-			$d = $matches[5];
-		}
-
-
-		/* Time only */
-
-		/* Time without seconds */
-		$pattern = sprintf('/^(%s):(%s)$/', DATETIME_RE_HOUR, DATETIME_RE_MINUTE);
-		if (preg_match($pattern, $date, $matches) === 1)
-		{
-			list ($y, $m, $d) = explode('-', strftime('%Y-%m-%d'));
-			$y = (int) $y;
-			$m = (int) $m;
-			$d = (int) $d;
-			$h = $matches[1];
-			$i = $matches[3];
-			$s = 0;
-		}
-
-		/* Time with seconds */
-		$pattern = sprintf('/^(%s):(%s):(%s)$/', DATETIME_RE_HOUR,
-				DATETIME_RE_MINUTE, DATETIME_RE_SECOND);
-		if (preg_match($pattern, $date, $matches) === 1)
-		{
-			list ($y, $m, $d) = explode('-', strftime('%Y-%m-%d'));
-			$y = (int) $y;
-			$m = (int) $m;
-			$d = (int) $d;
-			$h = $matches[1];
-			$i = $matches[3];
-			$s = $matches[5];
-		}
-
-
-		/* Date and time */
-
-		// SQL92 DATETIME: 2005-10-31 12:00:00
-		$pattern = sprintf('/^(%s)-(%s)-(%s) (%s):(%s):(%s)(\.\d+)?(%s)?$/', DATETIME_RE_YEAR,
-				DATETIME_RE_MONTH, DATETIME_RE_DAY, DATETIME_RE_HOUR, DATETIME_RE_MINUTE,
-				DATETIME_RE_SECOND, DATETIME_RE_TIMEZONE);
-		if (preg_match($pattern, $date, $matches) === 1)
-		{
-			$y = $matches[1];
-			$m = $matches[3];
-			$d = $matches[5];
-			$h = $matches[7];
-			$i = $matches[9];
-			$s = $matches[11];
-		}
-
-		// ISO 8601: 2005-10-31T12:00:00 (with and without T)
-		$pattern = sprintf('/^(%s)-?(%s)-?(%s)[T ](%s):?(%s):?(%s)$/',
-				DATETIME_RE_YEAR, DATETIME_RE_MONTH, DATETIME_RE_DAY,
-				DATETIME_RE_HOUR, DATETIME_RE_MINUTE, DATETIME_RE_SECOND);
-		if (preg_match($pattern, $date, $matches) === 1)
-		{
-			$y = $matches[1];
-			$m = $matches[3];
-			$d = $matches[5];
-			$h = $matches[7];
-			$i = $matches[9];
-			$s = $matches[11];
-		}
-
-		// ISO 8601: 2005-10-31T12:00 (with and without T)
-		$pattern = sprintf('/^(%s)-?(%s)-?(%s)[T ](%s):?(%s)$/',
-				DATETIME_RE_YEAR, DATETIME_RE_MONTH, DATETIME_RE_DAY,
-				DATETIME_RE_HOUR, DATETIME_RE_MINUTE);
-		if (preg_match($pattern, $date, $matches) === 1)
-		{
-			$y = $matches[1];
-			$m = $matches[3];
-			$d = $matches[5];
-			$h = $matches[7];
-			$i = $matches[9];
-		}
-
-		// ISO 8601: 2005-123
-		$pattern = sprintf('/^(%s)-?(%s)$/', DATETIME_RE_YEAR, DATETIME_RE_DAY_OF_YEAR);
-		if (preg_match($pattern, $date, $matches) === 1)
-		{
-			$y = $matches[1];
+			$year = $matches[1];
+			$month = $matches[3];
+			return array($year, $month, $day, $hour, $minute, $second);
+		}
+
+
+		/* Full ISO 8601 without seconds, e.g. 2005-10-31T12:00 (with and without T) */
+		$pattern = sprintf(
+			'/^(%s)-?(%s)-?(%s)[T ](%s):?(%s)$/',
+			DATETIME_RE_YEAR, DATETIME_RE_MONTH, DATETIME_RE_DAY,
+			DATETIME_RE_HOUR, DATETIME_RE_MINUTE);
+		if (preg_match($pattern, $date, $matches) === 1)
+		{
+			$year = $matches[1];
+			$month = $matches[3];
+			$day = $matches[5];
+			$hour = $matches[7];
+			$minute = $matches[9];
+			return array($year, $month, $day, $hour, $minute, $second);
+		}
+
+
+		/* RFC 2822 format, e.g. Tue, 10 Jun 2003 04:00:00 GMT */
+		$pattern = sprintf(
+			'/^%s,? %s %s %s %s:%s:%s %s$/',
+			DATETIME_RE_DAY_NAMES_ABBR,
+			DATETIME_RE_DAY, DATETIME_RE_MONTH_NAMES_ABBR, DATETIME_RE_YEAR,
+			DATETIME_RE_HOUR, DATETIME_RE_MINUTE, DATETIME_RE_SECOND,
+			DATETIME_RE_TIMEZONE);
+		if (preg_match($pattern, $date, $matches) === 1)
+		{
+			$year = $matches[4];
+			$month_names_abbr = array(1 => 'Jan', 'Feb', 'Mar', 'Apr', 'May',
+					'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec');
+			$month_names_abbr = array_flip($month_names_abbr);
+			$month = $month_names_abbr[$matches[3]];
+			$day = $matches[2];
+			$hour = $matches[5];
+			$minute = $matches[6];
+			$second = $matches[7];
+			return array($year, $month, $day, $hour, $minute, $second);
+		}
+
+
+		/* ISO 8601 year and day-of-year, e.g. 2005-123 */
+		$pattern = sprintf(
+			'/^(%s)-?(%s)$/',
+			DATETIME_RE_YEAR, DATETIME_RE_DAY_OF_YEAR);
+		if (preg_match($pattern, $date, $matches) === 1)
+		{
+			$year = $matches[1];
 			$timestamp = mktime(0, 0, 0, 1, $matches[3], $matches[1]);
-			$m = (int) strftime('%m', $timestamp);
-			$d = (int) strftime('%d', $timestamp);
+			$month = strftime('%m', $timestamp);
+			$day = strftime('%d', $timestamp);
+			return array($year, $month, $day, $hour, $minute, $second);
 		}
 
-		// ISO 8601: 2005-W02
+
+		/* ISO 8601 year and week number, e.g. 2005-W02 */
 		/* TODO: Dates specified by year, week and day. */
-		$pattern = sprintf('/^(%s)-?W(%s)$/', DATETIME_RE_YEAR, DATETIME_RE_WEEK_OF_YEAR);
+		$pattern = sprintf(
+			'/^(%s)-?W(%s)$/',
+			DATETIME_RE_YEAR, DATETIME_RE_WEEK_OF_YEAR);
 		if (preg_match($pattern, $date, $matches) === 1)
 		{
-			$y_not_final = (int) $matches[1];
+			$year_not_final = (int) $matches[1];
 			$w = (int) $matches[3];
 
 			/* Too bad, it seems strptime() cannot parse %V correctly, so we
@@ -365,7 +429,7 @@
 			/* Find the highest possible week number for the year... */
 			$last_day_of_the_last_week_of_the_year = 31;
 			do {
-				$w_max = (int) strftime('%V', mktime(0, 0, 0, 12, $last_day_of_the_last_week_of_the_year, $y_not_final));
+				$w_max = (int) strftime('%V', mktime(0, 0, 0, 12, $last_day_of_the_last_week_of_the_year, $year_not_final));
 				$last_day_of_the_last_week_of_the_year--;
 			} while ($w_max == 1);
 
@@ -379,59 +443,39 @@
 				while ((int) strftime('%V', $ts) != $w)
 					$ts += 24 * 60 * 60;
 
-				list ($y, $m, $d) = explode('-', strftime('%Y-%m-%d', $ts));
+				list ($year, $month, $day) = explode('-', strftime('%Y-%m-%d', $ts));
 			}
-		}
-
-
-		// MS SQL default format: Mon Jan 23 00:00:00 2006
-		$pattern = sprintf('/^%s,? %s %s %s:?%s:?%s %s$/',
-				DATETIME_RE_DAY_NAMES_ABBR, DATETIME_RE_MONTH_NAMES_ABBR,
-				DATETIME_RE_DAY, DATETIME_RE_HOUR, DATETIME_RE_MINUTE,
-				DATETIME_RE_SECOND, DATETIME_RE_YEAR);
-		if (preg_match($pattern, $date, $matches) === 1)
-		{
-			$y = $matches[7];
-			$month_names_abbr = array(1 => 'Jan', 'Feb', 'Mar', 'Apr', 'May',
-				'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec');
-			$month_names_abbr = array_flip($month_names_abbr);
-			$m = $month_names_abbr[$matches[2]];
-			$d = $matches[3];
-
-			$h = $matches[4];
-			$i = $matches[5];
-			$s = $matches[6];
-		}
-		
-		// RFC 2822 format: Tue, 10 Jun 2003 04:00:00 GMT
-		$pattern = sprintf('/^%s,? %s %s %s %s:%s:%s %s$/',
-				DATETIME_RE_DAY_NAMES_ABBR, DATETIME_RE_DAY,
-				DATETIME_RE_MONTH_NAMES_ABBR, DATETIME_RE_YEAR,
-				DATETIME_RE_HOUR, DATETIME_RE_MINUTE,
-				DATETIME_RE_SECOND, DATETIME_RE_TIMEZONE);
-		if (preg_match($pattern, $date, $matches) === 1)
-		{
-			$y = $matches[4];
-			$month_names_abbr = array(1 => 'Jan', 'Feb', 'Mar', 'Apr', 'May',
-					'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec');
-			$month_names_abbr = array_flip($month_names_abbr);
-			$m = $month_names_abbr[$matches[3]];
-			$d = $matches[2];
-			
-			$h = $matches[5];
-			$i = $matches[6];
-			$s = $matches[7];
-		}
-
-		/* Parsing done */
-
-		if (!AnewtDateTime::is_valid_date_ymd($y, $m, $d))
-			return null;
-
-		if (!AnewtDateTime::is_valid_time_hms($h, $i, $s))
-			return null;
-
-		return new AnewtDateTimeAtom($y, $m, $d, $h, $i, $s);
+
+			/* Abort if no year was extracted ( */
+			if (!$year)
+				return null;
+
+			return array($year, $month, $day, $hour, $minute, $second);
+		}
+
+
+		/* MS SQL default format, e.g. Mon Jan 23 00:00:00 2006 */
+		$pattern = sprintf(
+			'/^%s,? %s %s %s:?%s:?%s %s$/',
+			DATETIME_RE_DAY_NAMES_ABBR, DATETIME_RE_MONTH_NAMES_ABBR, DATETIME_RE_DAY,
+			DATETIME_RE_HOUR, DATETIME_RE_MINUTE, DATETIME_RE_SECOND,
+			DATETIME_RE_YEAR);
+		if (preg_match($pattern, $date, $matches) === 1)
+		{
+			$year = $matches[7];
+			$month_names_abbr = array(1 => 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec');
+			$month_names_abbr = array_flip($month_names_abbr);
+			$month = $month_names_abbr[$matches[2]];
+			$day = $matches[3];
+			$hour = $matches[4];
+			$minute = $matches[5];
+			$second = $matches[6];
+			return array($year, $month, $day, $hour, $minute, $second);
+		}
+
+
+		/* Nothing matched */
+		return null;
 	}
 
 	/**
@@ -965,7 +1009,7 @@
 	 *
 	 * \return
 	 *   A string in yyyy-mm-dd hh:mm:ss format.
-	 * 
+	 *
 	 * \see AnewtDateTime::sql_date
 	 * \see AnewtDateTime::sql_time
 	 */
@@ -1011,7 +1055,7 @@
 	 *
 	 * \return
 	 *   A string in hh:mm:ss format.
-	 * 
+	 *
 	 * \see AnewtDateTime::sql
 	 * \see AnewtDateTime::sql_date
 	 */

=== modified file 'core/datetime.test.php'
--- core/datetime.test.php	2010-03-27 20:50:26 +0000
+++ core/datetime.test.php	2010-10-26 19:27:26 +0000
@@ -39,8 +39,6 @@
 f('1984-060');
 f('1984060');
 
-f('This one should fail');
-
 f('1983-01-15T18:30:00');
 
 f('Mon Jan 23 02:04:06 2006');
@@ -56,12 +54,13 @@
 f('2010-W01');
 f('2010-W02');
 f('2009W53');
-echo 'This one should be empty:', NL;
-f('2010-W53');
 f('2013-W01');
 
 echo 'There should only be empty lines below:', NL;
+f('this-is-not-a-date');
 f(null);
+f('2010-W53');
+f('2013-W62');
 
 
 assert('AnewtDateTime::date(AnewtDateTime::parse("2008-12-09")) === "2008-12-09";');