← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~cjwatson/launchpad/sorted-apache-logs-by-age into lp:launchpad

 

Colin Watson has proposed merging lp:~cjwatson/launchpad/sorted-apache-logs-by-age into lp:launchpad.

Commit message:
Parse Apache log files in ascending order of mtime.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/sorted-apache-logs-by-age/+merge/331890

It turns out that sorting by name isn't ideal when there's a large backlog, because we always end up working on the large ppa.launchpad.net-access.log first.  It would make more sense to work on the oldest files first, since they're most likely to be lost to log rotation soon.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~cjwatson/launchpad/sorted-apache-logs-by-age into lp:launchpad.
=== modified file 'lib/lp/services/apachelogparser/base.py'
--- lib/lp/services/apachelogparser/base.py	2015-10-14 15:22:01 +0000
+++ lib/lp/services/apachelogparser/base.py	2017-10-05 19:07:01 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009 Canonical Ltd.  This software is licensed under the
+# Copyright 2009-2017 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 from datetime import datetime
@@ -31,6 +31,7 @@
     :param file_paths: The paths to the files.
     """
     store = IStore(ParsedApacheLog)
+    file_paths = sorted(file_paths, key=lambda path: os.stat(path).st_mtime)
     for file_path in file_paths:
         fd, file_size = get_fd_and_file_size(file_path)
         first_line = unicode(fd.readline())

=== modified file 'lib/lp/services/apachelogparser/script.py'
--- lib/lp/services/apachelogparser/script.py	2017-10-04 18:48:36 +0000
+++ lib/lp/services/apachelogparser/script.py	2017-10-05 19:07:01 +0000
@@ -77,7 +77,7 @@
         # disappears before we get around to parsing it, which is
         # desirable behaviour.
         files_to_parse = list(get_files_to_parse(
-            sorted(glob.glob(os.path.join(self.root, self.log_file_glob)))))
+            glob.glob(os.path.join(self.root, self.log_file_glob))))
 
         country_set = getUtility(ICountrySet)
         parsed_lines = 0

=== modified file 'lib/lp/services/apachelogparser/tests/test_apachelogparser.py'
--- lib/lp/services/apachelogparser/tests/test_apachelogparser.py	2015-10-14 15:22:01 +0000
+++ lib/lp/services/apachelogparser/tests/test_apachelogparser.py	2017-10-05 19:07:01 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009-2011 Canonical Ltd.  This software is licensed under the
+# Copyright 2009-2017 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 from datetime import datetime
@@ -7,6 +7,9 @@
 import os
 from StringIO import StringIO
 import tempfile
+import time
+
+from fixtures import TempDir
 
 from lp.services.apachelogparser.base import (
     create_or_update_parsedlog_entry,
@@ -22,6 +25,7 @@
 from lp.services.database.interfaces import IStore
 from lp.services.librarianserver.apachelogparser import DBUSER
 from lp.services.log.logger import BufferLogger
+from lp.services.osutils import write_file
 from lp.testing import TestCase
 from lp.testing.dbuser import switch_dbuser
 from lp.testing.layers import (
@@ -385,6 +389,20 @@
         super(TestParsedFilesDetection, self).setUp()
         switch_dbuser(DBUSER)
 
+    def test_sorts_by_mtime(self):
+        # Files are sorted by ascending mtime.
+        root = self.useFixture(TempDir()).path
+        file_paths = [os.path.join(root, str(name)) for name in range(3)]
+        now = time.time()
+        for i, path in enumerate(file_paths):
+            write_file(path, '%s\n' % i)
+            os.utime(path, (now - i, now - i))
+        contents = []
+        for fd, _ in get_files_to_parse(file_paths):
+            fd.seek(0)
+            contents.append(fd.read())
+        self.assertEqual(['2\n', '1\n', '0\n'], contents)
+
     def test_not_parsed_file(self):
         # A file that has never been parsed will have to be parsed from the
         # start.


Follow ups