launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #21907
[Merge] lp:~cjwatson/launchpad/sorted-apache-logs-by-age into lp:launchpad
Colin Watson has proposed merging lp:~cjwatson/launchpad/sorted-apache-logs-by-age into lp:launchpad.
Commit message:
Parse Apache log files in ascending order of mtime.
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/sorted-apache-logs-by-age/+merge/331890
It turns out that sorting by name isn't ideal when there's a large backlog, because we always end up working on the large ppa.launchpad.net-access.log first. It would make more sense to work on the oldest files first, since they're most likely to be lost to log rotation soon.
--
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~cjwatson/launchpad/sorted-apache-logs-by-age into lp:launchpad.
=== modified file 'lib/lp/services/apachelogparser/base.py'
--- lib/lp/services/apachelogparser/base.py 2015-10-14 15:22:01 +0000
+++ lib/lp/services/apachelogparser/base.py 2017-10-05 19:07:01 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009 Canonical Ltd. This software is licensed under the
+# Copyright 2009-2017 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
from datetime import datetime
@@ -31,6 +31,7 @@
:param file_paths: The paths to the files.
"""
store = IStore(ParsedApacheLog)
+ file_paths = sorted(file_paths, key=lambda path: os.stat(path).st_mtime)
for file_path in file_paths:
fd, file_size = get_fd_and_file_size(file_path)
first_line = unicode(fd.readline())
=== modified file 'lib/lp/services/apachelogparser/script.py'
--- lib/lp/services/apachelogparser/script.py 2017-10-04 18:48:36 +0000
+++ lib/lp/services/apachelogparser/script.py 2017-10-05 19:07:01 +0000
@@ -77,7 +77,7 @@
# disappears before we get around to parsing it, which is
# desirable behaviour.
files_to_parse = list(get_files_to_parse(
- sorted(glob.glob(os.path.join(self.root, self.log_file_glob)))))
+ glob.glob(os.path.join(self.root, self.log_file_glob))))
country_set = getUtility(ICountrySet)
parsed_lines = 0
=== modified file 'lib/lp/services/apachelogparser/tests/test_apachelogparser.py'
--- lib/lp/services/apachelogparser/tests/test_apachelogparser.py 2015-10-14 15:22:01 +0000
+++ lib/lp/services/apachelogparser/tests/test_apachelogparser.py 2017-10-05 19:07:01 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009-2011 Canonical Ltd. This software is licensed under the
+# Copyright 2009-2017 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
from datetime import datetime
@@ -7,6 +7,9 @@
import os
from StringIO import StringIO
import tempfile
+import time
+
+from fixtures import TempDir
from lp.services.apachelogparser.base import (
create_or_update_parsedlog_entry,
@@ -22,6 +25,7 @@
from lp.services.database.interfaces import IStore
from lp.services.librarianserver.apachelogparser import DBUSER
from lp.services.log.logger import BufferLogger
+from lp.services.osutils import write_file
from lp.testing import TestCase
from lp.testing.dbuser import switch_dbuser
from lp.testing.layers import (
@@ -385,6 +389,20 @@
super(TestParsedFilesDetection, self).setUp()
switch_dbuser(DBUSER)
+ def test_sorts_by_mtime(self):
+ # Files are sorted by ascending mtime.
+ root = self.useFixture(TempDir()).path
+ file_paths = [os.path.join(root, str(name)) for name in range(3)]
+ now = time.time()
+ for i, path in enumerate(file_paths):
+ write_file(path, '%s\n' % i)
+ os.utime(path, (now - i, now - i))
+ contents = []
+ for fd, _ in get_files_to_parse(file_paths):
+ fd.seek(0)
+ contents.append(fd.read())
+ self.assertEqual(['2\n', '1\n', '0\n'], contents)
+
def test_not_parsed_file(self):
# A file that has never been parsed will have to be parsed from the
# start.
Follow ups