← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~wgrant/launchpad/bug-1410592 into lp:launchpad

 

William Grant has proposed merging lp:~wgrant/launchpad/bug-1410592 into lp:launchpad.

Commit message:
Fix librarian-gc to treat .migrated filenames like the originals, so migrated files can be GCed and not doom us.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)
Related bugs:
  Bug #1410592 in Launchpad itself: "librarian-gc should treat .migrated files just like the originals"
  https://bugs.launchpad.net/launchpad/+bug/1410592

For more details, see:
https://code.launchpad.net/~wgrant/launchpad/bug-1410592/+merge/246389

librarian-gc was recently adjusted to ignore any files ending in ".migrated", so as to avoid whining about files that have been migrated to Swift. But it should instead treat them just like the original filename -- they still want to be expired if they still exist locally.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~wgrant/launchpad/bug-1410592 into lp:launchpad.
=== modified file 'lib/lp/services/librarianserver/librariangc.py'
--- lib/lp/services/librarianserver/librariangc.py	2014-12-16 10:54:15 +0000
+++ lib/lp/services/librarianserver/librariangc.py	2015-01-14 05:41:13 +0000
@@ -616,7 +616,7 @@
     removed_count = 0
     content_id = next_wanted_content_id = -1
 
-    hex_content_id_re = re.compile('^[0-9a-f]{8}$')
+    hex_content_id_re = re.compile('^([0-9a-f]{8})(\.migrated)?$')
     ONE_DAY = 24 * 60 * 60
 
     for dirpath, dirnames, filenames in os.walk(
@@ -627,8 +627,7 @@
             dirnames.remove('incoming')
         if 'lost+found' in dirnames:
             dirnames.remove('lost+found')
-        filenames = set(fn for fn in filenames
-                        if not fn.endswith('.migrated'))
+        filenames = set(filenames)
         filenames.discard('librarian.pid')
         filenames.discard('librarian.log')
 
@@ -660,12 +659,13 @@
         for filename in filenames:
             path = os.path.join(dirpath, filename)
             hex_content_id = ''.join(path.split(os.sep)[-4:])
-            if hex_content_id_re.search(hex_content_id) is None:
+            match = hex_content_id_re.search(hex_content_id)
+            if match is None:
                 log.warning(
                     "Ignoring invalid path %s" % path)
                 continue
 
-            content_id = int(hex_content_id, 16)
+            content_id = int(match.groups()[0], 16)
 
             while (next_wanted_content_id is not None
                     and content_id > next_wanted_content_id):

=== modified file 'lib/lp/services/librarianserver/tests/test_gc.py'
--- lib/lp/services/librarianserver/tests/test_gc.py	2014-12-16 10:51:41 +0000
+++ lib/lp/services/librarianserver/tests/test_gc.py	2015-01-14 05:41:13 +0000
@@ -19,6 +19,7 @@
 import sys
 import tempfile
 
+from contextlib import contextmanager
 from sqlobject import SQLObjectNotFound
 from swiftclient import client as swiftclient
 import transaction
@@ -437,6 +438,25 @@
                 len(results), 0, 'Too many results %r' % (results,)
                 )
 
+    @contextmanager
+    def librariangc_thinking_it_is_tomorrow(self):
+        org_time = librariangc.time
+        org_utcnow = librariangc._utcnow
+
+        def tomorrow_time():
+            return org_time() + 24 * 60 * 60 + 1
+
+        def tomorrow_utcnow():
+            return datetime.utcnow() + timedelta(days=1, seconds=1)
+
+        try:
+            librariangc.time = tomorrow_time
+            librariangc._utcnow = tomorrow_utcnow
+            yield
+        finally:
+            librariangc.time = org_time
+            librariangc._utcnow = org_utcnow
+
     def test_deleteUnwantedFiles(self):
         self.ztm.begin()
         cur = cursor()
@@ -473,22 +493,8 @@
 
         # To test removal does occur when we want it to, we need to trick
         # the garbage collector into thinking it is tomorrow.
-        org_time = librariangc.time
-        org_utcnow = librariangc._utcnow
-
-        def tomorrow_time():
-            return org_time() + 24 * 60 * 60 + 1
-
-        def tomorrow_utcnow():
-            return datetime.utcnow() + timedelta(days=1, seconds=1)
-
-        try:
-            librariangc.time = tomorrow_time
-            librariangc._utcnow = tomorrow_utcnow
+        with self.librariangc_thinking_it_is_tomorrow():
             librariangc.delete_unwanted_files(self.con)
-        finally:
-            librariangc.time = org_time
-            librariangc._utcnow = org_utcnow
 
         self.failIf(self.file_exists(content_id))
 
@@ -578,18 +584,10 @@
 
         # To test this function raises an excption when it should,
         # the garbage collector into thinking it is tomorrow.
-        org_time = librariangc.time
-
-        def tomorrow_time():
-            return org_time() + 24 * 60 * 60 + 1
-
-        try:
-            librariangc.time = tomorrow_time
+        with self.librariangc_thinking_it_is_tomorrow():
             self.assertRaises(
                 Exception, librariangc.confirm_no_clock_skew, (self.con,)
                 )
-        finally:
-            librariangc.time = org_time
 
 
 class TestDiskLibrarianGarbageCollection(
@@ -603,6 +601,39 @@
         path = librariangc.get_file_path(content_id)
         os.unlink(path)
 
+    def test_delete_unwanted_files_handles_migrated(self):
+        # Files that have been uploaded to Swift have ".migrated"
+        # appended to their names. These are treated just like the
+        # original file, ignoring the extension.
+        switch_dbuser('testadmin')
+        content = 'foo'
+        lfa = LibraryFileAlias.get(self.client.addFile(
+            'foo.txt', len(content), StringIO(content), 'text/plain'))
+        id_aborted = lfa.contentID
+        # Roll back the database changes, leaving the file on disk.
+        transaction.abort()
+
+        lfa = LibraryFileAlias.get(self.client.addFile(
+            'bar.txt', len(content), StringIO(content), 'text/plain'))
+        transaction.commit()
+        id_committed = lfa.contentID
+
+        switch_dbuser(config.librarian_gc.dbuser)
+
+        # Now, we will move the directory containing the trash somewhere else
+        # and make a symlink to it.
+        path_aborted = librariangc.get_file_path(id_aborted)
+        os.rename(path_aborted, path_aborted + '.migrated')
+
+        path_committed = librariangc.get_file_path(id_committed)
+        os.rename(path_committed, path_committed + '.migrated')
+
+        with self.librariangc_thinking_it_is_tomorrow():
+            librariangc.delete_unwanted_files(self.con)
+
+        self.assertFalse(os.path.exists(path_aborted + '.migrated'))
+        self.assertTrue(os.path.exists(path_committed + '.migrated'))
+
     def test_deleteUnwantedFilesIgnoresNoise(self):
         # Directories with invalid names in the storage area are
         # ignored. They are reported as warnings though.
@@ -616,8 +647,10 @@
         # Long non-hexadecimal number
         noisedir3_path = os.path.join(config.librarian_server.root, '11.bak')
 
-        # A file we will migrate to Swift using the --rename feed-swift option.
-        migrated_path = librariangc.get_file_path(8769786) + '.migrated'
+        # A file with the ".migrated" suffix has migrated to Swift but
+        # may still be removed from disk as unwanted by the GC. Other
+        # suffixes are unknown and stay around.
+        migrated_path = librariangc.get_file_path(8769786) + '.noise'
 
         try:
             os.mkdir(noisedir1_path)
@@ -636,16 +669,8 @@
 
             # Pretend it is tomorrow to ensure the files don't count as
             # recently created, and run the delete_unwanted_files process.
-            org_time = librariangc.time
-
-            def tomorrow_time():
-                return org_time() + 24 * 60 * 60 + 1
-
-            try:
-                librariangc.time = tomorrow_time
+            with self.librariangc_thinking_it_is_tomorrow():
                 librariangc.delete_unwanted_files(self.con)
-            finally:
-                librariangc.time = org_time
 
             # None of the rubbish we created has been touched.
             self.assert_(os.path.isdir(noisedir1_path))


Follow ups