← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] ~cjwatson/launchpad:swift-files-deduplicate into launchpad:master

 

Colin Watson has proposed merging ~cjwatson/launchpad:swift-files-deduplicate into launchpad:master.

Commit message:
Tolerate duplicate files from Swift get_container

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/+git/launchpad/+merge/387835

The librarian's GC process has been failing with errors such as this for some time:

  ClientException: Object DELETE failed: ... 404 Not Found  [first 60 chars of response] <html><h1>Not Found</h1><p>The resource could not be found.

It's not completely clear why this is happening, but one possibility is that swift_connection.get_container is returning duplicate files when iterating over multiple batches or similar, so deduplicate its response.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of ~cjwatson/launchpad:swift-files-deduplicate into launchpad:master.
diff --git a/lib/lp/services/librarianserver/librariangc.py b/lib/lp/services/librarianserver/librariangc.py
index 19900e4..88c9af9 100644
--- a/lib/lp/services/librarianserver/librariangc.py
+++ b/lib/lp/services/librarianserver/librariangc.py
@@ -1,4 +1,4 @@
-# Copyright 2009-2018 Canonical Ltd.  This software is licensed under the
+# Copyright 2009-2020 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 """Librarian garbage collection routines"""
@@ -750,15 +750,18 @@ def swift_files(max_lfc_id):
         while container != final_container:
             container_num += 1
             container = swift.SWIFT_CONTAINER_PREFIX + str(container_num)
+            seen_names = set()
             try:
-                names = sorted(
+                objs = sorted(
                     swift.quiet_swiftclient(
                         swift_connection.get_container,
                         container, full_listing=True)[1],
                     key=lambda x: [
                         int(segment) for segment in x['name'].split('/')])
-                for name in names:
-                    yield (container, name)
+                for obj in objs:
+                    if obj['name'] not in seen_names:
+                        yield (container, obj)
+                    seen_names.add(obj['name'])
             except swiftclient.ClientException as x:
                 if x.http_status == 404:
                     continue
diff --git a/lib/lp/services/librarianserver/tests/test_gc.py b/lib/lp/services/librarianserver/tests/test_gc.py
index 479c51c..d59dde0 100644
--- a/lib/lp/services/librarianserver/tests/test_gc.py
+++ b/lib/lp/services/librarianserver/tests/test_gc.py
@@ -1,4 +1,4 @@
-# Copyright 2009-2017 Canonical Ltd.  This software is licensed under the
+# Copyright 2009-2020 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 """Librarian garbage collection tests"""
@@ -23,6 +23,7 @@ from subprocess import (
 import sys
 import tempfile
 
+from fixtures import MockPatchObject
 import pytz
 from sqlobject import SQLObjectNotFound
 from storm.store import Store
@@ -796,6 +797,64 @@ class TestSwiftLibrarianGarbageCollection(
         self.assertEqual([False, False, False], segment_existence(big1_id))
         self.assertEqual([True, True, True], segment_existence(big2_id))
 
+    def test_delete_unwanted_files_handles_duplicates(self):
+        # GC tolerates swift_connection.get_container returning duplicate
+        # names (perhaps across multiple batches).  It's not clear whether
+        # this happens in practice, but some strange deletion failures
+        # suggest that it might happen.
+        switch_dbuser('testadmin')
+        content = b'uploading to swift'
+        f1_lfa = LibraryFileAlias.get(self.client.addFile(
+            'foo.txt', len(content), io.BytesIO(content), 'text/plain'))
+        f1_id = f1_lfa.contentID
+
+        f2_lfa = LibraryFileAlias.get(self.client.addFile(
+            'bar.txt', len(content), io.BytesIO(content), 'text/plain'))
+        f2_id = f2_lfa.contentID
+        transaction.commit()
+
+        for lfc_id in (f1_id, f2_id):
+            # Make the files old so they don't look in-progress.
+            os.utime(swift.filesystem_path(lfc_id), (0, 0))
+
+        switch_dbuser(config.librarian_gc.dbuser)
+
+        # Force swift_connection.get_container to return duplicate results.
+        orig_get_container = swiftclient.get_container
+
+        def duplicating_get_container(*args, **kwargs):
+            rv = orig_get_container(*args, **kwargs)
+            if not kwargs.get('full_listing', False):
+                rv = (rv[0], rv[1] * 2)
+            return rv
+
+        self.useFixture(MockPatchObject(
+            swiftclient, 'get_container', duplicating_get_container))
+
+        swift.to_swift(BufferLogger(), remove_func=os.unlink)
+
+        # Both files exist in Swift.
+        self.assertTrue(self.file_exists(f1_id))
+        self.assertTrue(self.file_exists(f2_id))
+
+        # Both files survive the first purge.
+        with self.librariangc_thinking_it_is_tomorrow():
+            librariangc.delete_unwanted_files(self.con)
+        self.assertTrue(self.file_exists(f1_id))
+        self.assertTrue(self.file_exists(f2_id))
+
+        # Remove the first file from the DB.
+        content = f1_lfa.content
+        Store.of(f1_lfa).remove(f1_lfa)
+        Store.of(content).remove(content)
+        transaction.commit()
+
+        # The first file is removed, but the second is intact.
+        with self.librariangc_thinking_it_is_tomorrow():
+            librariangc.delete_unwanted_files(self.con)
+        self.assertFalse(self.file_exists(f1_id))
+        self.assertTrue(self.file_exists(f2_id))
+
 
 class TestBlobCollection(TestCase):
     layer = LaunchpadZopelessLayer
diff --git a/lib/lp/testing/swift/fakeswift.py b/lib/lp/testing/swift/fakeswift.py
index eedaa77..86716cd 100644
--- a/lib/lp/testing/swift/fakeswift.py
+++ b/lib/lp/testing/swift/fakeswift.py
@@ -1,4 +1,4 @@
-# Copyright 2013-2017 Canonical Ltd.  This software is licensed under the
+# Copyright 2013-2020 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 """An OpenStack Swift server mock using Twisted.
@@ -359,8 +359,8 @@ class SwiftContainer(resource.Resource):
             child = self.container_children.get(name, None)
         elif request.method == b"DELETE":
             child = self.container_children.get(name, None)
-            if child is None: # delete unknown object
-                return EmptyPage(http.NO_CONTENT)
+            if child is None:  # delete unknown object
+                return EmptyPage(http.NOT_FOUND)
             del self.container_children[name]
             return EmptyPage(http.NO_CONTENT)
         else: