launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #25068
[Merge] ~cjwatson/launchpad:swift-files-deduplicate into launchpad:master
Colin Watson has proposed merging ~cjwatson/launchpad:swift-files-deduplicate into launchpad:master.
Commit message:
Tolerate duplicate files from Swift get_container
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/+git/launchpad/+merge/387835
The librarian's GC process has been failing with errors such as this for some time:
ClientException: Object DELETE failed: ... 404 Not Found [first 60 chars of response] <html><h1>Not Found</h1><p>The resource could not be found.
It's not completely clear why this is happening, but one possibility is that swift_connection.get_container is returning duplicate files when iterating over multiple batches or similar, so deduplicate its response.
--
Your team Launchpad code reviewers is requested to review the proposed merge of ~cjwatson/launchpad:swift-files-deduplicate into launchpad:master.
diff --git a/lib/lp/services/librarianserver/librariangc.py b/lib/lp/services/librarianserver/librariangc.py
index 19900e4..88c9af9 100644
--- a/lib/lp/services/librarianserver/librariangc.py
+++ b/lib/lp/services/librarianserver/librariangc.py
@@ -1,4 +1,4 @@
-# Copyright 2009-2018 Canonical Ltd. This software is licensed under the
+# Copyright 2009-2020 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
"""Librarian garbage collection routines"""
@@ -750,15 +750,18 @@ def swift_files(max_lfc_id):
while container != final_container:
container_num += 1
container = swift.SWIFT_CONTAINER_PREFIX + str(container_num)
+ seen_names = set()
try:
- names = sorted(
+ objs = sorted(
swift.quiet_swiftclient(
swift_connection.get_container,
container, full_listing=True)[1],
key=lambda x: [
int(segment) for segment in x['name'].split('/')])
- for name in names:
- yield (container, name)
+ for obj in objs:
+ if obj['name'] not in seen_names:
+ yield (container, obj)
+ seen_names.add(obj['name'])
except swiftclient.ClientException as x:
if x.http_status == 404:
continue
diff --git a/lib/lp/services/librarianserver/tests/test_gc.py b/lib/lp/services/librarianserver/tests/test_gc.py
index 479c51c..d59dde0 100644
--- a/lib/lp/services/librarianserver/tests/test_gc.py
+++ b/lib/lp/services/librarianserver/tests/test_gc.py
@@ -1,4 +1,4 @@
-# Copyright 2009-2017 Canonical Ltd. This software is licensed under the
+# Copyright 2009-2020 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
"""Librarian garbage collection tests"""
@@ -23,6 +23,7 @@ from subprocess import (
import sys
import tempfile
+from fixtures import MockPatchObject
import pytz
from sqlobject import SQLObjectNotFound
from storm.store import Store
@@ -796,6 +797,64 @@ class TestSwiftLibrarianGarbageCollection(
self.assertEqual([False, False, False], segment_existence(big1_id))
self.assertEqual([True, True, True], segment_existence(big2_id))
+ def test_delete_unwanted_files_handles_duplicates(self):
+ # GC tolerates swift_connection.get_container returning duplicate
+ # names (perhaps across multiple batches). It's not clear whether
+ # this happens in practice, but some strange deletion failures
+ # suggest that it might happen.
+ switch_dbuser('testadmin')
+ content = b'uploading to swift'
+ f1_lfa = LibraryFileAlias.get(self.client.addFile(
+ 'foo.txt', len(content), io.BytesIO(content), 'text/plain'))
+ f1_id = f1_lfa.contentID
+
+ f2_lfa = LibraryFileAlias.get(self.client.addFile(
+ 'bar.txt', len(content), io.BytesIO(content), 'text/plain'))
+ f2_id = f2_lfa.contentID
+ transaction.commit()
+
+ for lfc_id in (f1_id, f2_id):
+ # Make the files old so they don't look in-progress.
+ os.utime(swift.filesystem_path(lfc_id), (0, 0))
+
+ switch_dbuser(config.librarian_gc.dbuser)
+
+ # Force swift_connection.get_container to return duplicate results.
+ orig_get_container = swiftclient.get_container
+
+ def duplicating_get_container(*args, **kwargs):
+ rv = orig_get_container(*args, **kwargs)
+ if not kwargs.get('full_listing', False):
+ rv = (rv[0], rv[1] * 2)
+ return rv
+
+ self.useFixture(MockPatchObject(
+ swiftclient, 'get_container', duplicating_get_container))
+
+ swift.to_swift(BufferLogger(), remove_func=os.unlink)
+
+ # Both files exist in Swift.
+ self.assertTrue(self.file_exists(f1_id))
+ self.assertTrue(self.file_exists(f2_id))
+
+ # Both files survive the first purge.
+ with self.librariangc_thinking_it_is_tomorrow():
+ librariangc.delete_unwanted_files(self.con)
+ self.assertTrue(self.file_exists(f1_id))
+ self.assertTrue(self.file_exists(f2_id))
+
+ # Remove the first file from the DB.
+ content = f1_lfa.content
+ Store.of(f1_lfa).remove(f1_lfa)
+ Store.of(content).remove(content)
+ transaction.commit()
+
+ # The first file is removed, but the second is intact.
+ with self.librariangc_thinking_it_is_tomorrow():
+ librariangc.delete_unwanted_files(self.con)
+ self.assertFalse(self.file_exists(f1_id))
+ self.assertTrue(self.file_exists(f2_id))
+
class TestBlobCollection(TestCase):
layer = LaunchpadZopelessLayer
diff --git a/lib/lp/testing/swift/fakeswift.py b/lib/lp/testing/swift/fakeswift.py
index eedaa77..86716cd 100644
--- a/lib/lp/testing/swift/fakeswift.py
+++ b/lib/lp/testing/swift/fakeswift.py
@@ -1,4 +1,4 @@
-# Copyright 2013-2017 Canonical Ltd. This software is licensed under the
+# Copyright 2013-2020 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
"""An OpenStack Swift server mock using Twisted.
@@ -359,8 +359,8 @@ class SwiftContainer(resource.Resource):
child = self.container_children.get(name, None)
elif request.method == b"DELETE":
child = self.container_children.get(name, None)
- if child is None: # delete unknown object
- return EmptyPage(http.NO_CONTENT)
+ if child is None: # delete unknown object
+ return EmptyPage(http.NOT_FOUND)
del self.container_children[name]
return EmptyPage(http.NO_CONTENT)
else: