← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] ~cjwatson/launchpad:librarian-gc-delete-tolerate-404 into launchpad:master

 

Colin Watson has proposed merging ~cjwatson/launchpad:librarian-gc-delete-tolerate-404 into launchpad:master.

Commit message:
Ignore 404 when deleting unwanted Swift files

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/+git/launchpad/+merge/389749

librarian-gc has been consistently failing for some time with the following pattern:

  2020-08-19 03:22:44 DEBUG   http://10.34.0.139:8080 "GET /v1/AUTH_xxx/librarian_6539?format=json&marker=3269994338 HTTP/1.1" 200 552359
  2020-08-19 03:22:44 DEBUG   http://10.34.0.139:8080 "GET /v1/AUTH_xxx/librarian_6539?format=json&marker=3269999999 HTTP/1.1" 200 2
  2020-08-19 03:22:45 DEBUG   http://10.34.0.139:8080 "DELETE /v1/AUTH_xxx/librarian_6539/3269500785 HTTP/1.1" 401 131
  2020-08-19 03:22:46 DEBUG   http://10.34.0.136:5000 "POST /v2.0/tokens HTTP/1.1" 200 3410
  2020-08-19 03:22:46 DEBUG   http://10.34.0.139:8080 "DELETE /v1/AUTH_xxx/librarian_6539/3269500785 HTTP/1.1" 404 70

The DELETE may happen in a different swiftclient.client.Connection instance from the GETs, and the sequence of GETs to enumerate all the files in Swift up to that point takes almost two hours on production at the moment which is longer than Keystone's default token expiry time of one hour, so it makes sense that the first DELETE might return 401.  However, it's not at all clear why the retried DELETE of the same object gets 404 (nothing else in librarian-gc seems to have deleted that object).  Although I'm concerned about this odd pattern, I've been unable to reproduce this with the same version of Swift, so perhaps it's best to just ignore it (since DELETE returning 404 still at least indicates that the object is no longer present) and allow librarian-gc to run to completion.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of ~cjwatson/launchpad:librarian-gc-delete-tolerate-404 into launchpad:master.
diff --git a/lib/lp/services/librarianserver/librariangc.py b/lib/lp/services/librarianserver/librariangc.py
index 88c9af9..a16e769 100644
--- a/lib/lp/services/librarianserver/librariangc.py
+++ b/lib/lp/services/librarianserver/librariangc.py
@@ -832,7 +832,11 @@ def delete_unwanted_swift_files(con):
                     "File %d not removed - created too recently", content_id)
             else:
                 with swift.connection() as swift_connection:
-                    swift_connection.delete_object(container, name)
+                    try:
+                        swift_connection.delete_object(container, name)
+                    except swiftclient.ClientException as e:
+                        if e.http_status != 404:
+                            raise
                 log.debug3(
                     'Deleted ({0}, {1}) from Swift'.format(container, name))
                 removed_count += 1
diff --git a/lib/lp/services/librarianserver/tests/test_gc.py b/lib/lp/services/librarianserver/tests/test_gc.py
index d59dde0..71fb0c4 100644
--- a/lib/lp/services/librarianserver/tests/test_gc.py
+++ b/lib/lp/services/librarianserver/tests/test_gc.py
@@ -25,6 +25,8 @@ import tempfile
 
 from fixtures import MockPatchObject
 import pytz
+import requests
+from six.moves.urllib.parse import urljoin
 from sqlobject import SQLObjectNotFound
 from storm.store import Store
 from swiftclient import client as swiftclient
@@ -855,6 +857,64 @@ class TestSwiftLibrarianGarbageCollection(
         self.assertFalse(self.file_exists(f1_id))
         self.assertTrue(self.file_exists(f2_id))
 
+    def test_delete_unwanted_files_handles_missing(self):
+        # GC tolerates a file being already missing from Swift when it tries
+        # to delete it.  It's not clear why this happens in practice.
+        switch_dbuser('testadmin')
+        content = b'uploading to swift'
+        f1_lfa = LibraryFileAlias.get(self.client.addFile(
+            'foo.txt', len(content), io.BytesIO(content), 'text/plain'))
+        f1_id = f1_lfa.contentID
+
+        f2_lfa = LibraryFileAlias.get(self.client.addFile(
+            'bar.txt', len(content), io.BytesIO(content), 'text/plain'))
+        f2_id = f2_lfa.contentID
+        transaction.commit()
+
+        for lfc_id in (f1_id, f2_id):
+            # Make the files old so they don't look in-progress.
+            os.utime(swift.filesystem_path(lfc_id), (0, 0))
+
+        switch_dbuser(config.librarian_gc.dbuser)
+
+        swift.to_swift(BufferLogger(), remove_func=os.unlink)
+
+        # Both files exist in Swift.
+        self.assertTrue(self.file_exists(f1_id))
+        self.assertTrue(self.file_exists(f2_id))
+
+        # Remove the first file from the DB.
+        content = f1_lfa.content
+        Store.of(f1_lfa).remove(f1_lfa)
+        Store.of(content).remove(content)
+        transaction.commit()
+
+        # In production, we see a sequence of DELETE 401 / authenticate /
+        # DELETE 404.  Since it isn't clear why this happens, we don't know
+        # exactly how to simulate it here, but do our best by deleting the
+        # object and forcibly expiring the client's token just before the
+        # first expected DELETE.
+        real_delete_object = swiftclient.Connection.delete_object
+
+        def fake_delete_object(connection, *args, **kwargs):
+            real_delete_object(connection, *args, **kwargs)
+            self.assertIsNotNone(connection.token)
+            requests.delete(urljoin(
+                config.librarian_server.os_auth_url,
+                'tokens/%s' % connection.token))
+            return real_delete_object(connection, *args, **kwargs)
+
+        self.patch(
+            swiftclient.Connection, 'delete_object', fake_delete_object)
+
+        # delete_unwanted_files completes successfully, and the second file
+        # is intact.
+        with self.librariangc_thinking_it_is_tomorrow():
+            swift.quiet_swiftclient(
+                librariangc.delete_unwanted_files, self.con)
+        self.assertFalse(self.file_exists(f1_id))
+        self.assertTrue(self.file_exists(f2_id))
+
 
 class TestBlobCollection(TestCase):
     layer = LaunchpadZopelessLayer
diff --git a/lib/lp/testing/swift/fakeswift.py b/lib/lp/testing/swift/fakeswift.py
index 86716cd..3536326 100644
--- a/lib/lp/testing/swift/fakeswift.py
+++ b/lib/lp/testing/swift/fakeswift.py
@@ -74,8 +74,8 @@ class FakeKeystone(resource.Resource):
     def getValidToken(self, tenant_name, expected_token=None):
         """Get a valid token for the given tenant name."""
         if expected_token is not None:
-            token = self.tokens[expected_token]
-            if self._isValidToken(token, tenant_name):
+            token = self.tokens.get(expected_token)
+            if token is not None and self._isValidToken(token, tenant_name):
                 return token
         else:
             for id, token in six.iteritems(self.tokens):
@@ -100,10 +100,21 @@ class FakeKeystone(resource.Resource):
         valid_token = self.getValidToken(tenant_name, token)
         return valid_token is not None
 
-    def getChild(self, path, request):
+    def getChild(self, name, request):
         """See `twisted.web.resource.Resource.getChild`."""
-        if path in (b"v2.0", b"tokens"):
+        if name in (b"v2.0", b"tokens"):
             return self
+
+        token = self.tokens.get(name, None)
+
+        if request.method == b"DELETE":
+            if not self.validateToken(request, token["tenant"]["name"]):
+                return EmptyPage(http.UNAUTHORIZED)
+            if token is None:  # delete unknown token
+                return EmptyPage(http.NOT_FOUND)
+            del self.tokens[name]
+            return EmptyPage(http.NO_CONTENT)
+
         return resource.NoResource("Not a valid keystone URL.")
 
     def render_POST(self, request):
@@ -531,7 +542,7 @@ class FakeSwift(resource.Resource):
             return resource
 
         if not self.root.keystone.validateToken(request, tenant_name):
-            return EmptyPage(http.FORBIDDEN)
+            return EmptyPage(http.UNAUTHORIZED)
 
         return resource