← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~cjwatson/launchpad/librarian-swift-assertion-errors into lp:launchpad

 

Colin Watson has proposed merging lp:~cjwatson/launchpad/librarian-swift-assertion-errors into lp:launchpad.

Commit message:
Fix librarian OOPSes when closing a TxSwiftStream just before the end.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)
Related bugs:
  Bug #1420046 in Launchpad itself: "Librarian requests occasionally fail: 'NoneType' object has no attribute 'get_object'"
  https://bugs.launchpad.net/launchpad/+bug/1420046

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/librarian-swift-assertion-errors/+merge/370960

The following sequence of events caused an assertion error:

 * TxSwiftStream.read called successively, reading all non-empty data chunks from the Swift connection.
 * TxSwiftStream.read entered again, returning a Deferred while it tries to fetch the final chunk from Swift (which will be None due to EOF, but it doesn't know that yet).
 * The stream is closed, e.g. as a result of an HTTP client closing its connection prematurely.
 * The Swift client returns control to TxSwiftStream.read.  self._chunk and self._swift_connection are now both None, so it tries to put a connection back into the pool which is in fact None.

An extra None check is enough to avoid this.

I also fixed a couple of other small problems, including being more careful about closing Swift connections.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~cjwatson/launchpad/librarian-swift-assertion-errors into lp:launchpad.
=== modified file 'lib/lp/services/librarianserver/storage.py'
--- lib/lp/services/librarianserver/storage.py	2018-04-05 16:58:48 +0000
+++ lib/lp/services/librarianserver/storage.py	2019-08-05 18:02:46 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009-2018 Canonical Ltd.  This software is licensed under the
+# Copyright 2009-2019 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 __metaclass__ = type
@@ -33,7 +33,6 @@
     # _relFileLocation needed by other modules in this package.
     # Listed here to keep the import fascist happy
     '_relFileLocation',
-    '_sameFile',
     ]
 
 
@@ -174,8 +173,9 @@
                 # If we have drained the data successfully,
                 # the connection can be reused saving on auth
                 # handshakes.
-                swift.connection_pool.put(self._swift_connection)
-                self._swift_connection = None
+                if self._swift_connection is not None:
+                    swift.connection_pool.put(self._swift_connection)
+                    self._swift_connection = None
                 self._chunks = None
                 defer.returnValue('')
         return_chunk = self._chunk[:size]
@@ -304,18 +304,6 @@
         fsync_path(os.path.dirname(location), dir=True)
 
 
-def _sameFile(path1, path2):
-    file1 = open(path1, 'rb')
-    file2 = open(path2, 'rb')
-
-    blk = 1024 * 64
-    chunksIter = iter(lambda: (file1.read(blk), file2.read(blk)), ('', ''))
-    for chunk1, chunk2 in chunksIter:
-        if chunk1 != chunk2:
-            return False
-    return True
-
-
 def _relFileLocation(file_id):
     """Return the relative location for the given file_id.
 

=== modified file 'lib/lp/services/librarianserver/swift.py'
--- lib/lp/services/librarianserver/swift.py	2018-05-06 08:52:34 +0000
+++ lib/lp/services/librarianserver/swift.py	2019-08-05 18:02:46 +0000
@@ -1,4 +1,4 @@
-# Copyright 2013-2018 Canonical Ltd.  This software is licensed under the
+# Copyright 2013-2019 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 """Move files from Librarian disk storage into Swift."""
@@ -316,7 +316,9 @@
 
     def close(self):
         self.closed = True
-        self._swift_connection = None
+        if self._swift_connection is not None:
+            self._swift_connection.close()
+            self._swift_connection = None
 
     def seek(self, offset):
         if offset < self._offset:

=== modified file 'lib/lp/services/librarianserver/tests/test_storage.py'
--- lib/lp/services/librarianserver/tests/test_storage.py	2018-01-02 10:54:31 +0000
+++ lib/lp/services/librarianserver/tests/test_storage.py	2019-08-05 18:02:46 +0000
@@ -1,8 +1,7 @@
-# Copyright 2009 Canonical Ltd.  This software is licensed under the
+# Copyright 2009-2019 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 import hashlib
-import os
 import shutil
 import tempfile
 import unittest
@@ -12,7 +11,6 @@
 from lp.services.librarianserver import db
 from lp.services.librarianserver.storage import (
     _relFileLocation,
-    _sameFile,
     LibrarianStorage,
     )
 from lp.testing.layers import LaunchpadZopelessLayer
@@ -54,49 +52,6 @@
         # Make sure hasFile returns False when a file is missing
         self.assertFalse(self.storage.hasFile(9999999))
 
-    def _sameFileTestHelper(self, data1, data2):
-        # Make two temporary files
-        fd1, path1 = tempfile.mkstemp()
-        fd2, path2 = tempfile.mkstemp()
-        file1 = os.fdopen(fd1, 'wb')
-        file2 = os.fdopen(fd2, 'wb')
-
-        # Put the test data in them, and close them
-        file1.write(data1)
-        file2.write(data2)
-        file1.close()
-        file2.close()
-
-        # Do the test, and clean up afterwards
-        try:
-            return _sameFile(path1, path2)
-        finally:
-            os.remove(path1)
-            os.remove(path2)
-
-    def test_sameFile(self):
-        # Make sure sameFile returns True when the files are the same
-        self.assertTrue(
-            self._sameFileTestHelper('data ' * 5000, 'data ' * 5000))
-
-    def test_notSameFile(self):
-        # Make sure sameFile returns False when the files are different, even
-        # if they are the same length.
-        self.assertFalse(
-            self._sameFileTestHelper('data ' * 5000, 'fred ' * 5000))
-
-    def test_differentFileShorter(self):
-        # Make sure sameFile returns False when the second file is shorter
-        # than the first, even if they were the same up to that length.
-        self.assertFalse(
-            self._sameFileTestHelper('data ' * 5000, 'data ' * 4999))
-
-    def test_differentFileLonger(self):
-        # Make sure sameFile returns False when the second file is longer than
-        # the first, even if they were the same up to that length.
-        self.assertFalse(
-            self._sameFileTestHelper('data ' * 5000, 'data ' * 5001))
-
     def test_prefixDirectories(self):
         # _relFileLocation splits eight hex digits across four path segments
         self.assertEqual('12/34/56/78', _relFileLocation(0x12345678))

=== modified file 'lib/lp/services/librarianserver/tests/test_storage_db.py'
--- lib/lp/services/librarianserver/tests/test_storage_db.py	2018-01-02 10:54:31 +0000
+++ lib/lp/services/librarianserver/tests/test_storage_db.py	2019-08-05 18:02:46 +0000
@@ -1,35 +1,45 @@
-# Copyright 2009 Canonical Ltd.  This software is licensed under the
+# Copyright 2009-2019 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 import hashlib
-import shutil
-import tempfile
-import unittest
+import os.path
+import time
+
+from fixtures import TempDir
+from testtools.testcase import ExpectedException
+from testtools.twistedsupport import AsynchronousDeferredRunTest
+import transaction
+from twisted.internet import defer
 
 from lp.services.database.sqlbase import flush_database_updates
+from lp.services.features.testing import FeatureFixture
 from lp.services.librarian.model import LibraryFileContent
-from lp.services.librarianserver import db
+from lp.services.librarianserver import (
+    db,
+    swift,
+    )
 from lp.services.librarianserver.storage import (
     DigestMismatchError,
     DuplicateFileIDError,
     LibrarianStorage,
     LibraryFileUpload,
     )
+from lp.services.log.logger import DevNullLogger
+from lp.testing import TestCase
 from lp.testing.dbuser import switch_dbuser
 from lp.testing.layers import LaunchpadZopelessLayer
-
-
-class LibrarianStorageDBTests(unittest.TestCase):
+from lp.testing.swift.fixture import SwiftFixture
+
+
+class LibrarianStorageDBTests(TestCase):
     layer = LaunchpadZopelessLayer
 
     def setUp(self):
+        super(LibrarianStorageDBTests, self).setUp()
         switch_dbuser('librarian')
-        self.directory = tempfile.mkdtemp()
+        self.directory = self.useFixture(TempDir()).path
         self.storage = LibrarianStorage(self.directory, db.Library())
 
-    def tearDown(self):
-        shutil.rmtree(self.directory, ignore_errors=True)
-
     def test_addFile(self):
         data = 'data ' * 50
         digest = hashlib.sha1(data).hexdigest()
@@ -129,3 +139,98 @@
 
         flush_database_updates()
         # And no errors should have been raised!
+
+
+class LibrarianStorageSwiftTests(TestCase):
+
+    layer = LaunchpadZopelessLayer
+    run_tests_with = AsynchronousDeferredRunTest.make_factory(timeout=10)
+
+    def setUp(self):
+        super(LibrarianStorageSwiftTests, self).setUp()
+        switch_dbuser('librarian')
+        self.swift_fixture = self.useFixture(SwiftFixture())
+        self.useFixture(FeatureFixture({'librarian.swift.enabled': True}))
+        self.directory = self.useFixture(TempDir()).path
+        self.pushConfig('librarian_server', root=self.directory)
+        self.storage = LibrarianStorage(self.directory, db.Library())
+        transaction.commit()
+        self.addCleanup(swift.connection_pool.clear)
+
+    def moveToSwift(self, lfc_id):
+        # Move a file to Swift so that we know it can't accidentally be
+        # retrieved from the local file system.  We set its modification
+        # time far enough in the past that it isn't considered potentially
+        # in progress.
+        path = swift.filesystem_path(lfc_id)
+        mtime = time.time() - 25 * 60 * 60
+        os.utime(path, (mtime, mtime))
+        self.assertTrue(os.path.exists(path))
+        swift.to_swift(DevNullLogger(), remove_func=os.unlink)
+        self.assertFalse(os.path.exists(path))
+
+    @defer.inlineCallbacks
+    def test_completed_fetch_reuses_connection(self):
+        # A completed fetch returns the expected data and reuses the Swift
+        # connection.
+        data = b'x' * (self.storage.CHUNK_SIZE * 4 + 1)
+        newfile = self.storage.startAddFile('file', len(data))
+        newfile.mimetype = 'text/plain'
+        newfile.append(data)
+        lfc_id, _ = newfile.store()
+        self.moveToSwift(lfc_id)
+        stream = yield self.storage.open(lfc_id)
+        self.assertIsNotNone(stream)
+        chunks = []
+        while True:
+            chunk = yield stream.read(self.storage.CHUNK_SIZE)
+            if not chunk:
+                break
+            chunks.append(chunk)
+        self.assertEqual(b''.join(chunks), data)
+        self.assertEqual(1, len(swift.connection_pool._pool))
+
+    @defer.inlineCallbacks
+    def test_partial_fetch_does_not_reuse_connection(self):
+        # If only part of a file is fetched, the Swift connection is not
+        # reused.
+        data = b'x' * self.storage.CHUNK_SIZE * 4
+        newfile = self.storage.startAddFile('file', len(data))
+        newfile.mimetype = 'text/plain'
+        newfile.append(data)
+        lfc_id, _ = newfile.store()
+        self.moveToSwift(lfc_id)
+        stream = yield self.storage.open(lfc_id)
+        self.assertIsNotNone(stream)
+        chunk = yield stream.read(self.storage.CHUNK_SIZE)
+        self.assertEqual(b'x' * self.storage.CHUNK_SIZE, chunk)
+        stream.close()
+        with ExpectedException(ValueError, 'I/O operation on closed file'):
+            yield stream.read(self.storage.CHUNK_SIZE)
+        self.assertEqual(0, len(swift.connection_pool._pool))
+
+    @defer.inlineCallbacks
+    def test_fetch_with_close_at_end_does_not_reuse_connection(self):
+        num_chunks = 4
+        data = b'x' * self.storage.CHUNK_SIZE * num_chunks
+        newfile = self.storage.startAddFile('file', len(data))
+        newfile.mimetype = 'text/plain'
+        newfile.append(data)
+        lfc_id, _ = newfile.store()
+        self.moveToSwift(lfc_id)
+        stream = yield self.storage.open(lfc_id)
+        self.assertIsNotNone(stream)
+        # Read exactly the number of chunks we expect to make up the file.
+        for _ in range(num_chunks):
+            chunk = yield stream.read(self.storage.CHUNK_SIZE)
+            self.assertEqual(b'x' * self.storage.CHUNK_SIZE, chunk)
+        # Start the read that should return an empty byte string (indicating
+        # EOF), but close the stream before finishing it.  This exercises
+        # the connection-reuse path in TxSwiftStream.read.
+        d = stream.read(self.storage.CHUNK_SIZE)
+        stream.close()
+        chunk = yield d
+        self.assertEqual(b'', chunk)
+        # In principle we might be able to reuse the connection here, but
+        # SwiftStream.close doesn't know that.
+        self.assertEqual(0, len(swift.connection_pool._pool))


Follow ups