launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #23841
[Merge] lp:~cjwatson/launchpad/librarian-swift-assertion-errors into lp:launchpad
Colin Watson has proposed merging lp:~cjwatson/launchpad/librarian-swift-assertion-errors into lp:launchpad.
Commit message:
Fix librarian OOPSes when closing a TxSwiftStream just before the end.
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
Related bugs:
Bug #1420046 in Launchpad itself: "Librarian requests occasionally fail: 'NoneType' object has no attribute 'get_object'"
https://bugs.launchpad.net/launchpad/+bug/1420046
For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/librarian-swift-assertion-errors/+merge/370960
The following sequence of events caused an assertion error:
* TxSwiftStream.read called successively, reading all non-empty data chunks from the Swift connection.
* TxSwiftStream.read entered again, returning a Deferred while it tries to fetch the final chunk from Swift (which will be None due to EOF, but it doesn't know that yet).
* The stream is closed, e.g. as a result of an HTTP client closing its connection prematurely.
* The Swift client returns control to TxSwiftStream.read. self._chunk and self._swift_connection are now both None, so it tries to put a connection back into the pool which is in fact None.
An extra None check is enough to avoid this.
I also fixed a couple of other small problems, including being more careful about closing Swift connections.
--
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~cjwatson/launchpad/librarian-swift-assertion-errors into lp:launchpad.
=== modified file 'lib/lp/services/librarianserver/storage.py'
--- lib/lp/services/librarianserver/storage.py 2018-04-05 16:58:48 +0000
+++ lib/lp/services/librarianserver/storage.py 2019-08-05 18:02:46 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009-2018 Canonical Ltd. This software is licensed under the
+# Copyright 2009-2019 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
__metaclass__ = type
@@ -33,7 +33,6 @@
# _relFileLocation needed by other modules in this package.
# Listed here to keep the import fascist happy
'_relFileLocation',
- '_sameFile',
]
@@ -174,8 +173,9 @@
# If we have drained the data successfully,
# the connection can be reused saving on auth
# handshakes.
- swift.connection_pool.put(self._swift_connection)
- self._swift_connection = None
+ if self._swift_connection is not None:
+ swift.connection_pool.put(self._swift_connection)
+ self._swift_connection = None
self._chunks = None
defer.returnValue('')
return_chunk = self._chunk[:size]
@@ -304,18 +304,6 @@
fsync_path(os.path.dirname(location), dir=True)
-def _sameFile(path1, path2):
- file1 = open(path1, 'rb')
- file2 = open(path2, 'rb')
-
- blk = 1024 * 64
- chunksIter = iter(lambda: (file1.read(blk), file2.read(blk)), ('', ''))
- for chunk1, chunk2 in chunksIter:
- if chunk1 != chunk2:
- return False
- return True
-
-
def _relFileLocation(file_id):
"""Return the relative location for the given file_id.
=== modified file 'lib/lp/services/librarianserver/swift.py'
--- lib/lp/services/librarianserver/swift.py 2018-05-06 08:52:34 +0000
+++ lib/lp/services/librarianserver/swift.py 2019-08-05 18:02:46 +0000
@@ -1,4 +1,4 @@
-# Copyright 2013-2018 Canonical Ltd. This software is licensed under the
+# Copyright 2013-2019 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
"""Move files from Librarian disk storage into Swift."""
@@ -316,7 +316,9 @@
def close(self):
self.closed = True
- self._swift_connection = None
+ if self._swift_connection is not None:
+ self._swift_connection.close()
+ self._swift_connection = None
def seek(self, offset):
if offset < self._offset:
=== modified file 'lib/lp/services/librarianserver/tests/test_storage.py'
--- lib/lp/services/librarianserver/tests/test_storage.py 2018-01-02 10:54:31 +0000
+++ lib/lp/services/librarianserver/tests/test_storage.py 2019-08-05 18:02:46 +0000
@@ -1,8 +1,7 @@
-# Copyright 2009 Canonical Ltd. This software is licensed under the
+# Copyright 2009-2019 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
import hashlib
-import os
import shutil
import tempfile
import unittest
@@ -12,7 +11,6 @@
from lp.services.librarianserver import db
from lp.services.librarianserver.storage import (
_relFileLocation,
- _sameFile,
LibrarianStorage,
)
from lp.testing.layers import LaunchpadZopelessLayer
@@ -54,49 +52,6 @@
# Make sure hasFile returns False when a file is missing
self.assertFalse(self.storage.hasFile(9999999))
- def _sameFileTestHelper(self, data1, data2):
- # Make two temporary files
- fd1, path1 = tempfile.mkstemp()
- fd2, path2 = tempfile.mkstemp()
- file1 = os.fdopen(fd1, 'wb')
- file2 = os.fdopen(fd2, 'wb')
-
- # Put the test data in them, and close them
- file1.write(data1)
- file2.write(data2)
- file1.close()
- file2.close()
-
- # Do the test, and clean up afterwards
- try:
- return _sameFile(path1, path2)
- finally:
- os.remove(path1)
- os.remove(path2)
-
- def test_sameFile(self):
- # Make sure sameFile returns True when the files are the same
- self.assertTrue(
- self._sameFileTestHelper('data ' * 5000, 'data ' * 5000))
-
- def test_notSameFile(self):
- # Make sure sameFile returns False when the files are different, even
- # if they are the same length.
- self.assertFalse(
- self._sameFileTestHelper('data ' * 5000, 'fred ' * 5000))
-
- def test_differentFileShorter(self):
- # Make sure sameFile returns False when the second file is shorter
- # than the first, even if they were the same up to that length.
- self.assertFalse(
- self._sameFileTestHelper('data ' * 5000, 'data ' * 4999))
-
- def test_differentFileLonger(self):
- # Make sure sameFile returns False when the second file is longer than
- # the first, even if they were the same up to that length.
- self.assertFalse(
- self._sameFileTestHelper('data ' * 5000, 'data ' * 5001))
-
def test_prefixDirectories(self):
# _relFileLocation splits eight hex digits across four path segments
self.assertEqual('12/34/56/78', _relFileLocation(0x12345678))
=== modified file 'lib/lp/services/librarianserver/tests/test_storage_db.py'
--- lib/lp/services/librarianserver/tests/test_storage_db.py 2018-01-02 10:54:31 +0000
+++ lib/lp/services/librarianserver/tests/test_storage_db.py 2019-08-05 18:02:46 +0000
@@ -1,35 +1,45 @@
-# Copyright 2009 Canonical Ltd. This software is licensed under the
+# Copyright 2009-2019 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
import hashlib
-import shutil
-import tempfile
-import unittest
+import os.path
+import time
+
+from fixtures import TempDir
+from testtools.testcase import ExpectedException
+from testtools.twistedsupport import AsynchronousDeferredRunTest
+import transaction
+from twisted.internet import defer
from lp.services.database.sqlbase import flush_database_updates
+from lp.services.features.testing import FeatureFixture
from lp.services.librarian.model import LibraryFileContent
-from lp.services.librarianserver import db
+from lp.services.librarianserver import (
+ db,
+ swift,
+ )
from lp.services.librarianserver.storage import (
DigestMismatchError,
DuplicateFileIDError,
LibrarianStorage,
LibraryFileUpload,
)
+from lp.services.log.logger import DevNullLogger
+from lp.testing import TestCase
from lp.testing.dbuser import switch_dbuser
from lp.testing.layers import LaunchpadZopelessLayer
-
-
-class LibrarianStorageDBTests(unittest.TestCase):
+from lp.testing.swift.fixture import SwiftFixture
+
+
+class LibrarianStorageDBTests(TestCase):
layer = LaunchpadZopelessLayer
def setUp(self):
+ super(LibrarianStorageDBTests, self).setUp()
switch_dbuser('librarian')
- self.directory = tempfile.mkdtemp()
+ self.directory = self.useFixture(TempDir()).path
self.storage = LibrarianStorage(self.directory, db.Library())
- def tearDown(self):
- shutil.rmtree(self.directory, ignore_errors=True)
-
def test_addFile(self):
data = 'data ' * 50
digest = hashlib.sha1(data).hexdigest()
@@ -129,3 +139,98 @@
flush_database_updates()
# And no errors should have been raised!
+
+
+class LibrarianStorageSwiftTests(TestCase):
+
+ layer = LaunchpadZopelessLayer
+ run_tests_with = AsynchronousDeferredRunTest.make_factory(timeout=10)
+
+ def setUp(self):
+ super(LibrarianStorageSwiftTests, self).setUp()
+ switch_dbuser('librarian')
+ self.swift_fixture = self.useFixture(SwiftFixture())
+ self.useFixture(FeatureFixture({'librarian.swift.enabled': True}))
+ self.directory = self.useFixture(TempDir()).path
+ self.pushConfig('librarian_server', root=self.directory)
+ self.storage = LibrarianStorage(self.directory, db.Library())
+ transaction.commit()
+ self.addCleanup(swift.connection_pool.clear)
+
+ def moveToSwift(self, lfc_id):
+ # Move a file to Swift so that we know it can't accidentally be
+ # retrieved from the local file system. We set its modification
+ # time far enough in the past that it isn't considered potentially
+ # in progress.
+ path = swift.filesystem_path(lfc_id)
+ mtime = time.time() - 25 * 60 * 60
+ os.utime(path, (mtime, mtime))
+ self.assertTrue(os.path.exists(path))
+ swift.to_swift(DevNullLogger(), remove_func=os.unlink)
+ self.assertFalse(os.path.exists(path))
+
+ @defer.inlineCallbacks
+ def test_completed_fetch_reuses_connection(self):
+ # A completed fetch returns the expected data and reuses the Swift
+ # connection.
+ data = b'x' * (self.storage.CHUNK_SIZE * 4 + 1)
+ newfile = self.storage.startAddFile('file', len(data))
+ newfile.mimetype = 'text/plain'
+ newfile.append(data)
+ lfc_id, _ = newfile.store()
+ self.moveToSwift(lfc_id)
+ stream = yield self.storage.open(lfc_id)
+ self.assertIsNotNone(stream)
+ chunks = []
+ while True:
+ chunk = yield stream.read(self.storage.CHUNK_SIZE)
+ if not chunk:
+ break
+ chunks.append(chunk)
+ self.assertEqual(b''.join(chunks), data)
+ self.assertEqual(1, len(swift.connection_pool._pool))
+
+ @defer.inlineCallbacks
+ def test_partial_fetch_does_not_reuse_connection(self):
+ # If only part of a file is fetched, the Swift connection is not
+ # reused.
+ data = b'x' * self.storage.CHUNK_SIZE * 4
+ newfile = self.storage.startAddFile('file', len(data))
+ newfile.mimetype = 'text/plain'
+ newfile.append(data)
+ lfc_id, _ = newfile.store()
+ self.moveToSwift(lfc_id)
+ stream = yield self.storage.open(lfc_id)
+ self.assertIsNotNone(stream)
+ chunk = yield stream.read(self.storage.CHUNK_SIZE)
+ self.assertEqual(b'x' * self.storage.CHUNK_SIZE, chunk)
+ stream.close()
+ with ExpectedException(ValueError, 'I/O operation on closed file'):
+ yield stream.read(self.storage.CHUNK_SIZE)
+ self.assertEqual(0, len(swift.connection_pool._pool))
+
+ @defer.inlineCallbacks
+ def test_fetch_with_close_at_end_does_not_reuse_connection(self):
+ num_chunks = 4
+ data = b'x' * self.storage.CHUNK_SIZE * num_chunks
+ newfile = self.storage.startAddFile('file', len(data))
+ newfile.mimetype = 'text/plain'
+ newfile.append(data)
+ lfc_id, _ = newfile.store()
+ self.moveToSwift(lfc_id)
+ stream = yield self.storage.open(lfc_id)
+ self.assertIsNotNone(stream)
+ # Read exactly the number of chunks we expect to make up the file.
+ for _ in range(num_chunks):
+ chunk = yield stream.read(self.storage.CHUNK_SIZE)
+ self.assertEqual(b'x' * self.storage.CHUNK_SIZE, chunk)
+ # Start the read that should return an empty byte string (indicating
+ # EOF), but close the stream before finishing it. This exercises
+ # the connection-reuse path in TxSwiftStream.read.
+ d = stream.read(self.storage.CHUNK_SIZE)
+ stream.close()
+ chunk = yield d
+ self.assertEqual(b'', chunk)
+ # In principle we might be able to reuse the connection here, but
+ # SwiftStream.close doesn't know that.
+ self.assertEqual(0, len(swift.connection_pool._pool))
Follow ups