← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~wgrant/launchpad/librarian-missing-500 into lp:launchpad

 

William Grant has proposed merging lp:~wgrant/launchpad/librarian-missing-500 into lp:launchpad.

Commit message:
Fix Librarian to generate non-cachable 500s on missing storage files.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)
Related bugs:
  Bug #1529428 in Launchpad itself: "Librarian sets aggressive cache headers on missing files"
  https://bugs.launchpad.net/launchpad/+bug/1529428

For more details, see:
https://code.launchpad.net/~wgrant/launchpad/librarian-missing-500/+merge/281370

Fix Librarian to generate non-cachable 500s on missing storage files.

Previously we would return a 404 that was cachable for a year, which is silly for what is usually a temporary (otherwise disastrous) internal error.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~wgrant/launchpad/librarian-missing-500 into lp:launchpad.
=== modified file 'lib/lp/services/librarianserver/tests/test_web.py'
--- lib/lp/services/librarianserver/tests/test_web.py	2015-12-09 06:24:43 +0000
+++ lib/lp/services/librarianserver/tests/test_web.py	2015-12-27 04:00:12 +0000
@@ -5,6 +5,7 @@
 from datetime import datetime
 import hashlib
 import httplib
+import os
 import unittest
 from urllib2 import (
     HTTPError,
@@ -37,6 +38,7 @@
     LibraryFileAlias,
     TimeLimitedToken,
     )
+from lp.services.librarianserver.storage import LibrarianStorage
 from lp.testing.dbuser import switch_dbuser
 from lp.testing.layers import (
     LaunchpadFunctionalLayer,
@@ -239,6 +241,40 @@
         self.failUnlessEqual(
             last_modified_header, 'Tue, 30 Jan 2001 13:45:59 GMT')
 
+    def test_missing_storage(self):
+        # When a file exists in the DB but is missing from disk, a 404
+        # is just confusing. It's an internal error, so 500 instead.
+        client = LibrarianClient()
+
+        # Upload a file so we can retrieve it.
+        sample_data = 'blah'
+        file_alias_id = client.addFile(
+            'sample', len(sample_data), StringIO(sample_data),
+            contentType='text/plain')
+        url = client.getURLForAlias(file_alias_id)
+
+        # Change the date_created to a known value that doesn't match
+        # the disk timestamp. The timestamp on disk cannot be trusted.
+        file_alias = IMasterStore(LibraryFileAlias).get(
+            LibraryFileAlias, file_alias_id)
+
+        # Commit so the file is available from the Librarian.
+        self.commit()
+
+        # Fetch the file via HTTP.
+        urlopen(url)
+
+        # Delete the on-disk file.
+        storage = LibrarianStorage(config.librarian_server.root, None)
+        os.remove(storage._fileLocation(file_alias.contentID))
+
+        # The URL now 500s, since the DB says it should exist.
+        exception = self.assertRaises(HTTPError, urlopen, url)
+        self.assertEqual(500, exception.code)
+        self.assertIn('Server', exception.info())
+        self.assertNotIn('Last-Modified', exception.info())
+        self.assertNotIn('Cache-Control', exception.info())
+
     def get_restricted_file_and_public_url(self, filename='sample'):
         # Use a regular LibrarianClient to ensure we speak to the
         # nonrestricted port on the librarian which is where secured

=== modified file 'lib/lp/services/librarianserver/web.py'
--- lib/lp/services/librarianserver/web.py	2015-07-10 02:42:42 +0000
+++ lib/lp/services/librarianserver/web.py	2015-12-27 04:00:12 +0000
@@ -176,25 +176,25 @@
                 % (dbfilename.encode('utf-8'), filename))
             return fourOhFour
 
-        if not restricted:
-            # Set our caching headers. Librarian files can be cached forever.
-            request.setHeader('Cache-Control', 'max-age=31536000, public')
-        else:
-            # Restricted files require revalidation every time. For now,
-            # until the deployment details are completely reviewed, the
-            # simplest, most cautious approach is taken: no caching permited.
-            request.setHeader('Cache-Control', 'max-age=0, private')
-
-        if self.storage.hasFile(dbcontentID) or self.upstreamHost is None:
+        if self.storage.hasFile(dbcontentID):
             # XXX: Brad Crittenden 2007-12-05 bug=174204: When encodings are
             # stored as part of a file's metadata this logic will be replaced.
             encoding, mimetype = guess_librarian_encoding(filename, mimetype)
+            # Set our caching headers. Public Librarian files can be
+            # cached forever, while private ones mustn't be at all.
+            request.setHeader(
+                'Cache-Control',
+                'max-age=31536000, public'
+                if not restricted else 'max-age=0, private')
             return File(
                 mimetype, encoding, date_created,
                 self.storage._fileLocation(dbcontentID))
-        else:
+        elif self.upstreamHost is not None:
             return proxy.ReverseProxyResource(self.upstreamHost,
                                               self.upstreamPort, request.path)
+        else:
+            raise AssertionError(
+                "Content %d missing from storage." % dbcontentID)
 
     @defer.inlineCallbacks
     def _cb_getFileAlias_swift(
@@ -212,26 +212,27 @@
                 % (dbfilename.encode('utf-8'), filename))
             defer.returnValue(fourOhFour)
 
-        if not restricted:
-            # Set our caching headers. Librarian files can be cached forever.
-            request.setHeader('Cache-Control', 'max-age=31536000, public')
-        else:
-            # Restricted files require revalidation every time. For now,
-            # until the deployment details are completely reviewed, the
-            # simplest, most cautious approach is taken: no caching permited.
-            request.setHeader('Cache-Control', 'max-age=0, private')
-
         stream = yield self.storage.open(dbcontentID)
-        if stream is not None or self.upstreamHost is None:
+        if stream is not None:
             # XXX: Brad Crittenden 2007-12-05 bug=174204: When encodings are
             # stored as part of a file's metadata this logic will be replaced.
             encoding, mimetype = guess_librarian_encoding(filename, mimetype)
-            defer.returnValue(File_swift(
-                mimetype, encoding, date_created, stream, size))
-        else:
+            file = File_swift(mimetype, encoding, date_created, stream, size)
+            assert file.exists
+            # Set our caching headers. Public Librarian files can be
+            # cached forever, while private ones mustn't be at all.
+            request.setHeader(
+                'Cache-Control',
+                'max-age=31536000, public'
+                if not restricted else 'max-age=0, private')
+            defer.returnValue(file)
+        elif self.upstreamHost is not None:
             defer.returnValue(
                 proxy.ReverseProxyResource(
                     self.upstreamHost, self.upstreamPort, request.path))
+        else:
+            raise AssertionError(
+                "Content %d missing from storage." % dbcontentID)
 
     def render_GET(self, request):
         return defaultResource.render(request)


Follow ups