launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #19821
[Merge] lp:~wgrant/launchpad/librarian-missing-500 into lp:launchpad
William Grant has proposed merging lp:~wgrant/launchpad/librarian-missing-500 into lp:launchpad.
Commit message:
Fix Librarian to generate non-cachable 500s on missing storage files.
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
Related bugs:
Bug #1529428 in Launchpad itself: "Librarian sets aggressive cache headers on missing files"
https://bugs.launchpad.net/launchpad/+bug/1529428
For more details, see:
https://code.launchpad.net/~wgrant/launchpad/librarian-missing-500/+merge/281370
Fix Librarian to generate non-cachable 500s on missing storage files.
Previously we would return a 404 that was cachable for a year, which is silly for what is usually a temporary (otherwise disastrous) internal error.
--
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~wgrant/launchpad/librarian-missing-500 into lp:launchpad.
=== modified file 'lib/lp/services/librarianserver/tests/test_web.py'
--- lib/lp/services/librarianserver/tests/test_web.py 2015-12-09 06:24:43 +0000
+++ lib/lp/services/librarianserver/tests/test_web.py 2015-12-27 04:00:12 +0000
@@ -5,6 +5,7 @@
from datetime import datetime
import hashlib
import httplib
+import os
import unittest
from urllib2 import (
HTTPError,
@@ -37,6 +38,7 @@
LibraryFileAlias,
TimeLimitedToken,
)
+from lp.services.librarianserver.storage import LibrarianStorage
from lp.testing.dbuser import switch_dbuser
from lp.testing.layers import (
LaunchpadFunctionalLayer,
@@ -239,6 +241,40 @@
self.failUnlessEqual(
last_modified_header, 'Tue, 30 Jan 2001 13:45:59 GMT')
+ def test_missing_storage(self):
+ # When a file exists in the DB but is missing from disk, a 404
+ # is just confusing. It's an internal error, so 500 instead.
+ client = LibrarianClient()
+
+ # Upload a file so we can retrieve it.
+ sample_data = 'blah'
+ file_alias_id = client.addFile(
+ 'sample', len(sample_data), StringIO(sample_data),
+ contentType='text/plain')
+ url = client.getURLForAlias(file_alias_id)
+
+ # Change the date_created to a known value that doesn't match
+ # the disk timestamp. The timestamp on disk cannot be trusted.
+ file_alias = IMasterStore(LibraryFileAlias).get(
+ LibraryFileAlias, file_alias_id)
+
+ # Commit so the file is available from the Librarian.
+ self.commit()
+
+ # Fetch the file via HTTP.
+ urlopen(url)
+
+ # Delete the on-disk file.
+ storage = LibrarianStorage(config.librarian_server.root, None)
+ os.remove(storage._fileLocation(file_alias.contentID))
+
+ # The URL now 500s, since the DB says it should exist.
+ exception = self.assertRaises(HTTPError, urlopen, url)
+ self.assertEqual(500, exception.code)
+ self.assertIn('Server', exception.info())
+ self.assertNotIn('Last-Modified', exception.info())
+ self.assertNotIn('Cache-Control', exception.info())
+
def get_restricted_file_and_public_url(self, filename='sample'):
# Use a regular LibrarianClient to ensure we speak to the
# nonrestricted port on the librarian which is where secured
=== modified file 'lib/lp/services/librarianserver/web.py'
--- lib/lp/services/librarianserver/web.py 2015-07-10 02:42:42 +0000
+++ lib/lp/services/librarianserver/web.py 2015-12-27 04:00:12 +0000
@@ -176,25 +176,25 @@
% (dbfilename.encode('utf-8'), filename))
return fourOhFour
- if not restricted:
- # Set our caching headers. Librarian files can be cached forever.
- request.setHeader('Cache-Control', 'max-age=31536000, public')
- else:
- # Restricted files require revalidation every time. For now,
- # until the deployment details are completely reviewed, the
- # simplest, most cautious approach is taken: no caching permited.
- request.setHeader('Cache-Control', 'max-age=0, private')
-
- if self.storage.hasFile(dbcontentID) or self.upstreamHost is None:
+ if self.storage.hasFile(dbcontentID):
# XXX: Brad Crittenden 2007-12-05 bug=174204: When encodings are
# stored as part of a file's metadata this logic will be replaced.
encoding, mimetype = guess_librarian_encoding(filename, mimetype)
+ # Set our caching headers. Public Librarian files can be
+ # cached forever, while private ones mustn't be at all.
+ request.setHeader(
+ 'Cache-Control',
+ 'max-age=31536000, public'
+ if not restricted else 'max-age=0, private')
return File(
mimetype, encoding, date_created,
self.storage._fileLocation(dbcontentID))
- else:
+ elif self.upstreamHost is not None:
return proxy.ReverseProxyResource(self.upstreamHost,
self.upstreamPort, request.path)
+ else:
+ raise AssertionError(
+ "Content %d missing from storage." % dbcontentID)
@defer.inlineCallbacks
def _cb_getFileAlias_swift(
@@ -212,26 +212,27 @@
% (dbfilename.encode('utf-8'), filename))
defer.returnValue(fourOhFour)
- if not restricted:
- # Set our caching headers. Librarian files can be cached forever.
- request.setHeader('Cache-Control', 'max-age=31536000, public')
- else:
- # Restricted files require revalidation every time. For now,
- # until the deployment details are completely reviewed, the
- # simplest, most cautious approach is taken: no caching permited.
- request.setHeader('Cache-Control', 'max-age=0, private')
-
stream = yield self.storage.open(dbcontentID)
- if stream is not None or self.upstreamHost is None:
+ if stream is not None:
# XXX: Brad Crittenden 2007-12-05 bug=174204: When encodings are
# stored as part of a file's metadata this logic will be replaced.
encoding, mimetype = guess_librarian_encoding(filename, mimetype)
- defer.returnValue(File_swift(
- mimetype, encoding, date_created, stream, size))
- else:
+ file = File_swift(mimetype, encoding, date_created, stream, size)
+ assert file.exists
+ # Set our caching headers. Public Librarian files can be
+ # cached forever, while private ones mustn't be at all.
+ request.setHeader(
+ 'Cache-Control',
+ 'max-age=31536000, public'
+ if not restricted else 'max-age=0, private')
+ defer.returnValue(file)
+ elif self.upstreamHost is not None:
defer.returnValue(
proxy.ReverseProxyResource(
self.upstreamHost, self.upstreamPort, request.path))
+ else:
+ raise AssertionError(
+ "Content %d missing from storage." % dbcontentID)
def render_GET(self, request):
return defaultResource.render(request)
Follow ups