launchpad-reviewers team mailing list archive

Thread
Date

[Merge] lp:~wgrant/launchpad/bug-677270 into lp:launchpad

To: mp+279974@xxxxxxxxxxxxxxxxxx
From: William Grant <me@xxxxxxxxxxxxxxxxxx>
Date: Wed, 09 Dec 2015 06:53:25 -0000
Reply-to: mp+279974@xxxxxxxxxxxxxxxxxx
Sender: bounces@xxxxxxxxxxxxx

William Grant has proposed merging lp:~wgrant/launchpad/bug-677270 into lp:launchpad.

Commit message:
Canonicalise path encoding before checking a librarian TimeLimitedToken.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)
Related bugs:
  Bug #677270 in Launchpad itself: "restricted librarian urls give a 404 if normalised (e.g. by apache, chromium, often shows up on private PPA build logs)"
  https://bugs.launchpad.net/launchpad/+bug/677270

For more details, see:
https://code.launchpad.net/~wgrant/launchpad/bug-677270/+merge/279974

Canonicalise path encoding before checking a librarian TimeLimitedToken, and leave tildes unescaped in librarian URLs to appease RFC 3986 and Chromium.

I'm also tempted to whitelist +, as it has no special meaning in the path. Thoughts welcome.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~wgrant/launchpad/bug-677270 into lp:launchpad.

=== modified file 'lib/lp/services/librarian/client.py'
--- lib/lp/services/librarian/client.py	2015-07-08 16:05:11 +0000
+++ lib/lp/services/librarian/client.py	2015-12-09 06:53:03 +0000
@@ -53,9 +53,8 @@
 
 def url_path_quote(filename):
     """Quote `filename` for use in a URL."""
-    # XXX RobertCollins 2004-09-21: Perhaps filenames with / in them
-    # should be disallowed?
-    return urllib.quote(filename).replace('/', '%2F')
+    # This needs to match Library.getAlias' TimeLimitedToken handling.
+    return urllib.quote(filename, safe='~')
 
 
 def get_libraryfilealias_download_path(aliasID, filename):

=== modified file 'lib/lp/services/librarianserver/db.py'
--- lib/lp/services/librarianserver/db.py	2014-09-02 02:03:37 +0000
+++ lib/lp/services/librarianserver/db.py	2015-12-09 06:53:03 +0000
@@ -9,9 +9,11 @@
     ]
 
 import hashlib
+import urllib
 
 from storm.expr import (
     And,
+    Or,
     SQL,
     )
 
@@ -56,13 +58,29 @@
         """
         restricted = self.restricted
         if token and path:
-            # with a token and a path we may be able to serve restricted files
+            # With a token and a path we may be able to serve restricted files
             # on the public port.
+            #
+            # The URL-encoding of the path may have changed somewhere
+            # along the line, so reencode it canonically. LFA.filename
+            # can't contain slashes, so they're safe to leave unencoded.
+            # And urllib.quote erroneously excludes ~ from its safe set,
+            # while RFC 3986 says it should be unescaped and Chromium
+            # forcibly decodes it in any URL that it sees.
+            #
+            # This needs to match url_path_quote.
+            plain_tilde_path = urllib.quote(urllib.unquote(path), safe='/~')
+            # XXX wgrant 2015-12-09: We used to generate URLs with
+            # escaped tildes, so support those until the tokens are all
+            # expired.
+            encoded_tilde_path = urllib.quote(urllib.unquote(path), safe='/')
             store = session_store()
             token_found = store.find(TimeLimitedToken,
                 SQL("age(created) < interval '1 day'"),
                 TimeLimitedToken.token == hashlib.sha256(token).hexdigest(),
-                TimeLimitedToken.path == path).is_empty()
+                Or(
+                    TimeLimitedToken.path == plain_tilde_path,
+                    TimeLimitedToken.path == encoded_tilde_path)).is_empty()
             store.reset()
             if token_found:
                 raise LookupError("Token stale/pruned/path mismatch")

=== modified file 'lib/lp/services/librarianserver/tests/test_web.py'
--- lib/lp/services/librarianserver/tests/test_web.py	2015-10-14 15:22:01 +0000
+++ lib/lp/services/librarianserver/tests/test_web.py	2015-12-09 06:53:03 +0000
@@ -15,6 +15,8 @@
 from lazr.uri import URI
 import pytz
 from storm.expr import SQL
+import testtools
+from testtools.matchers import EndsWith
 import transaction
 from zope.component import getUtility
 
@@ -48,7 +50,7 @@
     return str(parsed.replace(path=parsed.path.replace(old, new)))
 
 
-class LibrarianWebTestCase(unittest.TestCase):
+class LibrarianWebTestCase(testtools.TestCase):
     """Test the librarian's web interface."""
     layer = LaunchpadFunctionalLayer
     dbuser = 'librarian'
@@ -237,13 +239,13 @@
         self.failUnlessEqual(
             last_modified_header, 'Tue, 30 Jan 2001 13:45:59 GMT')
 
-    def get_restricted_file_and_public_url(self):
+    def get_restricted_file_and_public_url(self, filename='sample'):
         # Use a regular LibrarianClient to ensure we speak to the
         # nonrestricted port on the librarian which is where secured
         # restricted files are served from.
         client = LibrarianClient()
         fileAlias = client.addFile(
-            'sample', 12, StringIO('a' * 12), contentType='text/plain')
+            filename, 12, StringIO('a' * 12), contentType='text/plain')
         # Note: We're deliberately using the wrong url here: we should be
         # passing secure=True to getURLForAlias, but to use the returned URL
         # we would need a wildcard DNS facility patched into urlopen; instead
@@ -333,6 +335,30 @@
         finally:
             fileObj.close()
 
+    def test_restricted_with_token_encoding(self):
+        fileAlias, url = self.get_restricted_file_and_public_url('foo~%')
+        self.assertThat(url, EndsWith('/foo~%25'))
+
+        # We have the base url for a restricted file; grant access to it
+        # for a short time.
+        token = TimeLimitedToken.allocate(url)
+
+        # Now we should be able to access the file.
+        fileObj = urlopen(url + "?token=%s" % token)
+        try:
+            self.assertEqual("a" * 12, fileObj.read())
+        finally:
+            fileObj.close()
+
+        # The token is valid even if the filename is encoded differently.
+        mangled_url = url.replace('~', '%7E')
+        self.assertNotEqual(mangled_url, url)
+        fileObj = urlopen(mangled_url + "?token=%s" % token)
+        try:
+            self.assertEqual("a" * 12, fileObj.read())
+        finally:
+            fileObj.close()
+
     def test_restricted_with_expired_token(self):
         fileAlias, url = self.get_restricted_file_and_public_url()
         # We have the base url for a restricted file; grant access to it
@@ -384,6 +410,7 @@
     layer = LaunchpadZopelessLayer
 
     def setUp(self):
+        super(LibrarianZopelessWebTestCase, self).setUp()
         switch_dbuser(config.librarian.dbuser)
 
     def commit(self):
@@ -409,6 +436,7 @@
     layer = LaunchpadZopelessLayer
 
     def setUp(self):
+        super(DeletedContentTestCase, self).setUp()
         switch_dbuser(config.librarian.dbuser)
 
     def test_deletedContentNotFound(self):

Follow ups

[Merge] lp:~wgrant/launchpad/bug-677270 into lp:launchpad
From: noreply, 2015-12-10
Re: [Merge] lp:~wgrant/launchpad/bug-677270 into lp:launchpad
From: Colin Watson, 2015-12-10