← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~cjwatson/launchpad/explicit-proxy-github into lp:launchpad

 

Colin Watson has proposed merging lp:~cjwatson/launchpad/explicit-proxy-github into lp:launchpad.

Commit message:
Convert the GitHub external bug tracker to urlfetch with explicit proxy configuration.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/explicit-proxy-github/+merge/348427

This pushes down most of the code from the GitHub tracker that dealt with using requests rather than urlopen to lp.bugs.externalbugtracker.base, and beefs it up to handle POSTs and to use urlfetch.  This is in a temporary subclass of ExternalBugTracker until all trackers have been converted.

(I've converted all the trackers locally in order to prove to myself that this is a suitable interface, but the resulting diff is pretty big so I need to break it into several MPs.)
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~cjwatson/launchpad/explicit-proxy-github into lp:launchpad.
=== modified file 'lib/lp/bugs/externalbugtracker/__init__.py'
--- lib/lp/bugs/externalbugtracker/__init__.py	2016-07-04 17:11:29 +0000
+++ lib/lp/bugs/externalbugtracker/__init__.py	2018-06-22 22:02:03 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009-2016 Canonical Ltd.  This software is licensed under the
+# Copyright 2009-2018 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 """__init__ module for the externalbugtracker package."""
@@ -14,6 +14,7 @@
     'DebBugs',
     'DebBugsDatabaseNotFound',
     'ExternalBugTracker',
+    'ExternalBugTrackerRequests',
     'GitHub',
     'InvalidBugId',
     'LookupTree',
@@ -39,6 +40,7 @@
     BugWatchUpdateError,
     BugWatchUpdateWarning,
     ExternalBugTracker,
+    ExternalBugTrackerRequests,
     InvalidBugId,
     LookupTree,
     PrivateRemoteBug,

=== modified file 'lib/lp/bugs/externalbugtracker/base.py'
--- lib/lp/bugs/externalbugtracker/base.py	2016-07-04 17:11:29 +0000
+++ lib/lp/bugs/externalbugtracker/base.py	2018-06-22 22:02:03 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009-2016 Canonical Ltd.  This software is licensed under the
+# Copyright 2009-2018 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 """External bugtrackers."""
@@ -12,10 +12,12 @@
     'BugWatchUpdateError',
     'BugWatchUpdateWarning',
     'ExternalBugTracker',
+    'ExternalBugTrackerRequests',
     'InvalidBugId',
     'LookupTree',
     'LP_USER_AGENT',
     'PrivateRemoteBug',
+    'repost_on_redirect_hook',
     'UnknownBugTrackerTypeError',
     'UnknownRemoteImportanceError',
     'UnknownRemoteStatusError',
@@ -28,8 +30,12 @@
 
 import urllib
 import urllib2
-from urlparse import urlparse
 
+import requests
+from six.moves.urllib_parse import (
+    urljoin,
+    urlparse,
+    )
 from zope.interface import implementer
 
 from lp.bugs.adapters import treelookup
@@ -42,6 +48,10 @@
     )
 from lp.services.config import config
 from lp.services.database.isolation import ensure_no_transaction
+from lp.services.timeout import (
+    override_timeout,
+    urlfetch,
+    )
 
 # The user agent we send in our requests
 LP_USER_AGENT = "Launchpad Bugscraper/0.2 (https://bugs.launchpad.net/)"
@@ -84,7 +94,7 @@
     def __init__(self, url, error):
         BugWatchUpdateError.__init__(self)
         self.url = url
-        self.error = str(error)
+        self.error = error
 
     def __str__(self):
         return "%s: %s" % (self.url, self.error)
@@ -135,6 +145,18 @@
     """Raised when a bug is marked private on the remote bugtracker."""
 
 
+def repost_on_redirect_hook(response, *args, **kwargs):
+    # The hook facilities in requests currently only let us modify the
+    # response, so we need to cheat a bit in order to persuade it to make a
+    # POST request to the target URL of a redirection.  The simplest
+    # approach is to pretend that the status code of a redirection response
+    # is 307 Temporary Redirect, which requires the request method to remain
+    # unchanged.
+    if response.status_code in (301, 302, 303):
+        response.status_code = 307
+    return response
+
+
 @implementer(IExternalBugTracker)
 class ExternalBugTracker:
     """Base class for an external bug tracker."""
@@ -283,6 +305,71 @@
         return response.read()
 
 
+class ExternalBugTrackerRequests(ExternalBugTracker):
+    """An external bug tracker that uses `requests`.
+
+    This is temporary until all bug tracker types have been converted.
+    """
+
+    @ensure_no_transaction
+    def makeRequest(self, method, url, **kwargs):
+        """Make a request.
+
+        :param method: The HTTP request method.
+        :param url: The URL to request.
+        :return: A `requests.Response` object.
+        :raises requests.RequestException: if the request fails.
+        """
+        with override_timeout(self.timeout):
+            return urlfetch(
+                url, method=method, trust_env=False, use_proxy=True, **kwargs)
+
+    def _getPage(self, page, **kwargs):
+        """GET the specified page on the remote HTTP server.
+
+        :return: A `requests.Response` object.
+        """
+        try:
+            url = self.baseurl
+            if not url.endswith("/"):
+                url += "/"
+            url = urljoin(url, page)
+            response = self.makeRequest(
+                "GET", url, headers=self._getHeaders(), **kwargs)
+            response.raise_for_status()
+            return response
+        except requests.RequestException as e:
+            raise BugTrackerConnectError(self.baseurl, e)
+
+    def _postPage(self, page, form, repost_on_redirect=False):
+        """POST to the specified page and form.
+
+        :param form: is a dict of form variables being POSTed.
+        :param repost_on_redirect: override RFC-compliant redirect handling.
+            By default, if the POST receives a redirect response, the
+            request to the redirection's target URL will be a GET.  If
+            `repost_on_redirect` is True, this method will do a second POST
+            instead.  Do this only if you are sure that repeated POST to
+            this page is safe, as is usually the case with search forms.
+        :return: A `requests.Response` object.
+        """
+        hooks = (
+            {'response': repost_on_redirect_hook}
+            if repost_on_redirect else None)
+        try:
+            url = self.baseurl
+            if not url.endswith("/"):
+                url += "/"
+            url = urljoin(url, page)
+            response = self.makeRequest(
+                "POST", url, headers=self._getHeaders(), data=form,
+                hooks=hooks)
+            response.raise_for_status()
+            return response
+        except requests.RequestException as e:
+            raise BugTrackerConnectError(self.baseurl, e)
+
+
 class LookupBranch(treelookup.LookupBranch):
     """A lookup branch customised for documenting external bug trackers."""
 

=== modified file 'lib/lp/bugs/externalbugtracker/github.py'
--- lib/lp/bugs/externalbugtracker/github.py	2016-12-15 07:20:26 +0000
+++ lib/lp/bugs/externalbugtracker/github.py	2018-06-22 22:02:03 +0000
@@ -1,4 +1,4 @@
-# Copyright 2016 Canonical Ltd.  This software is licensed under the
+# Copyright 2016-2018 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 """GitHub ExternalBugTracker utility."""
@@ -11,13 +11,11 @@
     'IGitHubRateLimit',
     ]
 
+from contextlib import contextmanager
 import httplib
 import time
 from urllib import urlencode
-from urlparse import (
-    urljoin,
-    urlunsplit,
-    )
+from urlparse import urlunsplit
 
 import pytz
 import requests
@@ -27,7 +25,7 @@
 from lp.bugs.externalbugtracker import (
     BugTrackerConnectError,
     BugWatchUpdateError,
-    ExternalBugTracker,
+    ExternalBugTrackerRequests,
     UnknownRemoteStatusError,
     UnparsableBugTrackerVersion,
     )
@@ -39,6 +37,10 @@
 from lp.bugs.interfaces.externalbugtracker import UNKNOWN_REMOTE_IMPORTANCE
 from lp.services.config import config
 from lp.services.database.isolation import ensure_no_transaction
+from lp.services.timeout import (
+    override_timeout,
+    urlfetch,
+    )
 from lp.services.webapp.url import urlsplit
 
 
@@ -56,14 +58,14 @@
 class IGitHubRateLimit(Interface):
     """Interface for rate-limit tracking for the GitHub Issues API."""
 
-    def makeRequest(method, url, token=None, **kwargs):
-        """Make a request, but only if the remote host's rate limit permits it.
+    def checkLimit(url, token=None):
+        """A context manager that checks the remote host's rate limit.
 
-        :param method: The HTTP request method.
-        :param url: The URL to request.
+        :param url: The URL being requested.
         :param token: If not None, an OAuth token to use as authentication
             to the remote host when asking it for the current rate limit.
-        :return: A `requests.Response` object.
+        :return: A suitable `Authorization` header (from the context
+            manager's `__enter__` method).
         :raises GitHubExceededRateLimit: if the rate limit was exceeded.
         """
 
@@ -77,7 +79,8 @@
     def __init__(self):
         self.clearCache()
 
-    def _update(self, host, auth_header=None):
+    @ensure_no_transaction
+    def _update(self, host, timeout, auth_header=None):
         headers = {
             "User-Agent": LP_USER_AGENT,
             "Host": host,
@@ -87,35 +90,26 @@
             headers["Authorization"] = auth_header
         url = "https://%s/rate_limit"; % host
         try:
-            response = requests.get(url, headers=headers)
-            response.raise_for_status()
-            return response.json()["resources"]["core"]
+            with override_timeout(timeout):
+                response = urlfetch(
+                    url, headers=headers, trust_env=False, use_proxy=True)
+                return response.json()["resources"]["core"]
         except requests.RequestException as e:
             raise BugTrackerConnectError(url, e)
 
-    @ensure_no_transaction
-    def makeRequest(self, method, url, token=None, **kwargs):
+    @contextmanager
+    def checkLimit(self, url, timeout, token=None):
         """See `IGitHubRateLimit`."""
-        if token is not None:
-            auth_header = "token %s" % token
-            if "headers" in kwargs:
-                kwargs["headers"] = kwargs["headers"].copy()
-            else:
-                kwargs["headers"] = {}
-            kwargs["headers"]["Authorization"] = auth_header
-        else:
-            auth_header = None
-
+        auth_header = "token %s" % token if token is not None else None
         host = urlsplit(url).netloc
         if (host, token) not in self._limits:
             self._limits[(host, token)] = self._update(
-                host, auth_header=auth_header)
+                host, timeout, auth_header=auth_header)
         limit = self._limits[(host, token)]
         if not limit["remaining"]:
             raise GitHubExceededRateLimit(host, limit["reset"])
-        response = requests.request(method, url, **kwargs)
+        yield auth_header
         limit["remaining"] -= 1
-        return response
 
     def clearCache(self):
         """See `IGitHubRateLimit`."""
@@ -126,7 +120,7 @@
     """The GitHub Issues URL is malformed."""
 
 
-class GitHub(ExternalBugTracker):
+class GitHub(ExternalBugTrackerRequests):
     """An `ExternalBugTracker` for dealing with GitHub issues."""
 
     # Avoid eating through our rate limit unnecessarily.
@@ -224,30 +218,32 @@
         else:
             raise UnknownRemoteStatusError(remote_status)
 
-    def _getHeaders(self, last_accessed=None):
+    def _getHeaders(self):
         """See `ExternalBugTracker`."""
         headers = super(GitHub, self)._getHeaders()
         headers["Accept"] = "application/vnd.github.v3+json"
+        return headers
+
+    def makeRequest(self, method, url, headers=None, last_accessed=None,
+                    **kwargs):
+        """See `ExternalBugTracker`."""
+        if headers is None:
+            headers = {}
         if last_accessed is not None:
             headers["If-Modified-Since"] = (
                 last_accessed.astimezone(pytz.UTC).strftime(
                     "%a, %d %b %Y %H:%M:%S GMT"))
-        return headers
+        with getUtility(IGitHubRateLimit).checkLimit(
+                url, self.timeout,
+                token=self.credentials["token"]) as auth_header:
+            headers["Authorization"] = auth_header
+            return super(GitHub, self).makeRequest(
+                method, url, headers=headers)
 
-    def _getPage(self, page, last_accessed=None):
+    def _getPage(self, page, last_accessed=None, **kwargs):
         """See `ExternalBugTracker`."""
-        # We prefer to use requests here because it knows how to parse Link
-        # headers.  Note that this returns a `requests.Response`, not the
-        # page data.
-        try:
-            response = getUtility(IGitHubRateLimit).makeRequest(
-                "GET", urljoin(self.baseurl + "/", page),
-                headers=self._getHeaders(last_accessed=last_accessed),
-                token=self.credentials["token"])
-            response.raise_for_status()
-            return response
-        except requests.RequestException as e:
-            raise BugTrackerConnectError(self.baseurl, e)
+        return super(GitHub, self)._getPage(
+            page, last_accessed=last_accessed, token=self.credentials["token"])
 
     def _getCollection(self, base_page, last_accessed=None):
         """Yield each item from a batched remote collection.
@@ -260,8 +256,8 @@
             try:
                 response = self._getPage(page, last_accessed=last_accessed)
             except BugTrackerConnectError as e:
-                if (e.response is not None and
-                        e.response.status_code == httplib.NOT_MODIFIED):
+                if (e.error.response is not None and
+                        e.error.response.status_code == httplib.NOT_MODIFIED):
                     return
                 else:
                     raise

=== modified file 'lib/lp/bugs/externalbugtracker/tests/test_externalbugtracker.py'
--- lib/lp/bugs/externalbugtracker/tests/test_externalbugtracker.py	2015-10-15 14:09:50 +0000
+++ lib/lp/bugs/externalbugtracker/tests/test_externalbugtracker.py	2018-06-22 22:02:03 +0000
@@ -1,4 +1,4 @@
-# Copyright 2010-2012 Canonical Ltd.  This software is licensed under the
+# Copyright 2010-2018 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 """Test the externalbugtracker package."""
@@ -8,11 +8,20 @@
 from StringIO import StringIO
 import urllib2
 
+import responses
+from testtools.matchers import (
+    ContainsDict,
+    Equals,
+    MatchesListwise,
+    MatchesStructure,
+    )
+import transaction
 from zope.interface import implementer
 
 from lp.bugs.externalbugtracker.base import (
     BugTrackerConnectError,
     ExternalBugTracker,
+    ExternalBugTrackerRequests,
     LP_USER_AGENT,
     )
 from lp.bugs.externalbugtracker.debbugs import DebBugs
@@ -26,7 +35,10 @@
     TestCase,
     )
 from lp.testing.fakemethod import FakeMethod
-from lp.testing.layers import ZopelessLayer
+from lp.testing.layers import (
+    ZopelessDatabaseLayer,
+    ZopelessLayer,
+    )
 
 
 @implementer(ISupportsBackLinking)
@@ -46,6 +58,7 @@
 
 class TestCheckwatchesConfig(TestCase):
 
+    layer = ZopelessDatabaseLayer
     base_url = "http://www.example.com/";
 
     def test_sync_comments_enabled(self):
@@ -151,6 +164,66 @@
         last_args, last_kwargs = bugtracker._post.calls[-1]
         self.assertEqual((fake_form.url, ), last_args)
 
+    @responses.activate
+    def test_requests_postPage_returns_response_page(self):
+        # _postPage posts, then returns the page text it gets back from
+        # the server.
+        base_url = "http://example.com/";
+        form = self.factory.getUniqueString()
+        fake_form = "<bugzilla>%s</bugzilla>" % self.factory.getUniqueString()
+        bugtracker = ExternalBugTrackerRequests(base_url)
+        transaction.commit()
+        responses.add("POST", base_url + form, body=fake_form)
+        self.assertEqual(fake_form, bugtracker._postPage(form, {}).text)
+
+    @responses.activate
+    def test_requests_postPage_does_not_repost_on_redirect(self):
+        # By default, if the POST redirects, _postPage leaves requests to
+        # handle it in the normal, RFC-compliant way.
+        base_url = "http://example.com/";
+        form = self.factory.getUniqueString()
+        target = self.factory.getUniqueString()
+        fake_form = "<bugzilla>%s</bugzilla>" % self.factory.getUniqueString()
+        bugtracker = ExternalBugTrackerRequests(base_url)
+        transaction.commit()
+        responses.add(
+            "POST", base_url + form, status=302,
+            headers={"Location": base_url + target})
+        responses.add("GET", base_url + target, body=fake_form)
+
+        bugtracker._postPage(form, {})
+
+        requests = [call.request for call in responses.calls]
+        self.assertThat(requests, MatchesListwise([
+            MatchesStructure.byEquality(method="POST", path_url="/" + form),
+            MatchesStructure.byEquality(method="GET", path_url="/" + target),
+            ]))
+
+    @responses.activate
+    def test_requests_postPage_can_repost_on_redirect(self):
+        # Some pages (that means you, BugZilla bug-search page!) can
+        # redirect on POST, but without honouring the POST.  Standard
+        # requests behaviour is to redirect to a GET, but if the caller
+        # says it's safe, _postPage can re-do the POST at the new URL.
+        base_url = "http://example.com/";
+        form = self.factory.getUniqueString()
+        target = self.factory.getUniqueString()
+        fake_form = "<bugzilla>%s</bugzilla>" % self.factory.getUniqueString()
+        bugtracker = ExternalBugTrackerRequests(base_url)
+        transaction.commit()
+        responses.add(
+            "POST", base_url + form, status=302,
+            headers={"Location": base_url + target})
+        responses.add("POST", base_url + target, body=fake_form)
+
+        bugtracker._postPage(form, form={}, repost_on_redirect=True)
+
+        requests = [call.request for call in responses.calls]
+        self.assertThat(requests, MatchesListwise([
+            MatchesStructure.byEquality(method="POST", path_url="/" + form),
+            MatchesStructure.byEquality(method="POST", path_url="/" + target),
+            ]))
+
 
 class TestExternalBugTracker(TestCase):
     """Tests for various methods of the ExternalBugTracker."""
@@ -183,3 +256,35 @@
 
         with monkey_patch(urllib2, urlopen=assert_headers):
             bugtracker._post('some-url', {'post-data': 'here'})
+
+
+class TestExternalBugTrackerRequests(TestCase):
+    """Tests for various methods of the ExternalBugTrackerRequests."""
+
+    layer = ZopelessDatabaseLayer
+
+    @responses.activate
+    def test_postPage_raises_on_404(self):
+        # When posting, a 404 is converted to a BugTrackerConnectError.
+        base_url = "http://example.com/";
+        bugtracker = ExternalBugTrackerRequests(base_url)
+        transaction.commit()
+        responses.add("POST", base_url + "some-url", status=404)
+        self.assertRaises(
+            BugTrackerConnectError,
+            bugtracker._postPage, 'some-url', {'post-data': 'here'})
+
+    @responses.activate
+    def test_postPage_sends_host(self):
+        # When posting, a Host header is sent.
+        base_host = 'example.com'
+        base_url = 'http://%s/' % base_host
+        bugtracker = ExternalBugTrackerRequests(base_url)
+        transaction.commit()
+        responses.add("POST", base_url + "some-url")
+        bugtracker._postPage('some-url', {'post-data': 'here'})
+        self.assertThat(responses.calls[-1].request, MatchesStructure(
+            headers=ContainsDict({
+                "User-Agent": Equals(LP_USER_AGENT),
+                "Host": Equals(base_host),
+                })))

=== modified file 'lib/lp/bugs/externalbugtracker/tests/test_github.py'
--- lib/lp/bugs/externalbugtracker/tests/test_github.py	2018-05-31 10:23:03 +0000
+++ lib/lp/bugs/externalbugtracker/tests/test_github.py	2018-06-22 22:02:03 +0000
@@ -17,6 +17,7 @@
     urlsplit,
     urlunsplit,
     )
+from testtools import ExpectedException
 from testtools.matchers import (
     Contains,
     ContainsDict,
@@ -73,60 +74,60 @@
         self.addCleanup(self.rate_limit.clearCache)
 
     @responses.activate
-    def test_makeRequest_no_token(self):
+    def test_checkLimit_no_token(self):
         _add_rate_limit_response("example.org", limit=60, remaining=50)
-        responses.add("GET", "http://example.org/";, body="test")
-        response = self.rate_limit.makeRequest("GET", "http://example.org/";)
+        with self.rate_limit.checkLimit("http://example.org/";, 30):
+            pass
         self.assertThat(responses.calls[0].request, MatchesStructure(
             path_url=Equals("/rate_limit"),
             headers=Not(Contains("Authorization"))))
-        self.assertEqual(b"test", response.content)
         limit = self.rate_limit._limits[("example.org", None)]
         self.assertEqual(49, limit["remaining"])
         self.assertEqual(1000000000, limit["reset"])
 
         limit["remaining"] = 0
         responses.reset()
-        self.assertRaisesWithContent(
-            GitHubExceededRateLimit,
-            "Rate limit for example.org exceeded "
-            "(resets at Sun Sep  9 07:16:40 2001)",
-            self.rate_limit.makeRequest,
-            "GET", "http://example.org/";)
+        with ExpectedException(
+                GitHubExceededRateLimit,
+                r"Rate limit for example\.org exceeded "
+                r"\(resets at Sun Sep  9 07:16:40 2001\)"):
+            with self.rate_limit.checkLimit("http://example.org/";, 30):
+                pass
         self.assertEqual(0, len(responses.calls))
         self.assertEqual(0, limit["remaining"])
 
     @responses.activate
-    def test_makeRequest_check_token(self):
+    def test_checkLimit_check_token(self):
         _add_rate_limit_response("example.org")
         responses.add("GET", "http://example.org/";, body="test")
-        response = self.rate_limit.makeRequest(
-            "GET", "http://example.org/";, token="abc")
+        with self.rate_limit.checkLimit(
+                "http://example.org/";, 30, token="abc"):
+            pass
         self.assertThat(responses.calls[0].request, MatchesStructure(
             path_url=Equals("/rate_limit"),
             headers=ContainsDict({"Authorization": Equals("token abc")})))
-        self.assertEqual(b"test", response.content)
         limit = self.rate_limit._limits[("example.org", "abc")]
         self.assertEqual(3999, limit["remaining"])
         self.assertEqual(1000000000, limit["reset"])
 
         limit["remaining"] = 0
         responses.reset()
-        self.assertRaisesWithContent(
-            GitHubExceededRateLimit,
-            "Rate limit for example.org exceeded "
-            "(resets at Sun Sep  9 07:16:40 2001)",
-            self.rate_limit.makeRequest,
-            "GET", "http://example.org/";, token="abc")
+        with ExpectedException(
+                GitHubExceededRateLimit,
+                r"Rate limit for example\.org exceeded "
+                r"\(resets at Sun Sep  9 07:16:40 2001\)"):
+            with self.rate_limit.checkLimit(
+                    "http://example.org/";, 30, token="abc"):
+                pass
         self.assertEqual(0, len(responses.calls))
         self.assertEqual(0, limit["remaining"])
 
     @responses.activate
-    def test_makeRequest_check_503(self):
+    def test_checkLimit_check_503(self):
         responses.add("GET", "https://example.org/rate_limit";, status=503)
-        self.assertRaises(
-            BugTrackerConnectError, self.rate_limit.makeRequest,
-            "GET", "http://example.org/";)
+        with ExpectedException(BugTrackerConnectError):
+            with self.rate_limit.checkLimit("http://example.org/";, 30):
+                pass
 
 
 class TestGitHub(TestCase):


Follow ups