← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] ~pappacena/launchpad:https-mirror-prober-proxy-fix into launchpad:master

 

Thiago F. Pappacena has proposed merging ~pappacena/launchpad:https-mirror-prober-proxy-fix into launchpad:master.

Commit message:
Adding a HTTPS proxy-CONNECT for twisted, and using it for HTTPS mirrors prober.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~pappacena/launchpad/+git/launchpad/+merge/380650

- Added a new type of HTTP Agent at lp.services.httpproxy.connect_tunneling, so we can reuse it for future implementations that needs to make HTTPS requests through proxy
- Changed the prober to use this new type of agent
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of ~pappacena/launchpad:https-mirror-prober-proxy-fix into launchpad:master.
diff --git a/lib/lp/registry/scripts/distributionmirror_prober.py b/lib/lp/registry/scripts/distributionmirror_prober.py
index c633fb8..fd40f8d 100644
--- a/lib/lp/registry/scripts/distributionmirror_prober.py
+++ b/lib/lp/registry/scripts/distributionmirror_prober.py
@@ -13,6 +13,10 @@ import os.path
 from StringIO import StringIO
 
 import OpenSSL
+from OpenSSL.SSL import (
+    Context,
+    TLSv1_1_METHOD,
+    )
 import requests
 from six.moves import http_client
 from six.moves.urllib.parse import (
@@ -31,13 +35,11 @@ from twisted.internet.defer import (
     CancelledError,
     DeferredSemaphore,
     )
-from twisted.internet.endpoints import HostnameEndpoint
 from twisted.internet.ssl import VerificationError
 from twisted.python.failure import Failure
 from twisted.web.client import (
     Agent,
     BrowserLikePolicyForHTTPS,
-    ProxyAgent,
     ResponseNeverReceived,
     )
 from twisted.web.http import HTTPClient
@@ -53,6 +55,7 @@ from lp.registry.interfaces.distributionmirror import (
     )
 from lp.registry.interfaces.distroseries import IDistroSeries
 from lp.services.config import config
+from lp.services.httpproxy.connect_tunneling import TunnelingAgent
 from lp.services.librarian.interfaces import ILibraryFileAliasSet
 from lp.services.timeout import urlfetch
 from lp.services.webapp import canonical_url
@@ -205,9 +208,8 @@ class HTTPSProbeFailureHandler:
         if self.isInvalidCertificateError(error):
             invalid_certificate_hosts.add(
                 (self.factory.request_host, self.factory.request_port))
-            reason = InvalidHTTPSCertificate(
+            raise InvalidHTTPSCertificate(
                 self.factory.request_host, self.factory.request_port)
-            raise reason
         if self.isTimeout(error):
             raise ProberTimeout(self.factory.url, self.factory.timeout)
         raise error
@@ -304,6 +306,7 @@ class ProberFactory(protocol.ClientFactory):
         self.timeoutCall = None
         self.setURL(url.encode('ascii'))
         self.logger = logging.getLogger('distributionmirror-prober')
+        self._https_client = None
 
     @property
     def is_https(self):
@@ -339,15 +342,20 @@ class ProberFactory(protocol.ClientFactory):
         return self._deferred
 
     def getHttpsClient(self):
+        if self._https_client is not None:
+            return self._https_client
         # Should we use a proxy?
         if not config.launchpad.http_proxy:
             agent = Agent(
                 reactor=reactor, contextFactory=self.https_agent_policy())
         else:
-            endpoint = HostnameEndpoint(
-                reactor, self.connect_host, self.connect_port)
-            agent = ProxyAgent(endpoint)
-        return TreqHTTPClient(agent)
+            contextFactory = self.https_agent_policy()
+            contextFactory.getContext = lambda: Context(TLSv1_1_METHOD)
+            agent = TunnelingAgent(
+                reactor, (self.connect_host, self.connect_port, None),
+                contextFactory=contextFactory)
+        self._https_client = TreqHTTPClient(agent)
+        return self._https_client
 
     def connect(self):
         """Starts the connection and sets the self._deferred to the proper
diff --git a/lib/lp/registry/tests/test_distributionmirror_prober.py b/lib/lp/registry/tests/test_distributionmirror_prober.py
index c3e363c..af890a5 100644
--- a/lib/lp/registry/tests/test_distributionmirror_prober.py
+++ b/lib/lp/registry/tests/test_distributionmirror_prober.py
@@ -31,12 +31,13 @@ from twisted.internet import (
     reactor,
     ssl,
     )
+from twisted.internet.defer import (
+    CancelledError,
+    inlineCallbacks,
+    )
 from twisted.python.failure import Failure
 from twisted.web import server
-from twisted.web.client import (
-    BrowserLikePolicyForHTTPS,
-    ProxyAgent,
-    )
+from twisted.web.client import BrowserLikePolicyForHTTPS
 from zope.component import getUtility
 from zope.security.proxy import removeSecurityProxy
 
@@ -77,6 +78,7 @@ from lp.registry.tests.distributionmirror_http_server import (
     )
 from lp.services.config import config
 from lp.services.daemons.tachandler import TacTestSetup
+from lp.services.httpproxy.connect_tunneling import TunnelingAgent
 from lp.services.timeout import default_timeout
 from lp.testing import (
     clean_up_reactor,
@@ -219,40 +221,19 @@ class TestProberHTTPSProtocolAndFactory(TestCase):
         return deferred.addCallback(got_result)
 
     def test_https_prober_uses_proxy(self):
-        root = DistributionMirrorTestSecureHTTPServer()
-        site = server.Site(root)
-        proxy_listen_port = reactor.listenTCP(0, site)
-        proxy_port = proxy_listen_port.getHost().port
+        proxy_port = 6654
         self.pushConfig(
-            'launchpad', http_proxy='http://localhost:%s/valid-mirror/file'
-                                    % proxy_port)
+            'launchpad', http_proxy='http://localhost:%s'% proxy_port)
 
         url = 'https://localhost:%s/valid-mirror/file' % self.port
-        prober = RedirectAwareProberFactory(url)
+        prober = RedirectAwareProberFactory(url, timeout=0.5)
         self.assertEqual(prober.url, url)
-        deferred = prober.probe()
 
-        def got_result(result):
-            # We basically don't care about the result here. We just want to
-            # check that it did the request to the correct URI,
-            # and ProxyAgent was used pointing to the correct proxy.
-            agent = prober.getHttpsClient()._agent
-            self.assertIsInstance(agent, ProxyAgent)
-            self.assertEqual('localhost', agent._proxyEndpoint._hostText)
-            self.assertEqual(proxy_port, agent._proxyEndpoint._port)
-
-            self.assertEqual(
-                'https://localhost:%s/valid-mirror/file' % self.port,
-                result.value.response.request.absoluteURI)
-
-        def cleanup(*args, **kwargs):
-            proxy_listen_port.stopListening()
-
-        # Doing the proxy checks on the error callback because the
-        # proxy is dummy and always returns 404.
-        deferred.addErrback(got_result)
-        deferred.addBoth(cleanup)
-        return deferred
+        # We just want to check that it did the request using the correct
+        # Agent, pointing to the correct proxy config.
+        agent = prober.getHttpsClient()._agent
+        self.assertIsInstance(agent, TunnelingAgent)
+        self.assertEqual(('localhost', proxy_port, None), agent._proxyConf)
 
     def test_https_fails_on_invalid_certificates(self):
         """Changes set back the default browser-like policy for HTTPS
diff --git a/lib/lp/services/httpproxy/__init__.py b/lib/lp/services/httpproxy/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/lib/lp/services/httpproxy/__init__.py
diff --git a/lib/lp/services/httpproxy/connect_tunneling.py b/lib/lp/services/httpproxy/connect_tunneling.py
new file mode 100644
index 0000000..b3c960a
--- /dev/null
+++ b/lib/lp/services/httpproxy/connect_tunneling.py
@@ -0,0 +1,105 @@
+# Copyright 2009-2020 Canonical Ltd.  This software is licensed under the
+# GNU Affero General Public License version 3 (see the file LICENSE).
+
+"""CONNECT proxy to help on HTTPS connections when using twisted.
+
+See https://twistedmatrix.com/trac/ticket/8806 (and reference
+implementation at https://github.com/scrapy/scrapy/pull/397/files)."""
+
+__metaclass__ = type
+
+__all__ = [
+    'TunnelingAgent',
+    ]
+
+import re
+
+from twisted.internet import defer
+from twisted.internet.endpoints import TCP4ClientEndpoint
+from twisted.web.client import Agent
+
+
+class TunnelError(Exception):
+    """An HTTP CONNECT tunnel could not be established by the proxy."""
+
+
+class TunnelingTCP4ClientEndpoint(TCP4ClientEndpoint):
+    """An endpoint that tunnels through proxies to allow HTTPS requests.
+
+    To accomplish that, this endpoint sends an HTTP CONNECT to the proxy.
+    """
+
+    _responseMatcher = re.compile('HTTP/1\.. 200')
+
+    def __init__(self, reactor, host, port, proxyConf, contextFactory,
+                 timeout=30, bindAddress=None):
+        proxyHost, proxyPort, self._proxyAuthHeader = proxyConf
+        super(TunnelingTCP4ClientEndpoint, self).__init__(reactor, proxyHost,
+            proxyPort, timeout, bindAddress)
+        self._tunneledHost = host
+        self._tunneledPort = port
+        self._contextFactory = contextFactory
+        self._tunnelReadyDeferred = defer.Deferred()
+        self._connectDeferred = None
+        self._protocol = None
+
+    def requestTunnel(self, protocol):
+        """Asks the proxy to open a tunnel."""
+        tunnelReq = 'CONNECT %s:%s HTTP/1.1\n' % (self._tunneledHost,
+                                                  self._tunneledPort)
+        if self._proxyAuthHeader:
+            tunnelReq += 'Proxy-Authorization: %s \n\n' % self._proxyAuthHeader
+        else:
+            tunnelReq += '\n'
+        protocol.transport.write(tunnelReq)
+        self._protocolDataReceived = protocol.dataReceived
+        protocol.dataReceived = self.processProxyResponse
+        self._protocol = protocol
+        return protocol
+
+    def processProxyResponse(self, bytes):
+        """Processes the response from the proxy. If the tunnel is successfully
+        created, notifies the client that we are ready to send requests. If not
+        raises a TunnelError.
+        """
+        self._protocol.dataReceived = self._protocolDataReceived
+        if TunnelingTCP4ClientEndpoint._responseMatcher.match(bytes):
+            self._protocol.transport.startTLS(
+                    self._contextFactory, self._protocolFactory)
+            self._tunnelReadyDeferred.callback(self._protocol)
+        else:
+            self._tunnelReadyDeferred.errback(
+                TunnelError('Could not open CONNECT tunnel.'))
+
+    def connectFailed(self, reason):
+        """Propagates the errback to the appropriate deferred."""
+        self._tunnelReadyDeferred.errback(reason)
+
+    def connect(self, protocolFactory):
+        self._protocolFactory = protocolFactory
+        self._connectDeferred = super(
+            TunnelingTCP4ClientEndpoint, self).connect(protocolFactory)
+        self._connectDeferred.addCallback(self.requestTunnel)
+        self._connectDeferred.addErrback(self.connectFailed)
+        return self._tunnelReadyDeferred
+
+
+class TunnelingAgent(Agent):
+    """An agent that uses a L{TunnelingTCP4ClientEndpoint} to make HTTPS
+    requests.
+    """
+
+    def __init__(self, reactor, proxyConf, contextFactory=None,
+                 connectTimeout=None, bindAddress=None, pool=None):
+        super(TunnelingAgent, self).__init__(reactor, contextFactory,
+            connectTimeout, bindAddress, pool)
+        self._contextFactory = contextFactory
+        self._connectTimeout = connectTimeout
+        self._bindAddress = bindAddress
+        self._proxyConf = proxyConf
+
+    def _getEndpoint(self, url):
+        return TunnelingTCP4ClientEndpoint(
+            self._reactor, url.host, url.port,
+            self._proxyConf, self._contextFactory, self._connectTimeout,
+            self._bindAddress)