launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #24468
[Merge] ~pappacena/launchpad:https-mirror-prober-proxy-fix into launchpad:master
Thiago F. Pappacena has proposed merging ~pappacena/launchpad:https-mirror-prober-proxy-fix into launchpad:master.
Commit message:
Adding a HTTPS proxy-CONNECT for twisted, and using it for HTTPS mirrors prober.
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
For more details, see:
https://code.launchpad.net/~pappacena/launchpad/+git/launchpad/+merge/380650
- Added a new type of HTTP Agent at lp.services.httpproxy.connect_tunneling, so we can reuse it for future implementations that needs to make HTTPS requests through proxy
- Changed the prober to use this new type of agent
--
Your team Launchpad code reviewers is requested to review the proposed merge of ~pappacena/launchpad:https-mirror-prober-proxy-fix into launchpad:master.
diff --git a/lib/lp/registry/scripts/distributionmirror_prober.py b/lib/lp/registry/scripts/distributionmirror_prober.py
index c633fb8..fd40f8d 100644
--- a/lib/lp/registry/scripts/distributionmirror_prober.py
+++ b/lib/lp/registry/scripts/distributionmirror_prober.py
@@ -13,6 +13,10 @@ import os.path
from StringIO import StringIO
import OpenSSL
+from OpenSSL.SSL import (
+ Context,
+ TLSv1_1_METHOD,
+ )
import requests
from six.moves import http_client
from six.moves.urllib.parse import (
@@ -31,13 +35,11 @@ from twisted.internet.defer import (
CancelledError,
DeferredSemaphore,
)
-from twisted.internet.endpoints import HostnameEndpoint
from twisted.internet.ssl import VerificationError
from twisted.python.failure import Failure
from twisted.web.client import (
Agent,
BrowserLikePolicyForHTTPS,
- ProxyAgent,
ResponseNeverReceived,
)
from twisted.web.http import HTTPClient
@@ -53,6 +55,7 @@ from lp.registry.interfaces.distributionmirror import (
)
from lp.registry.interfaces.distroseries import IDistroSeries
from lp.services.config import config
+from lp.services.httpproxy.connect_tunneling import TunnelingAgent
from lp.services.librarian.interfaces import ILibraryFileAliasSet
from lp.services.timeout import urlfetch
from lp.services.webapp import canonical_url
@@ -205,9 +208,8 @@ class HTTPSProbeFailureHandler:
if self.isInvalidCertificateError(error):
invalid_certificate_hosts.add(
(self.factory.request_host, self.factory.request_port))
- reason = InvalidHTTPSCertificate(
+ raise InvalidHTTPSCertificate(
self.factory.request_host, self.factory.request_port)
- raise reason
if self.isTimeout(error):
raise ProberTimeout(self.factory.url, self.factory.timeout)
raise error
@@ -304,6 +306,7 @@ class ProberFactory(protocol.ClientFactory):
self.timeoutCall = None
self.setURL(url.encode('ascii'))
self.logger = logging.getLogger('distributionmirror-prober')
+ self._https_client = None
@property
def is_https(self):
@@ -339,15 +342,20 @@ class ProberFactory(protocol.ClientFactory):
return self._deferred
def getHttpsClient(self):
+ if self._https_client is not None:
+ return self._https_client
# Should we use a proxy?
if not config.launchpad.http_proxy:
agent = Agent(
reactor=reactor, contextFactory=self.https_agent_policy())
else:
- endpoint = HostnameEndpoint(
- reactor, self.connect_host, self.connect_port)
- agent = ProxyAgent(endpoint)
- return TreqHTTPClient(agent)
+ contextFactory = self.https_agent_policy()
+ contextFactory.getContext = lambda: Context(TLSv1_1_METHOD)
+ agent = TunnelingAgent(
+ reactor, (self.connect_host, self.connect_port, None),
+ contextFactory=contextFactory)
+ self._https_client = TreqHTTPClient(agent)
+ return self._https_client
def connect(self):
"""Starts the connection and sets the self._deferred to the proper
diff --git a/lib/lp/registry/tests/test_distributionmirror_prober.py b/lib/lp/registry/tests/test_distributionmirror_prober.py
index c3e363c..af890a5 100644
--- a/lib/lp/registry/tests/test_distributionmirror_prober.py
+++ b/lib/lp/registry/tests/test_distributionmirror_prober.py
@@ -31,12 +31,13 @@ from twisted.internet import (
reactor,
ssl,
)
+from twisted.internet.defer import (
+ CancelledError,
+ inlineCallbacks,
+ )
from twisted.python.failure import Failure
from twisted.web import server
-from twisted.web.client import (
- BrowserLikePolicyForHTTPS,
- ProxyAgent,
- )
+from twisted.web.client import BrowserLikePolicyForHTTPS
from zope.component import getUtility
from zope.security.proxy import removeSecurityProxy
@@ -77,6 +78,7 @@ from lp.registry.tests.distributionmirror_http_server import (
)
from lp.services.config import config
from lp.services.daemons.tachandler import TacTestSetup
+from lp.services.httpproxy.connect_tunneling import TunnelingAgent
from lp.services.timeout import default_timeout
from lp.testing import (
clean_up_reactor,
@@ -219,40 +221,19 @@ class TestProberHTTPSProtocolAndFactory(TestCase):
return deferred.addCallback(got_result)
def test_https_prober_uses_proxy(self):
- root = DistributionMirrorTestSecureHTTPServer()
- site = server.Site(root)
- proxy_listen_port = reactor.listenTCP(0, site)
- proxy_port = proxy_listen_port.getHost().port
+ proxy_port = 6654
self.pushConfig(
- 'launchpad', http_proxy='http://localhost:%s/valid-mirror/file'
- % proxy_port)
+ 'launchpad', http_proxy='http://localhost:%s'% proxy_port)
url = 'https://localhost:%s/valid-mirror/file' % self.port
- prober = RedirectAwareProberFactory(url)
+ prober = RedirectAwareProberFactory(url, timeout=0.5)
self.assertEqual(prober.url, url)
- deferred = prober.probe()
- def got_result(result):
- # We basically don't care about the result here. We just want to
- # check that it did the request to the correct URI,
- # and ProxyAgent was used pointing to the correct proxy.
- agent = prober.getHttpsClient()._agent
- self.assertIsInstance(agent, ProxyAgent)
- self.assertEqual('localhost', agent._proxyEndpoint._hostText)
- self.assertEqual(proxy_port, agent._proxyEndpoint._port)
-
- self.assertEqual(
- 'https://localhost:%s/valid-mirror/file' % self.port,
- result.value.response.request.absoluteURI)
-
- def cleanup(*args, **kwargs):
- proxy_listen_port.stopListening()
-
- # Doing the proxy checks on the error callback because the
- # proxy is dummy and always returns 404.
- deferred.addErrback(got_result)
- deferred.addBoth(cleanup)
- return deferred
+ # We just want to check that it did the request using the correct
+ # Agent, pointing to the correct proxy config.
+ agent = prober.getHttpsClient()._agent
+ self.assertIsInstance(agent, TunnelingAgent)
+ self.assertEqual(('localhost', proxy_port, None), agent._proxyConf)
def test_https_fails_on_invalid_certificates(self):
"""Changes set back the default browser-like policy for HTTPS
diff --git a/lib/lp/services/httpproxy/__init__.py b/lib/lp/services/httpproxy/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/lib/lp/services/httpproxy/__init__.py
diff --git a/lib/lp/services/httpproxy/connect_tunneling.py b/lib/lp/services/httpproxy/connect_tunneling.py
new file mode 100644
index 0000000..b3c960a
--- /dev/null
+++ b/lib/lp/services/httpproxy/connect_tunneling.py
@@ -0,0 +1,105 @@
+# Copyright 2009-2020 Canonical Ltd. This software is licensed under the
+# GNU Affero General Public License version 3 (see the file LICENSE).
+
+"""CONNECT proxy to help on HTTPS connections when using twisted.
+
+See https://twistedmatrix.com/trac/ticket/8806 (and reference
+implementation at https://github.com/scrapy/scrapy/pull/397/files)."""
+
+__metaclass__ = type
+
+__all__ = [
+ 'TunnelingAgent',
+ ]
+
+import re
+
+from twisted.internet import defer
+from twisted.internet.endpoints import TCP4ClientEndpoint
+from twisted.web.client import Agent
+
+
+class TunnelError(Exception):
+ """An HTTP CONNECT tunnel could not be established by the proxy."""
+
+
+class TunnelingTCP4ClientEndpoint(TCP4ClientEndpoint):
+ """An endpoint that tunnels through proxies to allow HTTPS requests.
+
+ To accomplish that, this endpoint sends an HTTP CONNECT to the proxy.
+ """
+
+ _responseMatcher = re.compile('HTTP/1\.. 200')
+
+ def __init__(self, reactor, host, port, proxyConf, contextFactory,
+ timeout=30, bindAddress=None):
+ proxyHost, proxyPort, self._proxyAuthHeader = proxyConf
+ super(TunnelingTCP4ClientEndpoint, self).__init__(reactor, proxyHost,
+ proxyPort, timeout, bindAddress)
+ self._tunneledHost = host
+ self._tunneledPort = port
+ self._contextFactory = contextFactory
+ self._tunnelReadyDeferred = defer.Deferred()
+ self._connectDeferred = None
+ self._protocol = None
+
+ def requestTunnel(self, protocol):
+ """Asks the proxy to open a tunnel."""
+ tunnelReq = 'CONNECT %s:%s HTTP/1.1\n' % (self._tunneledHost,
+ self._tunneledPort)
+ if self._proxyAuthHeader:
+ tunnelReq += 'Proxy-Authorization: %s \n\n' % self._proxyAuthHeader
+ else:
+ tunnelReq += '\n'
+ protocol.transport.write(tunnelReq)
+ self._protocolDataReceived = protocol.dataReceived
+ protocol.dataReceived = self.processProxyResponse
+ self._protocol = protocol
+ return protocol
+
+ def processProxyResponse(self, bytes):
+ """Processes the response from the proxy. If the tunnel is successfully
+ created, notifies the client that we are ready to send requests. If not
+ raises a TunnelError.
+ """
+ self._protocol.dataReceived = self._protocolDataReceived
+ if TunnelingTCP4ClientEndpoint._responseMatcher.match(bytes):
+ self._protocol.transport.startTLS(
+ self._contextFactory, self._protocolFactory)
+ self._tunnelReadyDeferred.callback(self._protocol)
+ else:
+ self._tunnelReadyDeferred.errback(
+ TunnelError('Could not open CONNECT tunnel.'))
+
+ def connectFailed(self, reason):
+ """Propagates the errback to the appropriate deferred."""
+ self._tunnelReadyDeferred.errback(reason)
+
+ def connect(self, protocolFactory):
+ self._protocolFactory = protocolFactory
+ self._connectDeferred = super(
+ TunnelingTCP4ClientEndpoint, self).connect(protocolFactory)
+ self._connectDeferred.addCallback(self.requestTunnel)
+ self._connectDeferred.addErrback(self.connectFailed)
+ return self._tunnelReadyDeferred
+
+
+class TunnelingAgent(Agent):
+ """An agent that uses a L{TunnelingTCP4ClientEndpoint} to make HTTPS
+ requests.
+ """
+
+ def __init__(self, reactor, proxyConf, contextFactory=None,
+ connectTimeout=None, bindAddress=None, pool=None):
+ super(TunnelingAgent, self).__init__(reactor, contextFactory,
+ connectTimeout, bindAddress, pool)
+ self._contextFactory = contextFactory
+ self._connectTimeout = connectTimeout
+ self._bindAddress = bindAddress
+ self._proxyConf = proxyConf
+
+ def _getEndpoint(self, url):
+ return TunnelingTCP4ClientEndpoint(
+ self._reactor, url.host, url.port,
+ self._proxyConf, self._contextFactory, self._connectTimeout,
+ self._bindAddress)