← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~cjwatson/launchpad/explicit-proxy-mirror-prober into lp:launchpad

 

Colin Watson has proposed merging lp:~cjwatson/launchpad/explicit-proxy-mirror-prober into lp:launchpad with lp:~cjwatson/launchpad/no-default-http-proxy as a prerequisite.

Commit message:
Convert the mirror prober to use explicit proxy configuration.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/explicit-proxy-mirror-prober/+merge/348543
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~cjwatson/launchpad/explicit-proxy-mirror-prober into lp:launchpad.
=== modified file 'cronscripts/distributionmirror-prober.py'
--- cronscripts/distributionmirror-prober.py	2013-01-07 02:40:55 +0000
+++ cronscripts/distributionmirror-prober.py	2018-06-26 15:14:17 +0000
@@ -1,14 +1,12 @@
 #!/usr/bin/python -S
 #
-# Copyright 2009-2011 Canonical Ltd.  This software is licensed under the
+# Copyright 2009-2018 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 """Script to probe distribution mirrors and check how up-to-date they are."""
 
 import _pythonpath
 
-import os
-
 from lp.registry.interfaces.distributionmirror import MirrorContent
 from lp.registry.scripts.distributionmirror_prober import DistroMirrorProber
 from lp.services.config import config
@@ -16,6 +14,7 @@
     LaunchpadCronScript,
     LaunchpadScriptFailure,
     )
+from lp.services.timeout import set_default_timeout_function
 
 
 class DistroMirrorProberScript(LaunchpadCronScript):
@@ -49,12 +48,8 @@
                 'Wrong value for argument --content-type: %s'
                 % self.options.content_type)
 
-        if config.distributionmirrorprober.use_proxy:
-            os.environ['http_proxy'] = config.launchpad.http_proxy
-            self.logger.debug("Using %s as proxy." % os.environ['http_proxy'])
-        else:
-            self.logger.debug("Not using any proxy.")
-
+        set_default_timeout_function(
+            lambda: config.distributionmirrorprober.timeout)
         DistroMirrorProber(self.txn, self.logger).probe(
             content_type, self.options.no_remote_hosts, self.options.force,
             self.options.max_mirrors, not self.options.no_owner_notification)

=== modified file 'lib/lp/registry/scripts/distributionmirror_prober.py'
--- lib/lp/registry/scripts/distributionmirror_prober.py	2017-05-22 09:33:45 +0000
+++ lib/lp/registry/scripts/distributionmirror_prober.py	2018-06-26 15:14:17 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009-2017 Canonical Ltd.  This software is licensed under the
+# Copyright 2009-2018 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 __metaclass__ = type
@@ -10,12 +10,12 @@
 import httplib
 import itertools
 import logging
-import os
+import os.path
 from StringIO import StringIO
 import urllib
-import urllib2
 import urlparse
 
+import requests
 from twisted.internet import (
     defer,
     protocol,
@@ -36,6 +36,7 @@
 from lp.registry.interfaces.distroseries import IDistroSeries
 from lp.services.config import config
 from lp.services.librarian.interfaces import ILibraryFileAliasSet
+from lp.services.timeout import urlfetch
 from lp.services.webapp import canonical_url
 from lp.soyuz.interfaces.distroarchseries import IDistroArchSeries
 
@@ -202,9 +203,9 @@
     request_path = None
 
     # Details of the URL of the host in which we'll connect, which will only
-    # be different from request_* in case we have an http_proxy environment
-    # variable --in that case the scheme, host and port will be the ones
-    # extracted from http_proxy and the path will be self.url
+    # be different from request_* in case we have a configured http_proxy --
+    # in that case the scheme, host and port will be the ones extracted from
+    # http_proxy and the path will be self.url.
     connect_scheme = None
     connect_host = None
     connect_port = None
@@ -279,7 +280,7 @@
         # XXX Guilherme Salgado 2006-09-19:
         # We don't actually know how to handle FTP responses, but we
         # expect to be behind a squid HTTP proxy with the patch at
-        # http://www.squid-cache.org/bugs/show_bug.cgi?id=1758 applied. So, if
+        # https://bugs.squid-cache.org/show_bug.cgi?id=1758 applied. So, if
         # you encounter any problems with FTP URLs you'll probably have to nag
         # the sysadmins to fix squid for you.
         if scheme not in ('http', 'ftp'):
@@ -296,9 +297,9 @@
         if self.request_host not in host_timeouts:
             host_timeouts[self.request_host] = 0
 
-        # If the http_proxy variable is set, we want to use it as the host
-        # we're going to connect to.
-        proxy = os.getenv('http_proxy')
+        # If launchpad.http_proxy is set in our configuration, we want to
+        # use it as the host we're going to connect to.
+        proxy = config.launchpad.http_proxy
         if proxy:
             scheme, host, port, dummy = _parse(proxy)
             path = url
@@ -612,31 +613,25 @@
         return failure
 
 
-def _build_request_for_cdimage_file_list(url):
-    headers = {'Pragma': 'no-cache', 'Cache-control': 'no-cache'}
-    return urllib2.Request(url, headers=headers)
-
-
 def _get_cdimage_file_list():
     url = config.distributionmirrorprober.cdimage_file_list_url
+    # In test environments, this may be a file: URL.  Adjust it to be in a
+    # form that requests can cope with (i.e. using an absolute path).
+    parsed_url = urlparse.urlparse(url)
+    if parsed_url.scheme == 'file' and not os.path.isabs(parsed_url.path):
+        assert parsed_url.path == parsed_url[2]
+        parsed_url = list(parsed_url)
+        parsed_url[2] = os.path.join(config.root, parsed_url[2])
+    url = urlparse.urlunparse(parsed_url)
     try:
-        return urllib2.urlopen(_build_request_for_cdimage_file_list(url))
-    except urllib2.URLError as e:
+        return urlfetch(
+            url, headers={'Pragma': 'no-cache', 'Cache-control': 'no-cache'},
+            trust_env=False, use_proxy=True, allow_file=True)
+    except requests.RequestException as e:
         raise UnableToFetchCDImageFileList(
             'Unable to fetch %s: %s' % (url, e))
 
 
-def restore_http_proxy(http_proxy):
-    """Restore the http_proxy environment variable to the given value."""
-    if http_proxy is None:
-        try:
-            del os.environ['http_proxy']
-        except KeyError:
-            pass
-    else:
-        os.environ['http_proxy'] = http_proxy
-
-
 def get_expected_cdimage_paths():
     """Get all paths where we can find CD image files on a cdimage mirror.
 
@@ -648,7 +643,7 @@
     UnableToFetchCDImageFileList exception will be raised.
     """
     d = {}
-    for line in _get_cdimage_file_list().readlines():
+    for line in _get_cdimage_file_list().iter_lines():
         flavour, seriesname, path, size = line.split('\t')
         paths = d.setdefault((flavour, seriesname), [])
         paths.append(path.lstrip('/'))

=== modified file 'lib/lp/registry/tests/test_distributionmirror_prober.py'
--- lib/lp/registry/tests/test_distributionmirror_prober.py	2018-01-02 16:10:26 +0000
+++ lib/lp/registry/tests/test_distributionmirror_prober.py	2018-06-26 15:14:17 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009-2016 Canonical Ltd.  This software is licensed under the
+# Copyright 2009-2018 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 """distributionmirror-prober tests."""
@@ -13,13 +13,22 @@
 from StringIO import StringIO
 
 from lazr.uri import URI
+import responses
 from sqlobject import SQLObjectNotFound
+from testtools.matchers import (
+    ContainsDict,
+    Equals,
+    MatchesStructure,
+    )
+from testtools.twistedsupport import (
+    AsynchronousDeferredRunTest,
+    AsynchronousDeferredRunTestForBrokenTwisted,
+    )
 from twisted.internet import (
     defer,
     reactor,
     )
 from twisted.python.failure import Failure
-from twisted.trial.unittest import TestCase as TrialTestCase
 from twisted.web import server
 from zope.component import getUtility
 from zope.security.proxy import removeSecurityProxy
@@ -29,7 +38,7 @@
 from lp.registry.model.distributionmirror import DistributionMirror
 from lp.registry.scripts import distributionmirror_prober
 from lp.registry.scripts.distributionmirror_prober import (
-    _build_request_for_cdimage_file_list,
+    _get_cdimage_file_list,
     ArchiveMirrorProberCallbacks,
     BadResponseCode,
     ConnectionSkipped,
@@ -50,7 +59,6 @@
     RedirectAwareProberProtocol,
     RedirectToDifferentFile,
     RequestManager,
-    restore_http_proxy,
     should_skip_host,
     UnknownURLSchemeAfterRedirect,
     )
@@ -59,6 +67,7 @@
     )
 from lp.services.config import config
 from lp.services.daemons.tachandler import TacTestSetup
+from lp.services.timeout import default_timeout
 from lp.testing import (
     clean_up_reactor,
     TestCase,
@@ -93,39 +102,70 @@
         return os.path.join(self.root, 'distributionmirror_http_server.log')
 
 
-class TestProberProtocolAndFactory(TrialTestCase):
+class AssertFailureMixin:
+
+    @defer.inlineCallbacks
+    def assertFailure(self, deferred, *expectedFailures):
+        try:
+            result = yield deferred
+        except expectedFailures:
+            pass
+        except Exception as e:
+            self.fail('Expected %r, got %s' % (expectedFailures, e))
+        else:
+            self.fail(
+                'Returned %r instead of raising %r' %
+                (result, expectedFailures))
+
+
+class TestProberProtocolAndFactory(AssertFailureMixin, TestCase):
 
     layer = TwistedLayer
+    run_tests_with = AsynchronousDeferredRunTestForBrokenTwisted.make_factory(
+        timeout=30)
 
     def setUp(self):
-        self.orig_proxy = os.getenv('http_proxy')
+        super(TestProberProtocolAndFactory, self).setUp()
         root = DistributionMirrorTestHTTPServer()
         site = server.Site(root)
         site.displayTracebacks = False
         self.listening_port = reactor.listenTCP(0, site)
+        self.addCleanup(self.listening_port.stopListening)
         self.port = self.listening_port.getHost().port
         self.urls = {'timeout': u'http://localhost:%s/timeout' % self.port,
                      '200': u'http://localhost:%s/valid-mirror' % self.port,
                      '500': u'http://localhost:%s/error' % self.port,
                      '404': u'http://localhost:%s/invalid-mirror' % self.port}
-
-    def tearDown(self):
-        restore_http_proxy(self.orig_proxy)
-        return self.listening_port.stopListening()
+        self.pushConfig('launchpad', http_proxy=None)
 
     def _createProberAndProbe(self, url):
         prober = ProberFactory(url)
         return prober.probe()
 
-    def test_environment_http_proxy_is_handled_correctly(self):
-        os.environ['http_proxy'] = 'http://squid.internal:3128'
-        prober = ProberFactory(self.urls['200'])
-        self.assertEqual(prober.request_host, 'localhost')
-        self.assertEqual(prober.request_port, self.port)
-        self.assertEqual(prober.request_path, '/valid-mirror')
-        self.assertEqual(prober.connect_host, 'squid.internal')
-        self.assertEqual(prober.connect_port, 3128)
-        self.assertEqual(prober.connect_path, self.urls['200'])
+    def test_config_no_http_proxy(self):
+        prober = ProberFactory(self.urls['200'])
+        self.assertThat(prober, MatchesStructure.byEquality(
+            request_scheme='http',
+            request_host='localhost',
+            request_port=self.port,
+            request_path='/valid-mirror',
+            connect_scheme='http',
+            connect_host='localhost',
+            connect_port=self.port,
+            connect_path='/valid-mirror'))
+
+    def test_config_http_proxy(self):
+        self.pushConfig('launchpad', http_proxy='http://squid.internal:3128')
+        prober = ProberFactory(self.urls['200'])
+        self.assertThat(prober, MatchesStructure.byEquality(
+            request_scheme='http',
+            request_host='localhost',
+            request_port=self.port,
+            request_path='/valid-mirror',
+            connect_scheme='http',
+            connect_host='squid.internal',
+            connect_port=3128,
+            connect_path=self.urls['200']))
 
     def test_connect_cancels_existing_timeout_call(self):
         prober = ProberFactory(self.urls['200'])
@@ -161,14 +201,10 @@
         return deferred.addCallback(restore_connect, orig_connect)
 
     def test_connect_to_proxy_when_http_proxy_exists(self):
-        os.environ['http_proxy'] = 'http://squid.internal:3128'
+        self.pushConfig('launchpad', http_proxy='http://squid.internal:3128')
         self._test_connect_to_host(self.urls['200'], 'squid.internal')
 
     def test_connect_to_host_when_http_proxy_does_not_exist(self):
-        try:
-            del os.environ['http_proxy']
-        except KeyError:
-            pass
         self._test_connect_to_host(self.urls['200'], 'localhost')
 
     def test_probe_sets_up_timeout_call(self):
@@ -380,7 +416,8 @@
         self.assertFalse(should_skip_host(self.host))
 
 
-class TestProberFactoryRequestTimeoutRatioWithTwisted(TrialTestCase):
+class TestProberFactoryRequestTimeoutRatioWithTwisted(
+        AssertFailureMixin, TestCase):
     """Tests to ensure we stop issuing requests on a given host if the
     requests/timeouts ratio on that host is too low.
 
@@ -392,25 +429,29 @@
     """
 
     layer = TwistedLayer
+    run_tests_with = AsynchronousDeferredRunTest.make_factory(timeout=30)
 
     def setUp(self):
-        self.orig_host_requests = dict(
-            distributionmirror_prober.host_requests)
-        self.orig_host_timeouts = dict(
-            distributionmirror_prober.host_timeouts)
+        super(TestProberFactoryRequestTimeoutRatioWithTwisted, self).setUp()
+        orig_host_requests = dict(distributionmirror_prober.host_requests)
+        orig_host_timeouts = dict(distributionmirror_prober.host_timeouts)
         distributionmirror_prober.host_requests = {}
         distributionmirror_prober.host_timeouts = {}
+
+        def restore_prober_globals():
+            # Restore the globals that our tests fiddle with.
+            distributionmirror_prober.host_requests = orig_host_requests
+            distributionmirror_prober.host_timeouts = orig_host_timeouts
+
+        self.addCleanup(restore_prober_globals)
+
         root = DistributionMirrorTestHTTPServer()
         site = server.Site(root)
         site.displayTracebacks = False
         self.listening_port = reactor.listenTCP(0, site)
+        self.addCleanup(self.listening_port.stopListening)
         self.port = self.listening_port.getHost().port
-
-    def tearDown(self):
-        # Restore the globals that our tests fiddle with.
-        distributionmirror_prober.host_requests = self.orig_host_requests
-        distributionmirror_prober.host_timeouts = self.orig_host_timeouts
-        return self.listening_port.stopListening()
+        self.pushConfig('launchpad', http_proxy=None)
 
     def _createProberAndProbe(self, url):
         prober = ProberFactory(url)
@@ -842,7 +883,8 @@
         self.assertNotEqual(0, len(mirror.cdimage_series))
         # Note that calling this function won't actually probe any mirrors; we
         # need to call reactor.run() to actually start the probing.
-        probe_cdimage_mirror(mirror, StringIO(), [], logging)
+        with default_timeout(15.0):
+            probe_cdimage_mirror(mirror, StringIO(), [], logging)
         self.assertEqual(0, len(mirror.cdimage_series))
 
     def test_archive_mirror_probe_function(self):
@@ -856,8 +898,9 @@
         mirror1 = DistributionMirror.byName('releases-mirror')
         mirror2 = DistributionMirror.byName('releases-mirror2')
         mirror3 = DistributionMirror.byName('canonical-releases')
-        self._test_one_semaphore_for_each_host(
-            mirror1, mirror2, mirror3, probe_cdimage_mirror)
+        with default_timeout(15.0):
+            self._test_one_semaphore_for_each_host(
+                mirror1, mirror2, mirror3, probe_cdimage_mirror)
 
     def _test_one_semaphore_for_each_host(
             self, mirror1, mirror2, mirror3, probe_function):
@@ -905,20 +948,24 @@
         # When using an http_proxy, even though we'll actually connect to the
         # proxy, we'll use the mirror's host as the key to find the semaphore
         # that should be used
-        orig_proxy = os.getenv('http_proxy')
-        os.environ['http_proxy'] = 'http://squid.internal:3128/'
+        self.pushConfig('launchpad', http_proxy='http://squid.internal:3128/')
         probe_function(mirror3, StringIO(), [], logging)
         self.assertEqual(len(request_manager.host_locks), 2)
-        restore_http_proxy(orig_proxy)
 
 
 class TestCDImageFileListFetching(TestCase):
 
+    @responses.activate
     def test_no_cache(self):
         url = 'http://releases.ubuntu.com/.manifest'
-        request = _build_request_for_cdimage_file_list(url)
-        self.assertEqual(request.headers['Pragma'], 'no-cache')
-        self.assertEqual(request.headers['Cache-control'], 'no-cache')
+        self.pushConfig('distributionmirrorprober', cdimage_file_list_url=url)
+        responses.add('GET', url)
+        with default_timeout(1.0):
+            _get_cdimage_file_list()
+        self.assertThat(responses.calls[0].request.headers, ContainsDict({
+            'Pragma': Equals('no-cache'),
+            'Cache-control': Equals('no-cache'),
+            }))
 
 
 class TestLoggingMixin(TestCase):

=== modified file 'lib/lp/services/tests/test_timeout.py'
--- lib/lp/services/tests/test_timeout.py	2018-06-05 01:50:30 +0000
+++ lib/lp/services/tests/test_timeout.py	2018-06-26 15:14:17 +0000
@@ -367,26 +367,6 @@
             MatchesStructure.byEquality(status_code=200, content='Success.'))
         t.join()
 
-    def test_urlfetch_does_not_support_ftp_urls(self):
-        """urlfetch() does not support ftp urls."""
-        set_default_timeout_function(lambda: 1)
-        self.addCleanup(set_default_timeout_function, None)
-        url = 'ftp://localhost/'
-        e = self.assertRaises(InvalidSchema, urlfetch, url)
-        self.assertEqual(
-            "No connection adapters were found for '%s'" % url, str(e))
-
-    def test_urlfetch_does_not_support_file_urls_by_default(self):
-        """urlfetch() does not support file urls by default."""
-        set_default_timeout_function(lambda: 1)
-        self.addCleanup(set_default_timeout_function, None)
-        test_path = self.useFixture(TempDir()).join('file')
-        write_file(test_path, '')
-        url = 'file://' + test_path
-        e = self.assertRaises(InvalidSchema, urlfetch, url)
-        self.assertEqual(
-            "No connection adapters were found for '%s'" % url, str(e))
-
     def test_urlfetch_no_proxy_by_default(self):
         """urlfetch does not use a proxy by default."""
         self.pushConfig('launchpad', http_proxy='http://proxy.example:3128/')
@@ -418,6 +398,56 @@
             {scheme: proxy for scheme in ('http', 'https')},
             fake_send.calls[0][1]['proxies'])
 
+    def test_urlfetch_does_not_support_ftp_urls_by_default(self):
+        """urlfetch() does not support ftp urls by default."""
+        set_default_timeout_function(lambda: 1)
+        self.addCleanup(set_default_timeout_function, None)
+        url = 'ftp://localhost/'
+        e = self.assertRaises(InvalidSchema, urlfetch, url)
+        self.assertEqual(
+            "No connection adapters were found for '%s'" % url, str(e))
+
+    def test_urlfetch_supports_ftp_urls_if_allow_ftp(self):
+        """urlfetch() supports ftp urls via a proxy if explicitly asked."""
+        sock, http_server_url = self.make_test_socket()
+        sock.listen(1)
+
+        def success_result():
+            (client_sock, client_addr) = sock.accept()
+            # We only provide a test HTTP proxy, not anything beyond that.
+            client_sock.sendall(dedent("""\
+                HTTP/1.0 200 Ok
+                Content-Type: text/plain
+                Content-Length: 8
+
+                Success."""))
+            client_sock.close()
+
+        self.pushConfig('launchpad', http_proxy=http_server_url)
+        t = threading.Thread(target=success_result)
+        t.start()
+        set_default_timeout_function(lambda: 1)
+        self.addCleanup(set_default_timeout_function, None)
+        response = urlfetch(
+            'ftp://example.com/', trust_env=False, use_proxy=True,
+            allow_ftp=True)
+        self.assertThat(response, MatchesStructure(
+            status_code=Equals(200),
+            headers=ContainsDict({'content-length': Equals('8')}),
+            content=Equals('Success.')))
+        t.join()
+
+    def test_urlfetch_does_not_support_file_urls_by_default(self):
+        """urlfetch() does not support file urls by default."""
+        set_default_timeout_function(lambda: 1)
+        self.addCleanup(set_default_timeout_function, None)
+        test_path = self.useFixture(TempDir()).join('file')
+        write_file(test_path, '')
+        url = 'file://' + test_path
+        e = self.assertRaises(InvalidSchema, urlfetch, url)
+        self.assertEqual(
+            "No connection adapters were found for '%s'" % url, str(e))
+
     def test_urlfetch_supports_file_urls_if_allow_file(self):
         """urlfetch() supports file urls if explicitly asked to do so."""
         set_default_timeout_function(lambda: 1)

=== modified file 'lib/lp/services/timeout.py'
--- lib/lp/services/timeout.py	2018-06-22 22:07:42 +0000
+++ lib/lp/services/timeout.py	2018-06-26 15:14:17 +0000
@@ -323,8 +323,8 @@
         self.session = None
 
     @with_timeout(cleanup='cleanup')
-    def fetch(self, url, trust_env=None, use_proxy=False, allow_file=False,
-              **request_kwargs):
+    def fetch(self, url, trust_env=None, use_proxy=False, allow_ftp=False,
+              allow_file=False, **request_kwargs):
         """Fetch the URL using a custom HTTP handler supporting timeout.
 
         :param url: The URL to fetch.
@@ -332,6 +332,7 @@
             to determine whether it fetches proxy configuration from the
             environment.
         :param use_proxy: If True, use Launchpad's configured proxy.
+        :param allow_ftp: If True, allow ftp:// URLs.
         :param allow_file: If True, allow file:// URLs.  (Be careful to only
             pass this if the URL is trusted.)
         :param request_kwargs: Additional keyword arguments passed on to
@@ -343,6 +344,9 @@
         # Mount our custom adapters.
         self.session.mount("https://";, CleanableHTTPAdapter())
         self.session.mount("http://";, CleanableHTTPAdapter())
+        # We can do FTP, but currently only via an HTTP proxy.
+        if allow_ftp and use_proxy:
+            self.session.mount("ftp://";, CleanableHTTPAdapter())
         if allow_file:
             self.session.mount("file://", FileAdapter())
 
@@ -351,6 +355,8 @@
             request_kwargs.setdefault("proxies", {})
             request_kwargs["proxies"]["http"] = config.launchpad.http_proxy
             request_kwargs["proxies"]["https"] = config.launchpad.http_proxy
+            if allow_ftp:
+                request_kwargs["proxies"]["ftp"] = config.launchpad.http_proxy
         response = self.session.request(url=url, **request_kwargs)
         response.raise_for_status()
         # Make sure the content has been consumed before returning.


Follow ups