launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #22685
[Merge] lp:~cjwatson/launchpad/explicit-proxy-mirror-prober into lp:launchpad
Colin Watson has proposed merging lp:~cjwatson/launchpad/explicit-proxy-mirror-prober into lp:launchpad with lp:~cjwatson/launchpad/no-default-http-proxy as a prerequisite.
Commit message:
Convert the mirror prober to use explicit proxy configuration.
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/explicit-proxy-mirror-prober/+merge/348543
--
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~cjwatson/launchpad/explicit-proxy-mirror-prober into lp:launchpad.
=== modified file 'cronscripts/distributionmirror-prober.py'
--- cronscripts/distributionmirror-prober.py 2013-01-07 02:40:55 +0000
+++ cronscripts/distributionmirror-prober.py 2018-06-26 15:14:17 +0000
@@ -1,14 +1,12 @@
#!/usr/bin/python -S
#
-# Copyright 2009-2011 Canonical Ltd. This software is licensed under the
+# Copyright 2009-2018 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
"""Script to probe distribution mirrors and check how up-to-date they are."""
import _pythonpath
-import os
-
from lp.registry.interfaces.distributionmirror import MirrorContent
from lp.registry.scripts.distributionmirror_prober import DistroMirrorProber
from lp.services.config import config
@@ -16,6 +14,7 @@
LaunchpadCronScript,
LaunchpadScriptFailure,
)
+from lp.services.timeout import set_default_timeout_function
class DistroMirrorProberScript(LaunchpadCronScript):
@@ -49,12 +48,8 @@
'Wrong value for argument --content-type: %s'
% self.options.content_type)
- if config.distributionmirrorprober.use_proxy:
- os.environ['http_proxy'] = config.launchpad.http_proxy
- self.logger.debug("Using %s as proxy." % os.environ['http_proxy'])
- else:
- self.logger.debug("Not using any proxy.")
-
+ set_default_timeout_function(
+ lambda: config.distributionmirrorprober.timeout)
DistroMirrorProber(self.txn, self.logger).probe(
content_type, self.options.no_remote_hosts, self.options.force,
self.options.max_mirrors, not self.options.no_owner_notification)
=== modified file 'lib/lp/registry/scripts/distributionmirror_prober.py'
--- lib/lp/registry/scripts/distributionmirror_prober.py 2017-05-22 09:33:45 +0000
+++ lib/lp/registry/scripts/distributionmirror_prober.py 2018-06-26 15:14:17 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009-2017 Canonical Ltd. This software is licensed under the
+# Copyright 2009-2018 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
__metaclass__ = type
@@ -10,12 +10,12 @@
import httplib
import itertools
import logging
-import os
+import os.path
from StringIO import StringIO
import urllib
-import urllib2
import urlparse
+import requests
from twisted.internet import (
defer,
protocol,
@@ -36,6 +36,7 @@
from lp.registry.interfaces.distroseries import IDistroSeries
from lp.services.config import config
from lp.services.librarian.interfaces import ILibraryFileAliasSet
+from lp.services.timeout import urlfetch
from lp.services.webapp import canonical_url
from lp.soyuz.interfaces.distroarchseries import IDistroArchSeries
@@ -202,9 +203,9 @@
request_path = None
# Details of the URL of the host in which we'll connect, which will only
- # be different from request_* in case we have an http_proxy environment
- # variable --in that case the scheme, host and port will be the ones
- # extracted from http_proxy and the path will be self.url
+ # be different from request_* in case we have a configured http_proxy --
+ # in that case the scheme, host and port will be the ones extracted from
+ # http_proxy and the path will be self.url.
connect_scheme = None
connect_host = None
connect_port = None
@@ -279,7 +280,7 @@
# XXX Guilherme Salgado 2006-09-19:
# We don't actually know how to handle FTP responses, but we
# expect to be behind a squid HTTP proxy with the patch at
- # http://www.squid-cache.org/bugs/show_bug.cgi?id=1758 applied. So, if
+ # https://bugs.squid-cache.org/show_bug.cgi?id=1758 applied. So, if
# you encounter any problems with FTP URLs you'll probably have to nag
# the sysadmins to fix squid for you.
if scheme not in ('http', 'ftp'):
@@ -296,9 +297,9 @@
if self.request_host not in host_timeouts:
host_timeouts[self.request_host] = 0
- # If the http_proxy variable is set, we want to use it as the host
- # we're going to connect to.
- proxy = os.getenv('http_proxy')
+ # If launchpad.http_proxy is set in our configuration, we want to
+ # use it as the host we're going to connect to.
+ proxy = config.launchpad.http_proxy
if proxy:
scheme, host, port, dummy = _parse(proxy)
path = url
@@ -612,31 +613,25 @@
return failure
-def _build_request_for_cdimage_file_list(url):
- headers = {'Pragma': 'no-cache', 'Cache-control': 'no-cache'}
- return urllib2.Request(url, headers=headers)
-
-
def _get_cdimage_file_list():
url = config.distributionmirrorprober.cdimage_file_list_url
+ # In test environments, this may be a file: URL. Adjust it to be in a
+ # form that requests can cope with (i.e. using an absolute path).
+ parsed_url = urlparse.urlparse(url)
+ if parsed_url.scheme == 'file' and not os.path.isabs(parsed_url.path):
+ assert parsed_url.path == parsed_url[2]
+ parsed_url = list(parsed_url)
+ parsed_url[2] = os.path.join(config.root, parsed_url[2])
+ url = urlparse.urlunparse(parsed_url)
try:
- return urllib2.urlopen(_build_request_for_cdimage_file_list(url))
- except urllib2.URLError as e:
+ return urlfetch(
+ url, headers={'Pragma': 'no-cache', 'Cache-control': 'no-cache'},
+ trust_env=False, use_proxy=True, allow_file=True)
+ except requests.RequestException as e:
raise UnableToFetchCDImageFileList(
'Unable to fetch %s: %s' % (url, e))
-def restore_http_proxy(http_proxy):
- """Restore the http_proxy environment variable to the given value."""
- if http_proxy is None:
- try:
- del os.environ['http_proxy']
- except KeyError:
- pass
- else:
- os.environ['http_proxy'] = http_proxy
-
-
def get_expected_cdimage_paths():
"""Get all paths where we can find CD image files on a cdimage mirror.
@@ -648,7 +643,7 @@
UnableToFetchCDImageFileList exception will be raised.
"""
d = {}
- for line in _get_cdimage_file_list().readlines():
+ for line in _get_cdimage_file_list().iter_lines():
flavour, seriesname, path, size = line.split('\t')
paths = d.setdefault((flavour, seriesname), [])
paths.append(path.lstrip('/'))
=== modified file 'lib/lp/registry/tests/test_distributionmirror_prober.py'
--- lib/lp/registry/tests/test_distributionmirror_prober.py 2018-01-02 16:10:26 +0000
+++ lib/lp/registry/tests/test_distributionmirror_prober.py 2018-06-26 15:14:17 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009-2016 Canonical Ltd. This software is licensed under the
+# Copyright 2009-2018 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
"""distributionmirror-prober tests."""
@@ -13,13 +13,22 @@
from StringIO import StringIO
from lazr.uri import URI
+import responses
from sqlobject import SQLObjectNotFound
+from testtools.matchers import (
+ ContainsDict,
+ Equals,
+ MatchesStructure,
+ )
+from testtools.twistedsupport import (
+ AsynchronousDeferredRunTest,
+ AsynchronousDeferredRunTestForBrokenTwisted,
+ )
from twisted.internet import (
defer,
reactor,
)
from twisted.python.failure import Failure
-from twisted.trial.unittest import TestCase as TrialTestCase
from twisted.web import server
from zope.component import getUtility
from zope.security.proxy import removeSecurityProxy
@@ -29,7 +38,7 @@
from lp.registry.model.distributionmirror import DistributionMirror
from lp.registry.scripts import distributionmirror_prober
from lp.registry.scripts.distributionmirror_prober import (
- _build_request_for_cdimage_file_list,
+ _get_cdimage_file_list,
ArchiveMirrorProberCallbacks,
BadResponseCode,
ConnectionSkipped,
@@ -50,7 +59,6 @@
RedirectAwareProberProtocol,
RedirectToDifferentFile,
RequestManager,
- restore_http_proxy,
should_skip_host,
UnknownURLSchemeAfterRedirect,
)
@@ -59,6 +67,7 @@
)
from lp.services.config import config
from lp.services.daemons.tachandler import TacTestSetup
+from lp.services.timeout import default_timeout
from lp.testing import (
clean_up_reactor,
TestCase,
@@ -93,39 +102,70 @@
return os.path.join(self.root, 'distributionmirror_http_server.log')
-class TestProberProtocolAndFactory(TrialTestCase):
+class AssertFailureMixin:
+
+ @defer.inlineCallbacks
+ def assertFailure(self, deferred, *expectedFailures):
+ try:
+ result = yield deferred
+ except expectedFailures:
+ pass
+ except Exception as e:
+ self.fail('Expected %r, got %s' % (expectedFailures, e))
+ else:
+ self.fail(
+ 'Returned %r instead of raising %r' %
+ (result, expectedFailures))
+
+
+class TestProberProtocolAndFactory(AssertFailureMixin, TestCase):
layer = TwistedLayer
+ run_tests_with = AsynchronousDeferredRunTestForBrokenTwisted.make_factory(
+ timeout=30)
def setUp(self):
- self.orig_proxy = os.getenv('http_proxy')
+ super(TestProberProtocolAndFactory, self).setUp()
root = DistributionMirrorTestHTTPServer()
site = server.Site(root)
site.displayTracebacks = False
self.listening_port = reactor.listenTCP(0, site)
+ self.addCleanup(self.listening_port.stopListening)
self.port = self.listening_port.getHost().port
self.urls = {'timeout': u'http://localhost:%s/timeout' % self.port,
'200': u'http://localhost:%s/valid-mirror' % self.port,
'500': u'http://localhost:%s/error' % self.port,
'404': u'http://localhost:%s/invalid-mirror' % self.port}
-
- def tearDown(self):
- restore_http_proxy(self.orig_proxy)
- return self.listening_port.stopListening()
+ self.pushConfig('launchpad', http_proxy=None)
def _createProberAndProbe(self, url):
prober = ProberFactory(url)
return prober.probe()
- def test_environment_http_proxy_is_handled_correctly(self):
- os.environ['http_proxy'] = 'http://squid.internal:3128'
- prober = ProberFactory(self.urls['200'])
- self.assertEqual(prober.request_host, 'localhost')
- self.assertEqual(prober.request_port, self.port)
- self.assertEqual(prober.request_path, '/valid-mirror')
- self.assertEqual(prober.connect_host, 'squid.internal')
- self.assertEqual(prober.connect_port, 3128)
- self.assertEqual(prober.connect_path, self.urls['200'])
+ def test_config_no_http_proxy(self):
+ prober = ProberFactory(self.urls['200'])
+ self.assertThat(prober, MatchesStructure.byEquality(
+ request_scheme='http',
+ request_host='localhost',
+ request_port=self.port,
+ request_path='/valid-mirror',
+ connect_scheme='http',
+ connect_host='localhost',
+ connect_port=self.port,
+ connect_path='/valid-mirror'))
+
+ def test_config_http_proxy(self):
+ self.pushConfig('launchpad', http_proxy='http://squid.internal:3128')
+ prober = ProberFactory(self.urls['200'])
+ self.assertThat(prober, MatchesStructure.byEquality(
+ request_scheme='http',
+ request_host='localhost',
+ request_port=self.port,
+ request_path='/valid-mirror',
+ connect_scheme='http',
+ connect_host='squid.internal',
+ connect_port=3128,
+ connect_path=self.urls['200']))
def test_connect_cancels_existing_timeout_call(self):
prober = ProberFactory(self.urls['200'])
@@ -161,14 +201,10 @@
return deferred.addCallback(restore_connect, orig_connect)
def test_connect_to_proxy_when_http_proxy_exists(self):
- os.environ['http_proxy'] = 'http://squid.internal:3128'
+ self.pushConfig('launchpad', http_proxy='http://squid.internal:3128')
self._test_connect_to_host(self.urls['200'], 'squid.internal')
def test_connect_to_host_when_http_proxy_does_not_exist(self):
- try:
- del os.environ['http_proxy']
- except KeyError:
- pass
self._test_connect_to_host(self.urls['200'], 'localhost')
def test_probe_sets_up_timeout_call(self):
@@ -380,7 +416,8 @@
self.assertFalse(should_skip_host(self.host))
-class TestProberFactoryRequestTimeoutRatioWithTwisted(TrialTestCase):
+class TestProberFactoryRequestTimeoutRatioWithTwisted(
+ AssertFailureMixin, TestCase):
"""Tests to ensure we stop issuing requests on a given host if the
requests/timeouts ratio on that host is too low.
@@ -392,25 +429,29 @@
"""
layer = TwistedLayer
+ run_tests_with = AsynchronousDeferredRunTest.make_factory(timeout=30)
def setUp(self):
- self.orig_host_requests = dict(
- distributionmirror_prober.host_requests)
- self.orig_host_timeouts = dict(
- distributionmirror_prober.host_timeouts)
+ super(TestProberFactoryRequestTimeoutRatioWithTwisted, self).setUp()
+ orig_host_requests = dict(distributionmirror_prober.host_requests)
+ orig_host_timeouts = dict(distributionmirror_prober.host_timeouts)
distributionmirror_prober.host_requests = {}
distributionmirror_prober.host_timeouts = {}
+
+ def restore_prober_globals():
+ # Restore the globals that our tests fiddle with.
+ distributionmirror_prober.host_requests = orig_host_requests
+ distributionmirror_prober.host_timeouts = orig_host_timeouts
+
+ self.addCleanup(restore_prober_globals)
+
root = DistributionMirrorTestHTTPServer()
site = server.Site(root)
site.displayTracebacks = False
self.listening_port = reactor.listenTCP(0, site)
+ self.addCleanup(self.listening_port.stopListening)
self.port = self.listening_port.getHost().port
-
- def tearDown(self):
- # Restore the globals that our tests fiddle with.
- distributionmirror_prober.host_requests = self.orig_host_requests
- distributionmirror_prober.host_timeouts = self.orig_host_timeouts
- return self.listening_port.stopListening()
+ self.pushConfig('launchpad', http_proxy=None)
def _createProberAndProbe(self, url):
prober = ProberFactory(url)
@@ -842,7 +883,8 @@
self.assertNotEqual(0, len(mirror.cdimage_series))
# Note that calling this function won't actually probe any mirrors; we
# need to call reactor.run() to actually start the probing.
- probe_cdimage_mirror(mirror, StringIO(), [], logging)
+ with default_timeout(15.0):
+ probe_cdimage_mirror(mirror, StringIO(), [], logging)
self.assertEqual(0, len(mirror.cdimage_series))
def test_archive_mirror_probe_function(self):
@@ -856,8 +898,9 @@
mirror1 = DistributionMirror.byName('releases-mirror')
mirror2 = DistributionMirror.byName('releases-mirror2')
mirror3 = DistributionMirror.byName('canonical-releases')
- self._test_one_semaphore_for_each_host(
- mirror1, mirror2, mirror3, probe_cdimage_mirror)
+ with default_timeout(15.0):
+ self._test_one_semaphore_for_each_host(
+ mirror1, mirror2, mirror3, probe_cdimage_mirror)
def _test_one_semaphore_for_each_host(
self, mirror1, mirror2, mirror3, probe_function):
@@ -905,20 +948,24 @@
# When using an http_proxy, even though we'll actually connect to the
# proxy, we'll use the mirror's host as the key to find the semaphore
# that should be used
- orig_proxy = os.getenv('http_proxy')
- os.environ['http_proxy'] = 'http://squid.internal:3128/'
+ self.pushConfig('launchpad', http_proxy='http://squid.internal:3128/')
probe_function(mirror3, StringIO(), [], logging)
self.assertEqual(len(request_manager.host_locks), 2)
- restore_http_proxy(orig_proxy)
class TestCDImageFileListFetching(TestCase):
+ @responses.activate
def test_no_cache(self):
url = 'http://releases.ubuntu.com/.manifest'
- request = _build_request_for_cdimage_file_list(url)
- self.assertEqual(request.headers['Pragma'], 'no-cache')
- self.assertEqual(request.headers['Cache-control'], 'no-cache')
+ self.pushConfig('distributionmirrorprober', cdimage_file_list_url=url)
+ responses.add('GET', url)
+ with default_timeout(1.0):
+ _get_cdimage_file_list()
+ self.assertThat(responses.calls[0].request.headers, ContainsDict({
+ 'Pragma': Equals('no-cache'),
+ 'Cache-control': Equals('no-cache'),
+ }))
class TestLoggingMixin(TestCase):
=== modified file 'lib/lp/services/tests/test_timeout.py'
--- lib/lp/services/tests/test_timeout.py 2018-06-05 01:50:30 +0000
+++ lib/lp/services/tests/test_timeout.py 2018-06-26 15:14:17 +0000
@@ -367,26 +367,6 @@
MatchesStructure.byEquality(status_code=200, content='Success.'))
t.join()
- def test_urlfetch_does_not_support_ftp_urls(self):
- """urlfetch() does not support ftp urls."""
- set_default_timeout_function(lambda: 1)
- self.addCleanup(set_default_timeout_function, None)
- url = 'ftp://localhost/'
- e = self.assertRaises(InvalidSchema, urlfetch, url)
- self.assertEqual(
- "No connection adapters were found for '%s'" % url, str(e))
-
- def test_urlfetch_does_not_support_file_urls_by_default(self):
- """urlfetch() does not support file urls by default."""
- set_default_timeout_function(lambda: 1)
- self.addCleanup(set_default_timeout_function, None)
- test_path = self.useFixture(TempDir()).join('file')
- write_file(test_path, '')
- url = 'file://' + test_path
- e = self.assertRaises(InvalidSchema, urlfetch, url)
- self.assertEqual(
- "No connection adapters were found for '%s'" % url, str(e))
-
def test_urlfetch_no_proxy_by_default(self):
"""urlfetch does not use a proxy by default."""
self.pushConfig('launchpad', http_proxy='http://proxy.example:3128/')
@@ -418,6 +398,56 @@
{scheme: proxy for scheme in ('http', 'https')},
fake_send.calls[0][1]['proxies'])
+ def test_urlfetch_does_not_support_ftp_urls_by_default(self):
+ """urlfetch() does not support ftp urls by default."""
+ set_default_timeout_function(lambda: 1)
+ self.addCleanup(set_default_timeout_function, None)
+ url = 'ftp://localhost/'
+ e = self.assertRaises(InvalidSchema, urlfetch, url)
+ self.assertEqual(
+ "No connection adapters were found for '%s'" % url, str(e))
+
+ def test_urlfetch_supports_ftp_urls_if_allow_ftp(self):
+ """urlfetch() supports ftp urls via a proxy if explicitly asked."""
+ sock, http_server_url = self.make_test_socket()
+ sock.listen(1)
+
+ def success_result():
+ (client_sock, client_addr) = sock.accept()
+ # We only provide a test HTTP proxy, not anything beyond that.
+ client_sock.sendall(dedent("""\
+ HTTP/1.0 200 Ok
+ Content-Type: text/plain
+ Content-Length: 8
+
+ Success."""))
+ client_sock.close()
+
+ self.pushConfig('launchpad', http_proxy=http_server_url)
+ t = threading.Thread(target=success_result)
+ t.start()
+ set_default_timeout_function(lambda: 1)
+ self.addCleanup(set_default_timeout_function, None)
+ response = urlfetch(
+ 'ftp://example.com/', trust_env=False, use_proxy=True,
+ allow_ftp=True)
+ self.assertThat(response, MatchesStructure(
+ status_code=Equals(200),
+ headers=ContainsDict({'content-length': Equals('8')}),
+ content=Equals('Success.')))
+ t.join()
+
+ def test_urlfetch_does_not_support_file_urls_by_default(self):
+ """urlfetch() does not support file urls by default."""
+ set_default_timeout_function(lambda: 1)
+ self.addCleanup(set_default_timeout_function, None)
+ test_path = self.useFixture(TempDir()).join('file')
+ write_file(test_path, '')
+ url = 'file://' + test_path
+ e = self.assertRaises(InvalidSchema, urlfetch, url)
+ self.assertEqual(
+ "No connection adapters were found for '%s'" % url, str(e))
+
def test_urlfetch_supports_file_urls_if_allow_file(self):
"""urlfetch() supports file urls if explicitly asked to do so."""
set_default_timeout_function(lambda: 1)
=== modified file 'lib/lp/services/timeout.py'
--- lib/lp/services/timeout.py 2018-06-22 22:07:42 +0000
+++ lib/lp/services/timeout.py 2018-06-26 15:14:17 +0000
@@ -323,8 +323,8 @@
self.session = None
@with_timeout(cleanup='cleanup')
- def fetch(self, url, trust_env=None, use_proxy=False, allow_file=False,
- **request_kwargs):
+ def fetch(self, url, trust_env=None, use_proxy=False, allow_ftp=False,
+ allow_file=False, **request_kwargs):
"""Fetch the URL using a custom HTTP handler supporting timeout.
:param url: The URL to fetch.
@@ -332,6 +332,7 @@
to determine whether it fetches proxy configuration from the
environment.
:param use_proxy: If True, use Launchpad's configured proxy.
+ :param allow_ftp: If True, allow ftp:// URLs.
:param allow_file: If True, allow file:// URLs. (Be careful to only
pass this if the URL is trusted.)
:param request_kwargs: Additional keyword arguments passed on to
@@ -343,6 +344,9 @@
# Mount our custom adapters.
self.session.mount("https://", CleanableHTTPAdapter())
self.session.mount("http://", CleanableHTTPAdapter())
+ # We can do FTP, but currently only via an HTTP proxy.
+ if allow_ftp and use_proxy:
+ self.session.mount("ftp://", CleanableHTTPAdapter())
if allow_file:
self.session.mount("file://", FileAdapter())
@@ -351,6 +355,8 @@
request_kwargs.setdefault("proxies", {})
request_kwargs["proxies"]["http"] = config.launchpad.http_proxy
request_kwargs["proxies"]["https"] = config.launchpad.http_proxy
+ if allow_ftp:
+ request_kwargs["proxies"]["ftp"] = config.launchpad.http_proxy
response = self.session.request(url=url, **request_kwargs)
response.raise_for_status()
# Make sure the content has been consumed before returning.
Follow ups