launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #22578
[Merge] lp:~cjwatson/launchpad/bugs-remote-finders-requests into lp:launchpad
Colin Watson has proposed merging lp:~cjwatson/launchpad/bugs-remote-finders-requests into lp:launchpad with lp:~cjwatson/launchpad/responses as a prerequisite.
Commit message:
Convert update-bugzilla-remote-component and update-sourceforge-remote-products to urlfetch with explicit proxy configuration.
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/bugs-remote-finders-requests/+merge/347198
--
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~cjwatson/launchpad/bugs-remote-finders-requests into lp:launchpad.
=== modified file 'cronscripts/nightly.sh'
--- cronscripts/nightly.sh 2012-12-05 17:02:44 +0000
+++ cronscripts/nightly.sh 2018-05-31 12:52:21 +0000
@@ -1,15 +1,12 @@
#!/bin/sh
#
-# Copyright 2009 Canonical Ltd. This software is licensed under the
+# Copyright 2009-2018 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
# This script performs nightly chores. It should be run from
# cron as the launchpad user once a day. Typically the output
# will be sent to an email address for inspection.
-# Note that http/ftp proxies are needed by the product
-# release finder
-
LOGDIR=$1
LOGFILE=$LOGDIR/nightly.log
=== modified file 'lib/lp/bugs/doc/sourceforge-remote-products.txt'
--- lib/lp/bugs/doc/sourceforge-remote-products.txt 2012-06-06 13:44:50 +0000
+++ lib/lp/bugs/doc/sourceforge-remote-products.txt 2018-05-31 12:52:21 +0000
@@ -37,14 +37,51 @@
... print product.name, product.sourceforgeproject
my-first-product fronobulator
-We'll use a test version of SourceForgeRemoteProductFinder that won't
-try to access SourceForge.
-
+Define some request mocks so that we don't try to access SourceForge.
+
+ >>> import os.path
+ >>> import re
+ >>> import responses
+ >>> from six.moves.urllib_parse import urlsplit
+
+ >>> def project_callback(request):
+ ... url = urlsplit(request.url)
+ ... project = re.match(r'.*/projects/([a-z]+)', url.path).group(1)
+ ... file_path = os.path.join(
+ ... os.path.dirname(__file__), os.pardir, 'tests', 'testfiles',
+ ... 'sourceforge-project-%s.html' % project)
+ ... with open(file_path) as test_file:
+ ... return (200, {}, test_file.read())
+ >>> def add_project_response(requests_mock):
+ ... requests_mock.add_callback(
+ ... 'GET', re.compile(r'.*/projects/[a-z]+'),
+ ... callback=project_callback)
+
+ >>> def tracker_callback(request):
+ ... url = urlsplit(request.url)
+ ... group_id = re.match(r'group_id=([0-9]+)', url.query).group(1)
+ ... file_path = os.path.join(
+ ... os.path.dirname(__file__), os.pardir, 'tests', 'testfiles',
+ ... 'sourceforge-tracker-%s.html' % group_id)
+ ... with open(file_path) as test_file:
+ ... return (200, {}, test_file.read())
+ >>> def add_tracker_response(requests_mock):
+ ... requests_mock.add_callback(
+ ... 'GET', re.compile(r'.*/tracker/\?group_id=[0-9]+'),
+ ... match_querystring=True, callback=tracker_callback)
+
+ >>> def print_calls(calls):
+ ... for call in calls:
+ ... url = urlsplit(call.request.url)
+ ... print('Got page %s%s' % (
+ ... url.path, '?%s' % url.query if url.query else ''))
+
+ >>> from lp.bugs.scripts.sfremoteproductfinder import (
+ ... SourceForgeRemoteProductFinder,
+ ... )
>>> from lp.services.log.logger import FakeLogger
- >>> from lp.bugs.tests.sfremoteproductfinder import (
- ... TestSFRemoteProductFinder)
>>> from lp.testing.layers import LaunchpadZopelessLayer
- >>> finder = TestSFRemoteProductFinder(
+ >>> finder = SourceForgeRemoteProductFinder(
... txn=LaunchpadZopelessLayer.txn, logger=FakeLogger())
SourceForgeRemoteProductFinder has a method,
@@ -55,10 +92,14 @@
extracts the URL of the project's bug tracker and returns the group_id and
atid therein as an ampersand-separated string.
- >>> remote_product = finder.getRemoteProductFromSourceForge(
- ... 'fronobulator')
- DEBUG...Getting page projects/fronobulator
- DEBUG...Getting page tracker/?group_id=5570
+ >>> with responses.RequestsMock() as requests_mock:
+ ... add_project_response(requests_mock)
+ ... add_tracker_response(requests_mock)
+ ... remote_product = finder.getRemoteProductFromSourceForge(
+ ... 'fronobulator')
+ ... print_calls(requests_mock.calls)
+ Got page /projects/fronobulator
+ Got page /tracker/?group_id=5570
>>> print remote_product
5570&105570
@@ -66,23 +107,25 @@
If an error is raised when trying to fetch the project pages from the
remote server, it will be logged.
- >>> from lp.bugs.tests.sfremoteproductfinder import (
- ... TestBrokenSFRemoteProductFinder)
- >>> broken_finder = TestBrokenSFRemoteProductFinder(
- ... txn=LaunchpadZopelessLayer.txn, logger=FakeLogger())
- >>> broken_finder.getRemoteProductFromSourceForge('fronobulator')
- ERROR...Error fetching project...: HTTP Error 500: This is an error
+ >>> with responses.RequestsMock() as requests_mock:
+ ... requests_mock.add('GET', re.compile(r'.*'), status=500)
+ ... finder.getRemoteProductFromSourceForge('fronobulator')
+ ERROR...Error fetching project...: 500 Server Error: Internal Server Error
SourceForgeRemoteProductFinder.setRemoteProductsFromSourceForge()
iterates over the list of products returned by
getSFLinkedProductsWithNoneRemoteProduct() and then calls
getRemoteProductFromSourceForge() to fetch their remote products.
- >>> finder.setRemoteProductsFromSourceForge()
+ >>> with responses.RequestsMock() as requests_mock:
+ ... add_project_response(requests_mock)
+ ... add_tracker_response(requests_mock)
+ ... finder.setRemoteProductsFromSourceForge()
+ ... print_calls(requests_mock.calls)
INFO...Updating 1 Products using SourceForge project data
DEBUG...Updating remote_product for Product 'my-first-product'
- DEBUG...Getting page projects/fronobulator
- DEBUG...Getting page tracker/?group_id=5570
+ Got page /projects/fronobulator
+ Got page /tracker/?group_id=5570
The product that was linked to SourceForge without a remote_product now has
its remote_product set.
=== modified file 'lib/lp/bugs/scripts/bzremotecomponentfinder.py'
--- lib/lp/bugs/scripts/bzremotecomponentfinder.py 2017-10-21 18:14:14 +0000
+++ lib/lp/bugs/scripts/bzremotecomponentfinder.py 2018-05-31 12:52:21 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009-2011 Canonical Ltd. This software is licensed under the
+# Copyright 2009-2018 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
"""Utilities for the update-bugzilla-remote-components cronscript"""
@@ -10,11 +10,8 @@
]
import re
-from urllib2 import (
- HTTPError,
- urlopen,
- )
+import requests
import transaction
from zope.component import getUtility
@@ -24,9 +21,14 @@
)
from lp.bugs.model.bugtracker import BugTrackerComponent
from lp.services.beautifulsoup import BeautifulSoup
+from lp.services.config import config
from lp.services.database import bulk
from lp.services.database.interfaces import IStore
from lp.services.scripts.logger import log as default_log
+from lp.services.timeout import (
+ override_timeout,
+ urlfetch,
+ )
def dictFromCSV(line):
@@ -54,7 +56,12 @@
def getPage(self):
"""Download and return content from the Bugzilla page"""
- return urlopen(self.url).read()
+ proxies = {}
+ if config.launchpad.http_proxy:
+ proxies['http'] = config.launchpad.http_proxy
+ proxies['https'] = config.launchpad.http_proxy
+ with override_timeout(config.updatebugzillaremotecomponents.timeout):
+ return urlfetch(self.url, proxies=proxies).content
def parsePage(self, page_text):
"""Builds self.product using HTML content in page_text"""
@@ -106,20 +113,14 @@
u"mozilla.org",
]
- def __init__(self, logger=None, static_bugzilla_scraper=None):
+ def __init__(self, logger=None):
"""Instantiates object, without performing any parsing.
:param logger: A logger object
- :param static_bugzilla_scraper: Substitute this custom bugzilla
- scraper object instead of constructing a new
- BugzillaRemoteComponentScraper for each bugtracker's URL. This
- is intended for testing purposes to avoid needing to make remote
- web connections.
"""
self.logger = logger
if logger is None:
self.logger = default_log
- self.static_bugzilla_scraper = static_bugzilla_scraper
def getRemoteProductsAndComponents(self, bugtracker_name=None):
"""Retrieves, parses, and stores component data for each bugtracker"""
@@ -141,20 +142,17 @@
self.logger.info("%s: %s" % (
lp_bugtracker.name, lp_bugtracker.baseurl))
- if self.static_bugzilla_scraper is not None:
- bz_bugtracker = self.static_bugzilla_scraper
- else:
- bz_bugtracker = BugzillaRemoteComponentScraper(
- base_url=lp_bugtracker.baseurl)
+ bz_bugtracker = BugzillaRemoteComponentScraper(
+ base_url=lp_bugtracker.baseurl)
try:
self.logger.debug("...Fetching page")
page_text = bz_bugtracker.getPage()
- except HTTPError as error:
+ except requests.HTTPError as error:
self.logger.warning("Could not fetch %s: %s" % (
lp_bugtracker.baseurl, error))
continue
- except:
+ except Exception:
self.logger.warning("Failed to access %s" % (
lp_bugtracker.baseurl))
continue
=== modified file 'lib/lp/bugs/scripts/sfremoteproductfinder.py'
--- lib/lp/bugs/scripts/sfremoteproductfinder.py 2017-10-21 18:14:14 +0000
+++ lib/lp/bugs/scripts/sfremoteproductfinder.py 2018-05-31 12:52:21 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009 Canonical Ltd. This software is licensed under the
+# Copyright 2009-2018 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
"""Utilities for the sfremoteproductfinder cronscript"""
@@ -9,17 +9,19 @@
]
import urllib
-from urllib2 import (
- HTTPError,
- urlopen,
- )
+import requests
from zope.component import getUtility
from lp.app.interfaces.launchpad import ILaunchpadCelebrities
from lp.registry.interfaces.product import IProductSet
from lp.services.beautifulsoup import BeautifulSoup
+from lp.services.config import config
from lp.services.scripts.logger import log as default_log
+from lp.services.timeout import (
+ override_timeout,
+ urlfetch,
+ )
from lp.services.webapp import (
urlappend,
urlsplit,
@@ -43,7 +45,12 @@
def _getPage(self, page):
"""GET the specified page on the remote HTTP server."""
page_url = urlappend(self.sourceforge_baseurl, page)
- return urlopen(page_url).read()
+ proxies = {}
+ if config.launchpad.http_proxy:
+ proxies['http'] = config.launchpad.http_proxy
+ proxies['https'] = config.launchpad.http_proxy
+ with override_timeout(config.updatesourceforgeremoteproduct.timeout):
+ return urlfetch(page_url, proxies=proxies).content
def getRemoteProductFromSourceForge(self, sf_project):
"""Return the remote product of a SourceForge project.
@@ -55,7 +62,7 @@
# First, fetch the project page.
try:
soup = BeautifulSoup(self._getPage("projects/%s" % sf_project))
- except HTTPError as error:
+ except requests.HTTPError as error:
self.logger.error(
"Error fetching project %s: %s" %
(sf_project, error))
@@ -75,7 +82,7 @@
tracker_url = tracker_url.lstrip('/')
try:
soup = BeautifulSoup(self._getPage(tracker_url))
- except HTTPError as error:
+ except requests.HTTPError as error:
self.logger.error(
"Error fetching project %s: %s" %
(sf_project, error))
=== removed file 'lib/lp/bugs/tests/sfremoteproductfinder.py'
--- lib/lp/bugs/tests/sfremoteproductfinder.py 2011-12-19 15:09:08 +0000
+++ lib/lp/bugs/tests/sfremoteproductfinder.py 1970-01-01 00:00:00 +0000
@@ -1,50 +0,0 @@
-# Copyright 2009-2011 Canonical Ltd. This software is licensed under the
-# GNU Affero General Public License version 3 (see the file LICENSE).
-
-"""Testing helpers for sfremoteproductfinder."""
-
-__metaclass__ = type
-__all__ = ['TestSFRemoteProductFinder']
-
-import os
-import re
-from urllib2 import HTTPError
-
-from lp.bugs.scripts.sfremoteproductfinder import (
- SourceForgeRemoteProductFinder,
- )
-
-
-class TestSFRemoteProductFinder(SourceForgeRemoteProductFinder):
-
- def _getPage(self, page):
- self.logger.debug("Getting page %s" % page)
-
- project_re = re.compile('projects/([a-z]+)')
- tracker_re = re.compile('/?tracker/\?group_id=([0-9]+)')
-
- project_match = project_re.match(page)
- tracker_match = tracker_re.match(page)
-
- if project_match is not None:
- project = project_match.groups()[0]
- file_path = os.path.join(
- os.path.dirname(__file__), 'testfiles',
- 'sourceforge-project-%s.html' % project)
- elif tracker_match is not None:
- group_id = tracker_match.groups()[0]
- file_path = os.path.join(
- os.path.dirname(__file__), 'testfiles',
- 'sourceforge-tracker-%s.html' % group_id)
- else:
- raise AssertionError(
- "The requested page '%s' isn't a project or tracker page."
- % page)
-
- return open(file_path, 'r').read()
-
-
-class TestBrokenSFRemoteProductFinder(SourceForgeRemoteProductFinder):
-
- def _getPage(self, page):
- raise HTTPError(page, 500, "This is an error", None, None)
=== modified file 'lib/lp/bugs/tests/test_bzremotecomponentfinder.py'
--- lib/lp/bugs/tests/test_bzremotecomponentfinder.py 2015-10-06 06:48:01 +0000
+++ lib/lp/bugs/tests/test_bzremotecomponentfinder.py 2018-05-31 12:52:21 +0000
@@ -1,4 +1,4 @@
-# Copyright 2010-2014 Canonical Ltd. This software is licensed under the
+# Copyright 2010-2018 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
"""Tests cronscript for retriving components from remote Bugzillas"""
@@ -8,8 +8,9 @@
__all__ = []
import os
-from urllib2 import HTTPError
+import re
+import responses
import transaction
from lp.bugs.scripts.bzremotecomponentfinder import (
@@ -32,33 +33,8 @@
Test files are located in lib/canonical/launchpad/ftests/testfiles
"""
file_path = os.path.join(os.path.dirname(__file__), 'testfiles', name)
- test_file = open(file_path, 'r')
- return test_file.read()
-
-
-class StaticTextBugzillaRemoteComponentScraper(
- BugzillaRemoteComponentScraper):
- """A scraper that just returns static text for getPage()"""
- def __init__(self):
- BugzillaRemoteComponentScraper.__init__(
- self, "http://www.example.com")
-
- def getPage(self):
- return read_test_file("bugzilla-fdo-advanced-query.html")
-
-
-class FaultyBugzillaRemoteComponentScraper(
- BugzillaRemoteComponentScraper):
- """A scraper that trips asserts when getPage() is called"""
-
- def __init__(self, error=None):
- BugzillaRemoteComponentScraper.__init__(
- self, "http://www.example.com")
- self.error = error
-
- def getPage(self):
- raise self.error
- return None
+ with open(file_path, 'r') as test_file:
+ return test_file.read()
class TestBugzillaRemoteComponentScraper(TestCaseWithFactory):
@@ -130,6 +106,7 @@
asserted = e
self.assertIs(None, asserted)
+ @responses.activate
def test_store(self):
"""Check that already-parsed data gets stored to database"""
lp_bugtracker = self.factory.makeBugTracker()
@@ -173,17 +150,19 @@
comp = comp_group.getComponent(u'four')
self.assertEqual(u'four', comp.name)
+ @responses.activate
def test_get_remote_products_and_components(self):
"""Does a full retrieve and storing of data."""
lp_bugtracker = self.factory.makeBugTracker(
title="fdo-example",
name="fdo-example")
transaction.commit()
- bz_scraper = StaticTextBugzillaRemoteComponentScraper()
- finder = BugzillaRemoteComponentFinder(
- logger=BufferLogger(),
- static_bugzilla_scraper=bz_scraper)
+ finder = BugzillaRemoteComponentFinder(logger=BufferLogger())
+ responses.add(
+ "GET", re.compile(r".*/query\.cgi\?format=advanced"),
+ match_querystring=True, content_type="text/html",
+ body=read_test_file("bugzilla-fdo-advanced-query.html"))
finder.getRemoteProductsAndComponents(bugtracker_name="fdo-example")
self.assertEqual(
@@ -195,48 +174,60 @@
self.assertIsNot(None, comp)
self.assertEqual(u'Driver/Radeon', comp.name)
+ @responses.activate
def test_get_remote_products_and_components_encounters_301(self):
- self.factory.makeBugTracker()
+ def redirect_callback(request):
+ new_url = request.url.replace("query.cgi", "newquery.cgi")
+ return (301, {"Location": new_url}, "")
+
+ lp_bugtracker = self.factory.makeBugTracker(
+ title="fdo-example",
+ name="fdo-example")
transaction.commit()
- bz_scraper = FaultyBugzillaRemoteComponentScraper(
- error=HTTPError("http://bugzilla.example.com",
- 301, 'Moved Permanently', {}, None))
- finder = BugzillaRemoteComponentFinder(
- logger=BufferLogger(), static_bugzilla_scraper=bz_scraper)
-
- self.assertGetRemoteProductsAndComponentsDoesNotAssert(finder)
-
+
+ finder = BugzillaRemoteComponentFinder(logger=BufferLogger())
+ responses.add_callback(
+ "GET", re.compile(r".*/query\.cgi"), callback=redirect_callback)
+ responses.add(
+ "GET", re.compile(r".*/newquery\.cgi\?format=advanced"),
+ match_querystring=True, content_type="text/html",
+ body=read_test_file("bugzilla-fdo-advanced-query.html"))
+ finder.getRemoteProductsAndComponents(bugtracker_name="fdo-example")
+
+ self.assertEqual(
+ 109, len(list(lp_bugtracker.getAllRemoteComponentGroups())))
+ comp_group = lp_bugtracker.getRemoteComponentGroup(u'xorg')
+ self.assertIsNot(None, comp_group)
+ self.assertEqual(146, len(list(comp_group.components)))
+ comp = comp_group.getComponent(u'Driver/Radeon')
+ self.assertIsNot(None, comp)
+ self.assertEqual(u'Driver/Radeon', comp.name)
+
+ @responses.activate
def test_get_remote_products_and_components_encounters_400(self):
self.factory.makeBugTracker()
transaction.commit()
- bz_scraper = FaultyBugzillaRemoteComponentScraper(
- error=HTTPError("http://bugzilla.example.com",
- 400, 'Bad Request', {}, None))
- finder = BugzillaRemoteComponentFinder(
- logger=BufferLogger(), static_bugzilla_scraper=bz_scraper)
+ finder = BugzillaRemoteComponentFinder(logger=BufferLogger())
+ responses.add("GET", re.compile(r".*/query\.cgi"), status=400)
self.assertGetRemoteProductsAndComponentsDoesNotAssert(finder)
+ @responses.activate
def test_get_remote_products_and_components_encounters_404(self):
self.factory.makeBugTracker()
transaction.commit()
- bz_scraper = FaultyBugzillaRemoteComponentScraper(
- error=HTTPError("http://bugzilla.example.com",
- 404, 'Not Found', {}, None))
- finder = BugzillaRemoteComponentFinder(
- logger=BufferLogger(), static_bugzilla_scraper=bz_scraper)
+ finder = BugzillaRemoteComponentFinder(logger=BufferLogger())
+ responses.add("GET", re.compile(r".*/query\.cgi"), status=404)
self.assertGetRemoteProductsAndComponentsDoesNotAssert(finder)
+ @responses.activate
def test_get_remote_products_and_components_encounters_500(self):
self.factory.makeBugTracker()
transaction.commit()
- bz_scraper = FaultyBugzillaRemoteComponentScraper(
- error=HTTPError("http://bugzilla.example.com",
- 500, 'Internal Server Error', {}, None))
- finder = BugzillaRemoteComponentFinder(
- logger=BufferLogger(), static_bugzilla_scraper=bz_scraper)
+ finder = BugzillaRemoteComponentFinder(logger=BufferLogger())
+ responses.add("GET", re.compile(r".*/query\.cgi"), status=500)
self.assertGetRemoteProductsAndComponentsDoesNotAssert(finder)
# FIXME: This takes ~9 sec to run, but mars says new testsuites need to
=== modified file 'lib/lp/services/config/schema-lazr.conf'
--- lib/lp/services/config/schema-lazr.conf 2018-05-21 20:30:16 +0000
+++ lib/lp/services/config/schema-lazr.conf 2018-05-31 12:52:21 +0000
@@ -1667,12 +1667,18 @@
# datatype: string
dbuser: updatesourceforgeremoteproduct
+# datatype: integer
+timeout: 30
+
[updatebugzillaremotecomponents]
# The database user to run this process as.
# datatype: string
dbuser: updatebugzillaremotecomponents
+# datatype: integer
+timeout: 30
+
[uploader]
# The database user which will be used by this process.
=== modified file 'lib/lp/services/tests/test_timeout.py'
--- lib/lp/services/tests/test_timeout.py 2017-04-29 23:51:28 +0000
+++ lib/lp/services/tests/test_timeout.py 2018-05-31 12:52:21 +0000
@@ -1,4 +1,4 @@
-# Copyright 2012-2016 Canonical Ltd. This software is licensed under the
+# Copyright 2012-2018 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
"""timeout.py tests.
@@ -24,6 +24,7 @@
from lp.services.timeout import (
default_timeout,
get_default_timeout_function,
+ override_timeout,
reduced_timeout,
set_default_timeout_function,
TimeoutError,
@@ -247,6 +248,15 @@
finally:
clear_request_started()
+ def test_override_timeout(self):
+ """override_timeout temporarily overrides the default timeout."""
+ self.addCleanup(set_default_timeout_function, None)
+ with override_timeout(1.0):
+ self.assertEqual(1.0, get_default_timeout_function()())
+ set_default_timeout_function(lambda: 5.0)
+ with override_timeout(1.0):
+ self.assertEqual(1.0, get_default_timeout_function()())
+
def make_test_socket(self):
"""One common use case for timing out is when making an HTTP request
to an external site to fetch content. To this end, the timeout
=== modified file 'lib/lp/services/timeout.py'
--- lib/lp/services/timeout.py 2018-03-31 16:02:54 +0000
+++ lib/lp/services/timeout.py 2018-05-31 12:52:21 +0000
@@ -7,6 +7,7 @@
__all__ = [
"default_timeout",
"get_default_timeout_function",
+ "override_timeout",
"reduced_timeout",
"SafeTransportWithTimeout",
"set_default_timeout_function",
@@ -110,6 +111,21 @@
set_default_timeout_function(original_timeout_function)
+@contextmanager
+def override_timeout(timeout):
+ """A context manager that temporarily overrides the default timeout.
+
+ :param timeout: The new timeout to use.
+ """
+ original_timeout_function = get_default_timeout_function()
+
+ set_default_timeout_function(lambda: timeout)
+ try:
+ yield
+ finally:
+ set_default_timeout_function(original_timeout_function)
+
+
class TimeoutError(Exception):
"""Exception raised when a function doesn't complete within time."""
Follow ups