launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #22376
[Merge] lp:~maxiberta/launchpad/sitesearch-cleanup-2 into lp:launchpad
Maximiliano Bertacchini has proposed merging lp:~maxiberta/launchpad/sitesearch-cleanup-2 into lp:launchpad with lp:~maxiberta/launchpad/sitesearch-cleanup-1 as a prerequisite.
Commit message:
Assorted sitesearch fixes and improvements (part 2).
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
For more details, see:
https://code.launchpad.net/~maxiberta/launchpad/sitesearch-cleanup-2/+merge/343236
Assorted sitesearch fixes and improvements (part 2).
- Add sitesearch unittests based on doctests.
- sitesearch/doc/google-searchservice.txt => sitesearch/tests/test_google.py
- sitesearch/doc/bing-searchservice.txt => sitesearch/tests/test_bing.py
- Add PageMatch and PageMatches unittests based on doctests.
- sitesearch/doc/*-searchservice.txt => sitesearch/tests/test_pagematch.py
--
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~maxiberta/launchpad/sitesearch-cleanup-2 into lp:launchpad.
=== modified file 'lib/lp/services/sitesearch/tests/test_bing.py'
--- lib/lp/services/sitesearch/tests/test_bing.py 2018-04-12 19:42:23 +0000
+++ lib/lp/services/sitesearch/tests/test_bing.py 2018-04-13 19:42:12 +0000
@@ -1,4 +1,4 @@
-# Copyright 2011-2018 Canonical Ltd. This software is licensed under the
+# Copyright 2018 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
"""Test the bing search service."""
@@ -7,31 +7,177 @@
__metaclass__ = type
+import json
+from os import path
+
from fixtures import MockPatch
from requests.exceptions import (
ConnectionError,
HTTPError,
)
-from testtools.matchers import MatchesStructure
+from testtools.matchers import (
+ HasLength,
+ MatchesStructure,
+)
+from lp.services.config import config
from lp.services.sitesearch import BingSearchService
from lp.services.sitesearch.interfaces import SiteSearchResponseError
from lp.services.timeout import TimeoutError
from lp.testing import TestCase
-from lp.testing.layers import (
- BingLaunchpadFunctionalLayer,
- LaunchpadFunctionalLayer,
- )
+from lp.testing.layers import BingLaunchpadFunctionalLayer
class TestBingSearchService(TestCase):
"""Test BingSearchService."""
- layer = LaunchpadFunctionalLayer
-
def setUp(self):
super(TestBingSearchService, self).setUp()
self.search_service = BingSearchService()
+ self.base_path = path.normpath(
+ path.join(path.dirname(__file__), 'data'))
+
+ def test_configuration(self):
+ self.assertEqual(config.bing.site, self.search_service.site)
+ self.assertEqual(
+ config.bing.subscription_key, self.search_service.subscription_key)
+ self.assertEqual(
+ config.bing.custom_config_id, self.search_service.custom_config_id)
+
+ def test_create_search_url(self):
+ self.assertEndsWith(
+ self.search_service.create_search_url(terms='svg +bugs'),
+ '&offset=0&q=svg+%2Bbugs')
+
+ def test_create_search_url_escapes_unicode_chars(self):
+ self.assertEndsWith(
+ self.search_service.create_search_url('Carlo Perell\xf3 Mar\xedn'),
+ '&offset=0&q=Carlo+Perell%C3%B3+Mar%C3%ADn')
+
+ def test_create_search_url_with_offset(self):
+ self.assertEndsWith(
+ self.search_service.create_search_url(terms='svg +bugs', start=20),
+ '&offset=20&q=svg+%2Bbugs')
+
+ def test_create_search_url_empty_terms(self):
+ e = self.assertRaises(
+ ValueError, self.search_service.create_search_url, '')
+ self.assertEqual("Missing value for parameter 'q'.", str(e))
+
+ def test_create_search_url_null_terms(self):
+ e = self.assertRaises(
+ ValueError, self.search_service.create_search_url, None)
+ self.assertEqual("Missing value for parameter 'q'.", str(e))
+
+ def test_create_search_url_requires_start(self):
+ e = self.assertRaises(
+ ValueError, self.search_service.create_search_url, 'bugs', 'true')
+ self.assertEqual("Value for parameter 'offset' is not an int.", str(e))
+
+ def test_parse_search_response_invalid_total(self):
+ """The PageMatches's total attribute comes from the
+ `webPages.totalEstimatedMatches` JSON element.
+ When it cannot be found and the value cast to an int,
+ an error is raised. If Bing were to redefine the meaning of the
+ element to use a '~' to indicate an approximate total, an error would
+ be raised.
+ """
+ file_name = path.join(
+ self.base_path, 'bingsearchservice-incompatible-matches.json')
+ with open(file_name, 'r') as response_file:
+ response = response_file.read()
+ assert (
+ json.loads(response)['webPages']['totalEstimatedMatches'] == '~25')
+
+ e = self.assertRaises(
+ SiteSearchResponseError,
+ self.search_service._parse_search_response, response)
+ self.assertEqual(
+ "Could not get the total from the Bing JSON response.", str(e))
+
+ def test_parse_search_response_negative_total(self):
+ """If the total is ever less than zero (see bug 683115),
+ this is expected: we simply return a total of 0.
+ """
+ file_name = path.join(
+ self.base_path, 'bingsearchservice-negative-total.json')
+ with open(file_name, 'r') as response_file:
+ response = response_file.read()
+ assert json.loads(response)['webPages']['totalEstimatedMatches'] == -25
+
+ matches = self.search_service._parse_search_response(response)
+ self.assertEqual(0, matches.total)
+
+ def test_parse_search_response_missing_title(self):
+ """A PageMatch requires a title, url, and a summary. If those elements
+ cannot be found, a PageMatch cannot be made. A missing title ('name')
+ indicates a bad page on Launchpad, so it is ignored. In this example,
+ the first match is missing a title, so only the second page is present
+ in the PageMatches.
+ """
+ file_name = path.join(
+ self.base_path, 'bingsearchservice-missing-title.json')
+ with open(file_name, 'r') as response_file:
+ response = response_file.read()
+ assert len(json.loads(response)['webPages']['value']) == 2
+
+ matches = self.search_service._parse_search_response(response)
+ self.assertThat(matches, HasLength(1))
+ self.assertEqual('GCleaner in Launchpad', matches[0].title)
+ self.assertEqual('http://launchpad.dev/gcleaner', matches[0].url)
+
+ def test_parse_search_response_missing_summary(self):
+ """When a match is missing a summary ('snippet'), the match is skipped
+ because there is no information about why it matched. This appears to
+ relate to pages that are in the index, but should be removed. In this
+ example taken from real data, the links are to the same page on
+ different vhosts. The edge vhost has no summary, so it is skipped.
+ """
+ file_name = path.join(
+ self.base_path, 'bingsearchservice-missing-summary.json')
+ with open(file_name, 'r') as response_file:
+ response = response_file.read()
+ assert len(json.loads(response)['webPages']['value']) == 2
+
+ matches = self.search_service._parse_search_response(response)
+ self.assertThat(matches, HasLength(1))
+ self.assertEqual('BugExpiry - Launchpad Help', matches[0].title)
+ self.assertEqual(
+ 'https://help.launchpad.net/BugExpiry', matches[0].url)
+
+ def test_parse_search_response_missing_url(self):
+ """When the URL ('url') cannot be found the match is skipped. There are
+ no examples of this. We do not want this hypothetical situation to give
+ users a bad experience.
+ """
+ file_name = path.join(
+ self.base_path, 'bingsearchservice-missing-url.json')
+ with open(file_name, 'r') as response_file:
+ response = response_file.read()
+ assert len(json.loads(response)['webPages']['value']) == 2
+
+ matches = self.search_service._parse_search_response(response)
+ self.assertThat(matches, HasLength(1))
+ self.assertEqual('LongoMatch in Launchpad', matches[0].title)
+ self.assertEqual('http://launchpad.dev/longomatch', matches[0].url)
+
+ def test_parse_search_response_with_no_meaningful_results(self):
+ """If no matches are found in the response, and there are 20 or fewer
+ results, an Empty PageMatches is returned. This happens when the
+ results are missing titles and summaries. This is not considered to be
+ a problem because the small number implies that Bing did a poor job
+ of indexing pages or indexed the wrong Launchpad server. In this
+ example, there is only one match, but the results is missing a title so
+ there is not enough information to make a PageMatch.
+ """
+ file_name = path.join(
+ self.base_path, 'bingsearchservice-no-meaningful-results.json')
+ with open(file_name, 'r') as response_file:
+ response = response_file.read()
+ assert len(json.loads(response)['webPages']['value']) == 1
+
+ matches = self.search_service._parse_search_response(response)
+ self.assertThat(matches, HasLength(0))
def test_search_converts_HTTPError(self):
# The method converts HTTPError to SiteSearchResponseError.
@@ -76,13 +222,13 @@
self.search_service._parse_search_response, '{}')
-class FunctionalTestBingSearchService(TestCase):
+class FunctionalBingSearchServiceTests(TestCase):
"""Test BingSearchService."""
layer = BingLaunchpadFunctionalLayer
def setUp(self):
- super(FunctionalTestBingSearchService, self).setUp()
+ super(FunctionalBingSearchServiceTests, self).setUp()
self.search_service = BingSearchService()
def test_search_with_results(self):
@@ -96,6 +242,13 @@
self.assertEqual(20, matches.start)
self.assertEqual(25, matches.total)
self.assertEqual(5, len(matches))
+ self.assertEqual([
+ 'https://help.launchpad.net/Bugs',
+ 'http://blog.launchpad.dev/general/of-bugs-and-statuses',
+ 'http://launchpad.dev/mahara/+milestone/1.8.0',
+ 'http://launchpad.dev/mb',
+ 'http://launchpad.dev/bugs'],
+ [match.url for match in matches])
def test_search_no_results(self):
matches = self.search_service.search('fnord')
=== modified file 'lib/lp/services/sitesearch/tests/test_google.py'
--- lib/lp/services/sitesearch/tests/test_google.py 2018-04-12 19:46:41 +0000
+++ lib/lp/services/sitesearch/tests/test_google.py 2018-04-13 19:42:12 +0000
@@ -3,30 +3,214 @@
"""Test the google search service."""
+from __future__ import absolute_import, print_function, unicode_literals
+
__metaclass__ = type
+from os import path
from fixtures import MockPatch
from requests.exceptions import (
ConnectionError,
HTTPError,
)
+from testtools.matchers import HasLength
+from lp.services.config import config
from lp.services.sitesearch import GoogleSearchService
-from lp.services.sitesearch.interfaces import SiteSearchResponseError
+from lp.services.sitesearch.interfaces import (
+ GoogleWrongGSPVersion,
+ SiteSearchResponseError,
+ )
from lp.services.timeout import TimeoutError
from lp.testing import TestCase
-from lp.testing.layers import LaunchpadFunctionalLayer
+from lp.testing.layers import GoogleLaunchpadFunctionalLayer
class TestGoogleSearchService(TestCase):
"""Test GoogleSearchService."""
- layer = LaunchpadFunctionalLayer
-
def setUp(self):
super(TestGoogleSearchService, self).setUp()
self.search_service = GoogleSearchService()
+ self.base_path = path.normpath(
+ path.join(path.dirname(__file__), 'data'))
+
+ def test_configuration(self):
+ self.assertEqual(config.google.site, self.search_service.site)
+ self.assertEqual(
+ config.google.client_id, self.search_service.client_id)
+
+ def test_create_search_url(self):
+ self.assertEndsWith(
+ self.search_service.create_search_url(terms='svg +bugs'),
+ '&q=svg+%2Bbugs&start=0')
+
+ def test_create_search_url_escapes_unicode_chars(self):
+ self.assertEndsWith(
+ self.search_service.create_search_url('Carlo Perell\xf3 Mar\xedn'),
+ '&q=Carlo+Perell%C3%B3+Mar%C3%ADn&start=0')
+
+ def test_create_search_url_with_offset(self):
+ self.assertEndsWith(
+ self.search_service.create_search_url(terms='svg +bugs', start=20),
+ '&q=svg+%2Bbugs&start=20')
+
+ def test_create_search_url_empty_terms(self):
+ e = self.assertRaises(
+ ValueError, self.search_service.create_search_url, '')
+ self.assertEqual("Missing value for parameter 'q'.", str(e))
+
+ def test_create_search_url_null_terms(self):
+ e = self.assertRaises(
+ ValueError, self.search_service.create_search_url, None)
+ self.assertEqual("Missing value for parameter 'q'.", str(e))
+
+ def test_create_search_url_requires_start(self):
+ e = self.assertRaises(
+ ValueError, self.search_service.create_search_url, 'bugs', 'true')
+ self.assertEqual("Value for parameter 'start' is not an int.", str(e))
+
+ def test_parse_search_response_incompatible_param(self):
+ """The PageMatches's start attribute comes from the GSP XML element
+ '<PARAM name="start" value="0" original_value="0"/>'. When it cannot
+ be found and the value cast to an int, an error is raised. There is
+ nothing in the value attribute in the next test, so an error is raised.
+ """
+ file_name = path.join(
+ self.base_path, 'googlesearchservice-incompatible-param.xml')
+ with open(file_name, 'r') as response_file:
+ response = response_file.read()
+ assert '<M>' not in response
+
+ e = self.assertRaises(
+ GoogleWrongGSPVersion,
+ self.search_service._parse_search_response, response)
+ self.assertEqual(
+ "Could not get the 'start' from the GSP XML response.", str(e))
+
+ def test_parse_search_response_invalid_total(self):
+ """The PageMatches's total attribute comes from the GSP XML element
+ '<M>5</M>'. When it cannot be found and the value cast to an int,
+ an error is raised. If Google were to redefine the meaning of the M
+ element to use a '~' to indicate an approximate total, an error would
+ be raised.
+ """
+ file_name = path.join(
+ self.base_path, 'googlesearchservice-incompatible-matches.xml')
+ with open(file_name, 'r') as response_file:
+ response = response_file.read()
+ assert '<M>~1</M>' in response
+
+ e = self.assertRaises(
+ GoogleWrongGSPVersion,
+ self.search_service._parse_search_response, response)
+ self.assertEqual(
+ "Could not get the 'total' from the GSP XML response.", str(e))
+
+ def test_parse_search_response_negative_total(self):
+ """If the total is ever less than zero (see bug 683115),
+ this is expected: we simply return a total of 0.
+ """
+ file_name = path.join(
+ self.base_path, 'googlesearchservice-negative-total.xml')
+ with open(file_name, 'r') as response_file:
+ response = response_file.read()
+ assert '<M>-1</M>' in response
+
+ matches = self.search_service._parse_search_response(response)
+ self.assertEqual(0, matches.total)
+
+ def test_parse_search_response_missing_title(self):
+ """A PageMatch requires a title, url, and a summary. If those elements
+ ('<T>', '<U>', '<S>') cannot be found nested in an '<R>' a PageMatch
+ cannot be made. A missing title (<T>) indicates a bad page on Launchpad
+ so it is ignored. In this example, The first match is missing a title,
+ so only the second page is present in the PageMatches.
+ """
+ file_name = path.join(
+ self.base_path, 'googlesearchservice-missing-title.xml')
+ with open(file_name, 'r') as response_file:
+ response = response_file.read()
+
+ matches = self.search_service._parse_search_response(response)
+ self.assertThat(matches, HasLength(1))
+ self.assertStartsWith(matches[0].title, 'Bug #205991 in Ubuntu:')
+ self.assertEqual(
+ 'http://bugs.launchpad.dev/bugs/205991', matches[0].url)
+
+ def test_parse_search_response_missing_summary(self):
+ """When a match is missing a summary (<S>), it is skipped because
+ there is no information about why it matched. This appears to relate to
+ pages that are in the index, but should be removed. In this example
+ taken from real data, the links are to the same page on different
+ vhosts. The edge vhost has no summary, so it is skipped.
+ """
+ file_name = path.join(
+ self.base_path, 'googlesearchservice-missing-summary.xml')
+ with open(file_name, 'r') as response_file:
+ response = response_file.read()
+
+ matches = self.search_service._parse_search_response(response)
+ self.assertThat(matches, HasLength(1))
+ self.assertEqual('Blueprint: <b>Gobuntu</b> 8.04', matches[0].title)
+ self.assertEqual(
+ 'http://blueprints.launchpad.dev/ubuntu/+spec/gobuntu-hardy',
+ matches[0].url)
+
+ def test_parse_search_response_missing_url(self):
+ """When the URL (<U>) cannot be found the match is skipped. There are
+ no examples of this. We do not want this hypothetical situation to give
+ users a bad experience.
+ """
+ file_name = path.join(
+ self.base_path, 'googlesearchservice-missing-url.xml')
+ with open(file_name, 'r') as response_file:
+ response = response_file.read()
+
+ matches = self.search_service._parse_search_response(response)
+ self.assertThat(matches, HasLength(1))
+ self.assertEqual('Blueprint: <b>Gobuntu</b> 8.04', matches[0].title)
+ self.assertEqual(
+ 'http://blueprints.launchpad.dev/ubuntu/+spec/gobuntu-hardy',
+ matches[0].url)
+
+ def test_parse_search_response_with_no_meaningful_results(self):
+ """If no matches are found in the response, and there are 20 or fewer
+ results, an Empty PageMatches is returned. This happens when the
+ results are missing titles and summaries. This is not considered to be
+ a problem because the small number implies that Google did a poor job
+ of indexing pages or indexed the wrong Launchpad server. In this
+ example, there is only one match, but the results is missing a title so
+ there is not enough information to make a PageMatch.
+ """
+ file_name = path.join(
+ self.base_path, 'googlesearchservice-no-meaningful-results.xml')
+ with open(file_name, 'r') as response_file:
+ response = response_file.read()
+ assert '<M>1</M>' in response
+
+ matches = self.search_service._parse_search_response(response)
+ self.assertThat(matches, HasLength(0))
+
+ def test_parse_search_response_with_incompatible_result(self):
+ """If no matches are found in the response, and there are more than 20
+ possible matches, an error is raised. Unlike the previous example there
+ are lots of results; there is a possibility that the GSP version is
+ incompatible. This example says it has 1000 matches, but none of the R
+ tags can be parsed (because the markup was changed to use RESULT).
+ """
+ file_name = path.join(
+ self.base_path, 'googlesearchservice-incompatible-result.xml')
+ with open(file_name, 'r') as response_file:
+ response = response_file.read()
+ assert '<M>1000</M>' in response
+
+ e = self.assertRaises(
+ GoogleWrongGSPVersion,
+ self.search_service._parse_search_response, response)
+ self.assertEqual(
+ "Could not get any PageMatches from the GSP XML response.", str(e))
def test_search_converts_HTTPError(self):
# The method converts HTTPError to SiteSearchResponseError.
@@ -71,3 +255,49 @@
self.assertRaises(
SiteSearchResponseError,
self.search_service._parse_search_response, data)
+
+
+class FunctionalGoogleSearchServiceTests(TestCase):
+ """Test GoogleSearchService."""
+
+ layer = GoogleLaunchpadFunctionalLayer
+
+ def setUp(self):
+ super(FunctionalGoogleSearchServiceTests, self).setUp()
+ self.search_service = GoogleSearchService()
+
+ def test_search_with_results(self):
+ matches = self.search_service.search('bug')
+ self.assertEqual(0, matches.start)
+ self.assertEqual(25, matches.total)
+ self.assertEqual(20, len(matches))
+
+ def test_search_with_results_and_offset(self):
+ matches = self.search_service.search('bug', start=20)
+ self.assertEqual(20, matches.start)
+ self.assertEqual(25, matches.total)
+ self.assertEqual(5, len(matches))
+ self.assertEqual([
+ 'http://bugs.launchpad.dev/ubuntu/hoary/+bug/2',
+ 'http://bugs.launchpad.dev/debian/+source/mozilla-firefox/+bug/2',
+ 'http://bugs.launchpad.dev/debian/+source/mozilla-firefox/+bug/3',
+ 'http://bugs.launchpad.dev/bugs/bugtrackers',
+ 'http://bugs.launchpad.dev/bugs/bugtrackers/debbugs'],
+ [match.url for match in matches])
+
+ def test_search_no_results(self):
+ matches = self.search_service.search('fnord')
+ self.assertEqual(0, matches.start)
+ self.assertEqual(0, matches.total)
+ self.assertEqual(0, len(matches))
+
+ def test_search_no_meaningful_results(self):
+ matches = self.search_service.search('no-meaningful')
+ self.assertEqual(0, matches.start)
+ self.assertEqual(1, matches.total)
+ self.assertEqual(0, len(matches))
+
+ def test_search_incomplete_response(self):
+ self.assertRaises(
+ SiteSearchResponseError,
+ self.search_service.search, 'gnomebaker')
=== modified file 'lib/lp/services/sitesearch/tests/test_pagematch.py'
--- lib/lp/services/sitesearch/tests/test_pagematch.py 2018-03-16 14:02:16 +0000
+++ lib/lp/services/sitesearch/tests/test_pagematch.py 2018-04-13 19:42:12 +0000
@@ -5,14 +5,59 @@
__metaclass__ = type
-from lp.services.sitesearch import PageMatch
-from lp.testing import TestCaseWithFactory
-from lp.testing.layers import DatabaseFunctionalLayer
-
-
-class TestPageMatchURLHandling(TestCaseWithFactory):
-
- layer = DatabaseFunctionalLayer
+from lp.services.sitesearch import (
+ PageMatch,
+ PageMatches,
+ )
+from lp.testing import TestCase
+
+
+class TestPageMatchURLHandling(TestCase):
+
+ def test_attributes(self):
+ p = PageMatch(
+ u'Unicode Titles in Launchpad',
+ 'http://example.com/unicode-titles',
+ u'Unicode Titles is a modest project dedicated to using Unicode.')
+ self.assertEqual(u'Unicode Titles in Launchpad', p.title)
+ self.assertEqual(
+ u'Unicode Titles is a modest project dedicated to using Unicode.',
+ p.summary)
+ self.assertEqual('http://example.com/unicode-titles', p.url)
+
+ def test_rewrite_url(self):
+ """The URL scheme used in the rewritten URL is configured via
+ config.vhosts.use_https. The hostname is set in the shared
+ key config.vhost.mainsite.hostname.
+ """
+ p = PageMatch(
+ u'Bug #456 in Unicode title: "testrunner hates Unicode"',
+ 'https://bugs.launchpad.net/unicode-titles/+bug/456',
+ u'The Zope testrunner likes ASCII more than Unicode.')
+ self.assertEqual(
+ 'http://bugs.launchpad.dev/unicode-titles/+bug/456', p.url)
+
+ def test_rewrite_url_with_trailing_slash(self):
+ """A URL's trailing slash is removed; Launchpad does not use trailing
+ slashes.
+ """
+ p = PageMatch(
+ u'Ubuntu in Launchpad',
+ 'https://launchpad.net/ubuntu/',
+ u'Ubuntu also includes more software than any other operating')
+ self.assertEqual('http://launchpad.dev/ubuntu', p.url)
+
+ def test_rewrite_url_exceptions(self):
+ """There is a list of URLs that are not rewritten configured in
+ config.sitesearch.url_rewrite_exceptions. For example,
+ help.launchpad.net is only run in one environment, so links to
+ that site will be preserved.
+ """
+ p = PageMatch(
+ u'OpenID',
+ 'https://help.launchpad.net/OpenID',
+ u'Launchpad uses OpenID.')
+ self.assertEqual('https://help.launchpad.net/OpenID', p.url)
def test_rewrite_url_handles_invalid_data(self):
# Given a bad url, pagematch can get a valid one.
@@ -37,3 +82,24 @@
"field.text=WUSB54GC+%2Bkarmic&"
"field.actions.search=Search")
self.assertEqual(expected, p.url)
+
+
+class TestPageMatches(TestCase):
+
+ def test_initialisation(self):
+ matches = PageMatches([], start=12, total=15)
+ self.assertEqual(12, matches.start)
+ self.assertEqual(15, matches.total)
+
+ def test_len(self):
+ matches = PageMatches(['match1', 'match2', 'match3'], 12, 15)
+ self.assertEqual(3, len(matches))
+
+ def test_getitem(self):
+ matches = PageMatches(['match1', 'match2', 'match3'], 12, 15)
+ self.assertEqual('match2', matches[1])
+
+ def test_iter(self):
+ matches = PageMatches(['match1', 'match2', 'match3'], 12, 15)
+ self.assertEqual(
+ ['match1', 'match2', 'match3'], [match for match in matches])
Follow ups