launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #23569
[Merge] lp:~cjwatson/launchpad/bs4-initial into lp:launchpad
Colin Watson has proposed merging lp:~cjwatson/launchpad/bs4-initial into lp:launchpad.
Commit message:
Add beautifulsoup4 and convert some initial tests to it.
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/bs4-initial/+merge/366479
There's some slightly ugly duplication here, but it'll be fixed once we finish the conversion.
I've been meaning to get started on this for a while, but moved it up my list when I found that we're currently pulling in an older version of beautifulsoup4 via soupmatchers which is incompatible with bionic's html5lib (https://bugs.launchpad.net/beautifulsoup/+bug/1603299).
--
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~cjwatson/launchpad/bs4-initial into lp:launchpad.
=== modified file 'constraints.txt'
--- constraints.txt 2019-04-23 14:02:52 +0000
+++ constraints.txt 2019-04-24 16:24:33 +0000
@@ -227,9 +227,11 @@
auditorfixture==0.0.7
Automat==0.6.0
Babel==2.5.1
+backports.functools-lru-cache==1.5
backports.lzma==0.0.3
bcrypt==3.1.4
BeautifulSoup==3.2.1
+beautifulsoup4[lxml]==4.7.1
billiard==3.5.0.5
bson==0.3.3
bzr==2.6.0.lp.3
@@ -352,6 +354,7 @@
six==1.12.0
snowballstemmer==1.2.1
soupmatchers==0.4
+soupsieve==1.9
sphinxcontrib-websupport==1.0.1
# lp:~launchpad-committers/storm/lp
storm==0.20.0.99-lp-r411
=== modified file 'lib/lp/answers/tests/test_question_webservice.py'
--- lib/lp/answers/tests/test_question_webservice.py 2017-11-10 11:28:43 +0000
+++ lib/lp/answers/tests/test_question_webservice.py 2019-04-24 16:24:33 +0000
@@ -1,4 +1,4 @@
-# Copyright 2011-2016 Canonical Ltd. This software is licensed under the
+# Copyright 2011-2019 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
"""Webservice unit tests related to Launchpad Questions."""
@@ -29,7 +29,7 @@
NotQuestionOwnerError,
QuestionTargetError,
)
-from lp.services.beautifulsoup import BeautifulSoup
+from lp.services.beautifulsoup import BeautifulSoup4 as BeautifulSoup
from lp.services.webapp.interfaces import OAuthPermission
from lp.testing import (
admin_logged_in,
@@ -109,7 +109,7 @@
def findQuestionTitle(self, response):
"""Find the question title field in an XHTML document fragment."""
soup = BeautifulSoup(response.body)
- dt = soup.find('dt', text="title").parent
+ dt = soup.find('dt', text="title")
dd = dt.findNextSibling('dd')
return str(dd.contents.pop())
=== modified file 'lib/lp/app/browser/tests/test_base_layout.py'
--- lib/lp/app/browser/tests/test_base_layout.py 2017-10-21 18:14:14 +0000
+++ lib/lp/app/browser/tests/test_base_layout.py 2019-04-24 16:24:33 +0000
@@ -1,4 +1,4 @@
-# Copyright 2010 Canonical Ltd. This software is licensed under the
+# Copyright 2010-2019 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
"""Tests for base-layout.pt and its macros.
@@ -16,7 +16,7 @@
from z3c.ptcompat import ViewPageTemplateFile
from lp.registry.interfaces.person import PersonVisibility
-from lp.services.beautifulsoup import BeautifulSoup
+from lp.services.beautifulsoup import BeautifulSoup4 as BeautifulSoup
from lp.services.webapp.publisher import LaunchpadView
from lp.services.webapp.servers import LaunchpadTestRequest
from lp.testing import (
@@ -85,7 +85,7 @@
self.assertTrue(head.title.string.startswith(view.page_title))
# The shortcut icon for the browser chrome is provided.
link_tag = head.link
- self.assertEqual('shortcut icon', link_tag['rel'])
+ self.assertEqual(['shortcut', 'icon'], link_tag['rel'])
self.assertEqual('/@@/launchpad.png', link_tag['href'])
# The template loads the common scripts.
load_script = find_tag_by_id(head, 'base-layout-load-scripts').name
@@ -97,17 +97,17 @@
yui_layout = document.find('div', 'yui-d0')
self.assertTrue(yui_layout is not None)
self.assertEqual(
- 'login-logout', yui_layout.find(True, id='locationbar')['class'])
- self.assertEqual(
- 'yui-main', yui_layout.find(True, id='maincontent')['class'])
- self.assertEqual(
- 'footer', yui_layout.find(True, id='footer')['class'])
+ ['login-logout'], yui_layout.find(True, id='locationbar')['class'])
+ self.assertEqual(
+ ['yui-main'], yui_layout.find(True, id='maincontent')['class'])
+ self.assertEqual(
+ ['footer'], yui_layout.find(True, id='footer')['class'])
def verify_watermark(self, document):
# Verify the parts of a watermark.
yui_layout = document.find('div', 'yui-d0')
watermark = yui_layout.find(True, id='watermark')
- self.assertEqual('watermark-apps-portlet', watermark['class'])
+ self.assertEqual(['watermark-apps-portlet'], watermark['class'])
if self.context.is_team:
self.assertEqual('/@@/team-logo', watermark.img['src'])
self.assertEqual(
@@ -115,7 +115,7 @@
else:
self.assertEqual('/@@/person-logo', watermark.img['src'])
self.assertEqual('Waffles', watermark.h2.a.string)
- self.assertEqual('facetmenu', watermark.ul['class'])
+ self.assertEqual(['facetmenu'], watermark.ul['class'])
def test_main_side(self):
# The main_side layout has everything.
@@ -127,10 +127,10 @@
document = find_tag_by_id(content, 'document')
self.verify_base_layout_body_parts(document)
classes = 'tab-overview main_side public yui3-skin-sam'.split()
- self.assertEqual(classes, document['class'].split())
+ self.assertEqual(classes, document['class'])
self.verify_watermark(document)
self.assertEqual(
- 'registering', document.find(True, id='registration')['class'])
+ ['registering'], document.find(True, id='registration')['class'])
self.assertEqual(
'Registered on 2005-09-16 by Illuminati',
document.find(True, id='registration').string.strip(),
@@ -139,7 +139,8 @@
extract_text(document.find(True, id='maincontent')),
'Main content of the page.')
self.assertEqual(
- 'yui-b side', document.find(True, id='side-portlets')['class'])
+ ['yui-b', 'side'],
+ document.find(True, id='side-portlets')['class'])
self.assertEqual('form', document.find(True, id='globalsearch').name)
def test_main_only(self):
@@ -151,10 +152,10 @@
document = find_tag_by_id(content, 'document')
self.verify_base_layout_body_parts(document)
classes = 'tab-overview main_only public yui3-skin-sam'.split()
- self.assertEqual(classes, document['class'].split())
+ self.assertEqual(classes, document['class'])
self.verify_watermark(document)
self.assertEqual(
- 'registering', document.find(True, id='registration')['class'])
+ ['registering'], document.find(True, id='registration')['class'])
self.assertEqual(None, document.find(True, id='side-portlets'))
self.assertEqual('form', document.find(True, id='globalsearch').name)
@@ -168,9 +169,9 @@
self.verify_base_layout_body_parts(document)
self.verify_watermark(document)
classes = 'tab-overview searchless public yui3-skin-sam'.split()
- self.assertEqual(classes, document['class'].split())
+ self.assertEqual(classes, document['class'])
self.assertEqual(
- 'registering', document.find(True, id='registration')['class'])
+ ['registering'], document.find(True, id='registration')['class'])
self.assertEqual(None, document.find(True, id='side-portlets'))
self.assertEqual(None, document.find(True, id='globalsearch'))
@@ -180,7 +181,7 @@
view._user = self.user
content = BeautifulSoup(view())
footer = find_tag_by_id(content, 'footer')
- link = footer.find('a', text='Contact Launchpad Support').parent
+ link = footer.find('a', text='Contact Launchpad Support')
self.assertEqual('/support', link['href'])
def test_contact_support_anonymous(self):
@@ -189,7 +190,7 @@
view._user = None
content = BeautifulSoup(view())
footer = find_tag_by_id(content, 'footer')
- link = footer.find('a', text='Contact Launchpad Support').parent
+ link = footer.find('a', text='Contact Launchpad Support')
self.assertEqual('/feedback', link['href'])
def test_user_without_launchpad_view(self):
=== modified file 'lib/lp/app/browser/tests/test_launchpadroot.py'
--- lib/lp/app/browser/tests/test_launchpadroot.py 2018-01-02 16:10:26 +0000
+++ lib/lp/app/browser/tests/test_launchpadroot.py 2019-04-24 16:24:33 +0000
@@ -1,4 +1,4 @@
-# Copyright 2010-2017 Canonical Ltd. This software is licensed under the
+# Copyright 2010-2019 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
"""Tests related to ILaunchpadRoot."""
@@ -14,8 +14,8 @@
from lp.registry.interfaces.person import IPersonSet
from lp.registry.interfaces.pillar import IPillarNameSet
from lp.services.beautifulsoup import (
- BeautifulSoup,
- SoupStrainer,
+ BeautifulSoup4 as BeautifulSoup,
+ SoupStrainer4 as SoupStrainer,
)
from lp.services.config import config
from lp.services.features.testing import FeatureFixture
@@ -93,7 +93,7 @@
# Stub out the getRecentBlogPosts which fetches a blog feed using
# urlfetch.
view.getRecentBlogPosts = lambda: []
- content = BeautifulSoup(view(), parseOnlyThese=SoupStrainer('a'))
+ content = BeautifulSoup(view(), parse_only=SoupStrainer('a'))
self.assertTrue(
content.find('a', href='+featuredprojects'),
"Cannot find the +featuredprojects link on the first page")
@@ -142,8 +142,7 @@
view = create_initialized_view(root, 'index.html', principal=user)
# Replace the blog posts so the view does not make a network request.
view.getRecentBlogPosts = lambda: []
- markup = BeautifulSoup(
- view(), parseOnlyThese=SoupStrainer(id='document'))
+ markup = BeautifulSoup(view(), parse_only=SoupStrainer(id='document'))
self.assertIs(False, view.has_watermark)
self.assertIs(None, markup.find(True, id='watermark'))
logo = markup.find(True, id='launchpad-logo-and-name')
@@ -177,8 +176,8 @@
view = create_initialized_view(root, 'index.html')
view.getRecentBlogPosts = _get_blog_posts
result = view()
- markup = BeautifulSoup(result,
- parseOnlyThese=SoupStrainer(id='homepage-blogposts'))
+ markup = BeautifulSoup(
+ result, parse_only=SoupStrainer(id='homepage-blogposts'))
self.assertEqual(['called'], calls)
items = markup.findAll('li', 'news')
# Notice about launchpad being opened is always added at the end
@@ -204,7 +203,7 @@
view = create_initialized_view(root, 'index.html', principal=user)
view.getRecentBlogPosts = _get_blog_posts
markup = BeautifulSoup(
- view(), parseOnlyThese=SoupStrainer(id='homepage'))
+ view(), parse_only=SoupStrainer(id='homepage'))
self.assertEqual([], calls)
self.assertIs(None, markup.find(True, id='homepage-blogposts'))
# Even logged in users should get the launchpad intro text in the left
@@ -225,8 +224,8 @@
with anonymous_logged_in():
view = create_initialized_view(root, 'index.html')
result = view()
- markup = BeautifulSoup(result,
- parseOnlyThese=SoupStrainer(id='homepage-blogposts'))
+ markup = BeautifulSoup(
+ result, parse_only=SoupStrainer(id='homepage-blogposts'))
items = markup.findAll('li', 'news')
self.assertEqual(3, len(items))
=== modified file 'lib/lp/app/widgets/doc/launchpad-radio-widget.txt'
--- lib/lp/app/widgets/doc/launchpad-radio-widget.txt 2017-10-21 18:14:14 +0000
+++ lib/lp/app/widgets/doc/launchpad-radio-widget.txt 2019-04-24 16:24:33 +0000
@@ -10,7 +10,7 @@
The LaunchpadRadioWidget is mostly used to display items from
an enumerated type.
- >>> from lp.services.beautifulsoup import BeautifulSoup
+ >>> from lp.services.beautifulsoup import BeautifulSoup4 as BeautifulSoup
>>> from lp.services.webapp.servers import LaunchpadTestRequest
>>> from lp.code.interfaces.branch import IBranch
>>> branch = factory.makeAnyBranch()
@@ -25,15 +25,15 @@
>>> html = BeautifulSoup(radio_widget())
>>> for label in html.findAll('label'):
- ... print label.renderContents()
- <input class="radioType" checked="checked" id="field.branch_type.0"
- name="field.branch_type" type="radio" value="HOSTED" /> Hosted
+ ... print label.encode_contents(formatter='html')
+ <input checked="checked" class="radioType" id="field.branch_type.0"
+ name="field.branch_type" type="radio" value="HOSTED"/> Hosted
<input class="radioType" id="field.branch_type.1" name="field.branch_type"
- type="radio" value="MIRRORED" /> Mirrored
+ type="radio" value="MIRRORED"/> Mirrored
<input class="radioType" id="field.branch_type.2" name="field.branch_type"
- type="radio" value="IMPORTED" /> Imported
+ type="radio" value="IMPORTED"/> Imported
<input class="radioType" id="field.branch_type.3" name="field.branch_type"
- type="radio" value="REMOTE" /> Remote
+ type="radio" value="REMOTE"/> Remote
LaunchpadRadioWidgetWithDescription
=== modified file 'lib/lp/blueprints/stories/sprints/xx-sprint-meeting-export.txt'
--- lib/lp/blueprints/stories/sprints/xx-sprint-meeting-export.txt 2013-04-16 01:18:10 +0000
+++ lib/lp/blueprints/stories/sprints/xx-sprint-meeting-export.txt 2019-04-24 16:24:33 +0000
@@ -30,9 +30,9 @@
The attendees element contains a list of person elements.
- >>> from BeautifulSoup import BeautifulStoneSoup as BSS
>>> import operator
- >>> soup = BSS(browser.contents)
+ >>> from lp.services.beautifulsoup import BeautifulSoup4 as BeautifulSoup
+ >>> soup = BeautifulSoup(browser.contents, 'xml')
>>> people = soup.find('attendees').findAll('person')
>>> for person in sorted(people, key=operator.itemgetter("displayname")):
... print "%(displayname)s, %(name)s, %(start)s -> %(end)s" % person
@@ -42,7 +42,7 @@
The <unscheduled /> element contains a list of meetings. Each of these
actually refers to a Specification.
- >>> soup = BSS(browser.contents)
+ >>> soup = BeautifulSoup(browser.contents, 'xml')
>>> meetings = soup.find('unscheduled').findAll('meeting')
>>> for meeting in meetings:
... print "%(id)s: %(name)s, %(lpurl)s" % meeting
=== modified file 'lib/lp/bugs/browser/tests/test_bug_views.py'
--- lib/lp/bugs/browser/tests/test_bug_views.py 2018-01-02 16:10:26 +0000
+++ lib/lp/bugs/browser/tests/test_bug_views.py 2019-04-24 16:24:33 +0000
@@ -1,4 +1,4 @@
-# Copyright 2011-2012 Canonical Ltd. This software is licensed under the
+# Copyright 2011-2019 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
"""Tests for Bug Views."""
@@ -37,7 +37,7 @@
IAccessPolicySource,
)
from lp.registry.interfaces.person import PersonVisibility
-from lp.services.beautifulsoup import BeautifulSoup
+from lp.services.beautifulsoup import BeautifulSoup4 as BeautifulSoup
from lp.services.webapp.interfaces import IOpenLaunchBag
from lp.services.webapp.publisher import canonical_url
from lp.services.webapp.servers import LaunchpadTestRequest
@@ -247,7 +247,7 @@
# a CSS class `css_class`.
soup = BeautifulSoup(html)
element = soup.find(attrs={'id': element_id})
- return css_class in element.get('class', '').split(' ')
+ return css_class in element.get('class', [])
def test_bug_mute_for_individual_structural_subscription(self):
# If the person has a structural subscription to the pillar,
@@ -540,7 +540,7 @@
html = view.render()
soup = BeautifulSoup(html)
self.assertEqual(
- u'Private', soup.find('label', text="Private"))
+ u'Private', soup.find('label', text="Private").string)
def test_bugtask_view_user_with_grant_on_bug_for_private_product(self):
# The regular bug view is properly rendered even if the user
=== modified file 'lib/lp/bugs/tests/test_bugs_webservice.py'
--- lib/lp/bugs/tests/test_bugs_webservice.py 2018-01-02 10:54:31 +0000
+++ lib/lp/bugs/tests/test_bugs_webservice.py 2019-04-24 16:24:33 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009-2012 Canonical Ltd. This software is licensed under the
+# Copyright 2009-2019 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
"""Webservice unit tests related to Launchpad Bugs."""
@@ -31,7 +31,7 @@
from lp.bugs.interfaces.bug import IBug
from lp.registry.enums import BugSharingPolicy
from lp.registry.interfaces.product import License
-from lp.services.beautifulsoup import BeautifulSoup
+from lp.services.beautifulsoup import BeautifulSoup4 as BeautifulSoup
from lp.services.webapp import snapshot
from lp.services.webapp.interfaces import OAuthPermission
from lp.services.webapp.servers import LaunchpadTestRequest
@@ -108,7 +108,7 @@
def findBugDescription(self, response):
"""Find the bug description field in an XHTML document fragment."""
soup = BeautifulSoup(response.body)
- dt = soup.find('dt', text="description").parent
+ dt = soup.find('dt', text="description")
dd = dt.findNextSibling('dd')
return str(dd.contents.pop())
@@ -121,7 +121,7 @@
self.assertEqual(
self.findBugDescription(response),
u'<p>Useless bugs are useless. '
- 'See <a href="/bugs/%d" class="bug-link">Bug %d</a>.</p>' % (
+ 'See <a class="bug-link" href="/bugs/%d">Bug %d</a>.</p>' % (
self.bug_one.id, self.bug_one.id))
def test_PATCH_xhtml_representation(self):
@@ -140,7 +140,7 @@
self.assertEqual(
self.findBugDescription(response),
- u'<p>See <a href="/bugs/%d" class="bug-link">bug %d</a></p>' % (
+ u'<p>See <a class="bug-link" href="/bugs/%d">bug %d</a></p>' % (
self.bug_one.id, self.bug_one.id))
=== modified file 'lib/lp/registry/stories/person/xx-person-rdf.txt'
--- lib/lp/registry/stories/person/xx-person-rdf.txt 2018-01-26 22:18:38 +0000
+++ lib/lp/registry/stories/person/xx-person-rdf.txt 2019-04-24 16:24:33 +0000
@@ -4,14 +4,14 @@
We export FOAF RDF metadata from the /~Person.name/+index document.
>>> from lp.services.beautifulsoup import (
- ... BeautifulSoup,
- ... SoupStrainer,
+ ... BeautifulSoup4 as BeautifulSoup,
+ ... SoupStrainer4 as SoupStrainer,
... )
>>> anon_browser.open("http://launchpad.dev/~name16")
>>> strainer = SoupStrainer(['link'], {'type': ['application/rdf+xml']})
- >>> soup = BeautifulSoup(anon_browser.contents, parseOnlyThese=strainer)
+ >>> soup = BeautifulSoup(anon_browser.contents, parse_only=strainer)
>>> print soup.renderContents()
- <link rel="meta" type="application/rdf+xml" title="FOAF" href="+rdf" />
+ <link href="+rdf" rel="meta" title="FOAF" type="application/rdf+xml"/>
Individual RDF
@@ -102,7 +102,7 @@
>>> anon_browser.open("http://launchpad.dev/~carlos/+rdf")
>>> strainer = SoupStrainer(['foaf:name'])
- >>> soup = BeautifulSoup(anon_browser.contents, parseOnlyThese=strainer)
+ >>> soup = BeautifulSoup(anon_browser.contents, parse_only=strainer)
>>> for tag in soup:
... tag.renderContents()
'Carlos Perell\xc3\xb3 Mar\xc3\xadn'
@@ -112,7 +112,7 @@
>>> anon_browser.open("http://launchpad.dev/~name21/+rdf")
>>> strainer = SoupStrainer(['foaf:member'])
- >>> soup = BeautifulSoup(anon_browser.contents, parseOnlyThese=strainer)
+ >>> soup = BeautifulSoup(anon_browser.contents, parse_only=strainer)
>>> len(soup)
0
=== modified file 'lib/lp/registry/stories/team/xx-team-contactemail.txt'
--- lib/lp/registry/stories/team/xx-team-contactemail.txt 2017-10-21 18:14:14 +0000
+++ lib/lp/registry/stories/team/xx-team-contactemail.txt 2019-04-24 16:24:33 +0000
@@ -24,9 +24,9 @@
A warning is rendered about the privacy implications of using a mailing list or
external contact address.
- >>> from lp.services.beautifulsoup import BeautifulSoup
+ >>> from lp.services.beautifulsoup import BeautifulSoup4 as BeautifulSoup
>>> soup = BeautifulSoup(browser.contents)
- >>> soup.find(id='email-warning')
+ >>> print(soup.find(id='email-warning').decode())
<p ... Email sent to a mailing list or external contact address may ...
As we can see, the landscape-developers team has no contact address.
=== modified file 'lib/lp/services/beautifulsoup.py'
--- lib/lp/services/beautifulsoup.py 2017-10-21 18:14:14 +0000
+++ lib/lp/services/beautifulsoup.py 2019-04-24 16:24:33 +0000
@@ -1,10 +1,10 @@
-# Copyright 2017 Canonical Ltd. This software is licensed under the
+# Copyright 2017-2019 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
"""Beautiful Soup wrapper for Launchpad.
With Beautiful Soup 3, this is mostly for future migration convenience.
-With Beautiful Soup 4, it will do a little more work to avoid warnings.
+With Beautiful Soup 4, it does a little more work to avoid warnings.
"""
from __future__ import absolute_import, print_function, unicode_literals
@@ -12,7 +12,9 @@
__metaclass__ = type
__all__ = [
'BeautifulSoup',
+ 'BeautifulSoup4',
'SoupStrainer',
+ 'SoupStrainer4',
]
@@ -20,6 +22,8 @@
BeautifulSoup as _BeautifulSoup,
SoupStrainer,
)
+from bs4 import BeautifulSoup as _BeautifulSoup4
+from bs4.element import SoupStrainer as SoupStrainer4
class BeautifulSoup(_BeautifulSoup):
@@ -28,3 +32,12 @@
if not isinstance(markup, unicode) and "fromEncoding" not in kwargs:
kwargs["fromEncoding"] = "UTF-8"
super(BeautifulSoup, self).__init__(markup=markup, **kwargs)
+
+
+class BeautifulSoup4(_BeautifulSoup4):
+
+ def __init__(self, markup="", features="html.parser", **kwargs):
+ if not isinstance(markup, unicode) and "from_encoding" not in kwargs:
+ kwargs["from_encoding"] = "UTF-8"
+ super(BeautifulSoup4, self).__init__(
+ markup=markup, features=features, **kwargs)
=== modified file 'lib/lp/services/oauth/doc/oauth-pages.txt'
--- lib/lp/services/oauth/doc/oauth-pages.txt 2017-10-21 18:14:14 +0000
+++ lib/lp/services/oauth/doc/oauth-pages.txt 2019-04-24 16:24:33 +0000
@@ -26,15 +26,15 @@
... return view, token
>>> from lp.services.beautifulsoup import (
- ... BeautifulSoup,
- ... SoupStrainer,
+ ... BeautifulSoup4 as BeautifulSoup,
+ ... SoupStrainer4 as SoupStrainer,
... )
>>> def print_hidden_fields(html):
... soup = BeautifulSoup(
- ... html, parseOnlyThese=SoupStrainer(attrs={'type': 'hidden'}))
+ ... html, parse_only=SoupStrainer(attrs={'type': 'hidden'}))
... for tag in soup.findAll(attrs={'type': 'hidden'}):
- ... if tag.attrMap['value']:
- ... print tag.attrMap['name'], tag.attrMap['value']
+ ... if tag['value']:
+ ... print tag['name'], tag['value']
When the client doesn't specify a duration, the resulting request
token will have no expiration date set.
=== modified file 'lib/lp/soyuz/browser/tests/test_archive_packages.py'
--- lib/lp/soyuz/browser/tests/test_archive_packages.py 2018-02-01 18:44:21 +0000
+++ lib/lp/soyuz/browser/tests/test_archive_packages.py 2019-04-24 16:24:33 +0000
@@ -1,4 +1,4 @@
-# Copyright 2010-2018 Canonical Ltd. This software is licensed under the
+# Copyright 2010-2019 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
"""Unit tests for TestP3APackages."""
@@ -25,7 +25,7 @@
from lp.app.utilities.celebrities import ILaunchpadCelebrities
from lp.registry.interfaces.pocket import PackagePublishingPocket
-from lp.services.beautifulsoup import BeautifulSoup
+from lp.services.beautifulsoup import BeautifulSoup4 as BeautifulSoup
from lp.services.webapp import canonical_url
from lp.services.webapp.authentication import LaunchpadPrincipal
from lp.soyuz.browser.archive import ArchiveNavigationMenu
@@ -397,9 +397,10 @@
self.assertEqual([],
soup.findAll(
'div', attrs={'class': 'pending-job', 'job_id': jobs[-1].id}))
+ showing_tags = soup.find_all(
+ 'span', text=re.compile('Showing 5 of .'))
self.assertEqual(
- ['Showing 5 of 7'],
- soup.findAll('span', text=re.compile('Showing 5 of .')))
+ ['Showing 5 of 7'], [tag.string for tag in showing_tags])
def test_job_notifications_display_owner_is_team(self):
team = self.factory.makeTeam()
=== modified file 'lib/lp/testing/pages.py'
--- lib/lp/testing/pages.py 2018-12-10 13:54:34 +0000
+++ lib/lp/testing/pages.py 2019-04-24 16:24:33 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009-2018 Canonical Ltd. This software is licensed under the
+# Copyright 2009-2019 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
"""Testing infrastructure for page tests."""
@@ -25,6 +25,15 @@
ProcessingInstruction,
Tag,
)
+from bs4.element import (
+ Comment as Comment4,
+ Declaration as Declaration4,
+ Doctype as Doctype4,
+ NavigableString as NavigableString4,
+ PageElement as PageElement4,
+ ProcessingInstruction as ProcessingInstruction4,
+ Tag as Tag4,
+ )
from contrib.oauth import (
OAuthConsumer,
OAuthRequest,
@@ -195,6 +204,8 @@
"""Find and return the tag with the given ID"""
if isinstance(content, PageElement):
elements_with_id = content.findAll(True, {'id': id})
+ elif isinstance(content, PageElement4):
+ elements_with_id = content.find_all(True, {'id': id})
else:
elements_with_id = [
tag for tag in BeautifulSoup(
@@ -272,10 +283,10 @@
return [extract_text(tag) for tag in soup]
-def print_feedback_messages(content):
+def print_feedback_messages(content, formatter='minimal'):
"""Print out the feedback messages."""
for message in get_feedback_messages(content):
- print extract_text(message)
+ print extract_text(message, formatter=formatter)
def print_table(content, columns=None, skip_rows=None, sep="\t"):
@@ -337,7 +348,10 @@
return label.replace('\xC2', '').replace('\xA0', '').strip()
-IGNORED_ELEMENTS = [Comment, Declaration, ProcessingInstruction]
+IGNORED_ELEMENTS = [
+ Comment, Declaration, ProcessingInstruction,
+ Comment4, Declaration4, Doctype4, ProcessingInstruction4,
+ ]
ELEMENTS_INTRODUCING_NEWLINE = [
'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'ul', 'ol', 'pre', 'dl',
'div', 'noscript', 'blockquote', 'form', 'hr', 'table', 'fieldset',
@@ -348,7 +362,7 @@
LEADING_AND_TRAILING_SPACES_RE = re.compile(
u'(^[ \t]+)|([ \t]$)', re.MULTILINE)
TABS_AND_SPACES_RE = re.compile(u'[ \t]+')
-NBSP_RE = re.compile(u' | ')
+NBSP_RE = re.compile(u' | |\xa0')
def extract_link_from_tag(tag, base=None):
@@ -357,7 +371,7 @@
A `tag` should contain a 'href' attribute, and `base` will commonly
be extracted from browser.url.
"""
- if not isinstance(tag, PageElement):
+ if not isinstance(tag, (PageElement, PageElement4)):
link = BeautifulSoup(tag)
else:
link = tag
@@ -369,7 +383,8 @@
return urljoin(base, href)
-def extract_text(content, extract_image_text=False, skip_tags=None):
+def extract_text(content, extract_image_text=False, skip_tags=None,
+ formatter='minimal'):
"""Return the text stripped of all tags.
All runs of tabs and spaces are replaced by a single space and runs of
@@ -378,7 +393,7 @@
"""
if skip_tags is None:
skip_tags = ['script']
- if not isinstance(content, PageElement):
+ if not isinstance(content, (PageElement, PageElement4)):
soup = BeautifulSoup(content)
else:
soup = content
@@ -409,10 +424,15 @@
result.append(unicode(node[:]))
elif isinstance(node, NavigableString):
result.append(unicode(node))
+ elif isinstance(node, NavigableString4):
+ result.append(node.format_string(node, formatter=formatter))
else:
- if isinstance(node, Tag):
+ if isinstance(node, (Tag, Tag4)):
# If the node has the class "sortkey" then it is invisible.
- if node.get('class') == 'sortkey':
+ if isinstance(node, Tag) and node.get('class') == 'sortkey':
+ continue
+ elif (isinstance(node, Tag4) and
+ node.get('class') == ['sortkey']):
continue
elif getattr(node, 'name', '') in skip_tags:
continue
@@ -622,8 +642,12 @@
else:
for tab in location_apps:
tab_text = extract_text(tab)
- if tab['class'].find('active') != -1:
- tab_text += ' (selected)'
+ if isinstance(tab['class'], list): # BeautifulSoup 4
+ if 'active' in tab['class']:
+ tab_text += ' (selected)'
+ else: # BeautifulSoup 3
+ if tab['class'].find('active') != -1:
+ tab_text += ' (selected)'
if tab.a:
link = tab.a['href']
else:
=== modified file 'setup.py'
--- setup.py 2019-04-16 14:30:40 +0000
+++ setup.py 2019-04-24 16:24:33 +0000
@@ -148,6 +148,7 @@
'auditorfixture',
'backports.lzma',
'BeautifulSoup',
+ 'beautifulsoup4[lxml]',
'bzr',
'celery',
'cssselect',
=== modified file 'utilities/snakefood/Makefile'
--- utilities/snakefood/Makefile 2011-12-29 05:29:36 +0000
+++ utilities/snakefood/Makefile 2019-04-24 16:24:33 +0000
@@ -8,7 +8,7 @@
-I $(LIB_DIR)/devscripts -I $(LIB_DIR)/contrib \
-I $(LIB_DIR)/canonical/not-used $(LIB_DIR)/canonical \
$(LIB_DIR)/lp 2>/dev/null | grep -v contrib/ \
- | grep -v sqlobject | grep -v BeautifulSoup | grep -v psycopg \
+ | grep -v sqlobject | egrep -v 'BeautifulSoup|bs4' | grep -v psycopg \
| grep -v schoolbell | grep -v '/tests/' | grep -v '/ftests/' \
| grep -v 'lp/services/config' > lp.sfood.tmp
mv lp.sfood.tmp lp.sfood
Follow ups