launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #24256
[Merge] ~cjwatson/launchpad:remove-bs3 into launchpad:master
Colin Watson has proposed merging ~cjwatson/launchpad:remove-bs3 into launchpad:master with ~cjwatson/launchpad:bs4 as a prerequisite.
Commit message:
Finish port to Beautiful Soup 4
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/+git/launchpad/+merge/378025
Port a few remaining tests and some bits of external bug tracker code to Beautiful Soup 4, and remove Beautiful Soup 3.
--
Your team Launchpad code reviewers is requested to review the proposed merge of ~cjwatson/launchpad:remove-bs3 into launchpad:master.
diff --git a/constraints.txt b/constraints.txt
index 163e70e..389c6a6 100644
--- a/constraints.txt
+++ b/constraints.txt
@@ -170,7 +170,6 @@ Automat==0.6.0
Babel==2.5.1
backports.functools-lru-cache==1.5
backports.lzma==0.0.3
-BeautifulSoup==3.2.1
beautifulsoup4[lxml]==4.7.1
billiard==3.5.0.5
bleach==3.1.0
diff --git a/lib/lp/answers/stories/question-add-in-other-languages.txt b/lib/lp/answers/stories/question-add-in-other-languages.txt
index a788d13..61c02c2 100644
--- a/lib/lp/answers/stories/question-add-in-other-languages.txt
+++ b/lib/lp/answers/stories/question-add-in-other-languages.txt
@@ -91,7 +91,7 @@ markup. Search engine robots and browsers will use the lang and dir
attributes for indexing and rendering respectively. Users will find
the language in the question details portlet.
- >>> from BeautifulSoup import BeautifulSoup
+ >>> from lp.services.beautifulsoup import BeautifulSoup
>>> soup = BeautifulSoup(browser.contents)
>>> print(soup.find('div', id='question')['lang'])
pt-BR
diff --git a/lib/lp/answers/tests/test_question_webservice.py b/lib/lp/answers/tests/test_question_webservice.py
index 6738532..11fc14d 100644
--- a/lib/lp/answers/tests/test_question_webservice.py
+++ b/lib/lp/answers/tests/test_question_webservice.py
@@ -29,7 +29,7 @@ from lp.answers.errors import (
NotQuestionOwnerError,
QuestionTargetError,
)
-from lp.services.beautifulsoup import BeautifulSoup4 as BeautifulSoup
+from lp.services.beautifulsoup import BeautifulSoup
from lp.services.webapp.interfaces import OAuthPermission
from lp.testing import (
admin_logged_in,
diff --git a/lib/lp/app/__init__.py b/lib/lp/app/__init__.py
index fc252a8..bd4a213 100644
--- a/lib/lp/app/__init__.py
+++ b/lib/lp/app/__init__.py
@@ -19,11 +19,6 @@ from zope.formlib import itemswidgets
itemswidgets.EXPLICIT_EMPTY_SELECTION = False
-# Monkeypatch our embedded BeautifulSoup to teach it that wbr (new in HTML5,
-# but widely supported forever) is self-closing.
-import BeautifulSoup
-BeautifulSoup.BeautifulSoup.SELF_CLOSING_TAGS['wbr'] = None
-
# Load versioninfo.py so that we get errors on start-up rather than waiting
# for first page load.
import lp.app.versioninfo
diff --git a/lib/lp/app/browser/tests/test_base_layout.py b/lib/lp/app/browser/tests/test_base_layout.py
index c77f75c..247726e 100644
--- a/lib/lp/app/browser/tests/test_base_layout.py
+++ b/lib/lp/app/browser/tests/test_base_layout.py
@@ -16,7 +16,7 @@ __metaclass__ = type
from zope.browserpage import ViewPageTemplateFile
from lp.registry.interfaces.person import PersonVisibility
-from lp.services.beautifulsoup import BeautifulSoup4 as BeautifulSoup
+from lp.services.beautifulsoup import BeautifulSoup
from lp.services.webapp.publisher import LaunchpadView
from lp.services.webapp.servers import LaunchpadTestRequest
from lp.testing import (
diff --git a/lib/lp/app/browser/tests/test_launchpadroot.py b/lib/lp/app/browser/tests/test_launchpadroot.py
index 390992a..ece132e 100644
--- a/lib/lp/app/browser/tests/test_launchpadroot.py
+++ b/lib/lp/app/browser/tests/test_launchpadroot.py
@@ -14,8 +14,8 @@ from lp.app.interfaces.launchpad import ILaunchpadCelebrities
from lp.registry.interfaces.person import IPersonSet
from lp.registry.interfaces.pillar import IPillarNameSet
from lp.services.beautifulsoup import (
- BeautifulSoup4 as BeautifulSoup,
- SoupStrainer4 as SoupStrainer,
+ BeautifulSoup,
+ SoupStrainer,
)
from lp.services.config import config
from lp.services.features.testing import FeatureFixture
diff --git a/lib/lp/app/widgets/doc/launchpad-radio-widget.txt b/lib/lp/app/widgets/doc/launchpad-radio-widget.txt
index bd2152a..03b2817 100644
--- a/lib/lp/app/widgets/doc/launchpad-radio-widget.txt
+++ b/lib/lp/app/widgets/doc/launchpad-radio-widget.txt
@@ -10,7 +10,7 @@ and one that doesn't.
The LaunchpadRadioWidget is mostly used to display items from
an enumerated type.
- >>> from lp.services.beautifulsoup import BeautifulSoup4 as BeautifulSoup
+ >>> from lp.services.beautifulsoup import BeautifulSoup
>>> from lp.services.webapp.servers import LaunchpadTestRequest
>>> from lp.code.interfaces.branch import IBranch
>>> branch = factory.makeAnyBranch()
diff --git a/lib/lp/blueprints/stories/sprints/xx-sprint-meeting-export.txt b/lib/lp/blueprints/stories/sprints/xx-sprint-meeting-export.txt
index bd1cfaf..72969d8 100644
--- a/lib/lp/blueprints/stories/sprints/xx-sprint-meeting-export.txt
+++ b/lib/lp/blueprints/stories/sprints/xx-sprint-meeting-export.txt
@@ -31,7 +31,7 @@ element name:
The attendees element contains a list of person elements.
>>> import operator
- >>> from lp.services.beautifulsoup import BeautifulSoup4 as BeautifulSoup
+ >>> from lp.services.beautifulsoup import BeautifulSoup
>>> soup = BeautifulSoup(browser.contents, 'xml')
>>> people = soup.find('attendees').findAll('person')
>>> for person in sorted(people, key=operator.itemgetter("displayname")):
diff --git a/lib/lp/bugs/browser/tests/test_bug_views.py b/lib/lp/bugs/browser/tests/test_bug_views.py
index 628a599..1877018 100644
--- a/lib/lp/bugs/browser/tests/test_bug_views.py
+++ b/lib/lp/bugs/browser/tests/test_bug_views.py
@@ -37,7 +37,7 @@ from lp.registry.interfaces.accesspolicy import (
IAccessPolicySource,
)
from lp.registry.interfaces.person import PersonVisibility
-from lp.services.beautifulsoup import BeautifulSoup4 as BeautifulSoup
+from lp.services.beautifulsoup import BeautifulSoup
from lp.services.webapp.interfaces import IOpenLaunchBag
from lp.services.webapp.publisher import canonical_url
from lp.services.webapp.servers import LaunchpadTestRequest
diff --git a/lib/lp/bugs/externalbugtracker/mantis.py b/lib/lp/bugs/externalbugtracker/mantis.py
index 0884aec..9cb8203 100644
--- a/lib/lp/bugs/externalbugtracker/mantis.py
+++ b/lib/lp/bugs/externalbugtracker/mantis.py
@@ -12,7 +12,7 @@ __all__ = [
import csv
import logging
-from BeautifulSoup import Comment
+from bs4.element import Comment
from requests.cookies import RequestsCookieJar
from six.moves.urllib_parse import (
parse_qsl,
@@ -305,8 +305,7 @@ class Mantis(ExternalBugTracker):
# specific than this.
bug_page = BeautifulSoup(
self._getPage('view.php?id=%s' % bug_id).content,
- convertEntities=BeautifulSoup.HTML_ENTITIES,
- parseOnlyThese=SoupStrainer('table'))
+ parse_only=SoupStrainer('table'))
app_error = self._checkForApplicationError(bug_page)
if app_error:
@@ -355,7 +354,7 @@ class Mantis(ExternalBugTracker):
"""
app_error = page_soup.find(
text=lambda node: (node.startswith('APPLICATION ERROR ')
- and node.parent['class'] == 'form-title'
+ and 'form-title' in node.parent.get('class', [])
and not isinstance(node, Comment)))
if app_error:
app_error_code = ''.join(c for c in app_error if c.isdigit())
diff --git a/lib/lp/bugs/externalbugtracker/sourceforge.py b/lib/lp/bugs/externalbugtracker/sourceforge.py
index 98bdf29..b61c4b1 100644
--- a/lib/lp/bugs/externalbugtracker/sourceforge.py
+++ b/lib/lp/bugs/externalbugtracker/sourceforge.py
@@ -94,7 +94,7 @@ class SourceForge(ExternalBugTracker):
query_dict = {}
bugtracker_link = soup.find('a', text='Bugs')
if bugtracker_link:
- href = bugtracker_link.findParent()['href']
+ href = bugtracker_link['href']
# We need to replace encoded ampersands in the URL since
# SourceForge occasionally encodes them.
diff --git a/lib/lp/bugs/scripts/sfremoteproductfinder.py b/lib/lp/bugs/scripts/sfremoteproductfinder.py
index 4b76d70..b6342f3 100644
--- a/lib/lp/bugs/scripts/sfremoteproductfinder.py
+++ b/lib/lp/bugs/scripts/sfremoteproductfinder.py
@@ -71,7 +71,7 @@ class SourceForgeRemoteProductFinder:
"No tracker link for project '%s'" % sf_project)
return None
- tracker_url = tracker_link.findParent()['href']
+ tracker_url = tracker_link['href']
# Clean any leading '/' from tracker_url so that urlappend
# doesn't choke on it.
@@ -91,7 +91,7 @@ class SourceForgeRemoteProductFinder:
"No bug tracker link for project '%s'" % sf_project)
return None
- bugtracker_url = bugtracker_link.findParent()['href']
+ bugtracker_url = bugtracker_link['href']
# We need to replace encoded ampersands in the URL since
# SourceForge usually encodes them.
diff --git a/lib/lp/bugs/stories/feeds/xx-bug-atom.txt b/lib/lp/bugs/stories/feeds/xx-bug-atom.txt
index 840b943..0620bef 100644
--- a/lib/lp/bugs/stories/feeds/xx-bug-atom.txt
+++ b/lib/lp/bugs/stories/feeds/xx-bug-atom.txt
@@ -4,8 +4,8 @@ Atom feeds produce XML not HTML. Therefore we must parse the output as XML
by asking BeautifulSoup to use lxml.
>>> from lp.services.beautifulsoup import (
- ... BeautifulSoup4 as BeautifulSoup,
- ... SoupStrainer4 as SoupStrainer,
+ ... BeautifulSoup,
+ ... SoupStrainer,
... )
>>> from lp.services.feeds.tests.helper import (
... parse_entries, parse_links, validate_feed)
diff --git a/lib/lp/bugs/stories/feeds/xx-bug-html.txt b/lib/lp/bugs/stories/feeds/xx-bug-html.txt
index fa02653..6d2744b 100644
--- a/lib/lp/bugs/stories/feeds/xx-bug-html.txt
+++ b/lib/lp/bugs/stories/feeds/xx-bug-html.txt
@@ -5,8 +5,8 @@ The content of an HTML feed is very similar to an Atom feed, but is formatted
as HTML instead of Atom.
>>> from lp.services.beautifulsoup import (
- ... BeautifulSoup4 as BeautifulSoup,
- ... SoupStrainer4 as SoupStrainer,
+ ... BeautifulSoup,
+ ... SoupStrainer,
... )
Define a helper function for parsing the entries:
diff --git a/lib/lp/bugs/tests/test_bugs_webservice.py b/lib/lp/bugs/tests/test_bugs_webservice.py
index 6c3ca84..b795785 100644
--- a/lib/lp/bugs/tests/test_bugs_webservice.py
+++ b/lib/lp/bugs/tests/test_bugs_webservice.py
@@ -31,7 +31,7 @@ from lp.bugs.browser.bugtask import get_comments_for_bugtask
from lp.bugs.interfaces.bug import IBug
from lp.registry.enums import BugSharingPolicy
from lp.registry.interfaces.product import License
-from lp.services.beautifulsoup import BeautifulSoup4 as BeautifulSoup
+from lp.services.beautifulsoup import BeautifulSoup
from lp.services.webapp import snapshot
from lp.services.webapp.interfaces import OAuthPermission
from lp.services.webapp.servers import LaunchpadTestRequest
diff --git a/lib/lp/code/stories/feeds/xx-branch-atom.txt b/lib/lp/code/stories/feeds/xx-branch-atom.txt
index 74db8e1..7c91bca 100644
--- a/lib/lp/code/stories/feeds/xx-branch-atom.txt
+++ b/lib/lp/code/stories/feeds/xx-branch-atom.txt
@@ -4,8 +4,8 @@ Atom feeds produce XML not HTML. Therefore we must parse the output as XML
by asking BeautifulSoup to use lxml.
>>> from lp.services.beautifulsoup import (
- ... BeautifulSoup4 as BeautifulSoup,
- ... SoupStrainer4 as SoupStrainer,
+ ... BeautifulSoup,
+ ... SoupStrainer,
... )
>>> from lp.services.feeds.tests.helper import (
... parse_ids, parse_links, validate_feed)
diff --git a/lib/lp/code/stories/feeds/xx-revision-atom.txt b/lib/lp/code/stories/feeds/xx-revision-atom.txt
index 3638b09..0fadb05 100644
--- a/lib/lp/code/stories/feeds/xx-revision-atom.txt
+++ b/lib/lp/code/stories/feeds/xx-revision-atom.txt
@@ -3,7 +3,7 @@
Atom feeds produce XML not HTML. Therefore we must parse the output as XML
by asking BeautifulSoup to use lxml.
- >>> from lp.services.beautifulsoup import BeautifulSoup4 as BeautifulSoup
+ >>> from lp.services.beautifulsoup import BeautifulSoup
>>> from lp.services.feeds.tests.helper import (
... parse_ids, parse_links, validate_feed)
diff --git a/lib/lp/registry/browser/tests/test_distroseries.py b/lib/lp/registry/browser/tests/test_distroseries.py
index 48b71ae..de45a78 100644
--- a/lib/lp/registry/browser/tests/test_distroseries.py
+++ b/lib/lp/registry/browser/tests/test_distroseries.py
@@ -1392,7 +1392,7 @@ class TestDistroSeriesLocalDifferences(TestCaseWithFactory,
parent_dsp = dsd.parent_series.distribution.getSourcePackage(
dsd.source_package_name)
expected_url = urlappend(canonical_url(parent_dsp), '+changelog')
- self.assertEqual(expected_url, link.attrs[0][1])
+ self.assertEqual(expected_url, link['href'])
def test_getUpgrades_shows_updates_in_parent(self):
# The view's getUpgrades methods lists packages that can be
diff --git a/lib/lp/registry/browser/tests/test_distroseriesdifference_views.py b/lib/lp/registry/browser/tests/test_distroseriesdifference_views.py
index 5ea95af..a436fed 100644
--- a/lib/lp/registry/browser/tests/test_distroseriesdifference_views.py
+++ b/lib/lp/registry/browser/tests/test_distroseriesdifference_views.py
@@ -511,9 +511,9 @@ class DistroSeriesDifferenceTemplateTestCase(TestCaseWithFactory):
soup = BeautifulSoup(view())
self.assertEqual(
- 1, len(soup.findAll('pre', text="I'm working on this.")))
+ 1, len(soup.findAll('p', text="I'm working on this.")))
self.assertEqual(
- 1, len(soup.findAll('pre', text="Here's another comment.")))
+ 1, len(soup.findAll('p', text="Here's another comment.")))
def test_last_common_version_is_linked(self):
# The "Last Common Version" version text should link to the
diff --git a/lib/lp/registry/doc/product-widgets.txt b/lib/lp/registry/doc/product-widgets.txt
index f5fb0e6..dd2e6c5 100644
--- a/lib/lp/registry/doc/product-widgets.txt
+++ b/lib/lp/registry/doc/product-widgets.txt
@@ -43,7 +43,7 @@ Firefox has not yet selected a bug tracker.
None
>>> from bs4.element import Tag
- >>> from lp.services.beautifulsoup import BeautifulSoup4 as BeautifulSoup
+ >>> from lp.services.beautifulsoup import BeautifulSoup
>>> from lp.testing.pages import extract_text
>>> def print_items(html):
... soup = BeautifulSoup(html)
diff --git a/lib/lp/registry/stories/announcements/xx-announcements.txt b/lib/lp/registry/stories/announcements/xx-announcements.txt
index 3a8008d..ca0f13e 100644
--- a/lib/lp/registry/stories/announcements/xx-announcements.txt
+++ b/lib/lp/registry/stories/announcements/xx-announcements.txt
@@ -7,8 +7,8 @@ dedicated batched page showing all announcements, and as an RSS/Atom
news feed.
>>> from lp.services.beautifulsoup import (
- ... BeautifulSoup4 as BeautifulSoup,
- ... SoupStrainer4 as SoupStrainer,
+ ... BeautifulSoup,
+ ... SoupStrainer,
... )
>>> from lp.services.feeds.tests.helper import (
... parse_ids, parse_links, validate_feed)
diff --git a/lib/lp/registry/stories/person/xx-person-rdf.txt b/lib/lp/registry/stories/person/xx-person-rdf.txt
index 04269cd..849853f 100644
--- a/lib/lp/registry/stories/person/xx-person-rdf.txt
+++ b/lib/lp/registry/stories/person/xx-person-rdf.txt
@@ -4,8 +4,8 @@ Person RDF Pages
We export FOAF RDF metadata from the /~Person.name/+index document.
>>> from lp.services.beautifulsoup import (
- ... BeautifulSoup4 as BeautifulSoup,
- ... SoupStrainer4 as SoupStrainer,
+ ... BeautifulSoup,
+ ... SoupStrainer,
... )
>>> anon_browser.open("http://launchpad.test/~name16")
>>> strainer = SoupStrainer(['link'], {'type': ['application/rdf+xml']})
diff --git a/lib/lp/registry/stories/team/xx-team-contactemail.txt b/lib/lp/registry/stories/team/xx-team-contactemail.txt
index 04b5804..2d7f77a 100644
--- a/lib/lp/registry/stories/team/xx-team-contactemail.txt
+++ b/lib/lp/registry/stories/team/xx-team-contactemail.txt
@@ -24,7 +24,7 @@ send notifications to that team. The possible contact methods are:
A warning is rendered about the privacy implications of using a mailing list or
external contact address.
- >>> from lp.services.beautifulsoup import BeautifulSoup4 as BeautifulSoup
+ >>> from lp.services.beautifulsoup import BeautifulSoup
>>> soup = BeautifulSoup(browser.contents)
>>> print(soup.find(id='email-warning').decode())
<p ... Email sent to a mailing list or external contact address may ...
diff --git a/lib/lp/services/beautifulsoup.py b/lib/lp/services/beautifulsoup.py
index 68daa36..5a1862a 100644
--- a/lib/lp/services/beautifulsoup.py
+++ b/lib/lp/services/beautifulsoup.py
@@ -1,43 +1,25 @@
# Copyright 2017-2019 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
-"""Beautiful Soup wrapper for Launchpad.
-
-With Beautiful Soup 3, this is mostly for future migration convenience.
-With Beautiful Soup 4, it does a little more work to avoid warnings.
-"""
+"""Beautiful Soup wrapper for Launchpad."""
from __future__ import absolute_import, print_function, unicode_literals
__metaclass__ = type
__all__ = [
'BeautifulSoup',
- 'BeautifulSoup4',
'SoupStrainer',
- 'SoupStrainer4',
]
-from BeautifulSoup import (
- BeautifulSoup as _BeautifulSoup,
- SoupStrainer,
- )
-from bs4 import BeautifulSoup as _BeautifulSoup4
-from bs4.element import SoupStrainer as SoupStrainer4
+from bs4 import BeautifulSoup as _BeautifulSoup
+from bs4.element import SoupStrainer
class BeautifulSoup(_BeautifulSoup):
- def __init__(self, markup="", **kwargs):
- if not isinstance(markup, unicode) and "fromEncoding" not in kwargs:
- kwargs["fromEncoding"] = "UTF-8"
- super(BeautifulSoup, self).__init__(markup=markup, **kwargs)
-
-
-class BeautifulSoup4(_BeautifulSoup4):
-
def __init__(self, markup="", features="html.parser", **kwargs):
if not isinstance(markup, unicode) and "from_encoding" not in kwargs:
kwargs["from_encoding"] = "UTF-8"
- super(BeautifulSoup4, self).__init__(
+ super(BeautifulSoup, self).__init__(
markup=markup, features=features, **kwargs)
diff --git a/lib/lp/services/feeds/feed.py b/lib/lp/services/feeds/feed.py
index 143121c..6b041a9 100644
--- a/lib/lp/services/feeds/feed.py
+++ b/lib/lp/services/feeds/feed.py
@@ -27,7 +27,7 @@ from zope.component import getUtility
from zope.datetime import rfc1123_date
from zope.interface import implementer
-from lp.services.beautifulsoup import BeautifulSoup4 as BeautifulSoup
+from lp.services.beautifulsoup import BeautifulSoup
from lp.services.config import config
from lp.services.feeds.interfaces.feed import (
IFeed,
diff --git a/lib/lp/services/feeds/stories/xx-links.txt b/lib/lp/services/feeds/stories/xx-links.txt
index 23e467b..6f0b0ba 100644
--- a/lib/lp/services/feeds/stories/xx-links.txt
+++ b/lib/lp/services/feeds/stories/xx-links.txt
@@ -11,7 +11,7 @@ launchpad.test to provide links to corresponding Atom feeds.
The root launchpad.test url will have a link to the Atom feed which
displays the most recent announcements for all the projects.
- >>> from lp.services.beautifulsoup import BeautifulSoup4 as BeautifulSoup
+ >>> from lp.services.beautifulsoup import BeautifulSoup
>>> browser.open('http://launchpad.test/')
>>> soup = BeautifulSoup(browser.contents)
>>> soup.head.findAll('link', type='application/atom+xml')
diff --git a/lib/lp/services/feeds/stories/xx-security.txt b/lib/lp/services/feeds/stories/xx-security.txt
index 2e441df..e19cb85 100644
--- a/lib/lp/services/feeds/stories/xx-security.txt
+++ b/lib/lp/services/feeds/stories/xx-security.txt
@@ -8,7 +8,7 @@ First, set all the bugs to private.
>>> from zope.security.interfaces import Unauthorized
>>> from lp.app.enums import InformationType
>>> from lp.bugs.model.bug import Bug
- >>> from lp.services.beautifulsoup import BeautifulSoup4 as BeautifulSoup
+ >>> from lp.services.beautifulsoup import BeautifulSoup
>>> from lp.services.database.interfaces import IStore
>>> IStore(Bug).find(Bug).set(information_type=InformationType.USERDATA)
>>> transaction.commit()
diff --git a/lib/lp/services/feeds/tests/helper.py b/lib/lp/services/feeds/tests/helper.py
index 826df83..ca7c741 100644
--- a/lib/lp/services/feeds/tests/helper.py
+++ b/lib/lp/services/feeds/tests/helper.py
@@ -33,8 +33,8 @@ from zope.interface import (
)
from lp.services.beautifulsoup import (
- BeautifulSoup4 as BeautifulSoup,
- SoupStrainer4 as SoupStrainer,
+ BeautifulSoup,
+ SoupStrainer,
)
from lp.services.webapp.publisher import LaunchpadView
diff --git a/lib/lp/services/oauth/doc/oauth-pages.txt b/lib/lp/services/oauth/doc/oauth-pages.txt
index da25341..14d20a6 100644
--- a/lib/lp/services/oauth/doc/oauth-pages.txt
+++ b/lib/lp/services/oauth/doc/oauth-pages.txt
@@ -26,8 +26,8 @@ consumer's request to access Launchpad on their behalf.
... return view, token
>>> from lp.services.beautifulsoup import (
- ... BeautifulSoup4 as BeautifulSoup,
- ... SoupStrainer4 as SoupStrainer,
+ ... BeautifulSoup,
+ ... SoupStrainer,
... )
>>> def print_hidden_fields(html):
... soup = BeautifulSoup(
diff --git a/lib/lp/soyuz/browser/tests/test_archive_packages.py b/lib/lp/soyuz/browser/tests/test_archive_packages.py
index e6062ba..c18024b 100644
--- a/lib/lp/soyuz/browser/tests/test_archive_packages.py
+++ b/lib/lp/soyuz/browser/tests/test_archive_packages.py
@@ -26,7 +26,7 @@ from zope.security.proxy import removeSecurityProxy
from lp.app.utilities.celebrities import ILaunchpadCelebrities
from lp.buildmaster.enums import BuildStatus
from lp.registry.interfaces.pocket import PackagePublishingPocket
-from lp.services.beautifulsoup import BeautifulSoup4 as BeautifulSoup
+from lp.services.beautifulsoup import BeautifulSoup
from lp.services.webapp import canonical_url
from lp.services.webapp.authentication import LaunchpadPrincipal
from lp.soyuz.browser.archive import ArchiveNavigationMenu
diff --git a/lib/lp/testing/pages.py b/lib/lp/testing/pages.py
index facbfc1..073f5b8 100644
--- a/lib/lp/testing/pages.py
+++ b/lib/lp/testing/pages.py
@@ -18,25 +18,16 @@ import re
import unittest
from urlparse import urljoin
-from BeautifulSoup import (
+from bs4.element import (
CData,
Comment,
Declaration,
+ Doctype,
NavigableString,
PageElement,
ProcessingInstruction,
Tag,
)
-from bs4.element import (
- CData as CData4,
- Comment as Comment4,
- Declaration as Declaration4,
- Doctype as Doctype4,
- NavigableString as NavigableString4,
- PageElement as PageElement4,
- ProcessingInstruction as ProcessingInstruction4,
- Tag as Tag4,
- )
from contrib.oauth import (
OAuthConsumer,
OAuthRequest,
@@ -75,8 +66,8 @@ from lp.app.interfaces.launchpad import ILaunchpadCelebrities
from lp.registry.errors import NameAlreadyTaken
from lp.registry.interfaces.teammembership import TeamMembershipStatus
from lp.services.beautifulsoup import (
- BeautifulSoup4 as BeautifulSoup,
- SoupStrainer4 as SoupStrainer,
+ BeautifulSoup,
+ SoupStrainer,
)
from lp.services.config import config
from lp.services.encoding import wsgi_native_string
@@ -247,8 +238,6 @@ class DuplicateIdError(Exception):
def find_tag_by_id(content, id):
"""Find and return the tag with the given ID"""
if isinstance(content, PageElement):
- elements_with_id = content.findAll(True, {'id': id})
- elif isinstance(content, PageElement4):
elements_with_id = content.find_all(True, {'id': id})
else:
elements_with_id = [
@@ -393,8 +382,7 @@ def strip_label(label):
IGNORED_ELEMENTS = [
- Comment, Declaration, ProcessingInstruction,
- Comment4, Declaration4, Doctype4, ProcessingInstruction4,
+ Comment, Declaration, Doctype, ProcessingInstruction,
]
ELEMENTS_INTRODUCING_NEWLINE = [
'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'ul', 'ol', 'pre', 'dl',
@@ -415,7 +403,7 @@ def extract_link_from_tag(tag, base=None):
A `tag` should contain a 'href' attribute, and `base` will commonly
be extracted from browser.url.
"""
- if not isinstance(tag, (PageElement, PageElement4)):
+ if not isinstance(tag, PageElement):
link = BeautifulSoup(tag)
else:
link = tag
@@ -437,7 +425,7 @@ def extract_text(content, extract_image_text=False, skip_tags=None,
"""
if skip_tags is None:
skip_tags = ['script']
- if not isinstance(content, (PageElement, PageElement4)):
+ if not isinstance(content, PageElement):
soup = BeautifulSoup(content)
else:
soup = content
@@ -449,36 +437,13 @@ def extract_text(content, extract_image_text=False, skip_tags=None,
if type(node) in IGNORED_ELEMENTS:
continue
elif isinstance(node, CData):
- # CData inherits from NavigableString which inherits from unicode,
- # but contains a __unicode__() method that calls __str__() that
- # wraps the contents in <![CDATA[...]]>. In Python 2.4, calling
- # unicode(cdata_instance) copies the data directly so the wrapping
- # does not happen. Python 2.5 changed the unicode() function (C
- # function PyObject_Unicode) to call its operand's __unicode__()
- # method, which ends up calling CData.__str__() and the wrapping
- # happens. We don't want our test output to have to deal with the
- # <![CDATA[...]]> wrapper.
- #
- # The CData class does not override slicing though, so by slicing
- # node first, we're effectively turning it into a concrete unicode
- # instance, which does not wrap the contents when its
- # __unicode__() is called of course. We could remove the
- # unicode() call here, but we keep it for consistency and clarity
- # purposes.
- result.append(unicode(node[:]))
- elif isinstance(node, CData4):
result.append(unicode(node))
elif isinstance(node, NavigableString):
- result.append(unicode(node))
- elif isinstance(node, NavigableString4):
result.append(node.format_string(node, formatter=formatter))
else:
- if isinstance(node, (Tag, Tag4)):
+ if isinstance(node, Tag):
# If the node has the class "sortkey" then it is invisible.
- if isinstance(node, Tag) and node.get('class') == 'sortkey':
- continue
- elif (isinstance(node, Tag4) and
- node.get('class') == ['sortkey']):
+ if node.get('class') == ['sortkey']:
continue
elif getattr(node, 'name', '') in skip_tags:
continue
@@ -688,12 +653,8 @@ def print_location_apps(contents):
else:
for tab in location_apps:
tab_text = extract_text(tab)
- if isinstance(tab['class'], list): # BeautifulSoup 4
- if 'active' in tab['class']:
- tab_text += ' (selected)'
- else: # BeautifulSoup 3
- if tab['class'].find('active') != -1:
- tab_text += ' (selected)'
+ if 'active' in tab['class']:
+ tab_text += ' (selected)'
if tab.a:
link = tab.a['href']
else:
@@ -741,9 +702,9 @@ class Browser(_Browser):
def _getText(self, element):
def get_strings(elem):
for descendant in elem.descendants:
- if isinstance(descendant, (NavigableString4, CData4)):
+ if isinstance(descendant, (NavigableString, CData)):
yield descendant
- elif isinstance(descendant, Tag4) and descendant.name == 'img':
+ elif isinstance(descendant, Tag) and descendant.name == 'img':
yield u'%s[%s]' % (
descendant.get('alt', u''), descendant.name.upper())
diff --git a/setup.py b/setup.py
index cdc2960..0ab1ce4 100644
--- a/setup.py
+++ b/setup.py
@@ -147,7 +147,6 @@ setup(
'auditorclient',
'auditorfixture',
'backports.lzma',
- 'BeautifulSoup',
'beautifulsoup4[lxml]',
'breezy',
'bzr',