← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~cjwatson/launchpad/beautifulsoup-wrapper into lp:launchpad

 

Colin Watson has proposed merging lp:~cjwatson/launchpad/beautifulsoup-wrapper into lp:launchpad.

Commit message:
Add an lp.services.beautifulsoup wrapper for BeautifulSoup, to make it easier to port to beautifulsoup4 in future.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/beautifulsoup-wrapper/+merge/332599

I had a look recently at what it would take to port to Beautiful Soup 4, since that newer version supports Python 3 as well as being the version that's currently maintained.  As well as changing which package needs to be imported, BS4 has some changes that mean we'll probably want to wrap it slightly for Launchpad anyway, such as picking a single preferred parser class throughout our codebase rather than having to select one every time we create a BeautifulSoup object.  This kills at least one-and-a-half birds with one stone by running most things through a common wrapper.

(I've left the instantiations of BeautifulStoneSoup as they are for the time being.  In BS4, those will change to `BeautifulSoup(markup, "xml")`.)
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~cjwatson/launchpad/beautifulsoup-wrapper into lp:launchpad.
=== modified file 'lib/lp/answers/browser/tests/test_questiontarget.py'
--- lib/lp/answers/browser/tests/test_questiontarget.py	2015-01-29 16:28:30 +0000
+++ lib/lp/answers/browser/tests/test_questiontarget.py	2017-10-21 19:02:28 +0000
@@ -8,7 +8,6 @@
 import os
 from urllib import quote
 
-from BeautifulSoup import BeautifulSoup
 from lazr.restful.interfaces import (
     IJSONRequestCache,
     IWebServiceClientRequest,
@@ -22,6 +21,7 @@
 from lp.app.enums import ServiceUsage
 from lp.app.interfaces.launchpad import ILaunchpadCelebrities
 from lp.registry.interfaces.person import IPersonSet
+from lp.services.beautifulsoup import BeautifulSoup
 from lp.services.webapp import canonical_url
 from lp.services.worlddata.interfaces.language import ILanguageSet
 from lp.testing import (

=== modified file 'lib/lp/answers/tests/test_question_webservice.py'
--- lib/lp/answers/tests/test_question_webservice.py	2016-05-04 09:23:44 +0000
+++ lib/lp/answers/tests/test_question_webservice.py	2017-10-21 19:02:28 +0000
@@ -10,7 +10,6 @@
     timedelta,
     )
 
-from BeautifulSoup import BeautifulSoup
 from lazr.restfulclient.errors import HTTPError
 import pytz
 from simplejson import dumps
@@ -28,6 +27,7 @@
     NotQuestionOwnerError,
     QuestionTargetError,
     )
+from lp.services.beautifulsoup import BeautifulSoup
 from lp.services.webapp.interfaces import OAuthPermission
 from lp.testing import (
     admin_logged_in,

=== modified file 'lib/lp/app/browser/tests/test_base_layout.py'
--- lib/lp/app/browser/tests/test_base_layout.py	2014-11-28 05:40:54 +0000
+++ lib/lp/app/browser/tests/test_base_layout.py	2017-10-21 19:02:28 +0000
@@ -13,10 +13,10 @@
 
 __metaclass__ = type
 
-from BeautifulSoup import BeautifulSoup
 from z3c.ptcompat import ViewPageTemplateFile
 
 from lp.registry.interfaces.person import PersonVisibility
+from lp.services.beautifulsoup import BeautifulSoup
 from lp.services.webapp.publisher import LaunchpadView
 from lp.services.webapp.servers import LaunchpadTestRequest
 from lp.testing import (

=== modified file 'lib/lp/app/browser/tests/test_launchpadroot.py'
--- lib/lp/app/browser/tests/test_launchpadroot.py	2013-05-23 04:32:00 +0000
+++ lib/lp/app/browser/tests/test_launchpadroot.py	2017-10-21 19:02:28 +0000
@@ -6,16 +6,16 @@
 __metaclass__ = type
 
 
-from BeautifulSoup import (
-    BeautifulSoup,
-    SoupStrainer,
-    )
 from fixtures import FakeLogger
 from zope.component import getUtility
 from zope.security.checker import selectChecker
 
 from lp.app.interfaces.launchpad import ILaunchpadCelebrities
 from lp.registry.interfaces.person import IPersonSet
+from lp.services.beautifulsoup import (
+    BeautifulSoup,
+    SoupStrainer,
+    )
 from lp.services.config import config
 from lp.services.features.testing import FeatureFixture
 from lp.services.memcache.interfaces import IMemcacheClient

=== modified file 'lib/lp/app/doc/hierarchical-menu.txt'
--- lib/lp/app/doc/hierarchical-menu.txt	2015-08-04 11:17:11 +0000
+++ lib/lp/app/doc/hierarchical-menu.txt	2017-10-21 19:02:28 +0000
@@ -248,7 +248,7 @@
 The Hierarchy object is responsible for rendering the HTML for the
 location bar.
 
-    >>> from BeautifulSoup import BeautifulSoup
+    >>> from lp.services.beautifulsoup import BeautifulSoup
     >>> from lp.testing.pages import extract_text
 
     # Borrowed from lp.testing.pages.print_location()

=== modified file 'lib/lp/app/doc/launchpadform.txt'
--- lib/lp/app/doc/launchpadform.txt	2015-07-08 16:05:11 +0000
+++ lib/lp/app/doc/launchpadform.txt	2017-10-21 19:02:28 +0000
@@ -486,7 +486,7 @@
   >>> request = LaunchpadTestRequest()
   >>> view = TestWidgetVisibility(context, request)
 
-  >>> from BeautifulSoup import BeautifulSoup
+  >>> from lp.services.beautifulsoup import BeautifulSoup
   >>> soup = BeautifulSoup(view())
   >>> for input in soup.findAll('input'):
   ...     print input

=== modified file 'lib/lp/app/doc/tales.txt'
--- lib/lp/app/doc/tales.txt	2016-04-28 01:41:10 +0000
+++ lib/lp/app/doc/tales.txt	2017-10-21 19:02:28 +0000
@@ -666,7 +666,7 @@
     # First we define a helper function for printing the links together
     # with their titles.
 
-    >>> from BeautifulSoup import BeautifulSoup
+    >>> from lp.services.beautifulsoup import BeautifulSoup
     >>> def print_hrefs_with_titles(html):
     ...     soup = BeautifulSoup(html)
     ...     for link in soup.findAll('a'):

=== modified file 'lib/lp/app/widgets/doc/checkbox-matrix-widget.txt'
--- lib/lp/app/widgets/doc/checkbox-matrix-widget.txt	2012-05-24 21:26:57 +0000
+++ lib/lp/app/widgets/doc/checkbox-matrix-widget.txt	2017-10-21 19:02:28 +0000
@@ -8,7 +8,7 @@
 This widget is created to allow many options to be displayed in a single
 page.
 
-    >>> from BeautifulSoup import BeautifulSoup
+    >>> from lp.services.beautifulsoup import BeautifulSoup
     >>> from lp.services.webapp.servers import LaunchpadTestRequest
     >>> from lp.registry.interfaces.product import (
     ...     IProduct,

=== modified file 'lib/lp/app/widgets/doc/image-widget.txt'
--- lib/lp/app/widgets/doc/image-widget.txt	2016-01-26 15:47:37 +0000
+++ lib/lp/app/widgets/doc/image-widget.txt	2017-10-21 19:02:28 +0000
@@ -59,7 +59,7 @@
     >>> widget = ImageChangeWidget(
     ...     person_logo, LaunchpadTestRequest(), edit_style)
 
-    >>> from BeautifulSoup import BeautifulSoup
+    >>> from lp.services.beautifulsoup import BeautifulSoup
     >>> html = widget()
     >>> print BeautifulSoup(html).find('img').get('src')
     /@@/person-logo

=== modified file 'lib/lp/app/widgets/doc/launchpad-radio-widget.txt'
--- lib/lp/app/widgets/doc/launchpad-radio-widget.txt	2016-10-05 09:36:40 +0000
+++ lib/lp/app/widgets/doc/launchpad-radio-widget.txt	2017-10-21 19:02:28 +0000
@@ -10,7 +10,7 @@
 The LaunchpadRadioWidget is mostly used to display items from
 an enumerated type.
 
-    >>> from BeautifulSoup import BeautifulSoup
+    >>> from lp.services.beautifulsoup import BeautifulSoup
     >>> from lp.services.webapp.servers import LaunchpadTestRequest
     >>> from lp.code.interfaces.branch import IBranch
     >>> branch = factory.makeAnyBranch()

=== modified file 'lib/lp/app/widgets/tests/test_launchpadtarget.py'
--- lib/lp/app/widgets/tests/test_launchpadtarget.py	2016-07-27 17:19:20 +0000
+++ lib/lp/app/widgets/tests/test_launchpadtarget.py	2017-10-21 19:02:28 +0000
@@ -5,7 +5,6 @@
 
 import re
 
-from BeautifulSoup import BeautifulSoup
 from lazr.restful.fields import Reference
 from zope.formlib.interfaces import (
     IBrowserWidget,
@@ -24,6 +23,7 @@
     DistributionVocabulary,
     ProductVocabulary,
     )
+from lp.services.beautifulsoup import BeautifulSoup
 from lp.services.features.testing import FeatureFixture
 from lp.services.webapp.escaping import html_escape
 from lp.services.webapp.servers import LaunchpadTestRequest

=== modified file 'lib/lp/blueprints/browser/tests/test_specification.py'
--- lib/lp/blueprints/browser/tests/test_specification.py	2017-05-08 11:38:20 +0000
+++ lib/lp/blueprints/browser/tests/test_specification.py	2017-10-21 19:02:28 +0000
@@ -8,7 +8,6 @@
 import re
 import unittest
 
-from BeautifulSoup import BeautifulSoup
 from fixtures import FakeLogger
 from lazr.restful.interfaces import IJSONRequestCache
 import pytz
@@ -36,6 +35,7 @@
 from lp.registry.enums import SpecificationSharingPolicy
 from lp.registry.interfaces.person import PersonVisibility
 from lp.registry.interfaces.product import IProductSeries
+from lp.services.beautifulsoup import BeautifulSoup
 from lp.services.webapp.escaping import html_escape
 from lp.services.webapp.interaction import ANONYMOUS
 from lp.services.webapp.interfaces import BrowserNotificationLevel

=== modified file 'lib/lp/blueprints/browser/tests/test_specificationtarget.py'
--- lib/lp/blueprints/browser/tests/test_specificationtarget.py	2015-01-29 14:14:01 +0000
+++ lib/lp/blueprints/browser/tests/test_specificationtarget.py	2017-10-21 19:02:28 +0000
@@ -4,7 +4,6 @@
 __metaclass__ = type
 
 
-from BeautifulSoup import BeautifulSoup
 from fixtures import FakeLogger
 from zope.component import getUtility
 from zope.security.proxy import removeSecurityProxy
@@ -19,6 +18,7 @@
     IHasSpecifications,
     ISpecificationTarget,
     )
+from lp.services.beautifulsoup import BeautifulSoup
 from lp.testing import (
     BrowserTestCase,
     login_person,

=== modified file 'lib/lp/bugs/browser/tests/test_bug_views.py'
--- lib/lp/bugs/browser/tests/test_bug_views.py	2015-09-11 07:20:31 +0000
+++ lib/lp/bugs/browser/tests/test_bug_views.py	2017-10-21 19:02:28 +0000
@@ -11,7 +11,6 @@
     )
 import re
 
-from BeautifulSoup import BeautifulSoup
 import pytz
 import simplejson
 from soupmatchers import (
@@ -38,6 +37,7 @@
     IAccessPolicySource,
     )
 from lp.registry.interfaces.person import PersonVisibility
+from lp.services.beautifulsoup import BeautifulSoup
 from lp.services.webapp.interfaces import IOpenLaunchBag
 from lp.services.webapp.publisher import canonical_url
 from lp.services.webapp.servers import LaunchpadTestRequest

=== modified file 'lib/lp/bugs/browser/tests/test_bugtarget_filebug.py'
--- lib/lp/bugs/browser/tests/test_bugtarget_filebug.py	2016-07-30 01:10:27 +0000
+++ lib/lp/bugs/browser/tests/test_bugtarget_filebug.py	2017-10-21 19:02:28 +0000
@@ -5,7 +5,6 @@
 
 from textwrap import dedent
 
-from BeautifulSoup import BeautifulSoup
 from lazr.restful.interfaces import IJSONRequestCache
 from testscenarios import (
     load_tests_apply_scenarios,
@@ -36,6 +35,7 @@
     )
 from lp.registry.enums import BugSharingPolicy
 from lp.registry.interfaces.projectgroup import IProjectGroup
+from lp.services.beautifulsoup import BeautifulSoup
 from lp.services.features.testing import FeatureFixture
 from lp.services.temporaryblobstorage.interfaces import (
     ITemporaryStorageManager,

=== modified file 'lib/lp/bugs/browser/tests/test_bugtask.py'
--- lib/lp/bugs/browser/tests/test_bugtask.py	2017-04-12 15:34:45 +0000
+++ lib/lp/bugs/browser/tests/test_bugtask.py	2017-10-21 19:02:28 +0000
@@ -10,7 +10,6 @@
 import re
 import urllib
 
-from BeautifulSoup import BeautifulSoup
 from lazr.lifecycle.event import ObjectModifiedEvent
 from lazr.lifecycle.snapshot import Snapshot
 from lazr.restful.interfaces import IJSONRequestCache
@@ -65,6 +64,7 @@
     )
 from lp.registry.enums import BugSharingPolicy
 from lp.registry.interfaces.person import PersonVisibility
+from lp.services.beautifulsoup import BeautifulSoup
 from lp.services.config import config
 from lp.services.database.constants import UTC_NOW
 from lp.services.features.testing import FeatureFixture

=== modified file 'lib/lp/bugs/doc/bugtask-bugwatch-widget.txt'
--- lib/lp/bugs/doc/bugtask-bugwatch-widget.txt	2011-12-24 17:49:30 +0000
+++ lib/lp/bugs/doc/bugtask-bugwatch-widget.txt	2017-10-21 19:02:28 +0000
@@ -32,7 +32,7 @@
 also renders an option for creating a new bug watch. Let's
 define a helper function to make it easier to see what's going on.
 
-    >>> from BeautifulSoup import BeautifulSoup
+    >>> from lp.services.beautifulsoup import BeautifulSoup
     >>> def print_item(item):
     ...     soup = BeautifulSoup(item)
     ...     input_td, label_td = soup('td')

=== modified file 'lib/lp/bugs/externalbugtracker/mantis.py'
--- lib/lp/bugs/externalbugtracker/mantis.py	2015-09-28 17:38:45 +0000
+++ lib/lp/bugs/externalbugtracker/mantis.py	2017-10-21 19:02:28 +0000
@@ -13,11 +13,7 @@
 import urllib2
 from urlparse import urlunparse
 
-from BeautifulSoup import (
-    BeautifulSoup,
-    Comment,
-    SoupStrainer,
-    )
+from BeautifulSoup import Comment
 
 from lp.bugs.externalbugtracker import (
     BugNotFound,
@@ -34,6 +30,10 @@
     BugTaskStatus,
     )
 from lp.bugs.interfaces.externalbugtracker import UNKNOWN_REMOTE_IMPORTANCE
+from lp.services.beautifulsoup import (
+    BeautifulSoup,
+    SoupStrainer,
+    )
 from lp.services.propertycache import cachedproperty
 from lp.services.webapp.url import urlparse
 

=== modified file 'lib/lp/bugs/externalbugtracker/sourceforge.py'
--- lib/lp/bugs/externalbugtracker/sourceforge.py	2012-01-01 02:58:52 +0000
+++ lib/lp/bugs/externalbugtracker/sourceforge.py	2017-10-21 19:02:28 +0000
@@ -9,8 +9,6 @@
 import re
 import urllib
 
-from BeautifulSoup import BeautifulSoup
-
 from lp.bugs.externalbugtracker import (
     BugNotFound,
     ExternalBugTracker,
@@ -25,6 +23,7 @@
     BugTaskStatus,
     )
 from lp.bugs.interfaces.externalbugtracker import UNKNOWN_REMOTE_IMPORTANCE
+from lp.services.beautifulsoup import BeautifulSoup
 from lp.services.webapp import urlsplit
 
 

=== modified file 'lib/lp/bugs/scripts/bzremotecomponentfinder.py'
--- lib/lp/bugs/scripts/bzremotecomponentfinder.py	2013-06-20 05:50:00 +0000
+++ lib/lp/bugs/scripts/bzremotecomponentfinder.py	2017-10-21 19:02:28 +0000
@@ -15,7 +15,6 @@
     urlopen,
     )
 
-from BeautifulSoup import BeautifulSoup
 import transaction
 from zope.component import getUtility
 
@@ -24,6 +23,7 @@
     IBugTrackerSet,
     )
 from lp.bugs.model.bugtracker import BugTrackerComponent
+from lp.services.beautifulsoup import BeautifulSoup
 from lp.services.database import bulk
 from lp.services.database.interfaces import IStore
 from lp.services.scripts.logger import log as default_log

=== modified file 'lib/lp/bugs/scripts/sfremoteproductfinder.py'
--- lib/lp/bugs/scripts/sfremoteproductfinder.py	2012-06-29 08:40:05 +0000
+++ lib/lp/bugs/scripts/sfremoteproductfinder.py	2017-10-21 19:02:28 +0000
@@ -14,11 +14,11 @@
     urlopen,
     )
 
-from BeautifulSoup import BeautifulSoup
 from zope.component import getUtility
 
 from lp.app.interfaces.launchpad import ILaunchpadCelebrities
 from lp.registry.interfaces.product import IProductSet
+from lp.services.beautifulsoup import BeautifulSoup
 from lp.services.scripts.logger import log as default_log
 from lp.services.webapp import (
     urlappend,

=== modified file 'lib/lp/bugs/stories/feeds/xx-bug-html.txt'
--- lib/lp/bugs/stories/feeds/xx-bug-html.txt	2017-05-17 06:45:49 +0000
+++ lib/lp/bugs/stories/feeds/xx-bug-html.txt	2017-10-21 19:02:28 +0000
@@ -4,7 +4,7 @@
 The content of an HTML feed is very similar to an Atom feed, but is formatted
 as HTML instead of Atom.
 
-    >>> from BeautifulSoup import (
+    >>> from lp.services.beautifulsoup import (
     ...     BeautifulSoup,
     ...     SoupStrainer,
     ...     )

=== modified file 'lib/lp/bugs/tests/bug.py'
--- lib/lp/bugs/tests/bug.py	2012-10-02 06:36:44 +0000
+++ lib/lp/bugs/tests/bug.py	2017-10-21 19:02:28 +0000
@@ -11,7 +11,6 @@
 import re
 import textwrap
 
-from BeautifulSoup import BeautifulSoup
 from pytz import UTC
 from zope.component import getUtility
 from zope.security.proxy import removeSecurityProxy
@@ -29,6 +28,7 @@
 from lp.registry.interfaces.distribution import IDistributionSet
 from lp.registry.interfaces.person import IPersonSet
 from lp.registry.interfaces.product import IProductSet
+from lp.services.beautifulsoup import BeautifulSoup
 from lp.services.config import config
 from lp.testing.pages import (
     extract_text,

=== modified file 'lib/lp/bugs/tests/test_bugs_webservice.py'
--- lib/lp/bugs/tests/test_bugs_webservice.py	2015-09-29 01:38:34 +0000
+++ lib/lp/bugs/tests/test_bugs_webservice.py	2017-10-21 19:02:28 +0000
@@ -11,7 +11,6 @@
     )
 import re
 
-from BeautifulSoup import BeautifulSoup
 from lazr.lifecycle.interfaces import IDoNotSnapshot
 from lazr.restfulclient.errors import (
     BadRequest,
@@ -32,6 +31,7 @@
 from lp.bugs.interfaces.bug import IBug
 from lp.registry.enums import BugSharingPolicy
 from lp.registry.interfaces.product import License
+from lp.services.beautifulsoup import BeautifulSoup
 from lp.services.webapp import snapshot
 from lp.services.webapp.interfaces import OAuthPermission
 from lp.services.webapp.servers import LaunchpadTestRequest

=== modified file 'lib/lp/code/browser/codeimport.py'
--- lib/lp/code/browser/codeimport.py	2016-11-12 02:24:09 +0000
+++ lib/lp/code/browser/codeimport.py	2017-10-21 19:02:28 +0000
@@ -22,7 +22,6 @@
 from textwrap import dedent
 from urlparse import urlparse
 
-from BeautifulSoup import BeautifulSoup
 from lazr.restful.interface import (
     copy_field,
     use_template,
@@ -91,6 +90,7 @@
     )
 from lp.registry.interfaces.product import IProduct
 from lp.registry.interfaces.role import IPersonRoles
+from lp.services.beautifulsoup import BeautifulSoup
 from lp.services.fields import URIField
 from lp.services.propertycache import cachedproperty
 from lp.services.webapp import (

=== modified file 'lib/lp/code/browser/tests/test_branch.py'
--- lib/lp/code/browser/tests/test_branch.py	2017-10-04 01:16:22 +0000
+++ lib/lp/code/browser/tests/test_branch.py	2017-10-21 19:02:28 +0000
@@ -10,7 +10,6 @@
 from datetime import datetime
 from textwrap import dedent
 
-from BeautifulSoup import BeautifulSoup
 from fixtures import FakeLogger
 import pytz
 from storm.store import Store
@@ -36,6 +35,7 @@
 from lp.registry.enums import BranchSharingPolicy
 from lp.registry.interfaces.accesspolicy import IAccessPolicySource
 from lp.registry.interfaces.person import PersonVisibility
+from lp.services.beautifulsoup import BeautifulSoup
 from lp.services.config import config
 from lp.services.database.constants import UTC_NOW
 from lp.services.helpers import truncate_text

=== modified file 'lib/lp/code/browser/tests/test_gitlisting.py'
--- lib/lp/code/browser/tests/test_gitlisting.py	2017-10-04 01:16:22 +0000
+++ lib/lp/code/browser/tests/test_gitlisting.py	2017-10-21 19:02:28 +0000
@@ -7,7 +7,6 @@
 
 __metaclass__ = type
 
-from BeautifulSoup import BeautifulSoup
 from zope.component import getUtility
 
 from lp.app.enums import InformationType
@@ -16,6 +15,7 @@
     PersonDistributionSourcePackage,
     )
 from lp.registry.model.personproduct import PersonProduct
+from lp.services.beautifulsoup import BeautifulSoup
 from lp.testing import (
     admin_logged_in,
     anonymous_logged_in,

=== modified file 'lib/lp/code/browser/tests/test_gitref.py'
--- lib/lp/code/browser/tests/test_gitref.py	2017-10-04 01:16:22 +0000
+++ lib/lp/code/browser/tests/test_gitref.py	2017-10-21 19:02:28 +0000
@@ -11,7 +11,6 @@
 import hashlib
 import re
 
-from BeautifulSoup import BeautifulSoup
 import pytz
 import soupmatchers
 from storm.store import Store
@@ -22,6 +21,7 @@
 from lp.code.interfaces.gitjob import IGitRefScanJobSource
 from lp.code.interfaces.gitrepository import IGitRepositorySet
 from lp.code.tests.helpers import GitHostingFixture
+from lp.services.beautifulsoup import BeautifulSoup
 from lp.services.job.runner import JobRunner
 from lp.services.webapp.publisher import canonical_url
 from lp.testing import (

=== modified file 'lib/lp/code/browser/tests/test_gitrepository.py'
--- lib/lp/code/browser/tests/test_gitrepository.py	2017-10-04 01:16:22 +0000
+++ lib/lp/code/browser/tests/test_gitrepository.py	2017-10-21 19:02:28 +0000
@@ -11,7 +11,6 @@
 import doctest
 from textwrap import dedent
 
-from BeautifulSoup import BeautifulSoup
 from fixtures import FakeLogger
 import pytz
 from testtools.matchers import DocTestMatches
@@ -31,6 +30,7 @@
 from lp.registry.enums import BranchSharingPolicy
 from lp.registry.interfaces.accesspolicy import IAccessPolicySource
 from lp.registry.interfaces.person import PersonVisibility
+from lp.services.beautifulsoup import BeautifulSoup
 from lp.services.database.constants import UTC_NOW
 from lp.services.webapp.publisher import canonical_url
 from lp.services.webapp.servers import LaunchpadTestRequest

=== modified file 'lib/lp/code/browser/tests/test_sourcepackagerecipe.py'
--- lib/lp/code/browser/tests/test_sourcepackagerecipe.py	2017-10-04 01:16:22 +0000
+++ lib/lp/code/browser/tests/test_sourcepackagerecipe.py	2017-10-21 19:02:28 +0000
@@ -14,7 +14,6 @@
 import re
 from textwrap import dedent
 
-from BeautifulSoup import BeautifulSoup
 from fixtures import FakeLogger
 from mechanize import LinkNotFoundError
 from pytz import UTC
@@ -49,6 +48,7 @@
 from lp.registry.interfaces.pocket import PackagePublishingPocket
 from lp.registry.interfaces.series import SeriesStatus
 from lp.registry.interfaces.teammembership import TeamMembershipStatus
+from lp.services.beautifulsoup import BeautifulSoup
 from lp.services.database.constants import UTC_NOW
 from lp.services.propertycache import clear_property_cache
 from lp.services.webapp import canonical_url

=== modified file 'lib/lp/code/browser/widgets/tests/test_branchtargetwidget.py'
--- lib/lp/code/browser/widgets/tests/test_branchtargetwidget.py	2015-07-08 16:05:11 +0000
+++ lib/lp/code/browser/widgets/tests/test_branchtargetwidget.py	2017-10-21 19:02:28 +0000
@@ -5,7 +5,6 @@
 
 import re
 
-from BeautifulSoup import BeautifulSoup
 from lazr.restful.fields import Reference
 from zope.formlib.interfaces import (
     IBrowserWidget,
@@ -24,6 +23,7 @@
     ProductBranchTarget,
     )
 from lp.registry.vocabularies import ProductVocabulary
+from lp.services.beautifulsoup import BeautifulSoup
 from lp.services.webapp.escaping import html_escape
 from lp.services.webapp.servers import LaunchpadTestRequest
 from lp.testing import (

=== modified file 'lib/lp/code/browser/widgets/tests/test_gitrefwidget.py'
--- lib/lp/code/browser/widgets/tests/test_gitrefwidget.py	2016-12-02 12:04:11 +0000
+++ lib/lp/code/browser/widgets/tests/test_gitrefwidget.py	2017-10-21 19:02:28 +0000
@@ -3,7 +3,6 @@
 
 __metaclass__ = type
 
-from BeautifulSoup import BeautifulSoup
 from lazr.restful.fields import Reference
 from testscenarios import (
     load_tests_apply_scenarios,
@@ -22,6 +21,7 @@
 from lp.app.validators import LaunchpadValidationError
 from lp.code.browser.widgets.gitref import GitRefWidget
 from lp.code.vocabularies.gitrepository import GitRepositoryVocabulary
+from lp.services.beautifulsoup import BeautifulSoup
 from lp.services.webapp.escaping import html_escape
 from lp.services.webapp.servers import LaunchpadTestRequest
 from lp.testing import (

=== modified file 'lib/lp/code/browser/widgets/tests/test_gitrepositorytargetwidget.py'
--- lib/lp/code/browser/widgets/tests/test_gitrepositorytargetwidget.py	2016-09-08 02:56:36 +0000
+++ lib/lp/code/browser/widgets/tests/test_gitrepositorytargetwidget.py	2017-10-21 19:02:28 +0000
@@ -5,7 +5,6 @@
 
 import re
 
-from BeautifulSoup import BeautifulSoup
 from lazr.restful.fields import Reference
 from zope.formlib.interfaces import (
     IBrowserWidget,
@@ -28,6 +27,7 @@
     DistributionVocabulary,
     ProductVocabulary,
     )
+from lp.services.beautifulsoup import BeautifulSoup
 from lp.services.features.testing import FeatureFixture
 from lp.services.webapp.escaping import html_escape
 from lp.services.webapp.servers import LaunchpadTestRequest

=== modified file 'lib/lp/registry/browser/tests/test_distroseries.py'
--- lib/lp/registry/browser/tests/test_distroseries.py	2016-07-29 07:37:33 +0000
+++ lib/lp/registry/browser/tests/test_distroseries.py	2017-10-21 19:02:28 +0000
@@ -12,7 +12,6 @@
 from urllib import urlencode
 from urlparse import urlparse
 
-from BeautifulSoup import BeautifulSoup
 from fixtures import FakeLogger
 from lazr.restful.interfaces import IJSONRequestCache
 from lxml import html
@@ -51,6 +50,7 @@
 from lp.registry.interfaces.person import TeamMembershipPolicy
 from lp.registry.interfaces.pocket import PackagePublishingPocket
 from lp.registry.interfaces.series import SeriesStatus
+from lp.services.beautifulsoup import BeautifulSoup
 from lp.services.config import config
 from lp.services.database.constants import UTC_NOW
 from lp.services.database.interfaces import IStore

=== modified file 'lib/lp/registry/browser/tests/test_distroseriesdifference_views.py'
--- lib/lp/registry/browser/tests/test_distroseriesdifference_views.py	2016-12-22 16:32:38 +0000
+++ lib/lp/registry/browser/tests/test_distroseriesdifference_views.py	2017-10-21 19:02:28 +0000
@@ -7,7 +7,6 @@
 
 import re
 
-from BeautifulSoup import BeautifulSoup
 import soupmatchers
 from testtools.matchers import (
     MatchesAny,
@@ -27,6 +26,7 @@
 from lp.registry.interfaces.distroseriesdifference import (
     IDistroSeriesDifferenceSource,
     )
+from lp.services.beautifulsoup import BeautifulSoup
 from lp.services.comments.interfaces.conversation import (
     IComment,
     IConversation,

=== modified file 'lib/lp/registry/browser/tests/test_pillar_sharing.py'
--- lib/lp/registry/browser/tests/test_pillar_sharing.py	2015-11-26 13:31:45 +0000
+++ lib/lp/registry/browser/tests/test_pillar_sharing.py	2017-10-21 19:02:28 +0000
@@ -5,7 +5,6 @@
 
 __metaclass__ = type
 
-from BeautifulSoup import BeautifulSoup
 from fixtures import FakeLogger
 from lazr.restful.interfaces import IJSONRequestCache
 from lazr.restful.utils import get_current_web_service_request
@@ -27,6 +26,7 @@
     )
 from lp.registry.interfaces.accesspolicy import IAccessPolicyGrantFlatSource
 from lp.registry.model.pillar import PillarPerson
+from lp.services.beautifulsoup import BeautifulSoup
 from lp.services.config import config
 from lp.services.webapp.interfaces import StormRangeFactoryError
 from lp.services.webapp.publisher import canonical_url

=== modified file 'lib/lp/registry/browser/tests/test_subscription_links.py'
--- lib/lp/registry/browser/tests/test_subscription_links.py	2015-01-29 16:28:30 +0000
+++ lib/lp/registry/browser/tests/test_subscription_links.py	2017-10-21 19:02:28 +0000
@@ -7,7 +7,6 @@
 
 import unittest
 
-from BeautifulSoup import BeautifulSoup
 from fixtures import FakeLogger
 from zope.component import getUtility
 
@@ -16,6 +15,7 @@
     )
 from lp.registry.interfaces.person import IPersonSet
 from lp.registry.model.milestone import ProjectMilestone
+from lp.services.beautifulsoup import BeautifulSoup
 from lp.services.webapp.interaction import ANONYMOUS
 from lp.services.webapp.interfaces import ILaunchBag
 from lp.services.webapp.publisher import canonical_url

=== modified file 'lib/lp/registry/doc/product-widgets.txt'
--- lib/lp/registry/doc/product-widgets.txt	2015-01-29 18:43:52 +0000
+++ lib/lp/registry/doc/product-widgets.txt	2017-10-21 19:02:28 +0000
@@ -42,7 +42,8 @@
     >>> print firefox.projectgroup.bugtracker
     None
 
-    >>> from BeautifulSoup import BeautifulSoup, Tag
+    >>> from BeautifulSoup import Tag
+    >>> from lp.services.beautifulsoup import BeautifulSoup
     >>> from lp.testing.pages import extract_text
     >>> def print_items(html):
     ...     soup = BeautifulSoup(html)

=== modified file 'lib/lp/registry/scripts/productreleasefinder/walker.py'
--- lib/lp/registry/scripts/productreleasefinder/walker.py	2015-09-28 17:38:45 +0000
+++ lib/lp/registry/scripts/productreleasefinder/walker.py	2017-10-21 19:02:28 +0000
@@ -22,7 +22,6 @@
     urlsplit,
     )
 
-from BeautifulSoup import BeautifulSoup
 from cscvs.dircompare.path import (
     as_dir,
     subdir,
@@ -33,6 +32,7 @@
     )
 
 from lp.registry.scripts.productreleasefinder import log
+from lp.services.beautifulsoup import BeautifulSoup
 from lp.services.webapp.url import urlappend
 
 

=== modified file 'lib/lp/registry/stories/announcements/xx-announcements.txt'
--- lib/lp/registry/stories/announcements/xx-announcements.txt	2015-10-05 06:34:17 +0000
+++ lib/lp/registry/stories/announcements/xx-announcements.txt	2017-10-21 19:02:28 +0000
@@ -6,7 +6,7 @@
 dedicated batched page showing all announcements, and as an RSS/Atom
 news feed.
 
-    >>> from BeautifulSoup import (
+    >>> from lp.services.beautifulsoup import (
     ...     BeautifulSoup,
     ...     SoupStrainer,
     ...     )

=== modified file 'lib/lp/registry/stories/person/xx-person-rdf.txt'
--- lib/lp/registry/stories/person/xx-person-rdf.txt	2015-06-24 00:17:11 +0000
+++ lib/lp/registry/stories/person/xx-person-rdf.txt	2017-10-21 19:02:28 +0000
@@ -3,7 +3,10 @@
 
 We export FOAF RDF metadata from the /~Person.name/+index document.
 
-    >>> from BeautifulSoup import BeautifulSoup, SoupStrainer
+    >>> from lp.services.beautifulsoup import (
+    ...     BeautifulSoup,
+    ...     SoupStrainer,
+    ...     )
     >>> anon_browser.open("http://launchpad.dev/~name16";)
     >>> strainer = SoupStrainer(['link'], {'type': ['application/rdf+xml']})
     >>> soup = BeautifulSoup(anon_browser.contents, parseOnlyThese=strainer)
@@ -98,7 +101,6 @@
 Note how ascii and non-ascii names are rendered properly:
 
     >>> anon_browser.open("http://launchpad.dev/~carlos/+rdf";)
-    >>> from BeautifulSoup import BeautifulSoup, SoupStrainer
     >>> strainer = SoupStrainer(['foaf:name'])
     >>> soup = BeautifulSoup(anon_browser.contents, parseOnlyThese=strainer)
     >>> for tag in soup:

=== modified file 'lib/lp/registry/stories/team/xx-team-claim.txt'
--- lib/lp/registry/stories/team/xx-team-claim.txt	2016-01-26 15:47:37 +0000
+++ lib/lp/registry/stories/team/xx-team-claim.txt	2017-10-21 19:02:28 +0000
@@ -80,7 +80,7 @@
     Traceback (most recent call last):
     ...
     LookupError:...
-    >>> from BeautifulSoup import BeautifulSoup
+    >>> from lp.services.beautifulsoup import BeautifulSoup
     >>> soup = BeautifulSoup(user_browser.contents)
     >>> print extract_text(
     ...     soup.find(attrs={'for': 'field.teamowner'}).findPrevious('tr'))

=== modified file 'lib/lp/registry/stories/team/xx-team-contactemail.txt'
--- lib/lp/registry/stories/team/xx-team-contactemail.txt	2015-07-21 09:04:01 +0000
+++ lib/lp/registry/stories/team/xx-team-contactemail.txt	2017-10-21 19:02:28 +0000
@@ -24,7 +24,7 @@
 A warning is rendered about the privacy implications of using a mailing list or
 external contact address.
 
-    >>> from BeautifulSoup import BeautifulSoup
+    >>> from lp.services.beautifulsoup import BeautifulSoup
     >>> soup = BeautifulSoup(browser.contents)
     >>> soup.find(id='email-warning')
     <p ... Email sent to a mailing list or external contact address may ...

=== added file 'lib/lp/services/beautifulsoup.py'
--- lib/lp/services/beautifulsoup.py	1970-01-01 00:00:00 +0000
+++ lib/lp/services/beautifulsoup.py	2017-10-21 19:02:28 +0000
@@ -0,0 +1,30 @@
+# Copyright 2017 Canonical Ltd.  This software is licensed under the
+# GNU Affero General Public License version 3 (see the file LICENSE).
+
+"""Beautiful Soup wrapper for Launchpad.
+
+With Beautiful Soup 3, this is mostly for future migration convenience.
+With Beautiful Soup 4, it will do a little more work to avoid warnings.
+"""
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+__metaclass__ = type
+__all__ = [
+    'BeautifulSoup',
+    'SoupStrainer',
+    ]
+
+
+from BeautifulSoup import (
+    BeautifulSoup as _BeautifulSoup,
+    SoupStrainer,
+    )
+
+
+class BeautifulSoup(_BeautifulSoup):
+
+    def __init__(self, markup="", **kwargs):
+        if not isinstance(markup, unicode) and "fromEncoding" not in kwargs:
+            kwargs["fromEncoding"] = "UTF-8"
+        super(BeautifulSoup, self).__init__(markup=markup, **kwargs)

=== modified file 'lib/lp/services/feeds/feed.py'
--- lib/lp/services/feeds/feed.py	2015-10-14 15:22:01 +0000
+++ lib/lp/services/feeds/feed.py	2017-10-21 19:02:28 +0000
@@ -22,12 +22,12 @@
 import time
 from urlparse import urljoin
 
-from BeautifulSoup import BeautifulSoup
 from z3c.ptcompat import ViewPageTemplateFile
 from zope.component import getUtility
 from zope.datetime import rfc1123_date
 from zope.interface import implementer
 
+from lp.services.beautifulsoup import BeautifulSoup
 from lp.services.config import config
 from lp.services.feeds.interfaces.feed import (
     IFeed,

=== modified file 'lib/lp/services/feeds/stories/xx-links.txt'
--- lib/lp/services/feeds/stories/xx-links.txt	2013-09-27 04:13:23 +0000
+++ lib/lp/services/feeds/stories/xx-links.txt	2017-10-21 19:02:28 +0000
@@ -11,7 +11,7 @@
 The root launchpad.dev url will have a link to the Atom feed which
 displays the most recent announcements for all the projects.
 
-    >>> from BeautifulSoup import BeautifulSoup
+    >>> from lp.services.beautifulsoup import BeautifulSoup
     >>> browser.open('http://launchpad.dev/')
     >>> soup = BeautifulSoup(browser.contents)
     >>> soup.head.findAll('link', type='application/atom+xml')

=== modified file 'lib/lp/services/oauth/doc/oauth-pages.txt'
--- lib/lp/services/oauth/doc/oauth-pages.txt	2016-02-02 14:36:58 +0000
+++ lib/lp/services/oauth/doc/oauth-pages.txt	2017-10-21 19:02:28 +0000
@@ -25,9 +25,13 @@
     ...     view.initialize()
     ...     return view, token
 
-    >>> from BeautifulSoup import BeautifulSoup, SoupStrainer
+    >>> from lp.services.beautifulsoup import (
+    ...     BeautifulSoup,
+    ...     SoupStrainer,
+    ...     )
     >>> def print_hidden_fields(html):
-    ...     soup = BeautifulSoup(html, SoupStrainer(attrs={'type': 'hidden'}))
+    ...     soup = BeautifulSoup(
+    ...         html, parseOnlyThese=SoupStrainer(attrs={'type': 'hidden'}))
     ...     for tag in soup.findAll(attrs={'type': 'hidden'}):
     ...         if tag.attrMap['value']:
     ...             print tag.attrMap['name'], tag.attrMap['value']

=== modified file 'lib/lp/snappy/browser/widgets/tests/test_snaparchivewidget.py'
--- lib/lp/snappy/browser/widgets/tests/test_snaparchivewidget.py	2016-07-13 08:47:42 +0000
+++ lib/lp/snappy/browser/widgets/tests/test_snaparchivewidget.py	2017-10-21 19:02:28 +0000
@@ -5,7 +5,6 @@
 
 import re
 
-from BeautifulSoup import BeautifulSoup
 from lazr.restful.fields import Reference
 from testscenarios import (
     load_tests_apply_scenarios,
@@ -20,6 +19,7 @@
 
 from lp.app.interfaces.launchpad import ILaunchpadCelebrities
 from lp.app.validators import LaunchpadValidationError
+from lp.services.beautifulsoup import BeautifulSoup
 from lp.services.webapp.escaping import html_escape
 from lp.services.webapp.servers import LaunchpadTestRequest
 from lp.snappy.browser.widgets.snaparchive import SnapArchiveWidget

=== modified file 'lib/lp/snappy/browser/widgets/tests/test_storechannelswidget.py'
--- lib/lp/snappy/browser/widgets/tests/test_storechannelswidget.py	2017-03-27 19:28:36 +0000
+++ lib/lp/snappy/browser/widgets/tests/test_storechannelswidget.py	2017-10-21 19:02:28 +0000
@@ -5,7 +5,6 @@
 
 import re
 
-from BeautifulSoup import BeautifulSoup
 from zope.formlib.interfaces import (
     IBrowserWidget,
     IInputWidget,
@@ -14,6 +13,7 @@
 from zope.schema import List
 
 from lp.app.validators import LaunchpadValidationError
+from lp.services.beautifulsoup import BeautifulSoup
 from lp.services.webapp.escaping import html_escape
 from lp.services.webapp.servers import LaunchpadTestRequest
 from lp.snappy.browser.widgets.storechannels import StoreChannelsWidget

=== modified file 'lib/lp/soyuz/browser/tests/test_archive_packages.py'
--- lib/lp/soyuz/browser/tests/test_archive_packages.py	2015-11-26 13:31:45 +0000
+++ lib/lp/soyuz/browser/tests/test_archive_packages.py	2017-10-21 19:02:28 +0000
@@ -11,7 +11,6 @@
 
 import re
 
-from BeautifulSoup import BeautifulSoup
 import soupmatchers
 from testtools.matchers import (
     Equals,
@@ -24,6 +23,7 @@
 
 from lp.app.utilities.celebrities import ILaunchpadCelebrities
 from lp.registry.interfaces.pocket import PackagePublishingPocket
+from lp.services.beautifulsoup import BeautifulSoup
 from lp.services.webapp import canonical_url
 from lp.services.webapp.authentication import LaunchpadPrincipal
 from lp.soyuz.browser.archive import ArchiveNavigationMenu

=== modified file 'lib/lp/soyuz/stories/ppa/xx-private-ppa-presentation.txt'
--- lib/lp/soyuz/stories/ppa/xx-private-ppa-presentation.txt	2012-08-09 11:17:06 +0000
+++ lib/lp/soyuz/stories/ppa/xx-private-ppa-presentation.txt	2017-10-21 19:02:28 +0000
@@ -8,7 +8,7 @@
 Public PPAs appear like any other launchpad page.
 
     >>> browser.open("http://launchpad.dev/~cprov/+archive";)
-    >>> from BeautifulSoup import BeautifulSoup
+    >>> from lp.services.beautifulsoup import BeautifulSoup
     >>> body_el = BeautifulSoup(browser.contents).first('body')
     >>> 'private' in body_el['class']
     False

=== modified file 'lib/lp/testing/pages.py'
--- lib/lp/testing/pages.py	2016-01-26 15:14:01 +0000
+++ lib/lp/testing/pages.py	2017-10-21 19:02:28 +0000
@@ -17,14 +17,12 @@
 from urlparse import urljoin
 
 from BeautifulSoup import (
-    BeautifulSoup,
     CData,
     Comment,
     Declaration,
     NavigableString,
     PageElement,
     ProcessingInstruction,
-    SoupStrainer,
     Tag,
     )
 from contrib.oauth import (
@@ -48,6 +46,10 @@
 from lp.app.interfaces.launchpad import ILaunchpadCelebrities
 from lp.registry.errors import NameAlreadyTaken
 from lp.registry.interfaces.teammembership import TeamMembershipStatus
+from lp.services.beautifulsoup import (
+    BeautifulSoup,
+    SoupStrainer,
+    )
 from lp.services.config import config
 from lp.services.oauth.interfaces import (
     IOAuthConsumerSet,
@@ -195,8 +197,7 @@
     else:
         elements_with_id = [
             tag for tag in BeautifulSoup(
-                content, parseOnlyThese=SoupStrainer(id=id),
-                fromEncoding='utf-8')]
+                content, parseOnlyThese=SoupStrainer(id=id))]
     if len(elements_with_id) == 0:
         return None
     elif len(elements_with_id) == 1:
@@ -222,8 +223,7 @@
         classes = set(value.split())
         return match_classes.issubset(classes)
     soup = BeautifulSoup(
-        content, parseOnlyThese=SoupStrainer(attrs={'class': class_matcher}),
-        fromEncoding='utf-8')
+        content, parseOnlyThese=SoupStrainer(attrs={'class': class_matcher}))
     if only_first:
         find = BeautifulSoup.find
     else:
@@ -257,7 +257,7 @@
     if main_content is None:
         # Simple pages have neither of these, so as a last resort, we get
         # the page <body>.
-        main_content = BeautifulSoup(content, fromEncoding='utf-8').body
+        main_content = BeautifulSoup(content).body
     return main_content
 
 
@@ -267,8 +267,7 @@
                        'warning message']
     soup = BeautifulSoup(
         content,
-        parseOnlyThese=SoupStrainer(['div', 'p'], {'class': message_classes}),
-        fromEncoding='utf-8')
+        parseOnlyThese=SoupStrainer(['div', 'p'], {'class': message_classes}))
     return [extract_text(tag) for tag in soup]
 
 
@@ -327,7 +326,7 @@
     (*) A checked option
     ( ) An unchecked option
     """
-    main = BeautifulSoup(content, fromEncoding='utf-8')
+    main = BeautifulSoup(content)
     for field in get_radio_button_text_for_field(main, name):
         print field
 
@@ -379,7 +378,7 @@
     if skip_tags is None:
         skip_tags = ['script']
     if not isinstance(content, PageElement):
-        soup = BeautifulSoup(content, fromEncoding='utf-8')
+        soup = BeautifulSoup(content)
     else:
         soup = content
 
@@ -450,7 +449,7 @@
 
     See package-relationship-pages.txt and related.
     """
-    soup = BeautifulSoup(content, fromEncoding='utf-8')
+    soup = BeautifulSoup(content)
     section = soup.find('ul')
     whitespace_re = re.compile('\s+')
     if section is None:

=== modified file 'lib/lp/translations/browser/tests/test_noindex.py'
--- lib/lp/translations/browser/tests/test_noindex.py	2015-01-29 18:43:52 +0000
+++ lib/lp/translations/browser/tests/test_noindex.py	2017-10-21 19:02:28 +0000
@@ -4,10 +4,10 @@
 __metaclass__ = type
 
 
-from BeautifulSoup import BeautifulSoup
 from zope.security.proxy import removeSecurityProxy
 
 from lp.app.enums import ServiceUsage
+from lp.services.beautifulsoup import BeautifulSoup
 from lp.services.propertycache import cachedproperty
 from lp.services.webapp import canonical_url
 from lp.testing import (

=== modified file 'lib/lp/translations/stories/standalone/xx-translations-to-review.txt'
--- lib/lp/translations/stories/standalone/xx-translations-to-review.txt	2012-10-30 03:35:50 +0000
+++ lib/lp/translations/stories/standalone/xx-translations-to-review.txt	2017-10-21 19:02:28 +0000
@@ -4,11 +4,10 @@
 When a translations reviewer visits their own homepage, it shows a list
 of translations that they could or should be reviewing.
 
-    >>> from BeautifulSoup import BeautifulSoup
-
     >>> from zope.component import getUtility
     >>> from zope.security.proxy import removeSecurityProxy
 
+    >>> from lp.services.beautifulsoup import BeautifulSoup
     >>> from lp.services.worlddata.interfaces.language import ILanguageSet
     >>> from lp.translations.interfaces.translator import ITranslatorSet
 

=== modified file 'utilities/roundup-sniffer.py'
--- utilities/roundup-sniffer.py	2012-01-01 03:10:25 +0000
+++ utilities/roundup-sniffer.py	2017-10-21 19:02:28 +0000
@@ -49,7 +49,7 @@
 from urllib import urlencode
 import urllib2
 
-from BeautifulSoup import BeautifulSoup
+from lp.services.beautifulsoup import BeautifulSoup
 
 
 class RoundupSniffer:


Follow ups