← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~thumper/launchpad/fix-unicode-error-in-urlparse into lp:launchpad

 

Tim Penhey has proposed merging lp:~thumper/launchpad/fix-unicode-error-in-urlparse into lp:launchpad.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)
Related bugs:
  Bug #35077 in Launchpad itself: "Inconsistency in validation functions and database constraints"
  https://bugs.launchpad.net/launchpad/+bug/35077
  Bug #325268 in Launchpad itself: "UnicodeEncodeError in urlparse when trying to change URL specification in a blueprint"
  https://bugs.launchpad.net/launchpad/+bug/325268

For more details, see:
https://code.launchpad.net/~thumper/launchpad/fix-unicode-error-in-urlparse/+merge/57103

I moved the validate_url and valid_webref functions from
canonical.launchpad.interfaces.validation into
lp.app.validators.url, thus satisfying bug 35077.

valid_branch_url wasn't being used anywhere, so I deleted it.

A few lint cleanup removals in that file too.

To keep with the existing validator tests, I put them in the
docstrings too.

-- 
https://code.launchpad.net/~thumper/launchpad/fix-unicode-error-in-urlparse/+merge/57103
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~thumper/launchpad/fix-unicode-error-in-urlparse into lp:launchpad.
=== modified file 'lib/canonical/launchpad/interfaces/validation.py'
--- lib/canonical/launchpad/interfaces/validation.py	2011-03-23 16:28:51 +0000
+++ lib/canonical/launchpad/interfaces/validation.py	2011-04-11 03:30:57 +0000
@@ -7,9 +7,6 @@
 
 __all__ = [
     'can_be_nominated_for_series',
-    'validate_url',
-    'valid_webref',
-    'valid_branch_url',
     'non_duplicate_branch',
     'valid_bug_number',
     'valid_cve_sequence',
@@ -24,7 +21,6 @@
 
 from cgi import escape
 from textwrap import dedent
-import urllib
 
 from zope.app.form.interfaces import WidgetsError
 from zope.component import getUtility
@@ -32,9 +28,7 @@
 from lazr.restful.error import expose
 
 from canonical.launchpad import _
-from canonical.launchpad.interfaces.account import IAccount
 from canonical.launchpad.interfaces.emailaddress import (
-    IEmailAddress,
     IEmailAddressSet,
     )
 from canonical.launchpad.interfaces.launchpad import ILaunchBag
@@ -44,7 +38,6 @@
 from lp.app.validators import LaunchpadValidationError
 from lp.app.validators.cve import valid_cve
 from lp.app.validators.email import valid_email
-from lp.app.validators.url import valid_absolute_url
 
 
 def can_be_nominated_for_series(series):
@@ -64,101 +57,6 @@
     return True
 
 
-# XXX matsubara 2006-03-15 bug=35077:
-# The validations functions that deals with URLs should be in
-# validators/ and we should have them as separete constraints in trusted.sql.
-def validate_url(url, valid_schemes):
-    """Returns a boolean stating whether 'url' is a valid URL.
-
-       A URL is valid if:
-           - its URL scheme is in the provided 'valid_schemes' list, and
-           - it has a non-empty host name.
-
-       None and an empty string are not valid URLs::
-
-           >>> validate_url(None, [])
-           False
-           >>> validate_url('', [])
-           False
-
-       The valid_schemes list is checked::
-
-           >>> validate_url('http://example.com', ['http'])
-           True
-           >>> validate_url('http://example.com', ['https', 'ftp'])
-           False
-
-       A URL without a host name is not valid:
-
-           >>> validate_url('http://', ['http'])
-           False
-
-      """
-    if not url:
-        return False
-    scheme, host = urllib.splittype(url)
-    if not scheme in valid_schemes:
-        return False
-    if not valid_absolute_url(url):
-        return False
-    return True
-
-
-def valid_webref(web_ref):
-    """Returns True if web_ref is a valid download URL, or raises a
-    LaunchpadValidationError.
-
-    >>> valid_webref('http://example.com')
-    True
-    >>> valid_webref('https://example.com/foo/bar')
-    True
-    >>> valid_webref('ftp://example.com/~ming')
-    True
-    >>> valid_webref('sftp://example.com//absolute/path/maybe')
-    True
-    >>> valid_webref('other://example.com/moo')
-    Traceback (most recent call last):
-    ...
-    LaunchpadValidationError: ...
-    """
-    if validate_url(web_ref, ['http', 'https', 'ftp', 'sftp']):
-        # Allow ftp so valid_webref can be used for download_url, and so
-        # it doesn't lock out weird projects where the site or
-        # screenshots are kept on ftp.
-        return True
-    else:
-        raise LaunchpadValidationError(_(dedent("""
-            Not a valid URL. Please enter the full URL, including the
-            scheme (for instance, http:// for a web URL), and ensure the
-            URL uses either http, https or ftp.""")))
-
-
-def valid_branch_url(branch_url):
-    """Returns True if web_ref is a valid download URL, or raises a
-    LaunchpadValidationError.
-
-    >>> valid_branch_url('http://example.com')
-    True
-    >>> valid_branch_url('https://example.com/foo/bar')
-    True
-    >>> valid_branch_url('ftp://example.com/~ming')
-    True
-    >>> valid_branch_url('sftp://example.com//absolute/path/maybe')
-    True
-    >>> valid_branch_url('other://example.com/moo')
-    Traceback (most recent call last):
-    ...
-    LaunchpadValidationError: ...
-    """
-    if validate_url(branch_url, ['http', 'https', 'ftp', 'sftp', 'bzr+ssh']):
-        return True
-    else:
-        raise LaunchpadValidationError(_(dedent("""
-            Not a valid URL. Please enter the full URL, including the
-            scheme (for instance, http:// for a web URL), and ensure the
-            URL uses http, https, ftp, sftp, or bzr+ssh.""")))
-
-
 def non_duplicate_branch(value):
     """Ensure that this branch hasn't already been linked to this bug."""
     current_bug = getUtility(ILaunchBag).bug
@@ -224,9 +122,6 @@
     """Check that the given email is valid and not registered to
     another launchpad account.
     """
-    from canonical.launchpad.webapp.publisher import canonical_url
-    from canonical.launchpad.interfaces.emailaddress import IEmailAddressSet
-
     _validate_email(email)
     _check_email_availability(email)
     return True

=== modified file 'lib/lp/app/validators/url.py'
--- lib/lp/app/validators/url.py	2011-02-23 20:26:53 +0000
+++ lib/lp/app/validators/url.py	2011-04-11 03:30:57 +0000
@@ -3,14 +3,24 @@
 
 __metaclass__ = type
 
+__all__ = [
+    'builder_url_validator',
+    'valid_absolute_url',
+    'valid_builder_url',
+    'valid_webref',
+    'validate_url',
+    ]
+
 from textwrap import dedent
+import urllib
 
 from canonical.launchpad import _
+from canonical.launchpad.webapp.url import urlparse
 from lp.app.validators import LaunchpadValidationError
 
 
 def valid_absolute_url(name):
-    """validate an absolute URL.
+    """Validate an absolute URL.
 
     It looks like this function has been deprecated by
     canonical.launchpad.interfaces.validation.
@@ -18,25 +28,27 @@
     We define this as something that can be parsed into a URL that has both
     a protocol and a network address.
 
-    >>> valid_absolute_url('sftp://chinstrap.ubuntu.com/foo/bar')
-    True
-    >>> valid_absolute_url('http://www.example.com')
-    True
-    >>> valid_absolute_url('whatever:/uxample.com/blah')
-    False
-
-    # XXX: 2010-04-26, Salgado, bug=570244: This test only works against
-    # python2.6 but we still need to run on python2.5, so we should uncomment
-    # it only when we no longer need to run on 2.5.
-    >>> #valid_absolute_url('whatever://example.com/blah')
-    True
+      >>> valid_absolute_url('sftp://chinstrap.ubuntu.com/foo/bar')
+      True
+      >>> valid_absolute_url('http://www.example.com')
+      True
+      >>> valid_absolute_url('whatever:/uxample.com/blah')
+      False
+      >>> valid_absolute_url('whatever://example.com/blah')
+      True
+
+    Unicode urls are ascii encoded, and a failure here means it isn't valid.
+
+      >>> valid_absolute_url(u'http://www.example.com/test...')
+      True
+      >>> valid_absolute_url(u'http://www.example.com/test\u2026')
+      False
+
     """
-    # Have to import urlparse locally since imports from helpers.py
-    # causes this module to be imported, and we can't import stuff from
-    # webapp at that point, since webapp imports stuff from helpers.py
-    # as well.
-    from canonical.launchpad.webapp.url import urlparse
-    (scheme, netloc, path, params, query, fragment) = urlparse(name)
+    try:
+        (scheme, netloc, path, params, query, fragment) = urlparse(name)
+    except UnicodeEncodeError:
+        return False
     # note that URL checking is also done inside the database, in
     # trusted.sql, the valid_absolute_url function, and that code uses
     # stdlib urlparse, not our customized version.
@@ -44,6 +56,7 @@
         return False
     return True
 
+
 def valid_builder_url(url):
     """validate a url for a builder.
 
@@ -56,13 +69,12 @@
     False
     >>> valid_builder_url('ftp://foo.com/')
     False
+
     """
-    # Have to import urlparse locally since imports from helpers.py
-    # causes this module to be imported, and we can't import stuff from
-    # webapp at that point, since webapp imports stuff from helpers.py
-    # as well.
-    from canonical.launchpad.webapp.url import urlparse
-    (scheme, netloc, path, params, query, fragment) = urlparse(url)
+    try:
+        (scheme, netloc, path, params, query, fragment) = urlparse(url)
+    except UnicodeEncodeError:
+        return False
     if scheme != 'http':
         return False
     if params or query or fragment:
@@ -71,6 +83,7 @@
         return False
     return True
 
+
 def builder_url_validator(url):
     """Return True if the url is valid, or raise a LaunchpadValidationError"""
     if not valid_builder_url(url):
@@ -79,3 +92,77 @@
             http://host/ or http://host:port/ only.
             """), mapping={'url': url}))
     return True
+
+
+def validate_url(url, valid_schemes):
+    """Returns a boolean stating whether 'url' is a valid URL.
+
+    A URL is valid if:
+      - its URL scheme is in the provided 'valid_schemes' list, and
+      - it has a non-empty host name.
+
+    None and an empty string are not valid URLs::
+
+      >>> validate_url(None, [])
+      False
+      >>> validate_url('', [])
+      False
+
+    The valid_schemes list is checked::
+
+      >>> validate_url('http://example.com', ['http'])
+      True
+      >>> validate_url('http://example.com', ['https', 'ftp'])
+      False
+
+    A URL without a host name is not valid:
+
+      >>> validate_url('http://', ['http'])
+      False
+
+    Unicode urls are converted to ascii for checking.  Failure to convert
+    results in failure.
+
+      >>> validate_url(u'http://example.com', ['http'])
+      True
+      >>> validate_url(u'http://example.com/test\u2026', ['http'])
+      False
+
+    """
+    if not url:
+        return False
+    scheme, host = urllib.splittype(url)
+    if not scheme in valid_schemes:
+        return False
+    if not valid_absolute_url(url):
+        return False
+    return True
+
+
+def valid_webref(web_ref):
+    """Returns True if web_ref is a valid download URL, or raises a
+    LaunchpadValidationError.
+
+    >>> valid_webref('http://example.com')
+    True
+    >>> valid_webref('https://example.com/foo/bar')
+    True
+    >>> valid_webref('ftp://example.com/~ming')
+    True
+    >>> valid_webref('sftp://example.com//absolute/path/maybe')
+    True
+    >>> valid_webref('other://example.com/moo')
+    Traceback (most recent call last):
+    ...
+    LaunchpadValidationError: ...
+    """
+    if validate_url(web_ref, ['http', 'https', 'ftp', 'sftp']):
+        # Allow ftp so valid_webref can be used for download_url, and so
+        # it doesn't lock out weird projects where the site or
+        # screenshots are kept on ftp.
+        return True
+    else:
+        raise LaunchpadValidationError(_(dedent("""
+            Not a valid URL. Please enter the full URL, including the
+            scheme (for instance, http:// for a web URL), and ensure the
+            URL uses either http, https or ftp.""")))

=== modified file 'lib/lp/blueprints/interfaces/specification.py'
--- lib/lp/blueprints/interfaces/specification.py	2011-03-28 20:54:50 +0000
+++ lib/lp/blueprints/interfaces/specification.py	2011-04-11 03:30:57 +0000
@@ -46,8 +46,8 @@
     )
 
 from canonical.launchpad import _
-from canonical.launchpad.interfaces.validation import valid_webref
 from lp.app.validators import LaunchpadValidationError
+from lp.app.validators.url import valid_webref
 from lp.blueprints.enums import (
     SpecificationDefinitionStatus,
     SpecificationGoalStatus,

=== modified file 'lib/lp/registry/browser/announcement.py'
--- lib/lp/registry/browser/announcement.py	2011-02-02 15:43:31 +0000
+++ lib/lp/registry/browser/announcement.py	2011-04-11 03:30:57 +0000
@@ -33,7 +33,6 @@
     FeedsMixin,
     RootAnnouncementsFeedLink,
     )
-from canonical.launchpad.interfaces.validation import valid_webref
 from canonical.launchpad.webapp.authorization import check_permission
 from canonical.launchpad.webapp.batching import BatchNavigator
 from canonical.launchpad.webapp.menu import (
@@ -50,6 +49,7 @@
     custom_widget,
     LaunchpadFormView,
     )
+from lp.app.validators.url import valid_webref
 from lp.app.widgets.announcementdate import AnnouncementDateWidget
 from lp.registry.interfaces.announcement import IAnnouncement
 from lp.services.fields import (

=== modified file 'lib/lp/registry/interfaces/productseries.py'
--- lib/lp/registry/interfaces/productseries.py	2011-03-28 20:54:50 +0000
+++ lib/lp/registry/interfaces/productseries.py	2011-04-11 03:30:57 +0000
@@ -46,12 +46,12 @@
 
 from canonical.launchpad import _
 from canonical.launchpad.interfaces.launchpad import IHasAppointedDriver
-from canonical.launchpad.interfaces.validation import validate_url
 from canonical.launchpad.webapp.url import urlparse
 from lp.app.errors import NameLookupFailed
 from lp.app.interfaces.launchpad import IServiceUsage
 from lp.app.validators import LaunchpadValidationError
 from lp.app.validators.name import name_validator
+from lp.app.validators.url import validate_url
 from lp.blueprints.interfaces.specificationtarget import ISpecificationGoal
 from lp.bugs.interfaces.bugtarget import (
     IBugTarget,