← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~wgrant/launchpad/improved-html_escape into lp:launchpad

 

William Grant has proposed merging lp:~wgrant/launchpad/improved-html_escape into lp:launchpad.

Commit message:
Rewrite html_escape() and structured() and make them escape quotes in preparation for their deployment everywhere.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~wgrant/launchpad/improved-html_escape/+merge/138902

Rewrite html_escape and structured to be a bit more sensible and shorter. html_escape will now also escape single and double quotes. I rewrote the tests as well.
-- 
https://code.launchpad.net/~wgrant/launchpad/improved-html_escape/+merge/138902
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~wgrant/launchpad/improved-html_escape into lp:launchpad.
=== modified file 'lib/lp/services/webapp/doc/menus.txt'
--- lib/lp/services/webapp/doc/menus.txt	2012-06-15 13:45:02 +0000
+++ lib/lp/services/webapp/doc/menus.txt	2012-12-10 04:10:25 +0000
@@ -318,26 +318,6 @@
     >>> IFacetLink(link).escapedtext
     u'some <b> --&gt; </b> text'
 
-We can test the 'structured' helper directly.
-
-    >>> structured('foo').escapedtext
-    u'foo'
-
-    >>> structured('%s', 'a < b > c & d " e').escapedtext
-    u'a &lt; b &gt; c &amp; d " e'
-
-    >>> structured('a < b > c & d " e').escapedtext
-    u'a < b > c & d " e'
-
-    >>> structured(
-    ...     'replacement %(foo)s, %(bar)s', foo='<foo>', bar='bar').escapedtext
-    u'replacement &lt;foo&gt;, bar'
-
-    >>> structured('whatever', 'foo', 'bar', baz='baz')
-    Traceback (most recent call last):
-    ...
-    TypeError:...
-
 Next, we return the link as HTML.
 
     # We need to use a real launchpad test request so the view adapter

=== modified file 'lib/lp/services/webapp/escaping.py'
--- lib/lp/services/webapp/escaping.py	2012-11-26 08:48:03 +0000
+++ lib/lp/services/webapp/escaping.py	2012-12-10 04:10:25 +0000
@@ -7,8 +7,6 @@
     'structured',
     ]
 
-import cgi
-
 from zope.i18n import (
     Message,
     translate,
@@ -38,9 +36,13 @@
         # It is possible that the message is wrapped in an
         # internationalized object, so we need to translate it
         # first. See bug #54987.
-        return cgi.escape(
-            unicode(
-                translate_if_i18n(message)))
+        raw = unicode(translate_if_i18n(message))
+        raw = raw.replace('&', '&amp;')
+        raw = raw.replace('<', '&lt;')
+        raw = raw.replace('>', '&gt;')
+        raw = raw.replace('"', '&quot;')
+        raw = raw.replace("'", '&#x27;')
+        return raw
 
 
 def translate_if_i18n(obj_or_msgid):
@@ -49,9 +51,7 @@
     Returns any other type of object untouched.
     """
     if isinstance(obj_or_msgid, Message):
-        return translate(
-            obj_or_msgid,
-            context=get_current_browser_request())
+        return translate(obj_or_msgid, context=get_current_browser_request())
     else:
         # Just text (or something unknown).
         return obj_or_msgid
@@ -61,31 +61,20 @@
 
     implements(IStructuredString)
 
-    def __init__(self, text, *replacements, **kwreplacements):
-        text = translate_if_i18n(text)
+    def __init__(self, text, *reps, **kwreps):
+        text = unicode(translate_if_i18n(text))
         self.text = text
-        if replacements and kwreplacements:
+        if reps and kwreps:
             raise TypeError(
                 "You must provide either positional arguments or keyword "
                 "arguments to structured(), not both.")
-        if replacements:
-            escaped = []
-            for replacement in replacements:
-                if isinstance(replacement, structured):
-                    escaped.append(unicode(replacement.escapedtext))
-                else:
-                    escaped.append(cgi.escape(unicode(replacement)))
-            self.escapedtext = text % tuple(escaped)
-        elif kwreplacements:
-            escaped = {}
-            for key, value in kwreplacements.iteritems():
-                if isinstance(value, structured):
-                    escaped[key] = unicode(value.escapedtext)
-                else:
-                    escaped[key] = cgi.escape(unicode(value))
-            self.escapedtext = text % escaped
+        if reps:
+            escaped = tuple(html_escape(rep) for rep in reps)
+        elif kwreps:
+            escaped = dict((k, html_escape(v)) for k, v in kwreps.iteritems())
         else:
-            self.escapedtext = unicode(text)
+            escaped = ()
+        self.escapedtext = text % escaped
 
     def __repr__(self):
         return "<structured-string '%s'>" % self.text

=== added file 'lib/lp/services/webapp/tests/test_escaping.py'
--- lib/lp/services/webapp/tests/test_escaping.py	1970-01-01 00:00:00 +0000
+++ lib/lp/services/webapp/tests/test_escaping.py	2012-12-10 04:10:25 +0000
@@ -0,0 +1,61 @@
+# Copyright 2012 Canonical Ltd.  This software is licensed under the
+# GNU Affero General Public License version 3 (see the file LICENSE).
+
+__metaclass__ = type
+
+from lp.services.webapp.escaping import (
+    html_escape,
+    structured,
+    )
+from lp.testing import TestCase
+
+
+class TestHtmlEscape(TestCase):
+
+    def test_escapes_special_characters(self):
+        # &, <, >, " and ' are transformed into equivalent entity
+        # references, as they hold special significance in HTML.
+        self.assertEqual('&amp;&lt;&gt;&quot;&#x27;', html_escape('&<>"\''))
+
+    def test_structured_passed_through(self):
+        # An IStructuredString escapes to the value of its .escapedtext.
+        struct = structured('<b>%s</b>', '<i>It works!</i>')
+        self.assertEqual(
+            '<b>&lt;i&gt;It works!&lt;/i&gt;</b>', html_escape(struct))
+
+
+class TestStructured(TestCase):
+
+    def test_escapes_args(self):
+        # Normal string arguments are escaped before they're inserted
+        # into the formatted string.
+        struct = structured(
+            '<b>%s</b> %s', 'I am <i>escaped</i>!', '"& I\'m too."')
+        self.assertEqual(
+            '<b>I am &lt;i&gt;escaped&lt;/i&gt;!</b> '
+            '&quot;&amp; I&#x27;m too.&quot;',
+            struct.escapedtext)
+
+    def test_structured_args_passed_through(self):
+        # If an IStructuredString is used as an argument, its
+        # .escapedtext is included verbatim. Other arguments are still
+        # escaped.
+        inner = structured('<b>%s</b>', '<i>some text</i>')
+        outer = structured('<li>%s: %s</li>', 'First & last', inner)
+        self.assertEqual(
+            '<li>First &amp; last: <b>&lt;i&gt;some text&lt;/i&gt;</b></li>',
+            outer.escapedtext)
+
+    def test_kwargs(self):
+        # Keyword args work too.
+        inner = structured('<b>%s</b>', '<i>some text</i>')
+        outer = structured(
+            '<li>%(capt)s: %(body)s</li>', capt='First & last', body=inner)
+        self.assertEqual(
+            '<li>First &amp; last: <b>&lt;i&gt;some text&lt;/i&gt;</b></li>',
+            outer.escapedtext)
+
+    def test_mixing_kwargs_is_illegal(self):
+        # Passing a combination of args and kwargs is illegal.
+        self.assertRaises(
+            TypeError, structured, '%s %(foo)s', 'bar', foo='foo')