← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~launchpad-orange-squad/launchpad/bug-740208-obfuscate-ws into lp:launchpad

 

Abel Deuring has proposed merging lp:~launchpad-orange-squad/launchpad/bug-740208-obfuscate-ws into lp:launchpad.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~launchpad-orange-squad/launchpad/bug-740208-obfuscate-ws/+merge/66583

This branch fixes bug 740208: Launchpad web service leaks email addresses

Henning and myself worked together on it.

As decribed in the bug, the HTML representation of weservice data
always obfuscates email addresses, even for logged in users, while
the JSON representation never does.

The branch introduces a new marshaller for text fields which
obfuscates email addresses for anonymous accesses only. This
marshaller is defined in lib/lp/app/webservice/marshallers.py;
the file zcml/override-includes/ws-marshaller-configure.zcml
registers it.

The marshaller needs the same "email address replacement function"
as the one used for regular web requests, so we moved
obfuscate_email() and a related regular expression from
lib/lp/app/browser/stringformatter.py to lib/lp/services/utils.py.

This ensures that email addresses are obfuscated for anonymous
webservice requests, but does not prevent the obfuscation of
email addresses when the response return an HTML representation
of the data. This additional, "too eager", obfuscation happened
in text_xhtml_representation(), defined in
lib/lp/app/browser/webservice.py; this function does no longer
return standard_text_html_representation (), but a lambda function
which implements the "non-obfuscation part" of
standard_text_html_representation ().

test: ./bin/test app -vvt lp.app.webservice.tests.test_marshallers

-- 
https://code.launchpad.net/~launchpad-orange-squad/launchpad/bug-740208-obfuscate-ws/+merge/66583
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~launchpad-orange-squad/launchpad/bug-740208-obfuscate-ws into lp:launchpad.
=== modified file 'lib/lp/app/browser/stringformatter.py'
--- lib/lp/app/browser/stringformatter.py	2011-06-24 10:51:48 +0000
+++ lib/lp/app/browser/stringformatter.py	2011-07-01 11:21:12 +0000
@@ -34,6 +34,10 @@
 from canonical.launchpad.webapp.interfaces import ILaunchBag
 from lp.answers.interfaces.faq import IFAQSet
 from lp.registry.interfaces.person import IPersonSet
+from lp.services.utils import (
+    re_email_address,
+    obfuscate_email,
+    )
 
 
 def escape(text, quote=True):
@@ -361,12 +365,13 @@
             # devaluing the return on effort for spammers that consider
             # using Launchpad.
             if not FormattersAPI._linkify_url_should_be_ignored(url):
-                link_string = ('<a rel="nofollow" '
-                               'href="%(url)s">%(linked_text)s</a>%(trailers)s' % {
-                                    'url': cgi.escape(url, quote=True),
-                                    'linked_text': add_word_breaks(cgi.escape(url)),
-                                    'trailers': cgi.escape(trailers)
-                                    })
+                link_string = (
+                    '<a rel="nofollow" '
+                    'href="%(url)s">%(linked_text)s</a>%(trailers)s' % {
+                        'url': cgi.escape(url, quote=True),
+                        'linked_text': add_word_breaks(cgi.escape(url)),
+                        'trailers': cgi.escape(trailers)
+                        })
                 return link_string
             else:
                 return full_url
@@ -776,25 +781,6 @@
             output.append(line)
         return '\n'.join(output)
 
-    # This is a regular expression that matches email address embedded in
-    # text. It is not RFC 2821 compliant, nor does it need to be. This
-    # expression strives to identify probable email addresses so that they
-    # can be obfuscated when viewed by unauthenticated users. See
-    # http://www.email-unlimited.com/stuff/email_address_validator.htm
-
-    # localnames do not have [&?%!@<>,;:`|{}()#*^~ ] in practice
-    # (regardless of RFC 2821) because they conflict with other systems.
-    # See https://lists.ubuntu.com
-    #     /mailman/private/launchpad-reviews/2007-June/006081.html
-
-    # This verson of the re is more than 5x faster that the orginal
-    # version used in ftest/test_tales.testObfuscateEmail.
-    _re_email = re.compile(r"""
-        \b[a-zA-Z0-9._/="'+-]{1,64}@  # The localname.
-        [a-zA-Z][a-zA-Z0-9-]{1,63}    # The hostname.
-        \.[a-zA-Z0-9.-]{1,251}\b      # Dot starts one or more domains.
-        """, re.VERBOSE)              # ' <- font-lock turd
-
     def obfuscate_email(self):
         """Obfuscate an email address if there's no authenticated user.
 
@@ -813,11 +799,7 @@
         """
         if getUtility(ILaunchBag).user is not None:
             return self._stringtoformat
-        text = self._re_email.sub(
-            r'<email address hidden>', self._stringtoformat)
-        text = text.replace(
-            "<<email address hidden>>", "<email address hidden>")
-        return text
+        return obfuscate_email(self._stringtoformat)
 
     def linkify_email(self, preloaded_person_data=None):
         """Linkify any email address recognised in Launchpad.
@@ -831,7 +813,7 @@
         """
         text = self._stringtoformat
 
-        matches = re.finditer(self._re_email, text)
+        matches = re.finditer(re_email_address, text)
         for match in matches:
             address = match.group()
             person = None

=== modified file 'lib/lp/app/browser/webservice.py'
--- lib/lp/app/browser/webservice.py	2011-02-21 23:04:17 +0000
+++ lib/lp/app/browser/webservice.py	2011-07-01 11:21:12 +0000
@@ -18,7 +18,7 @@
     )
 from zope.schema.interfaces import IText
 
-from lp.app.browser.lazrjs import standard_text_html_representation
+from lp.app.browser.stringformatter import FormattersAPI
 from lp.app.browser.tales import format_link
 
 
@@ -41,4 +41,6 @@
 @implementer(IFieldHTMLRenderer)
 def text_xhtml_representation(context, field, request):
     """Render text as XHTML using the webservice."""
-    return standard_text_html_representation
+    return lambda text: (
+        '' if text is None
+        else FormattersAPI(text).text_to_html(linkify_text=True))

=== added directory 'lib/lp/app/webservice'
=== added file 'lib/lp/app/webservice/__init__.py'
--- lib/lp/app/webservice/__init__.py	1970-01-01 00:00:00 +0000
+++ lib/lp/app/webservice/__init__.py	2011-07-01 11:21:12 +0000
@@ -0,0 +1,4 @@
+# Copyright 2011 Canonical Ltd.  This software is licensed under the
+# GNU Affero General Public License version 3 (see the file LICENSE).
+
+"""This package contains code for the Launchpad webservice."""

=== added file 'lib/lp/app/webservice/marshallers.py'
--- lib/lp/app/webservice/marshallers.py	1970-01-01 00:00:00 +0000
+++ lib/lp/app/webservice/marshallers.py	2011-07-01 11:21:12 +0000
@@ -0,0 +1,32 @@
+# Copyright 2011 Canonical Ltd.  This software is licensed under the
+# GNU Affero General Public License version 3 (see the file LICENSE).
+
+"""Launchpad-specific field marshallers for the web service."""
+
+__metaclass__ = type
+
+__all__ = [
+    'TextFieldMarshaller',
+    ]
+
+
+from lazr.restful.marshallers import (
+    TextFieldMarshaller as LazrTextFieldMarshaller,
+    )
+from zope.app.security.interfaces import IUnauthenticatedPrincipal
+
+from lp.services.utils import obfuscate_email
+
+
+class TextFieldMarshaller(LazrTextFieldMarshaller):
+    """Do not expose email addresses for anonymous users."""
+
+    def unmarshall(self, entry, value):
+        """See `IFieldMarshaller`.
+
+        Return the value as is.
+        """
+        if (value is not None and
+                IUnauthenticatedPrincipal.providedBy(self.request.principal)):
+            return obfuscate_email(value)
+        return value

=== added directory 'lib/lp/app/webservice/tests'
=== added file 'lib/lp/app/webservice/tests/__init__.py'
--- lib/lp/app/webservice/tests/__init__.py	1970-01-01 00:00:00 +0000
+++ lib/lp/app/webservice/tests/__init__.py	2011-07-01 11:21:12 +0000
@@ -0,0 +1,4 @@
+# Copyright 2011 Canonical Ltd.  This software is licensed under the
+# GNU Affero General Public License version 3 (see the file LICENSE).
+
+"""Tests for code for the Launchpad webservice."""

=== added file 'lib/lp/app/webservice/tests/test_marshallers.py'
--- lib/lp/app/webservice/tests/test_marshallers.py	1970-01-01 00:00:00 +0000
+++ lib/lp/app/webservice/tests/test_marshallers.py	2011-07-01 11:21:12 +0000
@@ -0,0 +1,134 @@
+# Copyright 2011 Canonical Ltd.  This software is licensed under the
+# GNU Affero General Public License version 3 (see the file LICENSE).
+
+"""Tests for the webservice marshallers."""
+
+__metaclass__ = type
+
+import transaction
+from zope.component import getUtility
+
+from canonical.launchpad.testing.pages import (
+    LaunchpadWebServiceCaller,
+    webservice_for_person,
+    )
+from canonical.launchpad.webapp.interfaces import IPlacelessAuthUtility
+from canonical.launchpad.webapp.servers import WebServiceTestRequest
+from canonical.testing.layers import DatabaseFunctionalLayer
+from lp.app.webservice.marshallers import TextFieldMarshaller
+from lp.testing import logout, TestCaseWithFactory
+
+
+def ws_url(bug):
+    url = "/bugs/%d" % bug.id
+    return url
+
+
+class TestTextFieldMarshaller(TestCaseWithFactory):
+
+    layer = DatabaseFunctionalLayer
+
+    def _makeRequest(self, is_anonymous):
+        """Create either an anonymous or authenticated request."""
+        request = WebServiceTestRequest()
+        if is_anonymous:
+            request.setPrincipal(
+                getUtility(IPlacelessAuthUtility).unauthenticatedPrincipal())
+        else:
+            request.setPrincipal(self.factory.makePerson())
+        return request
+
+    def test_unmarshall_obfuscated(self):
+        # Data is obfuccated if the request is anonynous.
+        request = self._makeRequest(is_anonymous=True)
+        marshaller = TextFieldMarshaller(None, request)
+        result = marshaller.unmarshall(None, u"foo@xxxxxxxxxxx")
+        self.assertEqual(u"<email address hidden>", result)
+
+    def test_unmarshall_not_obfuscated(self):
+        # Data is not obfuccated if the request is authenticated.
+        request = self._makeRequest(is_anonymous=False)
+        marshaller = TextFieldMarshaller(None, request)
+        result = marshaller.unmarshall(None, u"foo@xxxxxxxxxxx")
+        self.assertEqual(u"foo@xxxxxxxxxxx", result)
+
+
+class TestWebServiceObfuscation(TestCaseWithFactory):
+    """Integration test for obfuscation marshaller.
+
+    Not using WebServiceTestCase because that assumes too much about users
+    """
+
+    layer = DatabaseFunctionalLayer
+
+    email_address = "joe@xxxxxxxxxxx"
+    email_address_obfuscated = "<email address hidden>"
+    email_address_obfuscated_escaped = "&lt;email address hidden&gt;"
+    bug_title = "Title with address %s in it"
+    bug_description = "Description with address %s in it"
+
+    def _makeBug(self):
+        """Create a bug with an email address in title and description."""
+        bug = self.factory.makeBug(
+            title=self.bug_title % self.email_address,
+            description=self.bug_description % self.email_address)
+        transaction.commit()
+        return bug
+
+    def test_email_address_obfuscated(self):
+        # Email addresses are obfuscated for anonymous users.
+        bug = self._makeBug()
+        logout()
+        webservice = LaunchpadWebServiceCaller()
+        result = webservice(ws_url(bug)).jsonBody()
+        self.assertEqual(
+            self.bug_title % self.email_address_obfuscated,
+            result['title'])
+        self.assertEqual(
+            self.bug_description % self.email_address_obfuscated,
+            result['description'])
+
+    def test_email_address_not_obfuscated(self):
+        # Email addresses are not obfuscated for authenticated users.
+        bug = self._makeBug()
+        user = self.factory.makePerson()
+        webservice = webservice_for_person(user)
+        result = webservice(ws_url(bug)).jsonBody()
+        self.assertEqual(self.bug_title % self.email_address, result['title'])
+        self.assertEqual(
+            self.bug_description % self.email_address, result['description'])
+
+    def test_xhtml_email_address_not_obfuscated(self):
+        # Email addresses are not obfuscated for authenticated users.
+        bug = self._makeBug()
+        user = self.factory.makePerson()
+        webservice = webservice_for_person(user)
+        result = webservice(
+            ws_url(bug), headers={'Accept': 'application/xhtml+xml'})
+        self.assertIn(self.email_address, result.body)
+        self.assertNotIn(
+            self.email_address_obfuscated_escaped, result.body)
+
+    def test_xhtml_email_address_obfuscated(self):
+        # Email addresses are obfuscated in the XML representation for
+        # anonymous users.
+        bug = self._makeBug()
+        logout()
+        webservice = LaunchpadWebServiceCaller()
+        result = webservice(
+            ws_url(bug), headers={'Accept': 'application/xhtml+xml'})
+        self.assertNotIn(self.email_address, result.body)
+        self.assertIn(self.email_address_obfuscated_escaped, result.body)
+
+    def test_etags_differ_for_anon_and_non_anon_represetations(self):
+        # The etag header sent for anonmoues requests where email
+        # addresses are obfusacted differs from the etag header
+        # sent in non-obfuscated responses.
+        bug = self._makeBug()
+        user = self.factory.makePerson()
+        webservice = webservice_for_person(user)
+        etag_logged_in = webservice(ws_url(bug)).getheader('etag')
+        logout()
+        webservice = LaunchpadWebServiceCaller()
+        etag_logged_out = webservice(ws_url(bug)).getheader('etag')
+        self.assertNotEqual(etag_logged_in, etag_logged_out)

=== modified file 'lib/lp/services/utils.py'
--- lib/lp/services/utils.py	2011-06-28 23:43:37 +0000
+++ lib/lp/services/utils.py	2011-07-01 11:21:12 +0000
@@ -18,6 +18,12 @@
     'file_exists',
     'iter_list_chunks',
     'iter_split',
+<<<<<<< TREE
+=======
+    'RegisteredSubclass',
+    're_email_address',
+    'obfuscate_email',
+>>>>>>> MERGE-SOURCE
     'run_capturing_output',
     'synchronize',
     'text_delta',
@@ -27,6 +33,7 @@
 
 from itertools import tee
 import os
+import re
 from StringIO import StringIO
 import string
 import sys
@@ -285,3 +292,60 @@
     variables, and helps to avoid typos.
     """
     sys._getframe(1).f_locals["__traceback_info__"] = info
+<<<<<<< TREE
+=======
+
+
+class RegisteredSubclass(type):
+    """Metaclass for when subclasses should be registered."""
+
+    def __init__(cls, name, bases, dict_):
+        # _register_subclass must be a static method to permit upcalls.
+        #
+        # We cannot use super(Class, cls) to do the upcalls, because Class
+        # isn't fully defined yet.  (Remember, we're calling this from a
+        # metaclass.)
+        #
+        # Without using super, a classmethod that overrides another
+        # classmethod has no reasonable way to call the overridden version AND
+        # provide its class as first parameter (i.e. "cls").  Therefore, we
+        # must use a static method.
+        cls._register_subclass(cls)
+
+
+# This is a regular expression that matches email address embedded in
+# text. It is not RFC 2821 compliant, nor does it need to be. This
+# expression strives to identify probable email addresses so that they
+# can be obfuscated when viewed by unauthenticated users. See
+# http://www.email-unlimited.com/stuff/email_address_validator.htm
+
+# localnames do not have [&?%!@<>,;:`|{}()#*^~ ] in practice
+# (regardless of RFC 2821) because they conflict with other systems.
+# See https://lists.ubuntu.com
+#     /mailman/private/launchpad-reviews/2007-June/006081.html
+
+# This verson of the re is more than 5x faster that the orginal
+# version used in ftest/test_tales.testObfuscateEmail.
+re_email_address = re.compile(r"""
+    \b[a-zA-Z0-9._/="'+-]{1,64}@  # The localname.
+    [a-zA-Z][a-zA-Z0-9-]{1,63}    # The hostname.
+    \.[a-zA-Z0-9.-]{1,251}\b      # Dot starts one or more domains.
+    """, re.VERBOSE)              # ' <- font-lock turd
+
+
+def obfuscate_email(text_to_obfuscate):
+    """Obfuscate an email address.
+
+    The email address is obfuscated as <email address hidden>.
+
+    The pattern used to identify an email address is not 2822. It strives
+    to match any possible email address embedded in the text. For example,
+    mailto:person@xxxxxxxxxx and http://person:password@xxxxxxxxxx both
+    match, though the http match is in fact not an email address.
+    """
+    text = re_email_address.sub(
+        r'<email address hidden>', text_to_obfuscate)
+    text = text.replace(
+        "<<email address hidden>>", "<email address hidden>")
+    return text
+>>>>>>> MERGE-SOURCE

=== modified file 'lib/lp/testing/factory.py'
--- lib/lp/testing/factory.py	2011-06-23 12:59:13 +0000
+++ lib/lp/testing/factory.py	2011-07-01 11:21:12 +0000
@@ -4065,8 +4065,11 @@
 
         return getUtility(ITemporaryStorageManager).fetch(new_uuid)
 
-    def makeLaunchpadService(self, person=None, version="devel"):
-        if person is None:
+    def makeLaunchpadService(self, person=None, version="devel",
+                             anonymous=False):
+        assert not(person is not None and anonymous), (
+            "person needs to be None for anonymous service")
+        if person is None and not anonymous:
             person = self.makePerson()
         from canonical.testing import BaseLayer
         launchpad = launchpadlib_for(

=== added file 'zcml/override-includes/ws-marshaller-configure.zcml'
--- zcml/override-includes/ws-marshaller-configure.zcml	1970-01-01 00:00:00 +0000
+++ zcml/override-includes/ws-marshaller-configure.zcml	2011-07-01 11:21:12 +0000
@@ -0,0 +1,23 @@
+<!-- Copyright 2011 Canonical Ltd.  This software is licensed under the
+     GNU Affero General Public License version 3 (see the file LICENSE).
+-->
+
+<configure xmlns="http://namespaces.zope.org/zope";>
+    <!-- Override lazr-restful marshallers for test field to enable
+         automatic obfuscation of email addresses for anonymous users. -->
+
+     <adapter
+        for="zope.schema.interfaces.IASCII
+             zope.publisher.interfaces.http.IHTTPRequest"
+        provides="lazr.restful.interfaces.IFieldMarshaller"
+        factory="lp.app.webservice.marshallers.TextFieldMarshaller"
+        />
+
+    <adapter
+        for="zope.schema.interfaces.IText
+             zope.publisher.interfaces.http.IHTTPRequest"
+        provides="lazr.restful.interfaces.IFieldMarshaller"
+        factory="lp.app.webservice.marshallers.TextFieldMarshaller"
+        />
+
+</configure>
\ No newline at end of file