← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] ~cjwatson/launchpad:markdown-bleach into launchpad:master

 

Colin Watson has proposed merging ~cjwatson/launchpad:markdown-bleach into launchpad:master.

Commit message:
Upgrade Markdown and sanitize its output using bleach

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)
Related bugs:
  Bug #391780 in Launchpad itself: "Support Markdown "stack overflow" style hyperlinks markup"
  https://bugs.launchpad.net/launchpad/+bug/391780

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/+git/launchpad/+merge/425067

This makes a start at reviving the old experiment to allow user-supplied Markdown.  `safe_mode` is deprecated, so use `bleach` to sanitize the resulting HTML instead (important to avoid trivial XSS attacks), and also add the `fenced_code` extension since that looks like something we're likely to want.

As per the linked bug, there are a number of things still to fix, and this is still behind a feature flag.

Dependencies MP: https://code.launchpad.net/~cjwatson/lp-source-dependencies/+git/lp-source-dependencies/+merge/425066
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of ~cjwatson/launchpad:markdown-bleach into launchpad:master.
diff --git a/lib/lp/app/browser/stringformatter.py b/lib/lp/app/browser/stringformatter.py
index f54f44a..dd75bbe 100644
--- a/lib/lp/app/browser/stringformatter.py
+++ b/lib/lp/app/browser/stringformatter.py
@@ -20,6 +20,8 @@ import sys
 from base64 import urlsafe_b64encode
 from itertools import zip_longest
 
+import bleach
+import bleach_allowlist
 import markdown
 import six
 from breezy.patches import hunk_from_header
@@ -1222,14 +1224,30 @@ class FormattersAPI:
             raise TraversalError(name)
 
 
+# Allow some additional tags used by the "tables" extension.
+markdown_tags = bleach_allowlist.markdown_tags + [
+    "table",
+    "tbody",
+    "td",
+    "tfoot",
+    "th",
+    "thead",
+    "tr",
+]
+
+
 def format_markdown(text):
     """Return html form of marked-up text."""
     # This returns whole paragraphs (in p tags), similarly to text_to_html.
-    md = markdown.Markdown(
-        safe_mode="escape",
-        extensions=[
-            "tables",
-            "nl2br",
-        ],
+    return bleach.clean(
+        markdown.markdown(
+            text,
+            extensions=[
+                "fenced_code",
+                "nl2br",
+                "tables",
+            ],
+        ),
+        markdown_tags,
+        bleach_allowlist.markdown_attrs,
     )
-    return md.convert(text)  # How easy was that?
diff --git a/lib/lp/app/browser/tests/test_stringformatter.py b/lib/lp/app/browser/tests/test_stringformatter.py
index 5490b46..bbb9db4 100644
--- a/lib/lp/app/browser/tests/test_stringformatter.py
+++ b/lib/lp/app/browser/tests/test_stringformatter.py
@@ -16,11 +16,11 @@ from lp.app.browser.stringformatter import (
     parse_diff,
 )
 from lp.services.config import config
-from lp.services.features.testing import FeatureFixture
+from lp.services.features.testing import MemoryFeatureFixture
 from lp.services.webapp.interfaces import ILaunchBag
 from lp.services.webapp.publisher import canonical_url
 from lp.testing import TestCase, TestCaseWithFactory
-from lp.testing.layers import DatabaseFunctionalLayer, ZopelessLayer
+from lp.testing.layers import BaseLayer, DatabaseFunctionalLayer, ZopelessLayer
 from lp.testing.pages import find_tags_by_class
 
 
@@ -830,11 +830,11 @@ class MarksDownAs(Matcher):
 class TestMarkdownDisabled(TestCase):
     """Feature flag can turn Markdown stuff off."""
 
-    layer = DatabaseFunctionalLayer  # Fixtures need the database for now
+    layer = BaseLayer
 
     def setUp(self):
         super().setUp()
-        self.useFixture(FeatureFixture({"markdown.enabled": None}))
+        self.useFixture(MemoryFeatureFixture({"markdown.enabled": None}))
 
     def test_plain_text(self):
         self.assertThat(
@@ -850,15 +850,57 @@ class TestMarkdown(TestCase):
     configuration.
     """
 
-    layer = DatabaseFunctionalLayer  # Fixtures need the database for now
+    layer = BaseLayer
 
     def setUp(self):
         super().setUp()
-        self.useFixture(FeatureFixture({"markdown.enabled": "on"}))
+        self.useFixture(MemoryFeatureFixture({"markdown.enabled": "on"}))
 
     def test_plain_text(self):
         self.assertThat("hello world", MarksDownAs("<p>hello world</p>"))
 
+    def test_fenced_code(self):
+        self.assertThat(
+            "```\nfoo = 1\n```\n",
+            MarksDownAs("<pre><code>foo = 1\n</code></pre>"),
+        )
+
+    def test_nl2br(self):
+        self.assertThat(
+            "hello\nworld\n\nanother paragraph\n",
+            MarksDownAs("<p>hello<br>\nworld</p>\n<p>another paragraph</p>"),
+        )
+
+    def test_tables(self):
+        self.assertThat(
+            dedent(
+                """\
+                Column 1 | Column 2
+                -------- | --------
+                Cell 1   | Cell 2
+                """
+            ),
+            MarksDownAs(
+                dedent(
+                    """\
+                    <table>
+                    <thead>
+                    <tr>
+                    <th>Column 1</th>
+                    <th>Column 2</th>
+                    </tr>
+                    </thead>
+                    <tbody>
+                    <tr>
+                    <td>Cell 1</td>
+                    <td>Cell 2</td>
+                    </tr>
+                    </tbody>
+                    </table>"""
+                )
+            ),
+        )
+
 
 def test_suite():
     suite = unittest.TestSuite()
diff --git a/requirements/launchpad.txt b/requirements/launchpad.txt
index e66e855..7657381 100644
--- a/requirements/launchpad.txt
+++ b/requirements/launchpad.txt
@@ -20,6 +20,7 @@ bcrypt==3.1.7
 beautifulsoup4==4.7.1
 billiard==3.6.4.0
 bleach==3.1.0
+bleach-allowlist==1.0.3
 breezy==3.1.0
 bson==0.5.9
 boto3==1.16.63
@@ -79,7 +80,7 @@ lazr.sshserver==0.1.13
 lazr.uri==1.0.6
 lpjsmin==0.6
 m2r==0.1.13
-Markdown==2.3.1
+Markdown==3.2.2
 martian==1.3.post1
 maxminddb==1.5.1
 meliae==0.5.1
diff --git a/setup.cfg b/setup.cfg
index b0225dc..ade4428 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -18,6 +18,8 @@ packages = find:
 install_requires =
     ampoule
     beautifulsoup4[lxml]
+    bleach
+    bleach-allowlist
     boto3
     breezy
     celery
@@ -157,7 +159,6 @@ install_requires =
     zstandard
     # Loggerhead dependencies. These should be removed once bug 383360 is
     # fixed and we include it as a source dist.
-    bleach
     Paste
     PasteDeploy
     SimpleTAL

References