launchpad-reviewers team mailing list archive
  
  - 
     launchpad-reviewers team launchpad-reviewers team
- 
    Mailing list archive
  
- 
    Message #23746
  
 [Merge] lp:~cjwatson/launchpad/defusedxml	into lp:launchpad
  
Colin Watson has proposed merging lp:~cjwatson/launchpad/defusedxml into lp:launchpad.
Commit message:
Use defusedxml to parse untrusted XML.
Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)
For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/defusedxml/+merge/368991
Python's standard library documentation recommends this (https://docs.python.org/2/library/xml.html#xml-vulnerabilities), and it seems like a good idea to harden Launchpad against something like a malicious external bug tracker.
The monkey-patching requirement for XML-RPC is a bit unfortunate, but this is mostly just for Bugzilla and Trac so it's tolerable.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~cjwatson/launchpad/defusedxml into lp:launchpad.
=== modified file 'constraints.txt'
--- constraints.txt	2019-06-14 14:31:08 +0000
+++ constraints.txt	2019-06-18 17:05:22 +0000
@@ -246,6 +246,7 @@
 cssselect==0.9.1
 cssutils==0.9.10
 d2to1==0.2.12
+defusedxml==0.6.0
 Django==1.4
 dkimpy==0.5.4
 # Required by dkimpy
=== modified file 'lib/lp/bugs/externalbugtracker/bugzilla.py'
--- lib/lp/bugs/externalbugtracker/bugzilla.py	2018-11-12 10:59:20 +0000
+++ lib/lp/bugs/externalbugtracker/bugzilla.py	2019-06-18 17:05:22 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009-2018 Canonical Ltd.  This software is licensed under the
+# Copyright 2009-2019 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 """Bugzilla ExternalBugTracker utility."""
@@ -15,10 +15,10 @@
 from httplib import BadStatusLine
 import re
 import string
-from xml.dom import minidom
 import xml.parsers.expat
 import xmlrpclib
 
+from defusedxml import minidom
 import pytz
 import requests
 import six
@@ -192,6 +192,8 @@
             bad_chars = bad_chars.replace(char, '')
         trans_map = string.maketrans(bad_chars, ' ' * len(bad_chars))
         contents = contents.translate(trans_map)
+        # Don't use forbid_dtd=True here; Bugzilla XML responses seem to
+        # include DOCTYPE declarations.
         return minidom.parseString(contents)
 
     def _probe_version(self):
=== modified file 'lib/lp/bugs/externalbugtracker/xmlrpc.py'
--- lib/lp/bugs/externalbugtracker/xmlrpc.py	2018-12-10 13:54:34 +0000
+++ lib/lp/bugs/externalbugtracker/xmlrpc.py	2019-06-18 17:05:22 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009-2018 Canonical Ltd.  This software is licensed under the
+# Copyright 2009-2019 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 """An XMLRPC transport which uses requests."""
@@ -19,6 +19,7 @@
     Transport,
     )
 
+from defusedxml.xmlrpc import monkey_patch
 import requests
 from requests.cookies import RequestsCookieJar
 import six
@@ -31,6 +32,9 @@
     )
 from lp.services.utils import traceback_info
 
+# Protect against various XML parsing vulnerabilities.
+monkey_patch()
+
 
 class RequestsTransport(Transport):
     """An XML-RPC transport which uses requests.
=== modified file 'lib/lp/bugs/scripts/bugimport.py'
--- lib/lp/bugs/scripts/bugimport.py	2016-09-19 12:33:59 +0000
+++ lib/lp/bugs/scripts/bugimport.py	2019-06-18 17:05:22 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009-2012 Canonical Ltd.  This software is licensed under the
+# Copyright 2009-2019 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 """An XML bug importer
@@ -20,8 +20,8 @@
 import logging
 import os
 import time
-from xml.etree import cElementTree
 
+from defusedxml import cElementTree
 import pytz
 from storm.store import Store
 from zope.component import getUtility
@@ -237,7 +237,7 @@
 
     def importBugs(self, ztm):
         """Import bugs from a file."""
-        tree = cElementTree.parse(self.bugs_filename)
+        tree = cElementTree.parse(self.bugs_filename, forbid_dtd=True)
         root = tree.getroot()
         assert root.tag == '{%s}launchpad-bugs' % BUGS_XMLNS, (
             "Root element is wrong: %s" % root.tag)
=== modified file 'lib/lp/bugs/scripts/cveimport.py'
--- lib/lp/bugs/scripts/cveimport.py	2018-06-05 16:48:05 +0000
+++ lib/lp/bugs/scripts/cveimport.py	2019-06-18 17:05:22 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009-2018 Canonical Ltd.  This software is licensed under the
+# Copyright 2009-2019 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 """A set of functions related to the ability to parse the XML CVE database,
@@ -10,8 +10,8 @@
 import gzip
 import io
 import time
-import xml.etree.cElementTree as cElementTree
 
+import defusedxml.cElementTree as cElementTree
 import requests
 from zope.component import getUtility
 from zope.event import notify
@@ -248,7 +248,7 @@
 
         :param cve_xml: The CVE XML as a string.
         """
-        dom = cElementTree.fromstring(cve_xml)
+        dom = cElementTree.fromstring(cve_xml, forbid_dtd=True)
         items = dom.findall(CVEDB_NS + 'item')
         if len(items) == 0:
             raise LaunchpadScriptFailure("No CVEs found in XML file.")
=== modified file 'lib/lp/bugs/scripts/tests/test_bugimport.py'
--- lib/lp/bugs/scripts/tests/test_bugimport.py	2016-09-19 12:33:59 +0000
+++ lib/lp/bugs/scripts/tests/test_bugimport.py	2019-06-18 17:05:22 +0000
@@ -1,12 +1,12 @@
-# Copyright 2009-2012 Canonical Ltd.  This software is licensed under the
+# Copyright 2009-2019 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 __metaclass__ = type
 
 import os
 import re
-import xml.etree.cElementTree as ET
 
+import defusedxml.cElementTree as ET
 import pytz
 from testtools.content import text_content
 import transaction
=== modified file 'lib/lp/hardwaredb/scripts/hwdbsubmissions.py'
--- lib/lp/hardwaredb/scripts/hwdbsubmissions.py	2019-04-16 17:16:08 +0000
+++ lib/lp/hardwaredb/scripts/hwdbsubmissions.py	2019-06-18 17:05:22 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009-2018 Canonical Ltd.  This software is licensed under the
+# Copyright 2009-2019 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 """Parse Hardware Database submissions.
@@ -25,8 +25,8 @@
 import os
 import re
 import sys
-import xml.etree.cElementTree as etree
 
+import defusedxml.cElementTree as etree
 import pytz
 from zope.component import getUtility
 from zope.interface import implementer
@@ -133,7 +133,6 @@
         if logger is None:
             logger = getLogger()
         self.logger = logger
-        self.doc_parser = etree.XMLParser()
         self._logged_warnings = set()
 
         self.validator = {}
@@ -216,7 +215,7 @@
         """
         submission = self.fixFrequentErrors(submission)
         try:
-            tree = etree.parse(io.BytesIO(submission), parser=self.doc_parser)
+            tree = etree.parse(io.BytesIO(submission), forbid_dtd=True)
         except SyntaxError as error_value:
             self._logError(error_value, submission_key)
             return None
=== modified file 'lib/lp/hardwaredb/scripts/tests/test_hwdb_submission_parser.py'
--- lib/lp/hardwaredb/scripts/tests/test_hwdb_submission_parser.py	2019-04-16 17:16:08 +0000
+++ lib/lp/hardwaredb/scripts/tests/test_hwdb_submission_parser.py	2019-06-18 17:05:22 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009-2011 Canonical Ltd.  This software is licensed under the
+# Copyright 2009-2019 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 """Tests of the HWDB submissions parser."""
@@ -10,8 +10,9 @@
 import logging
 import os
 from textwrap import dedent
-import xml.etree.cElementTree as etree
+from xml.etree.cElementTree import Element
 
+import defusedxml.cElementTree as etree
 import pytz
 from zope.testing.loghandler import Handler
 
@@ -155,7 +156,7 @@
 
     def getTimestampETreeNode(self, time_string):
         """Return an Elementtree node for an XML tag with a timestamp."""
-        return etree.Element('date_created', value=time_string)
+        return Element('date_created', value=time_string)
 
     def testTimeConversion(self):
         """Test of the conversion of a "time string" into datetime object."""
=== modified file 'lib/lp/services/xmlrpc.py'
--- lib/lp/services/xmlrpc.py	2014-08-29 01:41:14 +0000
+++ lib/lp/services/xmlrpc.py	2019-06-18 17:05:22 +0000
@@ -1,4 +1,4 @@
-# Copyright 2010 Canonical Ltd.  This software is licensed under the
+# Copyright 2010-2019 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 """Generic code for XML-RPC in Launchpad."""
@@ -12,6 +12,11 @@
 import socket
 import xmlrpclib
 
+from defusedxml.xmlrpc import monkey_patch
+
+# Protect against various XML parsing vulnerabilities.
+monkey_patch()
+
 
 class LaunchpadFault(xmlrpclib.Fault):
     """Base class for a Launchpad XMLRPC fault.
=== modified file 'lib/lp/translations/utilities/xpi_header.py'
--- lib/lp/translations/utilities/xpi_header.py	2015-10-14 16:23:18 +0000
+++ lib/lp/translations/utilities/xpi_header.py	2019-06-18 17:05:22 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009 Canonical Ltd.  This software is licensed under the
+# Copyright 2009-2019 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 __metaclass__ = type
@@ -7,13 +7,10 @@
     'XpiHeader',
     ]
 
-try:
-    import xml.etree.cElementTree as cElementTree
-except ImportError:
-    import cElementTree
 from email.utils import parseaddr
 from StringIO import StringIO
 
+import defusedxml.cElementTree as cElementTree
 from zope.interface import implementer
 
 from lp.translations.interfaces.translationcommonformat import (
@@ -68,7 +65,8 @@
         # Both cElementTree and elementtree fail when trying to parse
         # proper unicode strings.  Use our raw input instead.
         try:
-            parse = cElementTree.iterparse(StringIO(self._raw_content))
+            parse = cElementTree.iterparse(
+                StringIO(self._raw_content), forbid_dtd=True)
             for event, elem in parse:
                 if elem.tag == contributor_tag:
                     # An XPI header can list multiple contributors, but
=== modified file 'setup.py'
--- setup.py	2019-04-24 16:13:34 +0000
+++ setup.py	2019-06-18 17:05:22 +0000
@@ -153,6 +153,7 @@
         'celery',
         'cssselect',
         'cssutils',
+        'defusedxml',
         'dkimpy',
         # Required for dkimpy
         'dnspython',
Follow ups