launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #23746
[Merge] lp:~cjwatson/launchpad/defusedxml into lp:launchpad
Colin Watson has proposed merging lp:~cjwatson/launchpad/defusedxml into lp:launchpad.
Commit message:
Use defusedxml to parse untrusted XML.
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/defusedxml/+merge/368991
Python's standard library documentation recommends this (https://docs.python.org/2/library/xml.html#xml-vulnerabilities), and it seems like a good idea to harden Launchpad against something like a malicious external bug tracker.
The monkey-patching requirement for XML-RPC is a bit unfortunate, but this is mostly just for Bugzilla and Trac so it's tolerable.
--
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~cjwatson/launchpad/defusedxml into lp:launchpad.
=== modified file 'constraints.txt'
--- constraints.txt 2019-06-14 14:31:08 +0000
+++ constraints.txt 2019-06-18 17:05:22 +0000
@@ -246,6 +246,7 @@
cssselect==0.9.1
cssutils==0.9.10
d2to1==0.2.12
+defusedxml==0.6.0
Django==1.4
dkimpy==0.5.4
# Required by dkimpy
=== modified file 'lib/lp/bugs/externalbugtracker/bugzilla.py'
--- lib/lp/bugs/externalbugtracker/bugzilla.py 2018-11-12 10:59:20 +0000
+++ lib/lp/bugs/externalbugtracker/bugzilla.py 2019-06-18 17:05:22 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009-2018 Canonical Ltd. This software is licensed under the
+# Copyright 2009-2019 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
"""Bugzilla ExternalBugTracker utility."""
@@ -15,10 +15,10 @@
from httplib import BadStatusLine
import re
import string
-from xml.dom import minidom
import xml.parsers.expat
import xmlrpclib
+from defusedxml import minidom
import pytz
import requests
import six
@@ -192,6 +192,8 @@
bad_chars = bad_chars.replace(char, '')
trans_map = string.maketrans(bad_chars, ' ' * len(bad_chars))
contents = contents.translate(trans_map)
+ # Don't use forbid_dtd=True here; Bugzilla XML responses seem to
+ # include DOCTYPE declarations.
return minidom.parseString(contents)
def _probe_version(self):
=== modified file 'lib/lp/bugs/externalbugtracker/xmlrpc.py'
--- lib/lp/bugs/externalbugtracker/xmlrpc.py 2018-12-10 13:54:34 +0000
+++ lib/lp/bugs/externalbugtracker/xmlrpc.py 2019-06-18 17:05:22 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009-2018 Canonical Ltd. This software is licensed under the
+# Copyright 2009-2019 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
"""An XMLRPC transport which uses requests."""
@@ -19,6 +19,7 @@
Transport,
)
+from defusedxml.xmlrpc import monkey_patch
import requests
from requests.cookies import RequestsCookieJar
import six
@@ -31,6 +32,9 @@
)
from lp.services.utils import traceback_info
+# Protect against various XML parsing vulnerabilities.
+monkey_patch()
+
class RequestsTransport(Transport):
"""An XML-RPC transport which uses requests.
=== modified file 'lib/lp/bugs/scripts/bugimport.py'
--- lib/lp/bugs/scripts/bugimport.py 2016-09-19 12:33:59 +0000
+++ lib/lp/bugs/scripts/bugimport.py 2019-06-18 17:05:22 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009-2012 Canonical Ltd. This software is licensed under the
+# Copyright 2009-2019 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
"""An XML bug importer
@@ -20,8 +20,8 @@
import logging
import os
import time
-from xml.etree import cElementTree
+from defusedxml import cElementTree
import pytz
from storm.store import Store
from zope.component import getUtility
@@ -237,7 +237,7 @@
def importBugs(self, ztm):
"""Import bugs from a file."""
- tree = cElementTree.parse(self.bugs_filename)
+ tree = cElementTree.parse(self.bugs_filename, forbid_dtd=True)
root = tree.getroot()
assert root.tag == '{%s}launchpad-bugs' % BUGS_XMLNS, (
"Root element is wrong: %s" % root.tag)
=== modified file 'lib/lp/bugs/scripts/cveimport.py'
--- lib/lp/bugs/scripts/cveimport.py 2018-06-05 16:48:05 +0000
+++ lib/lp/bugs/scripts/cveimport.py 2019-06-18 17:05:22 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009-2018 Canonical Ltd. This software is licensed under the
+# Copyright 2009-2019 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
"""A set of functions related to the ability to parse the XML CVE database,
@@ -10,8 +10,8 @@
import gzip
import io
import time
-import xml.etree.cElementTree as cElementTree
+import defusedxml.cElementTree as cElementTree
import requests
from zope.component import getUtility
from zope.event import notify
@@ -248,7 +248,7 @@
:param cve_xml: The CVE XML as a string.
"""
- dom = cElementTree.fromstring(cve_xml)
+ dom = cElementTree.fromstring(cve_xml, forbid_dtd=True)
items = dom.findall(CVEDB_NS + 'item')
if len(items) == 0:
raise LaunchpadScriptFailure("No CVEs found in XML file.")
=== modified file 'lib/lp/bugs/scripts/tests/test_bugimport.py'
--- lib/lp/bugs/scripts/tests/test_bugimport.py 2016-09-19 12:33:59 +0000
+++ lib/lp/bugs/scripts/tests/test_bugimport.py 2019-06-18 17:05:22 +0000
@@ -1,12 +1,12 @@
-# Copyright 2009-2012 Canonical Ltd. This software is licensed under the
+# Copyright 2009-2019 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
__metaclass__ = type
import os
import re
-import xml.etree.cElementTree as ET
+import defusedxml.cElementTree as ET
import pytz
from testtools.content import text_content
import transaction
=== modified file 'lib/lp/hardwaredb/scripts/hwdbsubmissions.py'
--- lib/lp/hardwaredb/scripts/hwdbsubmissions.py 2019-04-16 17:16:08 +0000
+++ lib/lp/hardwaredb/scripts/hwdbsubmissions.py 2019-06-18 17:05:22 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009-2018 Canonical Ltd. This software is licensed under the
+# Copyright 2009-2019 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
"""Parse Hardware Database submissions.
@@ -25,8 +25,8 @@
import os
import re
import sys
-import xml.etree.cElementTree as etree
+import defusedxml.cElementTree as etree
import pytz
from zope.component import getUtility
from zope.interface import implementer
@@ -133,7 +133,6 @@
if logger is None:
logger = getLogger()
self.logger = logger
- self.doc_parser = etree.XMLParser()
self._logged_warnings = set()
self.validator = {}
@@ -216,7 +215,7 @@
"""
submission = self.fixFrequentErrors(submission)
try:
- tree = etree.parse(io.BytesIO(submission), parser=self.doc_parser)
+ tree = etree.parse(io.BytesIO(submission), forbid_dtd=True)
except SyntaxError as error_value:
self._logError(error_value, submission_key)
return None
=== modified file 'lib/lp/hardwaredb/scripts/tests/test_hwdb_submission_parser.py'
--- lib/lp/hardwaredb/scripts/tests/test_hwdb_submission_parser.py 2019-04-16 17:16:08 +0000
+++ lib/lp/hardwaredb/scripts/tests/test_hwdb_submission_parser.py 2019-06-18 17:05:22 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009-2011 Canonical Ltd. This software is licensed under the
+# Copyright 2009-2019 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
"""Tests of the HWDB submissions parser."""
@@ -10,8 +10,9 @@
import logging
import os
from textwrap import dedent
-import xml.etree.cElementTree as etree
+from xml.etree.cElementTree import Element
+import defusedxml.cElementTree as etree
import pytz
from zope.testing.loghandler import Handler
@@ -155,7 +156,7 @@
def getTimestampETreeNode(self, time_string):
"""Return an Elementtree node for an XML tag with a timestamp."""
- return etree.Element('date_created', value=time_string)
+ return Element('date_created', value=time_string)
def testTimeConversion(self):
"""Test of the conversion of a "time string" into datetime object."""
=== modified file 'lib/lp/services/xmlrpc.py'
--- lib/lp/services/xmlrpc.py 2014-08-29 01:41:14 +0000
+++ lib/lp/services/xmlrpc.py 2019-06-18 17:05:22 +0000
@@ -1,4 +1,4 @@
-# Copyright 2010 Canonical Ltd. This software is licensed under the
+# Copyright 2010-2019 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
"""Generic code for XML-RPC in Launchpad."""
@@ -12,6 +12,11 @@
import socket
import xmlrpclib
+from defusedxml.xmlrpc import monkey_patch
+
+# Protect against various XML parsing vulnerabilities.
+monkey_patch()
+
class LaunchpadFault(xmlrpclib.Fault):
"""Base class for a Launchpad XMLRPC fault.
=== modified file 'lib/lp/translations/utilities/xpi_header.py'
--- lib/lp/translations/utilities/xpi_header.py 2015-10-14 16:23:18 +0000
+++ lib/lp/translations/utilities/xpi_header.py 2019-06-18 17:05:22 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009 Canonical Ltd. This software is licensed under the
+# Copyright 2009-2019 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
__metaclass__ = type
@@ -7,13 +7,10 @@
'XpiHeader',
]
-try:
- import xml.etree.cElementTree as cElementTree
-except ImportError:
- import cElementTree
from email.utils import parseaddr
from StringIO import StringIO
+import defusedxml.cElementTree as cElementTree
from zope.interface import implementer
from lp.translations.interfaces.translationcommonformat import (
@@ -68,7 +65,8 @@
# Both cElementTree and elementtree fail when trying to parse
# proper unicode strings. Use our raw input instead.
try:
- parse = cElementTree.iterparse(StringIO(self._raw_content))
+ parse = cElementTree.iterparse(
+ StringIO(self._raw_content), forbid_dtd=True)
for event, elem in parse:
if elem.tag == contributor_tag:
# An XPI header can list multiple contributors, but
=== modified file 'setup.py'
--- setup.py 2019-04-24 16:13:34 +0000
+++ setup.py 2019-06-18 17:05:22 +0000
@@ -153,6 +153,7 @@
'celery',
'cssselect',
'cssutils',
+ 'defusedxml',
'dkimpy',
# Required for dkimpy
'dnspython',
Follow ups