launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #01718
[Merge] lp:~allenap/launchpad/bug-import-munging into lp:launchpad/devel
Gavin Panella has proposed merging lp:~allenap/launchpad/bug-import-munging into lp:launchpad/devel.
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
This adds a script, utilities/massage-bug-import-xml, that attempts to repair bug import XML <https://help.launchpad.net/Bugs/ImportFormat> that is not quite right, or needs some extra work before it will import cleanly. See the docstring of massage() for more information on what that involves.
I wrote this script to help import one particular project's bugs, so it is untested other than by use. I have tried to make it a bit more general, but I can imagine that developers may need to customize it for one-off imports.
I don't really have time to do any more work on it, but I want it to go somewhere where people can find it rather than sitting only on my machine.
--
https://code.launchpad.net/~allenap/launchpad/bug-import-munging/+merge/39505
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~allenap/launchpad/bug-import-munging into lp:launchpad/devel.
=== added file 'utilities/massage-bug-import-xml'
--- utilities/massage-bug-import-xml 1970-01-01 00:00:00 +0000
+++ utilities/massage-bug-import-xml 2010-10-28 10:26:25 +0000
@@ -0,0 +1,221 @@
+#!/usr/bin/env python2.6
+# -*- mode: python -*-
+
+from base64 import standard_b64encode
+from optparse import OptionParser
+import sys
+
+from lxml import etree
+
+
+NS = "https://launchpad.net/xmlns/2006/bugs"
+
+
+def norm_text(elem):
+ if elem is not None:
+ if elem.text is None:
+ elem.text = u""
+ else:
+ elem.text = elem.text.strip()
+
+
+def truncate(text, message=None):
+ lines = text.splitlines()
+ if len(lines) >= 30:
+ if message is None:
+ message = "[Truncated]"
+ else:
+ message = "[Truncated; %s]" % message
+ return u"%s...\n\n%s" % (
+ "\n".join(lines[:30]).strip(), message)
+ else:
+ return text
+
+
+def problem(message):
+ sys.stderr.write("{0}\n".format(message))
+
+
+def problem_detail(message):
+ sys.stderr.write(" {0}\n".format(message))
+
+
+def problem_resolution(message):
+ sys.stderr.write(" --> {0}\n".format(message))
+
+
+def problem_resolved():
+ sys.stderr.write("\n")
+
+
+def massage(root, project_name, fix_nickname, tag_nickname):
+ """Fix problems in the bug import XML tree.
+
+ This includes:
+
+ - Adding a tags element if one does not exist,
+
+ - Fixing up the bug nickname, adding the existing nickname as a tag,
+
+ - Fixing up the description, including truncating it if it's too long,
+
+ - Fixing up the first comment, including truncating it if it's too long,
+
+ - Normalizing whitespace.
+
+ """
+ # Scan the tree, fixing up issues.
+ for bug in root.findall('{%s}bug' % NS):
+ # Get or create the tags element.
+ tags = bug.find('{%s}tags' % NS)
+ if tags is None:
+ tags = etree.SubElement(bug, '{%s}tags' % NS)
+
+ nickname = bug.find('{%s}nickname' % NS)
+ if nickname is None:
+ # Add an empty nickname to be filled in later.
+ nickname = etree.SubElement(bug, '{%s}nickname' % NS)
+ elif tag_nickname:
+ # Add the original nickname as a tag.
+ etree.SubElement(tags, '{%s}tag' % NS).text = nickname.text
+
+ # Change the nickname.
+ if nickname.text is None or fix_nickname:
+ nickname.text = u"%s-%s" % (project_name, bug.get('id'))
+
+ # Get the first comment and its text. We'll need these later.
+ first_comment = bug.find('{%s}comment' % NS)
+ first_comment_text = first_comment.find('{%s}text' % NS)
+ norm_text(first_comment_text)
+
+ # Check the description.
+ description = bug.find('{%s}description' % NS)
+ norm_text(description)
+ if len(description.text) == 0:
+ problem("Bug %s has no description." % bug.get('id'))
+ # Try and get the description from the first comment.
+ if first_comment_text is None:
+ problem_detail("No comments!")
+ problem_resolution("Setting description to '-'.")
+ description.text = u'-'
+ elif len(first_comment_text.text) == 0:
+ problem_detail("First comment has no text!")
+ problem_resolution("Setting description to '-'.")
+ description.text = u'-'
+ else:
+ problem_detail("First comment has text.")
+ problem_resolution("Removing description.")
+ # The spec says that the description is optional, but
+ # the importer treats it as optional.
+ bug.remove(description)
+ problem_resolved()
+ elif len(description.text) > 50000:
+ problem(
+ "Bug %s's description is too long (%d chars)." % (
+ bug.get('id'), len(description.text),))
+ # Compare the description to the first comment. If it's
+ # the same, we don't need the description.
+ if first_comment_text is None:
+ problem_detail("No comments!")
+ problem_resolution("Adding comment.")
+ raise NotImplementedError("Add a comment.")
+ elif description.text == first_comment_text.text:
+ problem_detail('Description is same as first comment.')
+ problem_resolution('Trimming description.')
+ # It's safe to point the user to an attachment here,
+ # even though it has not yet been created. It will be
+ # created later because the first comment is also too
+ # long.
+ description.text = truncate(
+ description.text, 'see "Full description" attachment')
+ else:
+ problem_resolution("Truncating description.")
+ raise NotImplementedError("Fix overlong description.")
+ problem_resolved()
+
+ # Check first comment text.
+ if first_comment_text is not None:
+ if len(first_comment_text.text) == 0:
+ problem(
+ "Bug %s's first comment has no text." % bug.get('id'))
+ problem_resolution("Setting comment text to '-'.")
+ first_comment_text.text = u'-'
+ problem_resolved()
+ elif len(first_comment_text.text) > 50000:
+ problem(
+ "Bug %s's first comment is too long (%d chars)." % (
+ bug.get('id'), len(first_comment_text.text)))
+ # Save the original text as an attachment.
+ problem_resolution('Adding attachment.')
+ attachment = etree.SubElement(
+ first_comment, '{%s}attachment' % NS)
+ etree.SubElement(attachment, '{%s}filename' % NS).text = (
+ u"%s-bug-%s-full-description.txt" % (
+ project_name, bug.get('id')))
+ etree.SubElement(attachment, '{%s}title' % NS).text = (
+ u"Full description (text/plain, utf-8)")
+ etree.SubElement(attachment, '{%s}mimetype' % NS).text = (
+ u"text/plain")
+ etree.SubElement(attachment, '{%s}contents' % NS).text = (
+ standard_b64encode(
+ first_comment_text.text.encode('utf-8')))
+ # Trim the comment text.
+ problem_resolution('Trimming comment text.')
+ first_comment_text.text = truncate(
+ first_comment_text.text,
+ 'see "Full description" attachment')
+ problem_resolved()
+
+
+def main(arguments):
+ # optparse.OptionParser uses lower-case for usage and help text by
+ # default. This is distressing, so it is corrected for below.
+ usage = "Usage: %prog [options]"
+ description = """
+ This acts as a filter: pipe bug import XML into stdin and capture
+ stdout. By default it will ensure that bug descriptions and the first
+ comment are correct. If either exceeds 50,000 characters it is
+ truncated and an attachment is created to hold the original.
+ """
+ parser = OptionParser(
+ usage=usage,
+ description=description.strip(),
+ add_help_option=False)
+ parser.add_option(
+ "-p", "--project", dest="project_name", metavar="NAME",
+ help="The project to which this import data refers.")
+ parser.add_option(
+ "--fix-nickname", action="store_true", dest="fix_nickname",
+ help="Normalize the nickname to ${project_name}-${bug-id}.")
+ parser.add_option(
+ "--tag-nickname", action="store_true", dest="tag_nickname",
+ help="Add the original bug nickname as a tag.")
+ parser.add_option(
+ "-h", "--help", action="help",
+ help="Show this help message and exit.")
+ parser.set_defaults(
+ project_name=None,
+ fix_nickname=False,
+ tag_nickname=False)
+
+ options, args = parser.parse_args(arguments)
+ if len(args) != 0:
+ parser.error("Positional arguments are not recognized.")
+ if options.project_name is None:
+ parser.error("A project name must be specified.")
+
+ tree = etree.parse(sys.stdin)
+ massage(
+ root=tree.getroot(),
+ project_name=options.project_name,
+ fix_nickname=options.fix_nickname,
+ tag_nickname=options.tag_nickname)
+ tree.write(
+ sys.stdout, encoding='utf-8',
+ pretty_print=True, xml_declaration=True)
+
+ return 0
+
+
+if __name__ == '__main__':
+ sys.exit(main(sys.argv[1:]))