← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] ~cjwatson/launchpad:py3-diff-bytes into launchpad:master

 

Colin Watson has proposed merging ~cjwatson/launchpad:py3-diff-bytes into launchpad:master.

Commit message:
Handle diff content as bytes

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/+git/launchpad/+merge/391405

It's ultimately uploaded to the librarian.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of ~cjwatson/launchpad:py3-diff-bytes into launchpad:master.
diff --git a/lib/lp/code/interfaces/branchmergeproposal.py b/lib/lp/code/interfaces/branchmergeproposal.py
index 2a6626d..bf481c9 100644
--- a/lib/lp/code/interfaces/branchmergeproposal.py
+++ b/lib/lp/code/interfaces/branchmergeproposal.py
@@ -532,8 +532,8 @@ class IBranchMergeProposalEdit(Interface):
 
         If there is not an existing preview diff, one will be created.
 
-        :param diff_content: The raw bytes of the diff content to be put in
-            the librarian.
+        :param diff_content: The raw bytes (or text, which will be encoded
+            using UTF-8) of the diff content to be put in the librarian.
         :param diff_stat: Text describing the files added, remove or modified.
         :param source_revision_id: The revision id that was used from the
             source branch.
diff --git a/lib/lp/code/model/diff.py b/lib/lp/code/model/diff.py
index d74c103..9a36a13 100644
--- a/lib/lp/code/model/diff.py
+++ b/lib/lp/code/model/diff.py
@@ -14,7 +14,7 @@ try:
     from contextlib import ExitStack
 except ImportError:
     from contextlib2 import ExitStack
-from cStringIO import StringIO
+import io
 from operator import attrgetter
 import sys
 from uuid import uuid1
@@ -29,6 +29,7 @@ from breezy.patches import (
 from breezy.plugins.difftacular.generate_diff import diff_ignore_branches
 from lazr.delegates import delegate_to
 import simplejson
+import six
 from sqlobject import (
     ForeignKey,
     IntCol,
@@ -211,7 +212,7 @@ class Diff(SQLBase):
         :from_tree: The old tree in the diff.
         :to_tree: The new tree in the diff.
         """
-        diff_content = StringIO()
+        diff_content = io.BytesIO()
         show_diff_trees(from_tree, to_tree, diff_content, old_label='',
                         new_label='')
         return klass.fromFileAtEnd(diff_content, filename)
@@ -228,7 +229,7 @@ class Diff(SQLBase):
                  strip_prefix_segments=0):
         """Create a Diff from a textual diff.
 
-        :diff_content: The diff text
+        :diff_content: The diff text, as `bytes`.
         :size: The number of bytes in the diff text.
         :filename: The filename to store the content with.  Randomly generated
             if not supplied.
@@ -244,7 +245,7 @@ class Diff(SQLBase):
                 filename, size, diff_content, 'text/x-diff', restricted=True)
             diff_content.seek(0)
             diff_content_bytes = diff_content.read(size)
-            diff_lines_count = len(diff_content_bytes.strip().split('\n'))
+            diff_lines_count = len(diff_content_bytes.strip().split(b'\n'))
         try:
             diffstat = cls.generateDiffstat(
                 diff_content_bytes,
@@ -302,7 +303,7 @@ class Diff(SQLBase):
         :param ignore_brances: A collection of branches to ignore merges from.
         :return: a `Diff`.
         """
-        diff_content = StringIO()
+        diff_content = io.BytesIO()
         with ExitStack() as stack:
             for branch in [source_branch] + ignore_branches:
                 stack.enter_context(read_locked(branch))
@@ -462,7 +463,8 @@ class PreviewDiff(Storm):
         """Create a PreviewDiff with specified values.
 
         :param bmp: The `BranchMergeProposal` this diff references.
-        :param diff_content: The text of the diff, as bytes.
+        :param diff_content: The text of the diff, as bytes (or text, which
+            will be encoded using UTF-8).
         :param source_revision_id: The revision_id of the source branch.
         :param target_revision_id: The revision_id of the target branch.
         :param prerequisite_revision_id: The revision_id of the prerequisite
@@ -470,10 +472,11 @@ class PreviewDiff(Storm):
         :param conflicts: The conflicts, as text.
         :return: A `PreviewDiff` with specified values.
         """
+        diff_content = six.ensure_binary(diff_content)
         filename = str(uuid1()) + '.txt'
         size = len(diff_content)
         diff = Diff.fromFile(
-            StringIO(diff_content), size, filename,
+            io.BytesIO(diff_content), size, filename,
             strip_prefix_segments=strip_prefix_segments)
 
         preview = cls()
diff --git a/lib/lp/code/model/tests/test_diff.py b/lib/lp/code/model/tests/test_diff.py
index 8eaeeab..d6ed025 100644
--- a/lib/lp/code/model/tests/test_diff.py
+++ b/lib/lp/code/model/tests/test_diff.py
@@ -7,8 +7,8 @@ from __future__ import absolute_import, print_function, unicode_literals
 
 __metaclass__ = type
 
-from cStringIO import StringIO
 from difflib import unified_diff
+from io import BytesIO
 import logging
 from textwrap import dedent
 
@@ -192,11 +192,11 @@ class TestDiff(DiffTestCase):
 
     def _create_diff(self, content):
         # Create a Diff object with the content specified.
-        sio = StringIO()
-        sio.write(content)
-        size = sio.tell()
-        sio.seek(0)
-        diff = Diff.fromFile(sio, size)
+        buf = BytesIO()
+        buf.write(content)
+        size = buf.tell()
+        buf.seek(0)
+        diff = Diff.fromFile(buf, size)
         # Commit to make the alias available for reading.
         transaction.commit()
         return diff
@@ -204,14 +204,14 @@ class TestDiff(DiffTestCase):
     def test_text_reads_librarian_content(self):
         # IDiff.text will read at most config.diff.max_read_size bytes from
         # the librarian.
-        content = ''.join(unified_diff('', "1234567890" * 10))
+        content = b''.join(unified_diff(b'', b"1234567890" * 10))
         diff = self._create_diff(content)
         self.assertEqual(content, diff.text)
         self.assertTrue(diff.diff_text.restricted)
 
     def test_oversized_normal(self):
         # A diff smaller than config.diff.max_read_size is not oversized.
-        content = ''.join(unified_diff('', "1234567890" * 10))
+        content = b''.join(unified_diff(b'', b"1234567890" * 10))
         diff = self._create_diff(content)
         self.assertFalse(diff.oversized)
 
@@ -219,14 +219,14 @@ class TestDiff(DiffTestCase):
         # IDiff.text will read at most config.diff.max_read_size bytes from
         # the librarian.
         self.pushConfig("diff", max_read_size=25)
-        content = ''.join(unified_diff('', "1234567890" * 10))
+        content = b''.join(unified_diff(b'', b"1234567890" * 10))
         diff = self._create_diff(content)
         self.assertEqual(content[:25], diff.text)
 
     def test_oversized_for_big_diff(self):
         # A diff larger than config.diff.max_read_size is oversized.
         self.pushConfig("diff", max_read_size=25)
-        content = ''.join(unified_diff('', "1234567890" * 10))
+        content = b''.join(unified_diff(b'', b"1234567890" * 10))
         diff = self._create_diff(content)
         self.assertTrue(diff.oversized)
 
@@ -364,19 +364,19 @@ class TestDiffInScripts(DiffTestCase):
         self.assertEqual({'foo': (0, 0)}, Diff.generateDiffstat(diff_bytes))
 
     def test_fromFileSetsDiffstat(self):
-        diff = Diff.fromFile(StringIO(self.diff_bytes), len(self.diff_bytes))
+        diff = Diff.fromFile(BytesIO(self.diff_bytes), len(self.diff_bytes))
         self.assertEqual(
             {'bar': (0, 3), 'baz': (2, 0), 'foo': (2, 1)}, diff.diffstat)
 
     def test_fromFileAcceptsBinary(self):
         diff_bytes = b"Binary files a\t and b\t differ\n"
-        diff = Diff.fromFile(StringIO(diff_bytes), len(diff_bytes))
+        diff = Diff.fromFile(BytesIO(diff_bytes), len(diff_bytes))
         self.assertEqual({}, diff.diffstat)
 
     def test_fromFileSets_added_removed(self):
         """fromFile sets added_lines_count, removed_lines_count."""
         diff = Diff.fromFile(
-            StringIO(self.diff_bytes_2), len(self.diff_bytes_2))
+            BytesIO(self.diff_bytes_2), len(self.diff_bytes_2))
         self.assertEqual(5, diff.added_lines_count)
         self.assertEqual(4, diff.removed_lines_count)
 
@@ -384,7 +384,7 @@ class TestDiffInScripts(DiffTestCase):
         # If the diff is formatted such that generating the diffstat fails, we
         # want to record an oops but continue.
         diff_bytes = b"not a real diff"
-        diff = Diff.fromFile(StringIO(diff_bytes), len(diff_bytes))
+        diff = Diff.fromFile(BytesIO(diff_bytes), len(diff_bytes))
         oops = self.oopses[0]
         self.assertEqual('MalformedPatchHeader', oops['type'])
         self.assertIs(None, diff.diffstat)
diff --git a/lib/lp/testing/factory.py b/lib/lp/testing/factory.py
index 597785a..d0314ec 100644
--- a/lib/lp/testing/factory.py
+++ b/lib/lp/testing/factory.py
@@ -33,6 +33,7 @@ from email.utils import (
     make_msgid,
     )
 import hashlib
+from io import BytesIO
 from itertools import count
 import os
 from StringIO import StringIO
@@ -1646,10 +1647,10 @@ class BareLaunchpadObjectFactory(ObjectFactory):
     def makeDiff(self, size='small'):
         diff_path = os.path.join(os.path.dirname(__file__),
                                  'data/{}.diff'.format(size))
-        with open(os.path.join(diff_path), 'r') as diff:
+        with open(os.path.join(diff_path), 'rb') as diff:
             diff_text = diff.read()
             return ProxyFactory(
-                Diff.fromFile(StringIO(diff_text), len(diff_text)))
+                Diff.fromFile(BytesIO(diff_text), len(diff_text)))
 
     def makePreviewDiff(self, conflicts=u'', merge_proposal=None,
                         date_created=None, size='small', git=False):