launchpad-reviewers team mailing list archive

Thread
Date

[Merge] ~cjwatson/launchpad:py3-exported-translation-files-bytes into launchpad:master

To: mp+393770@xxxxxxxxxxxxxxxxxx
From: Colin Watson <cjwatson@xxxxxxxxxxxxx>
Date: Fri, 13 Nov 2020 23:30:41 -0000
Reply-to: mp+393770@xxxxxxxxxxxxxxxxxx
Sender: bounces@xxxxxxxxxxxxx

Colin Watson has proposed merging ~cjwatson/launchpad:py3-exported-translation-files-bytes into launchpad:master.

Commit message:
Fix exported translation files to use bytes

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/+git/launchpad/+merge/393770
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of ~cjwatson/launchpad:py3-exported-translation-files-bytes into launchpad:master.

diff --git a/lib/lp/translations/tests/test_exportresult.py b/lib/lp/translations/tests/test_exportresult.py
index 4d26a86..de3f17b 100644
--- a/lib/lp/translations/tests/test_exportresult.py
+++ b/lib/lp/translations/tests/test_exportresult.py
@@ -6,8 +6,8 @@
 __metaclass__ = type
 
 import hashlib
+import io
 import os.path
-from StringIO import StringIO
 
 from lp.services.librarianserver.testing.fake import FakeLibrarian
 from lp.services.log.logger import DevNullLogger
@@ -25,7 +25,7 @@ class FakeExportedTranslationFile:
         self.file_extension = ext
         self.size = len(content)
         self.content = content
-        self.file = StringIO(content)
+        self.file = io.BytesIO(content)
         self.content_type = content_type
 
     def read(self, *args, **kwargs):
@@ -50,8 +50,8 @@ class TestExportResult(TestCaseWithFactory):
         return ExportResult(requester, request, logger)
 
     def makeExportedTranslationFile(self):
-        filename = self.factory.getUniqueString()
-        content = self.factory.getUniqueString()
+        filename = self.factory.getUniqueUnicode()
+        content = self.factory.getUniqueBytes()
         mime_type = "text/plain"
         return FakeExportedTranslationFile(filename, content, mime_type)
 
diff --git a/lib/lp/translations/utilities/doc/gettext_mo_exporter.txt b/lib/lp/translations/utilities/doc/gettext_mo_exporter.txt
index a556fb8..ceb32d0 100644
--- a/lib/lp/translations/utilities/doc/gettext_mo_exporter.txt
+++ b/lib/lp/translations/utilities/doc/gettext_mo_exporter.txt
@@ -47,7 +47,7 @@ To get a TranslationFileData, we use the GettextPOImporter class.
     ...
     ...     msgid "foo"
     ...     msgstr "bar"
-    ...     '''
+    ...     '''.encode('UTF-8')
     >>> translation_file_es = parser.parse(file_content)
     >>> translation_file_es.path = 'foo/es.po'
     >>> translation_file_es.language_code = 'es'
@@ -62,7 +62,7 @@ When we export a single file, we get a .mo file as output.
     >>> storage = ExportFileStorage()
     >>> exporter.exportTranslationFile(translation_file_es, storage)
     >>> exported_file = storage.export()
-    >>> print exported_file.content_type
+    >>> print(exported_file.content_type)
     application/x-gmo
     >>> is_valid_mofile(exported_file.read())
     True
@@ -80,7 +80,7 @@ When we export more than one file, we get a tarball.
     >>> exported_file = translation_exporter.exportTranslationFiles(
     ...     [translation_file_es, translation_file_sr],
     ...     target_format=TranslationFileFormat.MO)
-    >>> print exported_file.content_type
+    >>> print(exported_file.content_type)
     application/x-gtar
     >>> file_content = exported_file.read()
     >>> is_valid_mofile(file_content)
@@ -94,7 +94,7 @@ When we export more than one file, we get a tarball.
     ...         desc = 'dir'
     ...     else:
     ...         desc = 'unknown'
-    ...     print "| %9s | %s" % (desc, member.name)
+    ...     print("| %9s | %s" % (desc, member.name))
     | dir       | es
     | dir       | es/LC_MESSAGES
     | non-empty | es/LC_MESSAGES/foo.mo
@@ -116,7 +116,7 @@ the binary form.
 POCompiler.compile gets a string with .po file content and gives you
 a .mo file binary stream.
 
-    >>> mofile_content = compiler.compile('''
+    >>> mofile_content = compiler.compile(b'''
     ... msgid "foo"
     ... msgstr "bar"
     ... ''')
@@ -126,7 +126,7 @@ a .mo file binary stream.
 Though, if we provide it with something that is not a .po file content, we
 get an export error exception:
 
-    >>> mofile = compiler.compile('''
+    >>> mofile = compiler.compile(b'''
     ... blah
     ... ''')
     Traceback (most recent call last):
diff --git a/lib/lp/translations/utilities/gettext_mo_exporter.py b/lib/lp/translations/utilities/gettext_mo_exporter.py
index 8f3d627..a97f507 100644
--- a/lib/lp/translations/utilities/gettext_mo_exporter.py
+++ b/lib/lp/translations/utilities/gettext_mo_exporter.py
@@ -34,6 +34,10 @@ class POCompiler:
 
     def compile(self, gettext_po_file):
         """Return a MO version of the given PO file."""
+        if not isinstance(gettext_po_file, bytes):
+            raise TypeError(
+                "gettext_po_file must be bytes, not %s" %
+                type(gettext_po_file))
 
         msgfmt = subprocess.Popen(
             args=[POCompiler.MSGFMT, '-v', '-o', '-', '-'],
@@ -43,8 +47,13 @@ class POCompiler:
         stdout, stderr = msgfmt.communicate(gettext_po_file)
 
         if msgfmt.returncode != 0:
+            # XXX 2020-06-18 cjwatson: Decoding to UTF-8 isn't quite right
+            # here, but we don't currently have access to the file's
+            # encoding here.  With any luck it won't matter too often.
             raise UnknownTranslationExporterError(
-                'Error compiling PO file: %s\n%s' % (gettext_po_file, stderr))
+                'Error compiling PO file: %s\n%s' % (
+                    gettext_po_file.decode('UTF-8', 'replace'),
+                    stderr.decode('UTF-8', 'replace')))
 
         return stdout
 
diff --git a/lib/lp/translations/utilities/gettext_po_parser.py b/lib/lp/translations/utilities/gettext_po_parser.py
index d4bdafc..b961d0e 100644
--- a/lib/lp/translations/utilities/gettext_po_parser.py
+++ b/lib/lp/translations/utilities/gettext_po_parser.py
@@ -78,8 +78,9 @@ def parse_charset(string_to_parse, is_escaped=True):
     # Default to UTF-8 if the header still has the default value or
     # is unknown.
     charset = default_charset
-    match = re.search(pattern, string_to_parse)
-    if match is not None and match.group(1) != 'CHARSET':
+    match = re.search(
+        pattern, six.ensure_text(string_to_parse, 'UTF-8', 'replace'))
+    if match is not None and match.group(1) != b'CHARSET':
         charset = match.group(1).strip()
         try:
             codecs.getencoder(charset)
@@ -507,19 +508,22 @@ class POParser(object):
         # We don't know what charset the data is in, so we parse it one line
         # at a time until we have the header, and then we'll know how to
         # treat the rest of the data.
-        parts = re.split(r'\n|\r\n|\r', self._pending_chars, 1)
+        parts = re.split(br'\n|\r\n|\r', self._pending_chars, 1)
         if len(parts) == 1:
             # only one line
             return None
         line, self._pending_chars = parts
         return line.strip()
 
-    def parse(self, content_text):
+    def parse(self, content_bytes):
         """Parse string as a PO file."""
+        if not isinstance(content_bytes, bytes):
+            raise TypeError(
+                "context_bytes must be bytes, not %s" % type(content_bytes))
         # Initialize the parser.
         self._translation_file = TranslationFileData()
         self._messageids = set()
-        self._pending_chars = content_text
+        self._pending_chars = content_bytes
         self._pending_unichars = u''
         self._lineno = 0
         # Message specific variables.
@@ -529,8 +533,8 @@ class POParser(object):
         self._plural_case = None
         self._parsed_content = u''
 
-        # First thing to do is to get the charset used in the content_text.
-        charset = parse_charset(content_text)
+        # First thing to do is to get the charset used in the content_bytes.
+        charset = parse_charset(content_bytes)
 
         # Now, parse the header, inefficiently. It ought to be short, so
         # this isn't disastrous.
diff --git a/lib/lp/translations/utilities/tests/helpers.py b/lib/lp/translations/utilities/tests/helpers.py
index 20565d6..9a40746 100644
--- a/lib/lp/translations/utilities/tests/helpers.py
+++ b/lib/lp/translations/utilities/tests/helpers.py
@@ -72,10 +72,10 @@ def import_pofile_or_potemplate(file_contents, person,
 
 
 def is_valid_mofile(mofile):
-    """Test whether a string is a valid MO file."""
+    """Test whether a byte string is a valid MO file."""
     # There are different magics for big- and little-endianness, so we
     # test for both.
-    be_magic = '\x95\x04\x12\xde'
-    le_magic = '\xde\x12\x04\x95'
+    be_magic = b'\x95\x04\x12\xde'
+    le_magic = b'\xde\x12\x04\x95'
 
     return mofile[:len(be_magic)] in (be_magic, le_magic)
diff --git a/lib/lp/translations/utilities/tests/test_translation_exporter.py b/lib/lp/translations/utilities/tests/test_translation_exporter.py
index 6f24b99..674dd30 100644
--- a/lib/lp/translations/utilities/tests/test_translation_exporter.py
+++ b/lib/lp/translations/utilities/tests/test_translation_exporter.py
@@ -5,7 +5,7 @@
 
 __metaclass__ = type
 
-from cStringIO import StringIO
+import io
 from operator import attrgetter
 import unittest
 
@@ -40,7 +40,7 @@ class TranslationExporterTestCase(unittest.TestCase):
         self.assertTrue(
             verifyObject(
                 IExportedTranslationFile,
-                ExportedTranslationFile(StringIO())),
+                ExportedTranslationFile(io.BytesIO())),
             "ExportedTranslationFile doesn't follow the interface")
 
     def testGetTranslationFormatExporterByFileFormat(self):