launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #25662
[Merge] ~cjwatson/launchpad:py3-exported-translation-files-bytes into launchpad:master
Colin Watson has proposed merging ~cjwatson/launchpad:py3-exported-translation-files-bytes into launchpad:master.
Commit message:
Fix exported translation files to use bytes
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/+git/launchpad/+merge/393770
--
Your team Launchpad code reviewers is requested to review the proposed merge of ~cjwatson/launchpad:py3-exported-translation-files-bytes into launchpad:master.
diff --git a/lib/lp/translations/tests/test_exportresult.py b/lib/lp/translations/tests/test_exportresult.py
index 4d26a86..de3f17b 100644
--- a/lib/lp/translations/tests/test_exportresult.py
+++ b/lib/lp/translations/tests/test_exportresult.py
@@ -6,8 +6,8 @@
__metaclass__ = type
import hashlib
+import io
import os.path
-from StringIO import StringIO
from lp.services.librarianserver.testing.fake import FakeLibrarian
from lp.services.log.logger import DevNullLogger
@@ -25,7 +25,7 @@ class FakeExportedTranslationFile:
self.file_extension = ext
self.size = len(content)
self.content = content
- self.file = StringIO(content)
+ self.file = io.BytesIO(content)
self.content_type = content_type
def read(self, *args, **kwargs):
@@ -50,8 +50,8 @@ class TestExportResult(TestCaseWithFactory):
return ExportResult(requester, request, logger)
def makeExportedTranslationFile(self):
- filename = self.factory.getUniqueString()
- content = self.factory.getUniqueString()
+ filename = self.factory.getUniqueUnicode()
+ content = self.factory.getUniqueBytes()
mime_type = "text/plain"
return FakeExportedTranslationFile(filename, content, mime_type)
diff --git a/lib/lp/translations/utilities/doc/gettext_mo_exporter.txt b/lib/lp/translations/utilities/doc/gettext_mo_exporter.txt
index a556fb8..ceb32d0 100644
--- a/lib/lp/translations/utilities/doc/gettext_mo_exporter.txt
+++ b/lib/lp/translations/utilities/doc/gettext_mo_exporter.txt
@@ -47,7 +47,7 @@ To get a TranslationFileData, we use the GettextPOImporter class.
...
... msgid "foo"
... msgstr "bar"
- ... '''
+ ... '''.encode('UTF-8')
>>> translation_file_es = parser.parse(file_content)
>>> translation_file_es.path = 'foo/es.po'
>>> translation_file_es.language_code = 'es'
@@ -62,7 +62,7 @@ When we export a single file, we get a .mo file as output.
>>> storage = ExportFileStorage()
>>> exporter.exportTranslationFile(translation_file_es, storage)
>>> exported_file = storage.export()
- >>> print exported_file.content_type
+ >>> print(exported_file.content_type)
application/x-gmo
>>> is_valid_mofile(exported_file.read())
True
@@ -80,7 +80,7 @@ When we export more than one file, we get a tarball.
>>> exported_file = translation_exporter.exportTranslationFiles(
... [translation_file_es, translation_file_sr],
... target_format=TranslationFileFormat.MO)
- >>> print exported_file.content_type
+ >>> print(exported_file.content_type)
application/x-gtar
>>> file_content = exported_file.read()
>>> is_valid_mofile(file_content)
@@ -94,7 +94,7 @@ When we export more than one file, we get a tarball.
... desc = 'dir'
... else:
... desc = 'unknown'
- ... print "| %9s | %s" % (desc, member.name)
+ ... print("| %9s | %s" % (desc, member.name))
| dir | es
| dir | es/LC_MESSAGES
| non-empty | es/LC_MESSAGES/foo.mo
@@ -116,7 +116,7 @@ the binary form.
POCompiler.compile gets a string with .po file content and gives you
a .mo file binary stream.
- >>> mofile_content = compiler.compile('''
+ >>> mofile_content = compiler.compile(b'''
... msgid "foo"
... msgstr "bar"
... ''')
@@ -126,7 +126,7 @@ a .mo file binary stream.
Though, if we provide it with something that is not a .po file content, we
get an export error exception:
- >>> mofile = compiler.compile('''
+ >>> mofile = compiler.compile(b'''
... blah
... ''')
Traceback (most recent call last):
diff --git a/lib/lp/translations/utilities/gettext_mo_exporter.py b/lib/lp/translations/utilities/gettext_mo_exporter.py
index 8f3d627..a97f507 100644
--- a/lib/lp/translations/utilities/gettext_mo_exporter.py
+++ b/lib/lp/translations/utilities/gettext_mo_exporter.py
@@ -34,6 +34,10 @@ class POCompiler:
def compile(self, gettext_po_file):
"""Return a MO version of the given PO file."""
+ if not isinstance(gettext_po_file, bytes):
+ raise TypeError(
+ "gettext_po_file must be bytes, not %s" %
+ type(gettext_po_file))
msgfmt = subprocess.Popen(
args=[POCompiler.MSGFMT, '-v', '-o', '-', '-'],
@@ -43,8 +47,13 @@ class POCompiler:
stdout, stderr = msgfmt.communicate(gettext_po_file)
if msgfmt.returncode != 0:
+ # XXX 2020-06-18 cjwatson: Decoding to UTF-8 isn't quite right
+ # here, but we don't currently have access to the file's
+ # encoding here. With any luck it won't matter too often.
raise UnknownTranslationExporterError(
- 'Error compiling PO file: %s\n%s' % (gettext_po_file, stderr))
+ 'Error compiling PO file: %s\n%s' % (
+ gettext_po_file.decode('UTF-8', 'replace'),
+ stderr.decode('UTF-8', 'replace')))
return stdout
diff --git a/lib/lp/translations/utilities/gettext_po_parser.py b/lib/lp/translations/utilities/gettext_po_parser.py
index d4bdafc..b961d0e 100644
--- a/lib/lp/translations/utilities/gettext_po_parser.py
+++ b/lib/lp/translations/utilities/gettext_po_parser.py
@@ -78,8 +78,9 @@ def parse_charset(string_to_parse, is_escaped=True):
# Default to UTF-8 if the header still has the default value or
# is unknown.
charset = default_charset
- match = re.search(pattern, string_to_parse)
- if match is not None and match.group(1) != 'CHARSET':
+ match = re.search(
+ pattern, six.ensure_text(string_to_parse, 'UTF-8', 'replace'))
+ if match is not None and match.group(1) != b'CHARSET':
charset = match.group(1).strip()
try:
codecs.getencoder(charset)
@@ -507,19 +508,22 @@ class POParser(object):
# We don't know what charset the data is in, so we parse it one line
# at a time until we have the header, and then we'll know how to
# treat the rest of the data.
- parts = re.split(r'\n|\r\n|\r', self._pending_chars, 1)
+ parts = re.split(br'\n|\r\n|\r', self._pending_chars, 1)
if len(parts) == 1:
# only one line
return None
line, self._pending_chars = parts
return line.strip()
- def parse(self, content_text):
+ def parse(self, content_bytes):
"""Parse string as a PO file."""
+ if not isinstance(content_bytes, bytes):
+ raise TypeError(
+ "context_bytes must be bytes, not %s" % type(content_bytes))
# Initialize the parser.
self._translation_file = TranslationFileData()
self._messageids = set()
- self._pending_chars = content_text
+ self._pending_chars = content_bytes
self._pending_unichars = u''
self._lineno = 0
# Message specific variables.
@@ -529,8 +533,8 @@ class POParser(object):
self._plural_case = None
self._parsed_content = u''
- # First thing to do is to get the charset used in the content_text.
- charset = parse_charset(content_text)
+ # First thing to do is to get the charset used in the content_bytes.
+ charset = parse_charset(content_bytes)
# Now, parse the header, inefficiently. It ought to be short, so
# this isn't disastrous.
diff --git a/lib/lp/translations/utilities/tests/helpers.py b/lib/lp/translations/utilities/tests/helpers.py
index 20565d6..9a40746 100644
--- a/lib/lp/translations/utilities/tests/helpers.py
+++ b/lib/lp/translations/utilities/tests/helpers.py
@@ -72,10 +72,10 @@ def import_pofile_or_potemplate(file_contents, person,
def is_valid_mofile(mofile):
- """Test whether a string is a valid MO file."""
+ """Test whether a byte string is a valid MO file."""
# There are different magics for big- and little-endianness, so we
# test for both.
- be_magic = '\x95\x04\x12\xde'
- le_magic = '\xde\x12\x04\x95'
+ be_magic = b'\x95\x04\x12\xde'
+ le_magic = b'\xde\x12\x04\x95'
return mofile[:len(be_magic)] in (be_magic, le_magic)
diff --git a/lib/lp/translations/utilities/tests/test_translation_exporter.py b/lib/lp/translations/utilities/tests/test_translation_exporter.py
index 6f24b99..674dd30 100644
--- a/lib/lp/translations/utilities/tests/test_translation_exporter.py
+++ b/lib/lp/translations/utilities/tests/test_translation_exporter.py
@@ -5,7 +5,7 @@
__metaclass__ = type
-from cStringIO import StringIO
+import io
from operator import attrgetter
import unittest
@@ -40,7 +40,7 @@ class TranslationExporterTestCase(unittest.TestCase):
self.assertTrue(
verifyObject(
IExportedTranslationFile,
- ExportedTranslationFile(StringIO())),
+ ExportedTranslationFile(io.BytesIO())),
"ExportedTranslationFile doesn't follow the interface")
def testGetTranslationFormatExporterByFileFormat(self):