launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #04970
[Merge] lp:~benji/launchpad/bug-742662-2 into lp:launchpad
Benji York has proposed merging lp:~benji/launchpad/bug-742662-2 into lp:launchpad.
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
Related bugs:
Bug #742662 in Launchpad itself: "Mixed new line markers causing OOPS while submitting translations"
https://bugs.launchpad.net/launchpad/+bug/742662
For more details, see:
https://code.launchpad.net/~benji/launchpad/bug-742662-2/+merge/75248
Bug 742662 describes a scenario in which very old imported translations
can contain mixed newlines (\r as well as \n). The fix (discovered by
doing an intra-implementation discussion with Henning) is to ignore the
mixed newlines in already-imported translations while preserving the
current behavior of disallowing any new imports with mixed newlines.
Since there was already two import sanitization functions
(sanitize_translations_from_import and sanitize_translations_from_webui)
which were just references to one-another, it was easy to provide new
definitions that behave as desired.
The only semi-invasive thing I had to do was to rework the place at
which the MixedNewlineMarkersError was raised. That's what caused the
introduction of the new mixed_style constant in the Sanitizer class and
the new handling thereof.
The tests had to be slightly modified to account for the new behavior.
Tests: bin/test -c -m lp.translations.utilities.tests
Lint: several pre-existing lint errors were fixed
QA: I would say to use the reproduction instructions on bug 742662 on
qastaging, but I can't reproduce the error now, so I'll have to figure
something else out.
--
https://code.launchpad.net/~benji/launchpad/bug-742662-2/+merge/75248
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~benji/launchpad/bug-742662-2 into lp:launchpad.
=== modified file 'lib/lp/translations/utilities/sanitize.py'
--- lib/lp/translations/utilities/sanitize.py 2011-09-07 11:34:06 +0000
+++ lib/lp/translations/utilities/sanitize.py 2011-09-13 19:59:35 +0000
@@ -24,6 +24,7 @@
windows_style = u'\r\n'
mac_style = u'\r'
unix_style = u'\n'
+ mixed_style = object()
dot_char = u'\u2022'
@@ -44,28 +45,25 @@
self.prefix = ''
self.postfix = ''
# Get the newline style that is used in the English Singular.
- self.newline_style = self._getNewlineStyle(
- english_singular, 'Original')
+ self.newline_style = self._getNewlineStyle(english_singular)
- def _getNewlineStyle(self, text, text_name):
+ @classmethod
+ def _getNewlineStyle(cls, text):
"""Find out which newline style is used in text."""
- error_message = (
- "%s text (%r) mixes different newline markers." % (
- text_name, text))
style = None
# To avoid confusing the single-character newline styles for mac and
# unix with the two-character windows one, remove the windows-style
# newlines from the text and use that text to search for the other
# two.
- stripped_text = text.replace(self.windows_style, u'')
+ stripped_text = text.replace(cls.windows_style, u'')
if text != stripped_text:
# Text contains windows style new lines.
- style = self.windows_style
+ style = cls.windows_style
- for one_char_style in (self.mac_style, self.unix_style):
+ for one_char_style in (cls.mac_style, cls.unix_style):
if one_char_style in stripped_text:
if style is not None:
- raise MixedNewlineMarkersError(error_message)
+ return cls.mixed_style
style = one_char_style
return style
@@ -135,25 +133,41 @@
def normalizeNewlines(self, translation_text):
"""Return 'translation_text' with newlines sync with english_singular.
+
+ Raises an exception if the text has mixed newline styles.
"""
if self.newline_style is None:
# No newlines in the English singular, so we have nothing to do.
return translation_text
- # Get the style that uses the given text.
- translation_newline_style = self._getNewlineStyle(
- translation_text, 'Translations')
+ # Get the style that is used in the given text.
+ translation_newline_style = self._getNewlineStyle(translation_text)
+
+ if translation_newline_style == self.mixed_style:
+ # The translation has mixed newlines in it; that is not allowed.
+ raise MixedNewlineMarkersError(
+ "Translations text (%r) mixes different newline markers." %
+ translation_text)
if translation_newline_style is None:
- # We don't need to do anything, the text is not changed.
+ # The translation text doesn't contain any newlines, so there is
+ # nothing for us to do.
return translation_text
- # Fix the newline chars.
- return translation_text.replace(
- translation_newline_style, self.newline_style)
-
-
-def sanitize_translations_from_webui(
+ if self.newline_style is self.mixed_style:
+ # The original has mixed newlines (some very old data are like
+ # this, new data with mixed newlines are rejected), so we're just
+ # going to punt and normalize to unix style.
+ return translation_text.replace(
+ translation_newline_style, self.unix_style)
+ else:
+ # Otherwise the translation text should be normalized to use the
+ # same newline style as the original.
+ return translation_text.replace(
+ translation_newline_style, self.newline_style)
+
+
+def sanitize_translations(
english_singular, translations, pluralforms):
"""Sanitize `translations` using sanitize_translation.
@@ -189,6 +203,18 @@
return sanitized_translations
-# There will be a different function for translation coming from imports but
-# for now it is identical to the one used in browser code.
-sanitize_translations_from_import = sanitize_translations_from_webui
+
+def sanitize_translations_from_import(
+ english_singular, translations, pluralforms):
+ # At import time we want to ensure that the english_singular does not
+ # contain mixed newline styles.
+ if Sanitizer._getNewlineStyle(english_singular) is Sanitizer.mixed_style:
+ raise MixedNewlineMarkersError(
+ "Original text (%r) mixes different newline markers." %
+ english_singular)
+ return sanitize_translations(english_singular, translations, pluralforms)
+
+
+def sanitize_translations_from_webui(
+ english_singular, translations, pluralforms):
+ return sanitize_translations(english_singular, translations, pluralforms)
=== modified file 'lib/lp/translations/utilities/tests/test_file_importer.py'
--- lib/lp/translations/utilities/tests/test_file_importer.py 2011-09-07 11:34:06 +0000
+++ lib/lp/translations/utilities/tests/test_file_importer.py 2011-09-13 19:59:35 +0000
@@ -246,7 +246,7 @@
message.addTranslation(0, u'')
potmsgset = self.factory.makePOTMsgSet(
- potemplate = importer.potemplate, sequence=50)
+ potemplate=importer.potemplate, sequence=50)
translation = importer.storeTranslationsInDatabase(
message, potmsgset)
# No TranslationMessage is created.
@@ -566,7 +566,7 @@
def test_not_raises_OutdatedTranslationError_on_upstream_uploads(self):
queue_entry = self._make_queue_entry(True)
try:
- importer = POFileImporter(queue_entry, GettextPOImporter(), None)
+ POFileImporter(queue_entry, GettextPOImporter(), None)
except OutdatedTranslationError:
self.fail("OutdatedTranslationError raised.")
@@ -574,7 +574,7 @@
queue_entry = self._make_queue_entry(True)
pofile = queue_entry.pofile
old_raw_header = pofile.header
- importer = POFileImporter(queue_entry, GettextPOImporter(), None)
+ POFileImporter(queue_entry, GettextPOImporter(), None)
self.assertEqual(old_raw_header, pofile.header)
@@ -762,4 +762,3 @@
importer = POFileImporter(
entry, importers[TranslationFileFormat.PO], None)
self.assertTrue(importer.is_upstream_import_on_sourcepackage)
-
=== modified file 'lib/lp/translations/utilities/tests/test_sanitize.py'
--- lib/lp/translations/utilities/tests/test_sanitize.py 2011-09-07 11:34:06 +0000
+++ lib/lp/translations/utilities/tests/test_sanitize.py 2011-09-13 19:59:35 +0000
@@ -133,7 +133,7 @@
english_newline, translation_text, sanitized))
def test_normalizeNewlines_mixed_newlines_english(self):
- # Mixed newlines in the English text will raise an exception.
+ # Mixed newlines in the English text will not raise an exception.
english_template = u"Text with%smixed%snewlines."
for english_newline_1 in self.newline_styles:
other_newlines = self.newline_styles[:]
@@ -141,8 +141,7 @@
for english_newline_2 in other_newlines:
english_text = english_template % (
english_newline_1, english_newline_2)
- self.assertRaises(
- MixedNewlineMarkersError, Sanitizer, english_text)
+ Sanitizer(english_text)
def test_normalizeNewlines_mixed_newlines_translation(self):
# Mixed newlines in the translation text will raise an exception.