widelands-dev team mailing list archive

Thread
Date
Re: [Merge] lp:~widelands-dev/widelands/codecheck_translations into lp:widelands

To: mp+333579@xxxxxxxxxxxxxxxxxx
From: SirVer <SirVer@xxxxxx>
Date: Fri, 24 Nov 2017 08:44:30 -0000
In-reply-to: <151041159446.4961.16382824710211035680.launchpad@ackee.canonical.com>
Reply-to: mp+333579@xxxxxxxxxxxxxxxxxx
Sender: bounces@xxxxxxxxxxxxx
Review: Approve

just nits. otherwise lgtm.

+1 for more automatic flagging!

Diff comments:

> 
> === added file 'cmake/codecheck/rules/translations_printf'
> --- cmake/codecheck/rules/translations_printf	1970-01-01 00:00:00 +0000
> +++ cmake/codecheck/rules/translations_printf	2017-11-15 09:07:16 +0000
> @@ -0,0 +1,235 @@
> +#!/usr/bin/python -tt
> +
> +"""Checks that all translatable strings that have multiple printf placeholders
> +have defined those as reversible.
> +
> +Checks that unordered, ordered and boost placeholders aren't mixed up in the same string.
> +
> +Checks that ngettext singular and plural strings have the same placeholders.
> +
> +Checks that placeholders are numbered in ascending order.
> +
> +"""
> +
> +import re
> +
> +# Regex to find placeholders
> +find_unordered = re.compile(r"(\%[0-9#*]*\.*[0-9#]*[a-zA-Z]{1,2})")

module constants are conventionally written in ALL_CAPS

> +find_ordered = re.compile(r"\%[|]{0,1}(\d\$[0-9#*]*\.*[0-9#]*[a-zA-Z]{1,2})")
> +find_boost = re.compile(r"\%(\d)\%")
> +clean_boost_for_unordered = re.compile(r"(\%\d\%)")
> +
> +
> +def find_unordered_placeholders(sanitized_entry):
> +    """We need to remove boost-style matches first, because we have cases like.
> +
> +    %1%m that match both regex expressions.
> +

remove empty line

> +    """
> +    for entry in clean_boost_for_unordered.findall(sanitized_entry):
> +        sanitized_entry = sanitized_entry.replace(entry, '')
> +    return find_unordered.findall(sanitized_entry)
> +
> +
> +def check_placeholders(entry):
> +    """Make sure that a string satisfies our placeholder policy."""
> +    sanitized_entry = entry.replace('%%', '')
> +    unordered = find_unordered_placeholders(sanitized_entry)
> +    if len(unordered) > 1:
> +        return 'Translatable string has multiple sprintf placeholders that are not ordered:'
> +    else:
> +        ordered = find_ordered.findall(sanitized_entry)
> +        boost = find_boost.findall(sanitized_entry)
> +        if len(unordered) > 0:
> +            if len(ordered) > 0:
> +                return 'Translatable string is mixing unordered sprintf placeholders with ordered placeholders:'
> +            if len(boost) > 0:
> +                return 'Translatable string is mixing unordered sprintf placeholders with boost-style placeholders:'
> +        if len(ordered) > 0 and len(boost) > 0:
> +            return 'Translatable string is mixing ordered sprintf placeholders with boost-style placeholders:'
> +        if len(ordered) > 0:
> +            for entryno, placeholder in enumerate(ordered, 1):
> +                if str(entryno) != placeholder[:placeholder.find('$')]:
> +                    return 'Translatable string has an ordered sprintf placeholder "' + placeholder + '" in position ' + str(entryno) + " - the numbers don't match:"
> +        if len(boost) > 0:
> +            for entryno, placeholder in enumerate(boost, 1):
> +                if str(entryno) != placeholder:
> +                    return 'Translatable string has an ordered boost-style placeholder "' + placeholder + '" in position ' + str(entryno) + " - the numbers don't match:"
> +    return ''
> +
> +
> +def compare_placeholders(entry1, entry2):
> +    """An Ngettext string must have the same placeholders in its singular and
> +    plural strings."""
> +    sanitized_entry1 = entry1.replace('%%', '')
> +    sanitized_entry2 = entry2.replace('%%', '')
> +
> +    # There is interaction between boost and unordered, so boost has to come
> +    # first.
> +    placeholders1 = find_boost.findall(sanitized_entry1)
> +    placeholders2 = find_boost.findall(sanitized_entry2)
> +    if len(placeholders1) == 0 and len(placeholders2) == 0:
> +        placeholders1 = find_ordered.findall(sanitized_entry1)
> +        placeholders2 = find_ordered.findall(sanitized_entry2)
> +        if len(placeholders1) == 0 and len(placeholders2) == 0:
> +            placeholders1 = find_unordered.findall(sanitized_entry1)
> +            placeholders2 = find_unordered.findall(sanitized_entry2)
> +
> +    if len(placeholders1) != len(placeholders2):
> +        return 'Ngettext string has mismatching number of placeholders. Singular has ' + str(len(placeholders1)) + ' placeholder(s) and plural has ' + str(len(placeholders2)) + ' placeholder(s):'
> +
> +    for entryno, placeholder in enumerate(placeholders1, 0):
> +        if placeholder != placeholders2[entryno]:
> +            return 'Ngettext string has mismatching placeholders "' + placeholder + '" and "' + placeholders2[entryno] + '" in position ' + str(entryno + 1) + ':'
> +
> +    return ''
> +
> +
> +def find_line(entry, lines):
> +    """Find the line for the error entry.
> +
> +    Grabs the first match, so this is imprecise for multiple matches
> +

remove empty line?

> +    """
> +    checkme = entry.split('\n')
> +    for lineno, line in enumerate(lines, 0):
> +        if checkme[0] in line:
> +            is_match = True
> +            for rowno, row in enumerate(checkme, 0):
> +                if (lineno + rowno) < len(lines):
> +                    if not checkme[rowno] in lines[lineno + rowno]:
> +                        is_match = False
> +                        break
> +            if is_match:
> +                return lineno + 1
> +    return 0
> +
> +
> +def evaluate_matches(lines, fn):
> +    """Main check."""
> +    result = []
> +
> +    searchme = ''.join(lines).strip()
> +
> +    single_strings = []
> +    string_pairs = []
> +
> +    # Simple gettext
> +    # DOTALL makes . match newlines
> +    matches = re.findall(r"_\(\"(.*?)\"\)", searchme, re.DOTALL)
> +    for match in matches:
> +        if '%' in match:
> +            single_strings.append(match)
> +
> +    matches = re.findall(r"\Wgettext\(\"(.*?)\"\)", searchme, re.DOTALL)
> +    for match in matches:
> +        if '%' in match:
> +            single_strings.append(match)
> +
> +    # pgettext
> +    matches = re.findall(
> +        r"\Wpgettext\(\".*?\",.+?\"(.*?)\"\)", searchme, re.DOTALL)
> +    for match in matches:
> +        if '%' in match:
> +            single_strings.append(match)
> +
> +    # ngettext
> +    matches = re.findall(
> +        r"ngettext\(\"(.*?)\",.+?\"(.*?)\".+?\w+?\)", searchme, re.DOTALL)
> +    for match in matches:
> +        if '%' in match[0] or '%' in match[1]:
> +            string_pairs.append(match)
> +
> +    # npgettext
> +    matches = re.findall(
> +        r"npgettext\(\".*?\",.+?(.*?)\",.+?\"(.*?)\".+?\w+?\)", searchme, re.DOTALL)
> +    for match in matches:
> +        if '%' in match[0] or '%' in match[1]:
> +            string_pairs.append(match)
> +
> +    for entry in single_strings:
> +        check_result = check_placeholders(entry)
> +        if len(check_result) > 0:
> +            result.append((fn, find_line(entry, lines),
> +                           check_result + '\n' + entry))
> +
> +    for checkme in string_pairs:
> +        for entry in checkme:
> +            check_result = check_placeholders(entry)
> +            if len(check_result) > 0:
> +                result.append((fn, find_line(
> +                    checkme[0], lines), check_result + '\n' + checkme[0] + ' - ' + checkme[1]))
> +
> +        check_result = compare_placeholders(checkme[0], checkme[1])
> +        if len(check_result) > 0:
> +            result.append((fn, find_line(
> +                checkme[0], lines), check_result + '\n' + checkme[0] + ' - ' + checkme[1]))
> +
> +    return result
> +
> +# File this is called on is always called testdir/test.h
> +forbidden = [
> +    # Unordered with multiple
> +    '_("One %d and another %s")',
> +
> +    # Mixed Boost + Ordered
> +    '_("One %1$i and another %2%")',
> +
> +    # Mixed Boost + Unordered
> +    '_("One %i and another %2%")',
> +
> +    # Mixed Ordered + Unordered
> +    '_("One %i and another %2$d")',
> +
> +    # Wrong order
> +    '_("One %2% and another %1%")',
> +    '_("One %2$lu and another %1$lu")',
> +]
> +
> +allowed = [
> +    # Boost followed by letter
> +    'npgettext("foo", "%1%d", "%1%d", value)',
> +    'ngettext("%1%m", "%1%m", value)',
> +    'gettext("%1%s")',
> +    '_("%1%s")',
> +    # Boost
> +    'npgettext("foo", "%1%", "%1%", value)',
> +    'ngettext("%1%", "%1%", value)',
> +    'gettext("%1%")',
> +    '_("%1%")',
> +    # Boost with percent
> +    'npgettext("foo", "%1%%%", "%1%%%", value)',
> +    'ngettext("%1%%%", "%1%%%", value)',
> +    'gettext("%1%%%")',
> +    '_("%1%%%")',
> +    # Boost with multiple
> +    'npgettext("foo", "One %1% and another %2%", "Many %1% and another %2%", value,)',
> +    'ngettext("One %1% and another %2%", "Many %1% and another %2%", value)',
> +    'gettext("One %1% and another %2%")',
> +    '_("One %1% and another %2%")',
> +    # Ordered
> +    'npgettext("foo", "%1$d", "%1$d", value)',
> +    'ngettext("%1$d", "%1$d", value)',
> +    'gettext("%1%s")',
> +    '_("%1$s")',
> +    # Ordered with percent
> +    'npgettext("foo", "%1$d%%", "%1$d%%", value)',
> +    'ngettext("%1$d%%", "%1$d%%", value)',
> +    'gettext("%1$s%%")',
> +    '_("%1%s%%")',
> +    # Ordered with multiple
> +    'npgettext("foo", "One %1$d and another %2$s", "Many %1$d and another %2$s", value,)',
> +    'ngettext("One %1$lu and another %2$s", "Many %1$lu and another %2$s", value)',
> +    'gettext("One %1$d and another %2$s")',
> +    '_("One %1$d and another %2$s")',
> +    # Unordered
> +    'npgettext("foo", "%d", "%d", value)',
> +    'ngettext("%d", "%d", value)',
> +    'gettext("%s")',
> +    '_("%1$s")',
> +    # Unordered with percent
> +    'npgettext("foo", "%d%%", "%d%%", value)',
> +    'ngettext("%lu%%", "%lu%%", value)',
> +    'gettext("%s%%")',
> +    '_("%s%%")',
> +]
> 
> === added file 'cmake/codecheck/rules/translators_comments_above_translations'
> --- cmake/codecheck/rules/translators_comments_above_translations	1970-01-01 00:00:00 +0000
> +++ cmake/codecheck/rules/translators_comments_above_translations	2017-11-15 09:07:16 +0000
> @@ -0,0 +1,62 @@
> +#!/usr/bin/python -tt
> +
> +"""Checks that all TRANSLATORS: comments are immediately above the string to be
> +translated.
> +
> +Also checks for malformed translators' comment tag. Will catch
> +permutation typos like "TRASNLATORS".
> +

remove empty line?

> +"""
> +
> +import re
> +
> +
> +def evaluate_matches(lines, fn):
> +    result = []
> +    translators_comment_started = False
> +    translators_comment_ended = False
> +    last_translators_comment = ''
> +    last_translators_comment_line = 0
> +
> +    translator_tag = re.compile('.*[*/]\s+[TRANSLATOR]{10}.*')
> +
> +    for lineno, line in enumerate(lines, 1):
> +        # Find start of translators' comment
> +        if translator_tag.match(line):
> +            translators_comment_started = True
> +            # Create error for malformed tag - we want consistency here
> +            if not '/** TRANSLATORS: ' in line:
> +                result.append((fn, lineno, 'Translators\' comment starting with "' +
> +                               line.strip() + '" does not start with: "/** TRANSLATORS:"'))
> +        # Comments can go over multiple lines, so we check for their end
> +        # separately
> +        if translators_comment_started:
> +            last_translators_comment = last_translators_comment + ' ' + line
> +            if '*/' in line:
> +                translators_comment_ended = True
> +                last_translators_comment_line = lineno
> +
> +        # Check for gettext. This can fail if we have another function name ending with _.
> +        # Should hopefully fail rarely though, since that's violating our code
> +        # style.
> +        if '_("' in line or 'gettext' in line:
> +            # We have a complete comment, so there should be a gettext call
> +            # immediately afterwards. Otherwise, xgettext won't find the
> +            # translators' comment.
> +            if translators_comment_ended:
> +                if lineno != last_translators_comment_line + 1:
> +                    result.append(
> +                        (fn, lineno, 'Translators\' comment is not directly above its translation:\n' + last_translators_comment.strip()))
> +            # Reset if there was no complete comment
> +            translators_comment_started = False
> +            translators_comment_ended = False
> +            last_translators_comment = ''
> +            last_translators_comment_line = 0
> +    return result
> +
> +# File this is called on is always called testdir/test.h
> +forbidden = [

no tests?

> +]
> +
> +allowed = [
> +]


-- 
https://code.launchpad.net/~widelands-dev/widelands/codecheck_translations/+merge/333579
Your team Widelands Developers is subscribed to branch lp:~widelands-dev/widelands/codecheck_translations.
References

[Merge] lp:~widelands-dev/widelands/codecheck_translations into lp:widelands
From: GunChleoc, 2017-11-11