← Back to team overview

openlp-core team mailing list archive

[Merge] lp:~m2j/openlp/bug-1094342 into lp:openlp

 

Meinert Jordan has proposed merging lp:~m2j/openlp/bug-1094342 into lp:openlp.

Requested reviews:
  Andreas Preikschat (googol)
Related bugs:
  Bug #1094342 in OpenLP: "Natural sorting broken on windows"
  https://bugs.launchpad.net/openlp/+bug/1094342

For more details, see:
https://code.launchpad.net/~m2j/openlp/bug-1094342/+merge/156655

This branch replaces the "cmp=" argument when sorting a list of strings. This is necessary for Python3 transition.

Additionally it uses ICU for locale aware string sorting. For Windows this is mandatory as Microsoft provides a broken locale.strcoll method. For Python 2 we can't use Pythons locale aware sorting, as a bug breaks the key generation for unicode.

I replaced the natural compare algorithm in openlp/plugins/songs/lib/__init__.py and placed it in openlp/core/utils/__init__.py as there might be wider use. Additional I precompute the full sorting key for song sorting. This should improve the song sorting performance significantly.

The code in lines 113 to 114 is commented out as it is not needed in Python 2. The expected performance penalty should be low, but still not necessary.
-- 
https://code.launchpad.net/~m2j/openlp/bug-1094342/+merge/156655
Your team OpenLP Core is subscribed to branch lp:openlp.
=== modified file 'openlp/core/ui/exceptionform.py'
--- openlp/core/ui/exceptionform.py	2013-03-14 10:46:19 +0000
+++ openlp/core/ui/exceptionform.py	2013-04-02 17:55:26 +0000
@@ -70,6 +70,11 @@
 except ImportError:
     MAKO_VERSION = u'-'
 try:
+    import icu
+    ICU_VERSION = u'OK'
+except ImportError:
+    ICU_VERSION = u'-'
+try:
     import uno
     arg = uno.createUnoStruct(u'com.sun.star.beans.PropertyValue')
     arg.Name = u'nodepath'
@@ -143,6 +148,7 @@
             u'PyEnchant: %s\n' % ENCHANT_VERSION + \
             u'PySQLite: %s\n' % SQLITE_VERSION + \
             u'Mako: %s\n' % MAKO_VERSION + \
+            u'pyICU: %s\n' % ICU_VERSION + \
             u'pyUNO bridge: %s\n' % UNO_VERSION + \
             u'VLC: %s\n' % VLC_VERSION
         if platform.system() == u'Linux':

=== modified file 'openlp/core/ui/thememanager.py'
--- openlp/core/ui/thememanager.py	2013-03-19 19:43:22 +0000
+++ openlp/core/ui/thememanager.py	2013-04-02 17:55:26 +0000
@@ -44,7 +44,7 @@
 from openlp.core.lib.ui import critical_error_message_box, create_widget_action
 from openlp.core.theme import Theme
 from openlp.core.ui import FileRenameForm, ThemeForm
-from openlp.core.utils import AppLocation, delete_file, locale_compare, get_filesystem_encoding
+from openlp.core.utils import AppLocation, delete_file, get_local_key, get_filesystem_encoding
 
 log = logging.getLogger(__name__)
 
@@ -418,7 +418,7 @@
         self.theme_list_widget.clear()
         files = AppLocation.get_files(self.settings_section, u'.png')
         # Sort the themes by its name considering language specific
-        files.sort(key=lambda file_name: unicode(file_name), cmp=locale_compare)
+        files.sort(key=lambda file_name: get_local_key(unicode(file_name)))
         # now process the file list of png files
         for name in files:
             # check to see file is in theme root directory

=== modified file 'openlp/core/utils/__init__.py'
--- openlp/core/utils/__init__.py	2013-03-04 10:55:02 +0000
+++ openlp/core/utils/__init__.py	2013-04-02 17:55:26 +0000
@@ -38,6 +38,7 @@
 from subprocess import Popen, PIPE
 import sys
 import urllib2
+import icu
 
 from PyQt4 import QtGui, QtCore
 
@@ -56,10 +57,12 @@
 log = logging.getLogger(__name__)
 APPLICATION_VERSION = {}
 IMAGES_FILTER = None
+ICU_COLLATOR = None
 UNO_CONNECTION_TYPE = u'pipe'
 #UNO_CONNECTION_TYPE = u'socket'
 CONTROL_CHARS = re.compile(r'[\x00-\x1F\x7F-\x9F]', re.UNICODE)
 INVALID_FILE_CHARS = re.compile(r'[\\/:\*\?"<>\|\+\[\]%]', re.UNICODE)
+DIGITS_OR_NONDIGITS = re.compile(r'\d+|\D+', re.UNICODE)
 
 
 class VersionThread(QtCore.QThread):
@@ -379,21 +382,32 @@
     return re.sub('\%[a-zA-Z]', match_formatting, text)
 
 
-def locale_compare(string1, string2):
-    """
-    Compares two strings according to the current locale settings.
-
-    As any other compare function, returns a negative, or a positive value,
-    or 0, depending on whether string1 collates before or after string2 or
-    is equal to it. Comparison is case insensitive.
-    """
-    # Function locale.strcoll() from standard Python library does not work properly on Windows.
-    return locale.strcoll(string1.lower(), string2.lower())
-
-
-# For performance reasons provide direct reference to compare function without wrapping it in another function making
-# the string lowercase. This is needed for sorting songs.
-locale_direct_compare = locale.strcoll
+def get_local_key(string):
+    """
+    Creates a key for case insensitive, locale aware string sorting.
+    """
+    string = string.lower()
+    # For Python 3 on platforms other than Windows ICU is not necessary. In those cases locale.strxfrm(str) can be used.
+    global ICU_COLLATOR
+    if ICU_COLLATOR is None:
+        from languagemanager import LanguageManager
+        locale = LanguageManager.get_language()
+        icu_locale = icu.Locale(locale)
+        ICU_COLLATOR = icu.Collator.createInstance(icu_locale)
+    return ICU_COLLATOR.getSortKey(string)
+
+
+def get_natural_key(string):
+    """
+    Generate a key for locale aware natural string sorting.
+    Returns a list of string compare keys and integers.
+    """
+    key = DIGITS_OR_NONDIGITS.findall(string)
+    key = [int(part) if part.isdigit() else get_local_key(part) for part in key]
+    # Python 3 does not support comparision of different types anymore. So make sure, that we do not compare str and int.
+    #if string[0].isdigit():
+    #    return [''] + key 
+    return key
 
 
 from applocation import AppLocation
@@ -403,4 +417,4 @@
 
 __all__ = [u'AppLocation', u'ActionList', u'LanguageManager', u'get_application_version', u'check_latest_version',
     u'add_actions', u'get_filesystem_encoding', u'get_web_page', u'get_uno_command', u'get_uno_instance',
-    u'delete_file', u'clean_filename', u'format_time', u'locale_compare', u'locale_direct_compare']
+    u'delete_file', u'clean_filename', u'format_time', u'get_local_key', u'get_natural_key']

=== modified file 'openlp/plugins/bibles/forms/bibleimportform.py'
--- openlp/plugins/bibles/forms/bibleimportform.py	2013-03-19 19:43:22 +0000
+++ openlp/plugins/bibles/forms/bibleimportform.py	2013-04-02 17:55:26 +0000
@@ -38,7 +38,7 @@
 from openlp.core.lib.db import delete_database
 from openlp.core.lib.ui import critical_error_message_box
 from openlp.core.ui.wizard import OpenLPWizard, WizardStrings
-from openlp.core.utils import AppLocation, locale_compare
+from openlp.core.utils import AppLocation, get_local_key
 from openlp.plugins.bibles.lib.manager import BibleFormat
 from openlp.plugins.bibles.lib.db import BiblesResourcesDB, clean_filename
 
@@ -455,7 +455,7 @@
         """
         self.webTranslationComboBox.clear()
         bibles = self.web_bible_list[index].keys()
-        bibles.sort(cmp=locale_compare)
+        bibles.sort(key=get_local_key)
         self.webTranslationComboBox.addItems(bibles)
 
     def onOsisBrowseButtonClicked(self):

=== modified file 'openlp/plugins/bibles/lib/mediaitem.py'
--- openlp/plugins/bibles/lib/mediaitem.py	2013-03-28 20:08:07 +0000
+++ openlp/plugins/bibles/lib/mediaitem.py	2013-04-02 17:55:26 +0000
@@ -36,7 +36,7 @@
 from openlp.core.lib.searchedit import SearchEdit
 from openlp.core.lib.ui import set_case_insensitive_completer, create_horizontal_adjusting_combo_box, \
     critical_error_message_box, find_and_set_in_combo_box, build_icon
-from openlp.core.utils import locale_compare
+from openlp.core.utils import get_local_key
 from openlp.plugins.bibles.forms import BibleImportForm, EditBibleForm
 from openlp.plugins.bibles.lib import LayoutStyle, DisplayStyle, VerseReferenceList, get_reference_separator, \
     LanguageSelection, BibleStrings
@@ -325,7 +325,7 @@
         # Get all bibles and sort the list.
         bibles = self.plugin.manager.get_bibles().keys()
         bibles = filter(None, bibles)
-        bibles.sort(cmp=locale_compare)
+        bibles.sort(key=get_local_key)
         # Load the bibles into the combo boxes.
         self.quickVersionComboBox.addItems(bibles)
         self.quickSecondComboBox.addItems(bibles)
@@ -461,7 +461,7 @@
                     for book in book_data:
                         data = BiblesResourcesDB.get_book_by_id(book.book_reference_id)
                         books.append(data[u'name'] + u' ')
-                books.sort(cmp=locale_compare)
+                books.sort(key=get_local_key)
         set_case_insensitive_completer(books, self.quickSearchEdit)
 
     def on_import_click(self):

=== modified file 'openlp/plugins/custom/lib/db.py'
--- openlp/plugins/custom/lib/db.py	2013-01-05 22:17:30 +0000
+++ openlp/plugins/custom/lib/db.py	2013-04-02 17:55:26 +0000
@@ -35,7 +35,7 @@
 from sqlalchemy.orm import mapper
 
 from openlp.core.lib.db import BaseModel, init_db
-from openlp.core.utils import locale_compare
+from openlp.core.utils import get_local_key
 
 class CustomSlide(BaseModel):
     """
@@ -44,11 +44,10 @@
     # By default sort the customs by its title considering language specific
     # characters.
     def __lt__(self, other):
-        r = locale_compare(self.title, other.title)
-        return True if r < 0 else False
+        return get_local_key(self.title) < get_local_key(other.title)
 
     def __eq__(self, other):
-        return 0 == locale_compare(self.title, other.title)
+        return get_local_key(self.title) == get_local_key(other.title)
 
 
 def init_schema(url):

=== modified file 'openlp/plugins/images/lib/mediaitem.py'
--- openlp/plugins/images/lib/mediaitem.py	2013-03-23 07:07:06 +0000
+++ openlp/plugins/images/lib/mediaitem.py	2013-04-02 17:55:26 +0000
@@ -36,7 +36,7 @@
     StringContent, TreeWidgetWithDnD, UiStrings, build_icon, check_directory_exists, check_item_selected, \
     create_thumb, translate, validate_thumb
 from openlp.core.lib.ui import create_widget_action, critical_error_message_box
-from openlp.core.utils import AppLocation, delete_file, locale_compare, get_images_filter
+from openlp.core.utils import AppLocation, delete_file, get_local_key, get_images_filter
 from openlp.plugins.images.forms import AddGroupForm, ChooseGroupForm
 from openlp.plugins.images.lib.db import ImageFilenames, ImageGroups
 
@@ -255,7 +255,7 @@
             The ID of the group that will be added recursively
         """
         image_groups = self.manager.get_all_objects(ImageGroups, ImageGroups.parent_id == parent_group_id)
-        image_groups.sort(cmp=locale_compare, key=lambda group_object: group_object.group_name)
+        image_groups.sort(key=lambda group_object: get_local_key(group_object.group_name))
         folder_icon = build_icon(u':/images/image_group.png')
         for image_group in image_groups:
             group = QtGui.QTreeWidgetItem()
@@ -286,7 +286,7 @@
             combobox.clear()
             combobox.top_level_group_added = False
         image_groups = self.manager.get_all_objects(ImageGroups, ImageGroups.parent_id == parent_group_id)
-        image_groups.sort(cmp=locale_compare, key=lambda group_object: group_object.group_name)
+        image_groups.sort(key=lambda group_object: get_local_key(group_object.group_name))
         for image_group in image_groups:
             combobox.addItem(prefix + image_group.group_name, image_group.id)
             self.fill_groups_combobox(combobox, image_group.id, prefix + '   ')
@@ -338,7 +338,7 @@
             self.expand_group(open_group.id)
         # Sort the images by its filename considering language specific
         # characters.
-        images.sort(cmp=locale_compare, key=lambda image_object: os.path.split(unicode(image_object.filename))[1])
+        images.sort(key=lambda image_object: get_local_key(os.path.split(unicode(image_object.filename))[1]))
         for imageFile in images:
             log.debug(u'Loading image: %s', imageFile.filename)
             filename = os.path.split(imageFile.filename)[1]
@@ -525,9 +525,9 @@
                 group_items.append(item)
             if isinstance(item.data(0, QtCore.Qt.UserRole), ImageFilenames):
                 image_items.append(item)
-        group_items.sort(cmp=locale_compare, key=lambda item: item.text(0))
+        group_items.sort(key=lambda item: get_local_key(item.text(0)))
         target_group.addChildren(group_items)
-        image_items.sort(cmp=locale_compare, key=lambda item: item.text(0))
+        image_items.sort(key=lambda item: get_local_key(item.text(0)))
         target_group.addChildren(image_items)
 
     def generate_slide_data(self, service_item, item=None, xmlVersion=False,

=== modified file 'openlp/plugins/media/lib/mediaitem.py'
--- openlp/plugins/media/lib/mediaitem.py	2013-03-23 06:46:41 +0000
+++ openlp/plugins/media/lib/mediaitem.py	2013-04-02 17:55:26 +0000
@@ -37,7 +37,7 @@
 from openlp.core.lib.ui import critical_error_message_box, create_horizontal_adjusting_combo_box
 from openlp.core.ui import DisplayController, Display, DisplayControllerType
 from openlp.core.ui.media import get_media_players, set_media_players
-from openlp.core.utils import AppLocation, locale_compare
+from openlp.core.utils import AppLocation, get_local_key
 
 log = logging.getLogger(__name__)
 
@@ -261,7 +261,7 @@
     def load_list(self, media, target_group=None):
         # Sort the media by its filename considering language specific
         # characters.
-        media.sort(cmp=locale_compare, key=lambda filename: os.path.split(unicode(filename))[1])
+        media.sort(key=lambda filename: get_local_key(os.path.split(unicode(filename))[1]))
         for track in media:
             track_info = QtCore.QFileInfo(track)
             if not os.path.exists(track):
@@ -287,7 +287,7 @@
 
     def getList(self, type=MediaType.Audio):
         media = Settings().value(self.settings_section + u'/media files')
-        media.sort(cmp=locale_compare, key=lambda filename: os.path.split(unicode(filename))[1])
+        media.sort(key=lambda filename: get_local_key(os.path.split(unicode(filename))[1]))
         ext = []
         if type == MediaType.Audio:
             ext = self.media_controller.audio_extensions_list

=== modified file 'openlp/plugins/presentations/lib/mediaitem.py'
--- openlp/plugins/presentations/lib/mediaitem.py	2013-03-23 06:46:41 +0000
+++ openlp/plugins/presentations/lib/mediaitem.py	2013-04-02 17:55:26 +0000
@@ -35,7 +35,7 @@
 from openlp.core.lib import MediaManagerItem, Registry, ItemCapabilities, ServiceItemContext, Settings, UiStrings, \
     build_icon, check_item_selected, create_thumb, translate, validate_thumb
 from openlp.core.lib.ui import critical_error_message_box, create_horizontal_adjusting_combo_box
-from openlp.core.utils import locale_compare
+from openlp.core.utils import get_local_key
 from openlp.plugins.presentations.lib import MessageListener
 
 log = logging.getLogger(__name__)
@@ -153,8 +153,7 @@
         if not initialLoad:
             self.main_window.display_progress_bar(len(files))
         # Sort the presentations by its filename considering language specific characters.
-        files.sort(cmp=locale_compare,
-            key=lambda filename: os.path.split(unicode(filename))[1])
+        files.sort(key=lambda filename: get_local_key(os.path.split(unicode(filename))[1]))
         for file in files:
             if not initialLoad:
                 self.main_window.increment_progress_bar()

=== modified file 'openlp/plugins/songs/forms/songexportform.py'
--- openlp/plugins/songs/forms/songexportform.py	2013-03-14 22:22:18 +0000
+++ openlp/plugins/songs/forms/songexportform.py	2013-04-02 17:55:26 +0000
@@ -37,7 +37,6 @@
 from openlp.core.lib import Registry, UiStrings, create_separated_list, build_icon, translate
 from openlp.core.lib.ui import critical_error_message_box
 from openlp.core.ui.wizard import OpenLPWizard, WizardStrings
-from openlp.plugins.songs.lib import natcmp
 from openlp.plugins.songs.lib.db import Song
 from openlp.plugins.songs.lib.openlyricsexport import OpenLyricsExport
 
@@ -222,7 +221,7 @@
         # Load the list of songs.
         self.application.set_busy_cursor()
         songs = self.plugin.manager.get_all_objects(Song)
-        songs.sort(cmp=natcmp, key=lambda song: song.sort_key)
+        songs.sort(key=lambda song: song.sort_key)
         for song in songs:
             # No need to export temporary songs.
             if song.temporary:

=== modified file 'openlp/plugins/songs/lib/__init__.py'
--- openlp/plugins/songs/lib/__init__.py	2013-03-11 21:10:29 +0000
+++ openlp/plugins/songs/lib/__init__.py	2013-04-02 17:55:26 +0000
@@ -34,7 +34,7 @@
 from PyQt4 import QtGui
 
 from openlp.core.lib import translate
-from openlp.core.utils import CONTROL_CHARS, locale_direct_compare
+from openlp.core.utils import CONTROL_CHARS
 from db import Author
 from ui import SongStrings
 
@@ -592,37 +592,3 @@
     text = u''.join(out)
     return text, default_encoding
 
-
-def natcmp(a, b):
-    """
-    Natural string comparison which mimics the behaviour of Python's internal cmp function.
-    """
-    if len(a) <= len(b):
-        for i, key in enumerate(a):
-            if isinstance(key, int) and isinstance(b[i], int):
-                result = cmp(key, b[i])
-            elif isinstance(key, int) and not isinstance(b[i], int):
-                result = locale_direct_compare(str(key), b[i])
-            elif not isinstance(key, int) and isinstance(b[i], int):
-                result = locale_direct_compare(key, str(b[i]))
-            else:
-                result = locale_direct_compare(key, b[i])
-            if result != 0:
-                return result
-        if len(a) == len(b):
-            return 0
-        else:
-            return -1
-    else:
-        for i, key in enumerate(b):
-            if isinstance(a[i], int) and isinstance(key, int):
-                result = cmp(a[i], key)
-            elif isinstance(a[i], int) and not isinstance(key, int):
-                result = locale_direct_compare(str(a[i]), key)
-            elif not isinstance(a[i], int) and isinstance(key, int):
-                result = locale_direct_compare(a[i], str(key))
-            else:
-                result = locale_direct_compare(a[i], key)
-            if result != 0:
-                return result
-        return 1

=== modified file 'openlp/plugins/songs/lib/db.py'
--- openlp/plugins/songs/lib/db.py	2013-01-18 23:31:02 +0000
+++ openlp/plugins/songs/lib/db.py	2013-04-02 17:55:26 +0000
@@ -38,6 +38,7 @@
 from sqlalchemy.sql.expression import func
 
 from openlp.core.lib.db import BaseModel, init_db
+from openlp.core.utils import get_natural_key
 
 
 class Author(BaseModel):
@@ -69,36 +70,15 @@
     def __init__(self):
         self.sort_key = ()
 
-    def _try_int(self, s):
-        """
-        Convert to integer if possible.
-        """
-        try:
-            return int(s)
-        except:
-            return s.lower()
-
-    def _natsort_key(self, s):
-        """
-        Used internally to get a tuple by which s is sorted.
-        """
-        return map(self._try_int, re.findall(r'(\d+|\D+)', s))
-
-    # This decorator tells sqlalchemy to call this method everytime
-    # any data on this object is updated.
-
     @reconstructor
     def init_on_load(self):
         """
-        Precompute a tuple to be used for sorting.
+        Precompute a natural sorting, locale aware sorting key.
 
         Song sorting is performance sensitive operation.
-        To get maximum speed lets precompute the string
-        used for comparison.
+        To get maximum speed lets precompute the sorting key.
         """
-        # Avoid the overhead of converting string to lowercase and to QString
-        # with every call to sort().
-        self.sort_key = self._natsort_key(self.title)
+        self.sort_key = get_natural_key(self.title)
 
 
 class Topic(BaseModel):

=== modified file 'openlp/plugins/songs/lib/mediaitem.py'
--- openlp/plugins/songs/lib/mediaitem.py	2013-03-23 06:46:41 +0000
+++ openlp/plugins/songs/lib/mediaitem.py	2013-04-02 17:55:26 +0000
@@ -43,7 +43,7 @@
 from openlp.plugins.songs.forms.songmaintenanceform import SongMaintenanceForm
 from openlp.plugins.songs.forms.songimportform import SongImportForm
 from openlp.plugins.songs.forms.songexportform import SongExportForm
-from openlp.plugins.songs.lib import VerseType, clean_string, natcmp
+from openlp.plugins.songs.lib import VerseType, clean_string
 from openlp.plugins.songs.lib.db import Author, Song, Book, MediaFile
 from openlp.plugins.songs.lib.ui import SongStrings
 from openlp.plugins.songs.lib.xml import OpenLyrics, SongXML
@@ -225,7 +225,7 @@
         log.debug(u'display results Song')
         self.save_auto_select_id()
         self.list_view.clear()
-        searchresults.sort(cmp=natcmp, key=lambda song: song.sort_key)
+        searchresults.sort(key=lambda song: song.sort_key)
         for song in searchresults:
             # Do not display temporary songs
             if song.temporary:

=== modified file 'scripts/check_dependencies.py'
--- scripts/check_dependencies.py	2013-03-14 10:51:49 +0000
+++ scripts/check_dependencies.py	2013-04-02 17:55:26 +0000
@@ -83,6 +83,7 @@
     'mako',
     'migrate',
     'uno',
+    'icu',
 ]
 
 

=== modified file 'tests/functional/openlp_core_utils/test_utils.py'
--- tests/functional/openlp_core_utils/test_utils.py	2012-12-07 21:38:02 +0000
+++ tests/functional/openlp_core_utils/test_utils.py	2013-04-02 17:55:26 +0000
@@ -5,7 +5,7 @@
 
 from mock import patch
 
-from openlp.core.utils import get_filesystem_encoding, _get_frozen_path
+from openlp.core.utils import get_filesystem_encoding, _get_frozen_path, get_local_key, get_natural_key
 
 class TestUtils(TestCase):
     """
@@ -56,3 +56,28 @@
             # THEN: The frozen parameter is returned
             assert _get_frozen_path(u'frozen', u'not frozen') == u'frozen', u'Should return "frozen"'
 
+    def get_local_key_test(self):
+        """
+        Test the get_local_key(string) function
+        """
+        with patch(u'openlp.core.utils.languagemanager.LanguageManager.get_language') as mocked_get_language:
+            # GIVEN: The language is German
+            # 0x00C3 (A with diaresis) should be sorted as "A". 0x00DF (sharp s) should be sorted as "ss".
+            mocked_get_language.return_value = u'de'
+            unsorted_list = [u'Auszug', u'Aushang', u'\u00C4u\u00DFerung']
+            # WHEN: We sort the list and use get_locale_key() to generate the sorting keys
+            # THEN: We get a properly sorted list
+            assert sorted(unsorted_list, key=get_local_key) == [u'Aushang', u'\u00C4u\u00DFerung', u'Auszug'], u'Strings should be sorted properly'
+
+    def get_natural_key_test(self):
+        """
+        Test the get_natural_key(string) function
+        """
+        with patch(u'openlp.core.utils.languagemanager.LanguageManager.get_language') as mocked_get_language:
+            # GIVEN: The language is English (a language, which sorts digits before letters)
+            mocked_get_language.return_value = u'en'
+            unsorted_list = [u'item 10a', u'item 3b', u'1st item']
+            # WHEN: We sort the list and use get_natural_key() to generate the sorting keys
+            # THEN: We get a properly sorted list
+            assert sorted(unsorted_list, key=get_natural_key) == [u'1st item', u'item 3b', u'item 10a'], u'Numbers should be sorted naturally'
+


Follow ups