← Back to team overview

openlp-core team mailing list archive

[Merge] lp:~googol/openlp/duplicate-speed into lp:openlp

 

Andreas Preikschat has proposed merging lp:~googol/openlp/duplicate-speed into lp:openlp.

Requested reviews:
  OpenLP Core (openlp-core)

For more details, see:
https://code.launchpad.net/~googol/openlp/duplicate-speed/+merge/215552

Hello,

- use a worker pool to compare songs in the duplicate finder

NEEDS to be tested on windows, and single core systems!

lp:~googol/openlp/duplicate-speed (revision 2349)
[FAILURE] http://ci.openlp.org/job/Branch-01-Pull/288/
[SUCCESS] http://ci.openlp.org/job/Branch-02-Functional-Tests/248/
[SUCCESS] http://ci.openlp.org/job/Branch-03-Interface-Tests/196/
[SUCCESS] http://ci.openlp.org/job/Branch-04-Windows_Tests/157/
[FAILURE] http://ci.openlp.org/job/Branch-05-Code-Analysis/114/
-- 
https://code.launchpad.net/~googol/openlp/duplicate-speed/+merge/215552
Your team OpenLP Core is requested to review the proposed merge of lp:~googol/openlp/duplicate-speed into lp:openlp.
=== modified file 'openlp/plugins/songs/forms/duplicatesongremovalform.py'
--- openlp/plugins/songs/forms/duplicatesongremovalform.py	2014-03-21 21:38:08 +0000
+++ openlp/plugins/songs/forms/duplicatesongremovalform.py	2014-04-12 17:05:56 +0000
@@ -31,6 +31,7 @@
 """
 
 import logging
+import multiprocessing
 import os
 
 from PyQt4 import QtCore, QtGui
@@ -45,6 +46,20 @@
 log = logging.getLogger(__name__)
 
 
+class SongIterator(object):
+    """
+    This class implements an iterator for the song duplicate finder. The iterator returns a tuple of two songs. When
+    completely iterated then all songs have once been returned combined with any other songs.
+    """
+    def __init__(self, songs):
+        self.songs = songs
+
+    def __iter__(self):
+        for outer_song_counter in range(len(self.songs) - 1):
+            for inner_song_counter in range(outer_song_counter + 1, len(self.songs)):
+                yield (self.songs[outer_song_counter], self.songs[inner_song_counter])
+
+
 class DuplicateSongRemovalForm(OpenLPWizard, RegistryProperties):
     """
     This is the Duplicate Song Removal Wizard. It provides functionality to search for and remove duplicate songs
@@ -167,24 +182,31 @@
                 max_progress_count = max_songs * (max_songs - 1) // 2
                 self.duplicate_search_progress_bar.setMaximum(max_progress_count)
                 songs = self.plugin.manager.get_all_objects(Song)
-                for outer_song_counter in range(max_songs - 1):
-                    for inner_song_counter in range(outer_song_counter + 1, max_songs):
-                        if songs_probably_equal(songs[outer_song_counter], songs[inner_song_counter]):
-                            duplicate_added = self.add_duplicates_to_song_list(
-                                songs[outer_song_counter], songs[inner_song_counter])
-                            if duplicate_added:
-                                self.found_duplicates_edit.appendPlainText(
-                                    songs[outer_song_counter].title + "  =  " + songs[inner_song_counter].title)
-                        self.duplicate_search_progress_bar.setValue(self.duplicate_search_progress_bar.value() + 1)
-                        # The call to process_events() will keep the GUI responsive.
-                        self.application.process_events()
-                        if self.break_search:
-                            return
-                self.review_total_count = len(self.duplicate_song_list)
-                if self.review_total_count == 0:
-                    self.notify_no_duplicates()
+                # Create a worker/process pool to check the songs.
+                process_number = max(1, multiprocessing.cpu_count() - 1)
+                pool = multiprocessing.Pool(process_number)
+                song_list = SongIterator(songs)
+                result = pool.imap_unordered(songs_probably_equal, song_list, 30)
+                # Do not accept any further tasks. Also this closes the processes if all tasks are done.
+                pool.close()
+                # While the processes are still working, start to look at the results.
+                for song_tuple in result:
+                    self.duplicate_search_progress_bar.setValue(self.duplicate_search_progress_bar.value() + 1)
+                    # The call to process_events() will keep the GUI responsive.
+                    self.application.process_events()
+                    if self.break_search:
+                        pool.terminate()
+                        return
+                    if song_tuple is None:
+                        continue
+                    song1, song2 = song_tuple
+                    duplicate_added = self.add_duplicates_to_song_list(song1, song2)
+                    if duplicate_added:
+                        self.found_duplicates_edit.appendPlainText(song1.title + "  =  " + song2.title)
+                if self.duplicate_song_list:
+                    self.button(QtGui.QWizard.NextButton).show()
                 else:
-                    self.button(QtGui.QWizard.NextButton).show()
+                    self.notify_no_duplicates()
             finally:
                 self.application.set_normal_cursor()
         elif page_id == self.review_page_id:

=== modified file 'openlp/plugins/songs/lib/songcompare.py'
--- openlp/plugins/songs/lib/songcompare.py	2014-03-21 21:38:08 +0000
+++ openlp/plugins/songs/lib/songcompare.py	2014-04-12 17:05:56 +0000
@@ -52,13 +52,13 @@
 MAX_TYPO_SIZE = 3
 
 
-def songs_probably_equal(song1, song2):
+def songs_probably_equal(song_tupel):
     """
     Calculate and return whether two songs are probably equal.
 
-    :param song1: The first song to compare.
-    :param song2:  The second song to compare.
+    :param song_tupel: A tuple of two songs to compare.
     """
+    song1, song2 = song_tupel
     if len(song1.search_lyrics) < len(song2.search_lyrics):
         small = song1.search_lyrics
         large = song2.search_lyrics
@@ -75,18 +75,19 @@
     for element in diff_no_typos:
         if element[0] == "equal" and _op_length(element) >= MIN_BLOCK_SIZE:
             length_of_equal_blocks += _op_length(element)
+
     if length_of_equal_blocks >= MIN_BLOCK_SIZE:
-        return True
+        return song1, song2
     # Check 2: Similarity based on the relative length of the longest equal block.
     # Calculate the length of the largest equal block of the diff set.
     length_of_longest_equal_block = 0
     for element in diff_no_typos:
         if element[0] == "equal" and _op_length(element) > length_of_longest_equal_block:
             length_of_longest_equal_block = _op_length(element)
-    if length_of_equal_blocks >= MIN_BLOCK_SIZE or length_of_longest_equal_block > len(small) * 2 // 3:
-        return True
+    if length_of_longest_equal_block > len(small) * 2 // 3:
+        return song1, song2
     # Both checks failed. We assume the songs are not equal.
-    return False
+    return None
 
 
 def _op_length(opcode):

=== modified file 'tests/functional/openlp_core_lib/test_renderer.py'
--- tests/functional/openlp_core_lib/test_renderer.py	2014-04-02 18:51:21 +0000
+++ tests/functional/openlp_core_lib/test_renderer.py	2014-04-12 17:05:56 +0000
@@ -49,7 +49,7 @@
 
     def setUp(self):
         """
-        Set up the components need for all tests.
+        Set up the components need for all tests
         """
         # Mocked out desktop object
         self.desktop = MagicMock()
@@ -67,7 +67,7 @@
 
     def initial_renderer_test(self):
         """
-        Test the initial renderer state .
+        Test the initial renderer state
         """
         # GIVEN: A new renderer instance.
         renderer = Renderer()
@@ -77,7 +77,7 @@
 
     def default_screen_layout_test(self):
         """
-        Test the default layout calculations.
+        Test the default layout calculations
         """
         # GIVEN: A new renderer instance.
         renderer = Renderer()
@@ -87,3 +87,35 @@
         self.assertEqual(renderer.height, 768, 'The base renderer should be a live controller')
         self.assertEqual(renderer.screen_ratio, 0.75, 'The base renderer should be a live controller')
         self.assertEqual(renderer.footer_start, 691, 'The base renderer should be a live controller')
+
+    def _get_start_tags_test(self):
+        """
+        Test the _get_start_tags() method
+        """
+        # GIVEN: A new renderer instance. Broken raw_text (missing closing tags).
+        renderer = Renderer()
+        given_raw_text = '{st}{r}Text text text'
+        expected_tuple = ('{st}{r}Text text text{/r}{/st}', '{st}{r}',
+                          '<strong><span style="-webkit-text-fill-color:red">')
+
+        # WHEN:
+        result = renderer._get_start_tags(given_raw_text)
+
+        # THEN: Check if the correct tuple is returned.
+        self.assertEqual(result, expected_tuple), 'A tuple should be returned '
+        '(fixed-text, opening tags, html opening tags).'
+
+    def _word_split_test(self):
+        """
+        Test the _word_split() method
+        """
+        # GIVEN: A line of text
+        renderer = Renderer()
+        given_line = 'beginning asdf \n end asdf'
+        expected_words = ['beginning', 'asdf', 'end', 'asdf']
+
+        # WHEN: Split the line
+        result_words = renderer._words_split(given_line)
+
+        # THEN: The word lists should be the same.
+        self.assertListEqual(result_words, expected_words)

=== modified file 'tests/functional/openlp_plugins/songs/test_lib.py'
--- tests/functional/openlp_plugins/songs/test_lib.py	2014-04-02 19:35:09 +0000
+++ tests/functional/openlp_plugins/songs/test_lib.py	2014-04-12 17:05:56 +0000
@@ -96,10 +96,10 @@
         self.song2.search_lyrics = self.full_lyrics
 
         # WHEN: We compare those songs for equality.
-        result = songs_probably_equal(self.song1, self.song2)
+        result = songs_probably_equal((self.song1, self.song2))
 
-        # THEN: The result should be True.
-        assert result is True, 'The result should be True'
+        # THEN: The result should be a tuple..
+        assert result == (self.song1, self.song2), 'The result should be the tuble of songs'
 
     def songs_probably_equal_short_song_test(self):
         """
@@ -110,10 +110,10 @@
         self.song2.search_lyrics = self.short_lyrics
 
         # WHEN: We compare those songs for equality.
-        result = songs_probably_equal(self.song1, self.song2)
+        result = songs_probably_equal((self.song1, self.song2))
 
-        # THEN: The result should be True.
-        assert result is True, 'The result should be True'
+        # THEN: The result should be a tuple..
+        assert result == (self.song1, self.song2), 'The result should be the tuble of songs'
 
     def songs_probably_equal_error_song_test(self):
         """
@@ -124,10 +124,11 @@
         self.song2.search_lyrics = self.error_lyrics
 
         # WHEN: We compare those songs for equality.
-        result = songs_probably_equal(self.song1, self.song2)
-
-        # THEN: The result should be True.
-        assert result is True, 'The result should be True'
+        result = songs_probably_equal((self.song1, self.song2))
+
+        # THEN: The result should be a tuple of songs..
+        assert result == (self.song1, self.song2), 'The result should be the tuble of songs'
+
 
     def songs_probably_equal_different_song_test(self):
         """
@@ -138,10 +139,10 @@
         self.song2.search_lyrics = self.different_lyrics
 
         # WHEN: We compare those songs for equality.
-        result = songs_probably_equal(self.song1, self.song2)
+        result = songs_probably_equal((self.song1, self.song2))
 
-        # THEN: The result should be False.
-        assert result is False, 'The result should be False'
+        # THEN: The result should be Nonw.
+        assert result is None, 'The result should be None'
 
     def remove_typos_beginning_test(self):
         """


References