openlp-core team mailing list archive
-
openlp-core team
-
Mailing list archive
-
Message #23057
[Merge] lp:~googol/openlp/duplicate-speed into lp:openlp
Andreas Preikschat has proposed merging lp:~googol/openlp/duplicate-speed into lp:openlp.
Requested reviews:
OpenLP Core (openlp-core)
For more details, see:
https://code.launchpad.net/~googol/openlp/duplicate-speed/+merge/215552
Hello,
- use a worker pool to compare songs in the duplicate finder
NEEDS to be tested on windows, and single core systems!
lp:~googol/openlp/duplicate-speed (revision 2349)
[FAILURE] http://ci.openlp.org/job/Branch-01-Pull/288/
[SUCCESS] http://ci.openlp.org/job/Branch-02-Functional-Tests/248/
[SUCCESS] http://ci.openlp.org/job/Branch-03-Interface-Tests/196/
[SUCCESS] http://ci.openlp.org/job/Branch-04-Windows_Tests/157/
[FAILURE] http://ci.openlp.org/job/Branch-05-Code-Analysis/114/
--
https://code.launchpad.net/~googol/openlp/duplicate-speed/+merge/215552
Your team OpenLP Core is requested to review the proposed merge of lp:~googol/openlp/duplicate-speed into lp:openlp.
=== modified file 'openlp/plugins/songs/forms/duplicatesongremovalform.py'
--- openlp/plugins/songs/forms/duplicatesongremovalform.py 2014-03-21 21:38:08 +0000
+++ openlp/plugins/songs/forms/duplicatesongremovalform.py 2014-04-12 17:05:56 +0000
@@ -31,6 +31,7 @@
"""
import logging
+import multiprocessing
import os
from PyQt4 import QtCore, QtGui
@@ -45,6 +46,20 @@
log = logging.getLogger(__name__)
+class SongIterator(object):
+ """
+ This class implements an iterator for the song duplicate finder. The iterator returns a tuple of two songs. When
+ completely iterated then all songs have once been returned combined with any other songs.
+ """
+ def __init__(self, songs):
+ self.songs = songs
+
+ def __iter__(self):
+ for outer_song_counter in range(len(self.songs) - 1):
+ for inner_song_counter in range(outer_song_counter + 1, len(self.songs)):
+ yield (self.songs[outer_song_counter], self.songs[inner_song_counter])
+
+
class DuplicateSongRemovalForm(OpenLPWizard, RegistryProperties):
"""
This is the Duplicate Song Removal Wizard. It provides functionality to search for and remove duplicate songs
@@ -167,24 +182,31 @@
max_progress_count = max_songs * (max_songs - 1) // 2
self.duplicate_search_progress_bar.setMaximum(max_progress_count)
songs = self.plugin.manager.get_all_objects(Song)
- for outer_song_counter in range(max_songs - 1):
- for inner_song_counter in range(outer_song_counter + 1, max_songs):
- if songs_probably_equal(songs[outer_song_counter], songs[inner_song_counter]):
- duplicate_added = self.add_duplicates_to_song_list(
- songs[outer_song_counter], songs[inner_song_counter])
- if duplicate_added:
- self.found_duplicates_edit.appendPlainText(
- songs[outer_song_counter].title + " = " + songs[inner_song_counter].title)
- self.duplicate_search_progress_bar.setValue(self.duplicate_search_progress_bar.value() + 1)
- # The call to process_events() will keep the GUI responsive.
- self.application.process_events()
- if self.break_search:
- return
- self.review_total_count = len(self.duplicate_song_list)
- if self.review_total_count == 0:
- self.notify_no_duplicates()
+ # Create a worker/process pool to check the songs.
+ process_number = max(1, multiprocessing.cpu_count() - 1)
+ pool = multiprocessing.Pool(process_number)
+ song_list = SongIterator(songs)
+ result = pool.imap_unordered(songs_probably_equal, song_list, 30)
+ # Do not accept any further tasks. Also this closes the processes if all tasks are done.
+ pool.close()
+ # While the processes are still working, start to look at the results.
+ for song_tuple in result:
+ self.duplicate_search_progress_bar.setValue(self.duplicate_search_progress_bar.value() + 1)
+ # The call to process_events() will keep the GUI responsive.
+ self.application.process_events()
+ if self.break_search:
+ pool.terminate()
+ return
+ if song_tuple is None:
+ continue
+ song1, song2 = song_tuple
+ duplicate_added = self.add_duplicates_to_song_list(song1, song2)
+ if duplicate_added:
+ self.found_duplicates_edit.appendPlainText(song1.title + " = " + song2.title)
+ if self.duplicate_song_list:
+ self.button(QtGui.QWizard.NextButton).show()
else:
- self.button(QtGui.QWizard.NextButton).show()
+ self.notify_no_duplicates()
finally:
self.application.set_normal_cursor()
elif page_id == self.review_page_id:
=== modified file 'openlp/plugins/songs/lib/songcompare.py'
--- openlp/plugins/songs/lib/songcompare.py 2014-03-21 21:38:08 +0000
+++ openlp/plugins/songs/lib/songcompare.py 2014-04-12 17:05:56 +0000
@@ -52,13 +52,13 @@
MAX_TYPO_SIZE = 3
-def songs_probably_equal(song1, song2):
+def songs_probably_equal(song_tupel):
"""
Calculate and return whether two songs are probably equal.
- :param song1: The first song to compare.
- :param song2: The second song to compare.
+ :param song_tupel: A tuple of two songs to compare.
"""
+ song1, song2 = song_tupel
if len(song1.search_lyrics) < len(song2.search_lyrics):
small = song1.search_lyrics
large = song2.search_lyrics
@@ -75,18 +75,19 @@
for element in diff_no_typos:
if element[0] == "equal" and _op_length(element) >= MIN_BLOCK_SIZE:
length_of_equal_blocks += _op_length(element)
+
if length_of_equal_blocks >= MIN_BLOCK_SIZE:
- return True
+ return song1, song2
# Check 2: Similarity based on the relative length of the longest equal block.
# Calculate the length of the largest equal block of the diff set.
length_of_longest_equal_block = 0
for element in diff_no_typos:
if element[0] == "equal" and _op_length(element) > length_of_longest_equal_block:
length_of_longest_equal_block = _op_length(element)
- if length_of_equal_blocks >= MIN_BLOCK_SIZE or length_of_longest_equal_block > len(small) * 2 // 3:
- return True
+ if length_of_longest_equal_block > len(small) * 2 // 3:
+ return song1, song2
# Both checks failed. We assume the songs are not equal.
- return False
+ return None
def _op_length(opcode):
=== modified file 'tests/functional/openlp_core_lib/test_renderer.py'
--- tests/functional/openlp_core_lib/test_renderer.py 2014-04-02 18:51:21 +0000
+++ tests/functional/openlp_core_lib/test_renderer.py 2014-04-12 17:05:56 +0000
@@ -49,7 +49,7 @@
def setUp(self):
"""
- Set up the components need for all tests.
+ Set up the components need for all tests
"""
# Mocked out desktop object
self.desktop = MagicMock()
@@ -67,7 +67,7 @@
def initial_renderer_test(self):
"""
- Test the initial renderer state .
+ Test the initial renderer state
"""
# GIVEN: A new renderer instance.
renderer = Renderer()
@@ -77,7 +77,7 @@
def default_screen_layout_test(self):
"""
- Test the default layout calculations.
+ Test the default layout calculations
"""
# GIVEN: A new renderer instance.
renderer = Renderer()
@@ -87,3 +87,35 @@
self.assertEqual(renderer.height, 768, 'The base renderer should be a live controller')
self.assertEqual(renderer.screen_ratio, 0.75, 'The base renderer should be a live controller')
self.assertEqual(renderer.footer_start, 691, 'The base renderer should be a live controller')
+
+ def _get_start_tags_test(self):
+ """
+ Test the _get_start_tags() method
+ """
+ # GIVEN: A new renderer instance. Broken raw_text (missing closing tags).
+ renderer = Renderer()
+ given_raw_text = '{st}{r}Text text text'
+ expected_tuple = ('{st}{r}Text text text{/r}{/st}', '{st}{r}',
+ '<strong><span style="-webkit-text-fill-color:red">')
+
+ # WHEN:
+ result = renderer._get_start_tags(given_raw_text)
+
+ # THEN: Check if the correct tuple is returned.
+ self.assertEqual(result, expected_tuple), 'A tuple should be returned '
+ '(fixed-text, opening tags, html opening tags).'
+
+ def _word_split_test(self):
+ """
+ Test the _word_split() method
+ """
+ # GIVEN: A line of text
+ renderer = Renderer()
+ given_line = 'beginning asdf \n end asdf'
+ expected_words = ['beginning', 'asdf', 'end', 'asdf']
+
+ # WHEN: Split the line
+ result_words = renderer._words_split(given_line)
+
+ # THEN: The word lists should be the same.
+ self.assertListEqual(result_words, expected_words)
=== modified file 'tests/functional/openlp_plugins/songs/test_lib.py'
--- tests/functional/openlp_plugins/songs/test_lib.py 2014-04-02 19:35:09 +0000
+++ tests/functional/openlp_plugins/songs/test_lib.py 2014-04-12 17:05:56 +0000
@@ -96,10 +96,10 @@
self.song2.search_lyrics = self.full_lyrics
# WHEN: We compare those songs for equality.
- result = songs_probably_equal(self.song1, self.song2)
+ result = songs_probably_equal((self.song1, self.song2))
- # THEN: The result should be True.
- assert result is True, 'The result should be True'
+ # THEN: The result should be a tuple..
+ assert result == (self.song1, self.song2), 'The result should be the tuble of songs'
def songs_probably_equal_short_song_test(self):
"""
@@ -110,10 +110,10 @@
self.song2.search_lyrics = self.short_lyrics
# WHEN: We compare those songs for equality.
- result = songs_probably_equal(self.song1, self.song2)
+ result = songs_probably_equal((self.song1, self.song2))
- # THEN: The result should be True.
- assert result is True, 'The result should be True'
+ # THEN: The result should be a tuple..
+ assert result == (self.song1, self.song2), 'The result should be the tuble of songs'
def songs_probably_equal_error_song_test(self):
"""
@@ -124,10 +124,11 @@
self.song2.search_lyrics = self.error_lyrics
# WHEN: We compare those songs for equality.
- result = songs_probably_equal(self.song1, self.song2)
-
- # THEN: The result should be True.
- assert result is True, 'The result should be True'
+ result = songs_probably_equal((self.song1, self.song2))
+
+ # THEN: The result should be a tuple of songs..
+ assert result == (self.song1, self.song2), 'The result should be the tuble of songs'
+
def songs_probably_equal_different_song_test(self):
"""
@@ -138,10 +139,10 @@
self.song2.search_lyrics = self.different_lyrics
# WHEN: We compare those songs for equality.
- result = songs_probably_equal(self.song1, self.song2)
+ result = songs_probably_equal((self.song1, self.song2))
- # THEN: The result should be False.
- assert result is False, 'The result should be False'
+ # THEN: The result should be Nonw.
+ assert result is None, 'The result should be None'
def remove_typos_beginning_test(self):
"""
References