← Back to team overview

openlp-core team mailing list archive

[Merge] lp:~patrick-zakweb/openlp/duplicate-removal-review into lp:openlp

 

mohij has proposed merging lp:~patrick-zakweb/openlp/duplicate-removal-review into lp:openlp.

Requested reviews:
  OpenLP Core (openlp-core)

For more details, see:
https://code.launchpad.net/~patrick-zakweb/openlp/duplicate-removal-review/+merge/148345

This is no real merge request. I would just be grateful for some feedback on how to continue.

This branch contains a logic and GUI to find, review and remove duplicate songs from the song database.
The GUI can be reached via "Tools->Find Duplicate Songs". As far as I have tested everything works.

There are several pain points in the code however:
-No tests for the wizard. I have no real idea which parts would be best for testing (I think "everything" is not a good answer for this question :-). Probably some refactoring of the code is necessary to make it testable.
-No good MVC separation for the song review widgets. Some feedback on how to improve this (hopefully without pulling off a full QItemModel subclass) would be appreciated.
-I changed the wizard.py class to allow *not* adding a final progress page. I am not sure whether this is ok.

------------------------------
- Correct many whitespacing issues
- Made test more standards compliant
-- 
https://code.launchpad.net/~patrick-zakweb/openlp/duplicate-removal-review/+merge/148345
Your team OpenLP Core is requested to review the proposed merge of lp:~patrick-zakweb/openlp/duplicate-removal-review into lp:openlp.
=== modified file 'openlp/core/lib/settings.py'
--- openlp/core/lib/settings.py	2013-02-10 16:05:52 +0000
+++ openlp/core/lib/settings.py	2013-02-13 23:24:27 +0000
@@ -204,6 +204,7 @@
         u'shortcuts/songImportItem': [],
         u'shortcuts/themeScreen': [QtGui.QKeySequence(u'T')],
         u'shortcuts/toolsReindexItem': [],
+        u'shortcuts/toolsFindDuplicates': [],
         u'shortcuts/toolsAlertItem': [u'F7'],
         u'shortcuts/toolsFirstTimeWizard': [],
         u'shortcuts/toolsOpenDataFolder': [],

=== modified file 'openlp/core/ui/wizard.py'
--- openlp/core/ui/wizard.py	2013-02-07 08:42:17 +0000
+++ openlp/core/ui/wizard.py	2013-02-13 23:24:27 +0000
@@ -79,13 +79,30 @@
     """
     Generic OpenLP wizard to provide generic functionality and a unified look
     and feel.
+
+    ``parent``
+        The QWidget-derived parent of the wizard.
+
+    ``plugin``
+        Plugin this wizard is part of. The plugin will be saved in the "plugin" variable.
+        The plugin will also be used as basis for the file dialog methods this class provides.
+
+    ``name``
+        The object name this wizard should have.
+
+    ``image``
+        The image to display on the "welcome" page of the wizard. Should be 163x350.
+
+    ``addProgressPage``
+        Whether to add a progress page with a progressbar at the end of the wizard.
     """
-    def __init__(self, parent, plugin, name, image):
+    def __init__(self, parent, plugin, name, image, addProgressPage=True):
         """
         Constructor
         """
         QtGui.QWizard.__init__(self, parent)
         self.plugin = plugin
+        self.withProgressPage = addProgressPage
         self.setObjectName(name)
         self.openIcon = build_icon(u':/general/general_open.png')
         self.deleteIcon = build_icon(u':/general/general_delete.png')
@@ -96,8 +113,9 @@
         self.customInit()
         self.customSignals()
         QtCore.QObject.connect(self, QtCore.SIGNAL(u'currentIdChanged(int)'), self.onCurrentIdChanged)
-        QtCore.QObject.connect(self.errorCopyToButton, QtCore.SIGNAL(u'clicked()'), self.onErrorCopyToButtonClicked)
-        QtCore.QObject.connect(self.errorSaveToButton, QtCore.SIGNAL(u'clicked()'), self.onErrorSaveToButtonClicked)
+        if self.withProgressPage:
+            QtCore.QObject.connect(self.errorCopyToButton, QtCore.SIGNAL(u'clicked()'), self.onErrorCopyToButtonClicked)
+            QtCore.QObject.connect(self.errorSaveToButton, QtCore.SIGNAL(u'clicked()'), self.onErrorSaveToButtonClicked)
 
     def setupUi(self, image):
         """
@@ -110,7 +128,8 @@
             QtGui.QWizard.NoBackButtonOnLastPage)
         add_welcome_page(self, image)
         self.addCustomPages()
-        self.addProgressPage()
+        if self.withProgressPage:
+            self.addProgressPage()
         self.retranslateUi()
 
     def registerFields(self):
@@ -172,15 +191,20 @@
         Stop the wizard on cancel button, close button or ESC key.
         """
         log.debug(u'Wizard cancelled by user.')
+<<<<<<< TREE
         if self.currentPage() == self.progressPage:
             Registry().execute(u'openlp_stop_wizard')
+=======
+        if self.withProgressPage and self.currentPage() == self.progressPage:
+            Receiver.send_message(u'openlp_stop_wizard')
+>>>>>>> MERGE-SOURCE
         self.done(QtGui.QDialog.Rejected)
 
     def onCurrentIdChanged(self, pageId):
         """
         Perform necessary functions depending on which wizard page is active.
         """
-        if self.page(pageId) == self.progressPage:
+        if self.withProgressPage and self.page(pageId) == self.progressPage:
             self.preWizard()
             self.performWizard()
             self.postWizard()

=== added file 'openlp/plugins/songs/forms/duplicatesongremovalform.py'
--- openlp/plugins/songs/forms/duplicatesongremovalform.py	1970-01-01 00:00:00 +0000
+++ openlp/plugins/songs/forms/duplicatesongremovalform.py	2013-02-13 23:24:27 +0000
@@ -0,0 +1,492 @@
+# -*- coding: utf-8 -*-
+# vim: autoindent shiftwidth=4 expandtab textwidth=120 tabstop=4 softtabstop=4
+
+###############################################################################
+# OpenLP - Open Source Lyrics Projection                                      #
+# --------------------------------------------------------------------------- #
+# Copyright (c) 2008-2013 Raoul Snyman                                        #
+# Portions copyright (c) 2008-2013 Tim Bentley, Gerald Britton, Jonathan      #
+# Corwin, Samuel Findlay, Michael Gorven, Scott Guerrieri, Matthias Hub,      #
+# Meinert Jordan, Armin Köhler, Erik Lundin, Edwin Lunando, Brian T. Meyer.   #
+# Joshua Miller, Stevan Pettit, Andreas Preikschat, Mattias Põldaru,          #
+# Christian Richter, Philip Ridout, Simon Scudder, Jeffrey Smith,             #
+# Maikel Stuivenberg, Martin Thompson, Jon Tibble, Dave Warnock,              #
+# Frode Woldsund, Martin Zibricky, Patrick Zimmermann                         #
+# --------------------------------------------------------------------------- #
+# This program is free software; you can redistribute it and/or modify it     #
+# under the terms of the GNU General Public License as published by the Free  #
+# Software Foundation; version 2 of the License.                              #
+#                                                                             #
+# This program is distributed in the hope that it will be useful, but WITHOUT #
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or       #
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for    #
+# more details.                                                               #
+#                                                                             #
+# You should have received a copy of the GNU General Public License along     #
+# with this program; if not, write to the Free Software Foundation, Inc., 59  #
+# Temple Place, Suite 330, Boston, MA 02111-1307 USA                          #
+###############################################################################
+"""
+The duplicate song removal logic for OpenLP.
+"""
+import codecs
+import logging
+import os
+
+from PyQt4 import QtCore, QtGui
+
+from openlp.core.lib import translate, build_icon
+from openlp.core.lib.db import Manager
+from openlp.core.lib.ui import UiStrings, critical_error_message_box
+from openlp.core.ui.wizard import OpenLPWizard, WizardStrings
+from openlp.core.utils import AppLocation
+from openlp.plugins.songs.lib.db import Song, MediaFile
+from openlp.plugins.songs.lib.xml import SongXML
+from openlp.plugins.songs.lib.duplicatesongfinder import DuplicateSongFinder
+
+log = logging.getLogger(__name__)
+
+class DuplicateSongRemovalForm(OpenLPWizard):
+    """
+    This is the Duplicate Song Removal Wizard. It provides functionality to
+    search for and remove duplicate songs in the database.
+    """
+    log.info(u'DuplicateSongRemovalForm loaded')
+
+    def __init__(self, parent, plugin):
+        """
+        Instantiate the wizard, and run any extra setup we need to.
+
+        ``parent``
+            The QWidget-derived parent of the wizard.
+
+        ``plugin``
+            The songs plugin.
+        """
+        self.duplicateSongList = []
+        self.reviewCurrentCount = 0
+        self.reviewTotalCount = 0
+        OpenLPWizard.__init__(self, parent, plugin, u'duplicateSongRemovalWizard',
+            u':/wizards/wizard_duplicateremoval.bmp', False)
+
+    def customInit(self):
+        """
+        Song wizard specific initialisation.
+        """
+        pass
+
+    def customSignals(self):
+        """
+        Song wizard specific signals.
+        """
+        QtCore.QObject.connect(self.finishButton, QtCore.SIGNAL(u'clicked()'), self.onWizardExit)
+        QtCore.QObject.connect(self.cancelButton, QtCore.SIGNAL(u'clicked()'), self.onWizardExit)
+
+    def addCustomPages(self):
+        """
+        Add song wizard specific pages.
+        """
+        #add custom pages
+        self.searchingPage = QtGui.QWizardPage()
+        self.searchingPage.setObjectName(u'searchingPage')
+        self.searchingVerticalLayout = QtGui.QVBoxLayout(self.searchingPage)
+        self.searchingVerticalLayout.setObjectName(u'searchingVerticalLayout')
+        self.duplicateSearchProgressBar = QtGui.QProgressBar(self.searchingPage)
+        self.duplicateSearchProgressBar.setObjectName(u'duplicateSearchProgressBar')
+        self.duplicateSearchProgressBar.setFormat(WizardStrings.PercentSymbolFormat)
+        self.searchingVerticalLayout.addWidget(self.duplicateSearchProgressBar)
+        self.foundDuplicatesEdit = QtGui.QPlainTextEdit(self.searchingPage)
+        self.foundDuplicatesEdit.setUndoRedoEnabled(False)
+        self.foundDuplicatesEdit.setReadOnly(True)
+        self.foundDuplicatesEdit.setObjectName(u'foundDuplicatesEdit')
+        self.searchingVerticalLayout.addWidget(self.foundDuplicatesEdit)
+        self.searchingPageId = self.addPage(self.searchingPage)
+        self.reviewPage = QtGui.QWizardPage()
+        self.reviewPage.setObjectName(u'reviewPage')
+        self.reviewLayout = QtGui.QVBoxLayout(self.reviewPage)
+        self.reviewLayout.setObjectName(u'reviewLayout')
+        self.songsHorizontalScrollArea = QtGui.QScrollArea(self.reviewPage)
+        self.songsHorizontalScrollArea.setObjectName(u'songsHorizontalScrollArea')
+        self.songsHorizontalScrollArea.setHorizontalScrollBarPolicy(QtCore.Qt.ScrollBarAsNeeded)
+        self.songsHorizontalScrollArea.setVerticalScrollBarPolicy(QtCore.Qt.ScrollBarAsNeeded)
+        self.songsHorizontalScrollArea.setFrameStyle(QtGui.QFrame.NoFrame)
+        self.songsHorizontalScrollArea.setWidgetResizable(True)
+        self.songsHorizontalScrollArea.setStyleSheet(u'QScrollArea#songsHorizontalScrollArea {background-color:transparent;}')
+        self.songsHorizontalSongsWidget = QtGui.QWidget(self.songsHorizontalScrollArea)
+        self.songsHorizontalSongsWidget.setObjectName(u'songsHorizontalSongsWidget')
+        self.songsHorizontalSongsWidget.setStyleSheet(u'QWidget#songsHorizontalSongsWidget {background-color:transparent;}')
+        self.songsHorizontalLayout = QtGui.QHBoxLayout(self.songsHorizontalSongsWidget)
+        self.songsHorizontalLayout.setObjectName(u'songsHorizontalLayout')
+        self.songsHorizontalLayout.setSizeConstraint(QtGui.QLayout.SetMinAndMaxSize)
+        self.songsHorizontalScrollArea.setWidget(self.songsHorizontalSongsWidget)
+        self.reviewLayout.addWidget(self.songsHorizontalScrollArea)
+        self.reviewPageId = self.addPage(self.reviewPage)
+        #add a dummy page to the end, to prevent the finish button to appear and the next button do disappear on the
+        #review page
+        self.dummyPage = QtGui.QWizardPage()
+        self.dummyPageId = self.addPage(self.dummyPage)
+
+    def retranslateUi(self):
+        """
+        Song wizard localisation.
+        """
+        self.setWindowTitle(translate(u'Wizard', u'Wizard'))
+        self.titleLabel.setText(WizardStrings.HeaderStyle % translate(u'OpenLP.Ui',
+            u'Welcome to the Duplicate Song Removal Wizard'))
+        self.informationLabel.setText(translate("Wizard",
+            u'This wizard will help you to remove duplicate songs from the song database. You will have a chance to '
+            u'review every potential duplicate song before it is deleted. So no songs will be deleted without your '
+            u'explicit approval.'))
+        self.searchingPage.setTitle(translate(u'Wizard', u'Searching for duplicate songs.'))
+        self.searchingPage.setSubTitle(translate(u'Wizard', u'The song database is searched for double songs.'))
+        self.updateReviewCounterText()
+        self.reviewPage.setSubTitle(translate(u'Wizard',
+            u'Here you can decide which songs to remove and which ones to keep.'))
+
+    def updateReviewCounterText(self):
+        """
+        Set the wizard review page header text.
+        """
+        self.reviewPage.setTitle(translate(u'Wizard', u'Review duplicate songs (%s/%s)') % \
+                (self.reviewCurrentCount, self.reviewTotalCount))
+
+    def customPageChanged(self, pageId):
+        """
+        Called when changing the wizard page.
+
+        ``pageId``
+            ID of the page the wizard changed to.
+        """
+        #hide back button
+        self.button(QtGui.QWizard.BackButton).hide()
+        if pageId == self.searchingPageId:
+            #search duplicate songs
+            maxSongs = self.plugin.manager.get_object_count(Song)
+            if maxSongs == 0 or maxSongs == 1:
+                self.duplicateSearchProgressBar.setMaximum(1)
+                self.duplicateSearchProgressBar.setValue(1)
+                self.notifyNoDuplicates()
+                return
+            # with x songs we have x*(x - 1) / 2 comparisons
+            maxProgressCount = maxSongs * (maxSongs - 1) / 2
+            self.duplicateSearchProgressBar.setMaximum(maxProgressCount)
+            songs = self.plugin.manager.get_all_objects(Song)
+            for outerSongCounter in range(maxSongs - 1):
+                for innerSongCounter in range(outerSongCounter + 1, maxSongs):
+                    doubleFinder = DuplicateSongFinder()
+                    if doubleFinder.songsProbablyEqual(songs[outerSongCounter], songs[innerSongCounter]):
+                        duplicateAdded = self.addDuplicatesToSongList(songs[outerSongCounter], songs[innerSongCounter])
+                        if duplicateAdded:
+                            self.foundDuplicatesEdit.appendPlainText(songs[outerSongCounter].title + "  =  " +
+                                songs[innerSongCounter].title)
+                    self.duplicateSearchProgressBar.setValue(self.duplicateSearchProgressBar.value() + 1)
+            self.reviewTotalCount = len(self.duplicateSongList)
+            if self.reviewTotalCount == 0:
+                self.notifyNoDuplicates()
+        elif pageId == self.reviewPageId:
+            self.processCurrentDuplicateEntry()
+
+    def notifyNoDuplicates(self):
+        """
+        Notifies the user, that there were no duplicates found in the database.
+        """
+        self.button(QtGui.QWizard.FinishButton).show()
+        self.button(QtGui.QWizard.FinishButton).setEnabled(True)
+        self.button(QtGui.QWizard.NextButton).hide()
+        QtGui.QMessageBox.information(self, translate(u'Wizard', u'Information'),
+            translate(u'Wizard', u'No duplicate songs have been found in the database.'),
+            QtGui.QMessageBox.StandardButtons(QtGui.QMessageBox.Ok))
+
+
+    def addDuplicatesToSongList(self, searchSong, duplicateSong):
+        """
+        Inserts a song duplicate (two similar songs) to the duplicate song list.
+        If one of the two songs is already part of the duplicate song list,
+        don't add another duplicate group but add the other song to that group.
+        Returns True if at least one of the songs was added, False if both were already
+        member of a group.
+
+        ``searchSong``
+            The song we searched the duplicate for.
+
+        ``duplicateSong``
+            The duplicate song.
+        """
+        duplicateGroupFound = False
+        duplicateAdded = False
+        for duplicateGroup in self.duplicateSongList:
+            #skip the first song in the duplicate lists, since the first one has to be an earlier song
+            if searchSong in duplicateGroup and not duplicateSong in duplicateGroup:
+                duplicateGroup.append(duplicateSong)
+                duplicateGroupFound = True
+                duplicateAdded = True
+                break
+            elif not searchSong in duplicateGroup and duplicateSong in duplicateGroup:
+                duplicateGroup.append(searchSong)
+                duplicateGroupFound = True
+                duplicateAdded = True
+                break
+            elif searchSong in duplicateGroup and duplicateSong in duplicateGroup:
+                duplicateGroupFound = True
+                duplicateAdded = False
+                break
+        if not duplicateGroupFound:
+            self.duplicateSongList.append([searchSong, duplicateSong])
+            duplicateAdded = True
+        return duplicateAdded
+
+    def onWizardExit(self):
+        """
+        Once the wizard is finished, refresh the song list,
+        since we potentially removed songs from it.
+        """
+        self.plugin.mediaItem.onSearchTextButtonClicked()
+
+    def setDefaults(self):
+        """
+        Set default form values for the song import wizard.
+        """
+        self.restart()
+        self.duplicateSearchProgressBar.setValue(0)
+        self.foundDuplicatesEdit.clear()
+
+    def validateCurrentPage(self):
+        """
+        Controls whether we should switch to the next wizard page. This method loops
+        on the review page as long as there are more song duplicates to review.
+        """
+        if self.currentId() == self.reviewPageId:
+            #as long as it's not the last duplicate list entry we revisit the review page
+            if len(self.duplicateSongList) == 1:
+                return True
+            else:
+                self.proceedToNextReview()
+                return False
+        return OpenLPWizard.validateCurrentPage(self)
+
+    def removeButtonClicked(self, songReviewWidget):
+        """
+        Removes a song from the database, removes the GUI element representing the
+        song on the review page, and disable the remove button if only one duplicate
+        is left.
+
+        ``songReviewWidget``
+            The SongReviewWidget whose song we should delete.
+        """
+        #remove song from duplicate song list
+        self.duplicateSongList[-1].remove(songReviewWidget.song)
+        #remove song
+        item_id = songReviewWidget.song.id
+        media_files = self.plugin.manager.get_all_objects(MediaFile,
+            MediaFile.song_id == item_id)
+        for media_file in media_files:
+            try:
+                os.remove(media_file.file_name)
+            except:
+                log.exception(u'Could not remove file: %s',
+                    media_file.file_name)
+        try:
+            save_path = os.path.join(AppLocation.get_section_data_path(
+                self.plugin.name), u'audio', str(item_id))
+            if os.path.exists(save_path):
+                os.rmdir(save_path)
+        except OSError:
+            log.exception(u'Could not remove directory: %s', save_path)
+        self.plugin.manager.delete_object(Song, item_id)
+        # remove GUI elements
+        self.songsHorizontalLayout.removeWidget(songReviewWidget)
+        songReviewWidget.setParent(None)
+        # check if we only have one duplicate left
+        # 4 stretches + 1 SongReviewWidget = 5
+        # the SongReviewWidget is then at position 2
+        if len(self.duplicateSongList[-1]) == 1:
+            self.songsHorizontalLayout.itemAt(2).widget().songRemoveButton.setEnabled(False)
+
+    def proceedToNextReview(self):
+        """
+        Removes the previous review UI elements and calls processCurrentDuplicateEntry.
+        """
+        #remove last duplicate group
+        self.duplicateSongList.pop()
+        # remove all previous elements
+        for i in reversed(range(self.songsHorizontalLayout.count())): 
+            item = self.songsHorizontalLayout.itemAt(i)
+            if isinstance(item, QtGui.QWidgetItem):
+                # the order is important here, if the .setParent(None) call is done before the .removeItem() call, a
+                # segfault occurs
+                widget = item.widget()
+                self.songsHorizontalLayout.removeItem(item) 
+                widget.setParent(None)
+            else:
+                self.songsHorizontalLayout.removeItem(item)
+        #process next set of duplicates
+        self.processCurrentDuplicateEntry()
+    
+    def processCurrentDuplicateEntry(self):
+        """
+        Update the review counter in the wizard header, add song widgets for
+        the current duplicate group to review, if it's the last
+        duplicate song group, hide the "next" button and show the "finish" button.
+        """
+        # update counter
+        self.reviewCurrentCount = self.reviewTotalCount - (len(self.duplicateSongList) - 1)
+        self.updateReviewCounterText()
+        # add song elements to the UI
+        if len(self.duplicateSongList) > 0:
+            # a stretch doesn't seem to stretch endlessly, so I add two to get enough stetch for 1400x1050
+            self.songsHorizontalLayout.addStretch()
+            self.songsHorizontalLayout.addStretch()
+            for duplicate in self.duplicateSongList[-1]:
+                songReviewWidget = SongReviewWidget(self.reviewPage, duplicate)
+                QtCore.QObject.connect(songReviewWidget,
+                        QtCore.SIGNAL(u'songRemoveButtonClicked(PyQt_PyObject)'),
+                        self.removeButtonClicked)
+                self.songsHorizontalLayout.addWidget(songReviewWidget)
+            self.songsHorizontalLayout.addStretch()
+            self.songsHorizontalLayout.addStretch()
+        #change next button to finish button on last review
+        if len(self.duplicateSongList) == 1:
+            self.button(QtGui.QWizard.FinishButton).show()
+            self.button(QtGui.QWizard.FinishButton).setEnabled(True)
+            self.button(QtGui.QWizard.NextButton).hide()
+
+class SongReviewWidget(QtGui.QWidget):
+    """
+    A widget representing a song on the duplicate song review page.
+    It displays most of the information a song contains and
+    provides a "remove" button to remove the song from the database.
+    The remove logic is not implemented here, but a signal is provided
+    when the remove button is clicked.
+    """
+    def __init__(self, parent, song):
+        """
+        ``parent``
+            The QWidget-derived parent of the wizard.
+
+        ``song``
+            The Song which this SongReviewWidget should represent.
+        """
+        QtGui.QWidget.__init__(self, parent)
+        self.song = song
+        self.setupUi()
+        self.retranslateUi()
+        QtCore.QObject.connect(self.songRemoveButton, QtCore.SIGNAL(u'clicked()'), self.onRemoveButtonClicked)
+
+    def setupUi(self):
+        self.songVerticalLayout = QtGui.QVBoxLayout(self)
+        self.songVerticalLayout.setObjectName(u'songVerticalLayout')
+        self.songGroupBox = QtGui.QGroupBox(self)
+        self.songGroupBox.setObjectName(u'songGroupBox')
+        self.songGroupBox.setMinimumWidth(300)
+        self.songGroupBox.setMaximumWidth(300)
+        self.songGroupBoxLayout = QtGui.QVBoxLayout(self.songGroupBox)
+        self.songGroupBoxLayout.setObjectName(u'songGroupBoxLayout')
+        self.songInfoFormLayout = QtGui.QFormLayout()
+        self.songInfoFormLayout.setObjectName(u'songInfoFormLayout')
+        #title
+        self.songTitleLabel = QtGui.QLabel(self)
+        self.songTitleLabel.setObjectName(u'songTitleLabel')
+        self.songInfoFormLayout.setWidget(0, QtGui.QFormLayout.LabelRole, self.songTitleLabel)
+        self.songTitleContent = QtGui.QLabel(self)
+        self.songTitleContent.setObjectName(u'songTitleContent')
+        self.songTitleContent.setText(self.song.title)
+        self.songTitleContent.setWordWrap(True)
+        self.songInfoFormLayout.setWidget(0, QtGui.QFormLayout.FieldRole, self.songTitleContent)
+        #alternate title
+        self.songAlternateTitleLabel = QtGui.QLabel(self)
+        self.songAlternateTitleLabel.setObjectName(u'songAlternateTitleLabel')
+        self.songInfoFormLayout.setWidget(1, QtGui.QFormLayout.LabelRole, self.songAlternateTitleLabel)
+        self.songAlternateTitleContent = QtGui.QLabel(self)
+        self.songAlternateTitleContent.setObjectName(u'songAlternateTitleContent')
+        self.songAlternateTitleContent.setText(self.song.alternate_title)
+        self.songAlternateTitleContent.setWordWrap(True)
+        self.songInfoFormLayout.setWidget(1, QtGui.QFormLayout.FieldRole, self.songAlternateTitleContent)
+        #CCLI number
+        self.songCCLINumberLabel = QtGui.QLabel(self)
+        self.songCCLINumberLabel.setObjectName(u'songCCLINumberLabel')
+        self.songInfoFormLayout.setWidget(2, QtGui.QFormLayout.LabelRole, self.songCCLINumberLabel)
+        self.songCCLINumberContent = QtGui.QLabel(self)
+        self.songCCLINumberContent.setObjectName(u'songCCLINumberContent')
+        self.songCCLINumberContent.setText(self.song.ccli_number)
+        self.songCCLINumberContent.setWordWrap(True)
+        self.songInfoFormLayout.setWidget(2, QtGui.QFormLayout.FieldRole, self.songCCLINumberContent)
+        #copyright
+        self.songCopyrightLabel = QtGui.QLabel(self)
+        self.songCopyrightLabel.setObjectName(u'songCopyrightLabel')
+        self.songInfoFormLayout.setWidget(3, QtGui.QFormLayout.LabelRole, self.songCopyrightLabel)
+        self.songCopyrightContent = QtGui.QLabel(self)
+        self.songCopyrightContent.setObjectName(u'songCopyrightContent')
+        self.songCopyrightContent.setWordWrap(True)
+        self.songCopyrightContent.setText(self.song.copyright)
+        self.songInfoFormLayout.setWidget(3, QtGui.QFormLayout.FieldRole, self.songCopyrightContent)
+        #comments
+        self.songCommentsLabel = QtGui.QLabel(self)
+        self.songCommentsLabel.setObjectName(u'songCommentsLabel')
+        self.songInfoFormLayout.setWidget(4, QtGui.QFormLayout.LabelRole, self.songCommentsLabel)
+        self.songCommentsContent = QtGui.QLabel(self)
+        self.songCommentsContent.setObjectName(u'songCommentsContent')
+        self.songCommentsContent.setText(self.song.comments)
+        self.songCommentsContent.setWordWrap(True)
+        self.songInfoFormLayout.setWidget(4, QtGui.QFormLayout.FieldRole, self.songCommentsContent)
+        #authors
+        self.songAuthorsLabel = QtGui.QLabel(self)
+        self.songAuthorsLabel.setObjectName(u'songAuthorsLabel')
+        self.songInfoFormLayout.setWidget(5, QtGui.QFormLayout.LabelRole, self.songAuthorsLabel)
+        self.songAuthorsContent = QtGui.QLabel(self)
+        self.songAuthorsContent.setObjectName(u'songAuthorsContent')
+        self.songAuthorsContent.setWordWrap(True)
+        authorsText = u''
+        for author in self.song.authors:
+            authorsText += author.display_name + ', '
+        if authorsText:
+            authorsText = authorsText[:-2]
+        self.songAuthorsContent.setText(authorsText)
+        self.songInfoFormLayout.setWidget(5, QtGui.QFormLayout.FieldRole, self.songAuthorsContent)
+        #verse order
+        self.songVerseOrderLabel = QtGui.QLabel(self)
+        self.songVerseOrderLabel.setObjectName(u'songVerseOrderLabel')
+        self.songInfoFormLayout.setWidget(6, QtGui.QFormLayout.LabelRole, self.songVerseOrderLabel)
+        self.songVerseOrderContent = QtGui.QLabel(self)
+        self.songVerseOrderContent.setObjectName(u'songVerseOrderContent')
+        self.songVerseOrderContent.setText(self.song.verse_order)
+        self.songVerseOrderContent.setWordWrap(True)
+        self.songInfoFormLayout.setWidget(6, QtGui.QFormLayout.FieldRole, self.songVerseOrderContent)
+        #verses
+        self.songGroupBoxLayout.addLayout(self.songInfoFormLayout)
+        self.songInfoVerseGroupBox = QtGui.QGroupBox(self.songGroupBox)
+        self.songInfoVerseGroupBox.setObjectName(u'songInfoVerseGroupBox')
+        self.songInfoVerseGroupBoxLayout = QtGui.QFormLayout(self.songInfoVerseGroupBox)
+        songXml = SongXML()
+        verses = songXml.get_verses(self.song.lyrics)
+        for verse in verses:
+            verseMarker = verse[0]['type'] + verse[0]['label']
+            verseLabel = QtGui.QLabel(self.songInfoVerseGroupBox)
+            verseLabel.setText(verse[1])
+            verseLabel.setWordWrap(True)
+            self.songInfoVerseGroupBoxLayout.addRow(verseMarker, verseLabel)
+        self.songGroupBoxLayout.addWidget(self.songInfoVerseGroupBox)
+        self.songGroupBoxLayout.addStretch()
+        self.songVerticalLayout.addWidget(self.songGroupBox)
+        self.songRemoveButton = QtGui.QPushButton(self)
+        self.songRemoveButton.setObjectName(u'songRemoveButton')
+        self.songRemoveButton.setIcon(build_icon(u':/songs/song_delete.png'))
+        self.songRemoveButton.setSizePolicy(QtGui.QSizePolicy.Fixed, QtGui.QSizePolicy.Fixed)
+        self.songVerticalLayout.addWidget(self.songRemoveButton, alignment = QtCore.Qt.AlignHCenter)
+
+    def retranslateUi(self):
+        self.songRemoveButton.setText(u'Remove')
+        self.songTitleLabel.setText(u'Title:')
+        self.songAlternateTitleLabel.setText(u'Alternate Title:')
+        self.songCCLINumberLabel.setText(u'CCLI Number:')
+        self.songVerseOrderLabel.setText(u'Verse Order:')
+        self.songCopyrightLabel.setText(u'Copyright:')
+        self.songCommentsLabel.setText(u'Comments:')
+        self.songAuthorsLabel.setText(u'Authors:')
+        self.songInfoVerseGroupBox.setTitle(u'Verses')
+
+    def onRemoveButtonClicked(self):
+        """
+        Signal emitted when the "remove" button is clicked.
+        """
+        self.emit(QtCore.SIGNAL(u'songRemoveButtonClicked(PyQt_PyObject)'), self)
+

=== added file 'openlp/plugins/songs/lib/duplicatesongfinder.py'
--- openlp/plugins/songs/lib/duplicatesongfinder.py	1970-01-01 00:00:00 +0000
+++ openlp/plugins/songs/lib/duplicatesongfinder.py	2013-02-13 23:24:27 +0000
@@ -0,0 +1,154 @@
+# -*- coding: utf-8 -*-
+# vim: autoindent shiftwidth=4 expandtab textwidth=120 tabstop=4 softtabstop=4
+
+###############################################################################
+# OpenLP - Open Source Lyrics Projection                                      #
+# --------------------------------------------------------------------------- #
+# Copyright (c) 2008-2013 Raoul Snyman                                        #
+# Portions copyright (c) 2008-2013 Tim Bentley, Gerald Britton, Jonathan      #
+# Corwin, Samuel Findlay, Michael Gorven, Scott Guerrieri, Matthias Hub,      #
+# Meinert Jordan, Armin Köhler, Erik Lundin, Edwin Lunando, Brian T. Meyer.   #
+# Joshua Miller, Stevan Pettit, Andreas Preikschat, Mattias Põldaru,          #
+# Christian Richter, Philip Ridout, Simon Scudder, Jeffrey Smith,             #
+# Maikel Stuivenberg, Martin Thompson, Jon Tibble, Dave Warnock,              #
+# Frode Woldsund, Martin Zibricky, Patrick Zimmermann                         #
+# --------------------------------------------------------------------------- #
+# This program is free software; you can redistribute it and/or modify it     #
+# under the terms of the GNU General Public License as published by the Free  #
+# Software Foundation; version 2 of the License.                              #
+#                                                                             #
+# This program is distributed in the hope that it will be useful, but WITHOUT #
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or       #
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for    #
+# more details.                                                               #
+#                                                                             #
+# You should have received a copy of the GNU General Public License along     #
+# with this program; if not, write to the Free Software Foundation, Inc., 59  #
+# Temple Place, Suite 330, Boston, MA 02111-1307 USA                          #
+###############################################################################
+import difflib
+
+from openlp.plugins.songs.lib.db import Song
+
+class DuplicateSongFinder(object):
+    """
+    The :class:`DuplicateSongFinder` class provides functionality to search for
+    duplicate songs.
+
+    The algorithm is based on the diff algorithm.
+    First a diffset is calculated for two songs.
+    To compensate for typos all differences that are smaller than a
+    limit (<maxTypoSize) and are surrounded by larger equal blocks
+    (>minFragmentSize) are removed and the surrounding equal parts are merged.
+    Finally two conditions can qualify a song tuple to be a duplicate:
+    1. There is a block of equal content that is at least minBlockSize large.
+       This condition should hit for all larger songs that have a long enough
+       equal part. Even if only one verse is equal this condition should still hit.
+    2. Two thirds of the smaller song is contained in the larger song.
+       This condition should hit if one of the two songs (or both) is small (smaller
+       than the minBlockSize), but most of the song is contained in the other song.
+    """
+
+    def __init__(self):
+        self.minFragmentSize = 5
+        self.minBlockSize = 70
+        self.maxTypoSize = 3
+
+    def songsProbablyEqual(self, song1, song2):
+        """
+        Calculate and return whether two songs are probably equal.
+
+        ``song1``
+            The first song to compare.
+
+        ``song2``
+            The second song to compare.
+        """
+        if len(song1.search_lyrics) < len(song2.search_lyrics):
+            small = song1.search_lyrics
+            large = song2.search_lyrics
+        else:
+            small = song2.search_lyrics
+            large = song1.search_lyrics
+        differ = difflib.SequenceMatcher(a=large, b=small)
+        diff_tuples = differ.get_opcodes()
+        diff_no_typos = self.__removeTypos(diff_tuples)
+        #print(diff_no_typos)
+        if self.__lengthOfEqualBlocks(diff_no_typos) >= self.minBlockSize or \
+                self.__lengthOfLongestEqualBlock(diff_no_typos) > len(small) * 2 / 3:
+                    return True
+        else:
+            return False
+
+    def __opLength(self, opcode):
+        """
+        Return the length of a given difference.
+
+        ``opcode``
+            The difference.
+        """
+        return max(opcode[2] - opcode[1], opcode[4] - opcode[3])
+
+    def __removeTypos(self, diff):
+        """
+        Remove typos from a diff set. A typo is a small difference (<maxTypoSize)
+        surrounded by larger equal passages (>minFragmentSize).
+
+        ``diff``
+            The diff set to remove the typos from.
+        """
+        #remove typo at beginning of string
+        if len(diff) >= 2:
+            if diff[0][0] != "equal" and self.__opLength(diff[0]) <= self.maxTypoSize and \
+                    self.__opLength(diff[1]) >= self.minFragmentSize:
+                        del diff[0]
+        #remove typos in the middle of string
+        if len(diff) >= 3:
+            for index in range(len(diff) - 3, -1, -1):
+                if self.__opLength(diff[index]) >= self.minFragmentSize and \
+                    diff[index + 1][0] != "equal" and self.__opLength(diff[index + 1]) <= self.maxTypoSize and \
+                        self.__opLength(diff[index + 2]) >= self.minFragmentSize:
+                            del diff[index + 1]
+        #remove typo at the end of string
+        if len(diff) >= 2:
+            if self.__opLength(diff[-2]) >= self.minFragmentSize and \
+                diff[-1][0] != "equal" and self.__opLength(diff[-1]) <= self.maxTypoSize:
+                        del diff[-1]
+
+        #merge fragments
+        for index in range(len(diff) - 2, -1, -1):
+            if diff[index][0] == "equal" and self.__opLength(diff[index]) >= self.minFragmentSize and \
+                diff[index + 1][0] == "equal" and self.__opLength(diff[index + 1]) >= self.minFragmentSize:
+                        diff[index] = ("equal", diff[index][1], diff[index + 1][2], diff[index][3],
+                            diff[index + 1][4])
+                        del diff[index + 1]
+
+        return diff
+
+    def __lengthOfEqualBlocks(self, diff):
+        """
+        Return the total length of all equal blocks in a diff set.
+        Blocks smaller than minBlockSize are not counted.
+
+        ``diff``
+            The diff set to return the length for.
+        """
+        length = 0
+        for element in diff:
+            if element[0] == "equal" and self.__opLength(element) >= self.minBlockSize:
+                length += self.__opLength(element)
+        return length
+
+    def __lengthOfLongestEqualBlock(self, diff):
+        """
+        Return the length of the largest equal block in a diff set.
+
+        ``diff``
+            The diff set to return the length for.
+        """
+        length = 0
+        for element in diff:
+            if element[0] == "equal" and self.__opLength(element) > length:
+                length = self.__opLength(element)
+        return length
+

=== modified file 'openlp/plugins/songs/songsplugin.py'
--- openlp/plugins/songs/songsplugin.py	2013-02-05 08:05:28 +0000
+++ openlp/plugins/songs/songsplugin.py	2013-02-13 23:24:27 +0000
@@ -48,6 +48,8 @@
 from openlp.plugins.songs.lib.mediaitem import SongSearch
 from openlp.plugins.songs.lib.importer import SongFormat
 from openlp.plugins.songs.lib.olpimport import OpenLPSongImport
+from openlp.plugins.songs.forms.duplicatesongremovalform import \
+    DuplicateSongRemovalForm
 
 log = logging.getLogger(__name__)
 __default_settings__ = {
@@ -92,10 +94,12 @@
         self.songImportItem.setVisible(True)
         self.songExportItem.setVisible(True)
         self.toolsReindexItem.setVisible(True)
+        self.toolsFindDuplicates.setVisible(True)
         action_list = ActionList.get_instance()
         action_list.add_action(self.songImportItem, UiStrings().Import)
         action_list.add_action(self.songExportItem, UiStrings().Export)
         action_list.add_action(self.toolsReindexItem, UiStrings().Tools)
+        action_list.add_action(self.toolsFindDuplicates, UiStrings().Tools)
 
     def addImportMenuItem(self, import_menu):
         """
@@ -131,7 +135,7 @@
 
     def addToolsMenuItem(self, tools_menu):
         """
-        Give the alerts plugin the opportunity to add items to the
+        Give the Songs plugin the opportunity to add items to the
         **Tools** menu.
 
         ``tools_menu``
@@ -145,6 +149,12 @@
             statustip=translate('SongsPlugin', 'Re-index the songs database to improve searching and ordering.'),
             visible=False, triggers=self.onToolsReindexItemTriggered)
         tools_menu.addAction(self.toolsReindexItem)
+        self.toolsFindDuplicates = create_action(tools_menu, u'toolsFindDuplicates',
+            text=translate('SongsPlugin', 'Find &Duplicate Songs'),
+            statustip=translate('SongsPlugin',
+            'Find and remove duplicate songs in the song database.'),
+            visible=False, triggers=self.onToolsFindDuplicatesTriggered)
+        tools_menu.addAction(self.toolsFindDuplicates)
 
     def onToolsReindexItemTriggered(self):
         """
@@ -164,6 +174,12 @@
         self.manager.save_objects(songs)
         self.mediaItem.onSearchTextButtonClicked()
 
+    def onToolsFindDuplicatesTriggered(self):
+        """
+        Search for duplicates in the song database.
+        """
+        DuplicateSongRemovalForm(self.main_window, self).exec_()
+
     def onSongImportItemClicked(self):
         if self.mediaItem:
             self.mediaItem.onImportClick()
@@ -284,10 +300,12 @@
         self.songImportItem.setVisible(False)
         self.songExportItem.setVisible(False)
         self.toolsReindexItem.setVisible(False)
+        self.toolsFindDuplicates.setVisible(False)
         action_list = ActionList.get_instance()
         action_list.remove_action(self.songImportItem, UiStrings().Import)
         action_list.remove_action(self.songExportItem, UiStrings().Export)
         action_list.remove_action(self.toolsReindexItem, UiStrings().Tools)
+        action_list.remove_action(self.toolsFindDuplicates, UiStrings().Tools)
         Plugin.finalise(self)
 
     def new_service_created(self):

=== modified file 'resources/images/openlp-2.qrc'
--- resources/images/openlp-2.qrc	2012-12-06 19:26:50 +0000
+++ resources/images/openlp-2.qrc	2013-02-13 23:24:27 +0000
@@ -20,6 +20,7 @@
     <file>song_author_edit.png</file>
     <file>song_topic_edit.png</file>
     <file>song_book_edit.png</file>
+    <file>song_delete.png</file>
   </qresource>
   <qresource prefix="bibles">
     <file>bibles_search_text.png</file>
@@ -98,6 +99,7 @@
     <file>wizard_importbible.bmp</file>
     <file>wizard_firsttime.bmp</file>
     <file>wizard_createtheme.bmp</file>
+    <file>wizard_duplicateremoval.bmp</file>
   </qresource>
   <qresource prefix="services">
     <file>service_collapse_all.png</file>

=== added file 'resources/images/wizard_duplicateremoval.bmp'
Binary files resources/images/wizard_duplicateremoval.bmp	1970-01-01 00:00:00 +0000 and resources/images/wizard_duplicateremoval.bmp	2013-02-13 23:24:27 +0000 differ
=== added directory 'tests/functional/openlp_plugins'
=== added directory 'tests/functional/openlp_plugins/songs'
=== added file 'tests/functional/openlp_plugins/songs/__init__.py'
--- tests/functional/openlp_plugins/songs/__init__.py	1970-01-01 00:00:00 +0000
+++ tests/functional/openlp_plugins/songs/__init__.py	2013-02-13 23:24:27 +0000
@@ -0,0 +1,8 @@
+import sip
+sip.setapi(u'QDate', 2)
+sip.setapi(u'QDateTime', 2)
+sip.setapi(u'QString', 2)
+sip.setapi(u'QTextStream', 2)
+sip.setapi(u'QTime', 2)
+sip.setapi(u'QUrl', 2)
+sip.setapi(u'QVariant', 2)

=== added file 'tests/functional/openlp_plugins/songs/test_lib.py'
--- tests/functional/openlp_plugins/songs/test_lib.py	1970-01-01 00:00:00 +0000
+++ tests/functional/openlp_plugins/songs/test_lib.py	2013-02-13 23:24:27 +0000
@@ -0,0 +1,103 @@
+# -*- coding: utf-8 -*-
+# vim: autoindent shiftwidth=4 expandtab textwidth=120 tabstop=4 softtabstop=4
+
+###############################################################################
+# OpenLP - Open Source Lyrics Projection                                      #
+# --------------------------------------------------------------------------- #
+# Copyright (c) 2008-2013 Raoul Snyman                                        #
+# Portions copyright (c) 2008-2013 Tim Bentley, Gerald Britton, Jonathan      #
+# Corwin, Samuel Findlay, Michael Gorven, Scott Guerrieri, Matthias Hub,      #
+# Meinert Jordan, Armin Köhler, Erik Lundin, Edwin Lunando, Brian T. Meyer.   #
+# Joshua Miller, Stevan Pettit, Andreas Preikschat, Mattias Põldaru,          #
+# Christian Richter, Philip Ridout, Simon Scudder, Jeffrey Smith,             #
+# Maikel Stuivenberg, Martin Thompson, Jon Tibble, Dave Warnock,              #
+# Frode Woldsund, Martin Zibricky, Patrick Zimmermann                         #
+# --------------------------------------------------------------------------- #
+# This program is free software; you can redistribute it and/or modify it     #
+# under the terms of the GNU General Public License as published by the Free  #
+# Software Foundation; version 2 of the License.                              #
+#                                                                             #
+# This program is distributed in the hope that it will be useful, but WITHOUT #
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or       #
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for    #
+# more details.                                                               #
+#                                                                             #
+# You should have received a copy of the GNU General Public License along     #
+# with this program; if not, write to the Free Software Foundation, Inc., 59  #
+# Temple Place, Suite 330, Boston, MA 02111-1307 USA                          #
+###############################################################################
+
+from unittest import TestCase
+
+from mock import MagicMock
+
+from openlp.plugins.songs.lib.duplicatesongfinder import DuplicateSongFinder
+
+class TestLib(TestCase):
+
+    def songs_probably_equal_test(self):
+        """
+        Test the DuplicateSongFinder.songsProbablyEqual function.
+        """
+        full_lyrics =u'''amazing grace how sweet the sound that saved a wretch like me i once was lost but now am found was
+        blind but now i see  twas grace that taught my heart to fear and grace my fears relieved how precious did that grace
+        appear the hour i first believed  through many dangers toils and snares i have already come tis grace that brought
+        me safe thus far and grace will lead me home  the lord has promised good to me his word my hope secures he will my
+        shield and portion be as long as life endures  yea when this flesh and heart shall fail and mortal life shall cease
+        i shall possess within the veil a life of joy and peace  when weve been here ten thousand years bright shining as
+        the sun weve no less days to sing gods praise than when weve first begun'''
+        short_lyrics =u'''twas grace that taught my heart to fear and grace my fears relieved how precious did that grace
+        appear the hour i first believed'''
+        error_lyrics =u'''amazing grace how sweet the sound that saved a wretch like me i once was lost but now am found was
+        blind but now i see  twas grace that taught my heart to fear and grace my fears relieved how precious did that grace
+        appear the hour i first believedxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx snares i have already come tis grace that brought
+        me safe thus far and grace will lead me home  the lord has promised good to me his word my hope secures he will my
+        shield andwhen this flcsh and heart shall fail and mortal life shall cease
+        i shall possess within the veila lifeofjoy and peace  when weve been here ten thousand years bright shining as
+        the sun weve no less days to sing gods praise than when weve first begun'''
+        different_lyrics=u'''on a hill far away stood an old rugged cross the emblem of suffering and shame and i love that
+        old cross where the dearest and best for a world of lost sinners was slain  so ill cherish the old rugged cross till
+        my trophies at last i lay down i will cling to the old rugged cross and exchange it some day for a crown'''
+        dsf = DuplicateSongFinder()
+        song1 = MagicMock()
+        song2 = MagicMock()
+        
+        #GIVEN: Two equal songs
+        song1.search_lyrics = full_lyrics
+        song2.search_lyrics = full_lyrics
+        
+        #WHEN: We compare those songs for equality
+        result = dsf.songsProbablyEqual(song1, song2)
+        
+        #THEN: The result should be True
+        assert result is True, u'The result should be True'
+        
+        #GIVEN: A song and a short version of the same song
+        song1.search_lyrics = full_lyrics
+        song2.search_lyrics = short_lyrics
+        
+        #WHEN: We compare those songs for equality
+        result = dsf.songsProbablyEqual(song1, song2)
+        
+        #THEN: The result should be True
+        assert result  is True, u'The result should be True'
+        
+        #GIVEN: A song and the same song with lots of errors
+        song1.search_lyrics = full_lyrics
+        song2.search_lyrics = error_lyrics
+        
+        #WHEN: We compare those songs for equality
+        result = dsf.songsProbablyEqual(song1, song2)
+        
+        #THEN: The result should be True
+        assert result is True, u'The result should be True'
+        
+        #GIVEN: Two different songs
+        song1.search_lyrics = full_lyrics
+        song2.search_lyrics = different_lyrics
+        
+        #WHEN: We compare those songs for equality
+        result = dsf.songsProbablyEqual(song1, song2)
+        
+        #THEN: The result should be False
+        assert result is False, u'The result should be False'


Follow ups