← Back to team overview

openlp-core team mailing list archive

[Merge] lp:~trb143/openlp/duphash into lp:openlp

 

Tim Bentley has proposed merging lp:~trb143/openlp/duphash into lp:openlp.

Requested reviews:
  OpenLP Core (openlp-core)

For more details, see:
https://code.launchpad.net/~trb143/openlp/duphash/+merge/328736

THIS IS EDIT YOUR SONG DATABASE so take a backup.

POC code to speedup song duplicate finding.
-- 
Your team OpenLP Core is requested to review the proposed merge of lp:~trb143/openlp/duphash into lp:openlp.
=== modified file 'openlp/core/lib/db.py'
--- openlp/core/lib/db.py	2017-08-01 20:59:41 +0000
+++ openlp/core/lib/db.py	2017-08-08 19:40:26 +0000
@@ -208,8 +208,8 @@
     :param upgrade: The python module that contains the upgrade instructions.
     """
     if not database_exists(url):
-        log.warn("Database {db} doesn't exist - skipping upgrade checks".format(db=url))
-        return (0, 0)
+        log.warning("Database {db} doesn't exist - skipping upgrade checks".format(db=url))
+        return 0, 0
 
     log.debug('Checking upgrades for DB {db}'.format(db=url))
 

=== modified file 'openlp/plugins/songs/forms/duplicatesongremovalform.py'
--- openlp/plugins/songs/forms/duplicatesongremovalform.py	2017-06-09 06:06:49 +0000
+++ openlp/plugins/songs/forms/duplicatesongremovalform.py	2017-08-08 19:40:26 +0000
@@ -25,14 +25,14 @@
 
 import logging
 import multiprocessing
-import os
 
 from PyQt5 import QtCore, QtWidgets
+from sqlalchemy.sql.expression import func
 
 from openlp.core.common import Registry, RegistryProperties, translate
 from openlp.core.ui.lib.wizard import OpenLPWizard, WizardStrings
 from openlp.plugins.songs.lib import delete_song
-from openlp.plugins.songs.lib.db import Song, MediaFile
+from openlp.plugins.songs.lib.db import Song
 from openlp.plugins.songs.forms.songreviewwidget import SongReviewWidget
 from openlp.plugins.songs.lib.songcompare import songs_probably_equal
 
@@ -174,6 +174,17 @@
                     self.duplicate_search_progress_bar.setValue(1)
                     self.notify_no_duplicates()
                     return
+
+                search_results = \
+                    self.plugin.manager.session.query(func.count(Song.id), Song).group_by(Song.song_hash).all()
+                for y, x in search_results:
+                    hash = x.song_hash
+                    if y > 1:
+                        dupls = self.plugin.manager.session.query(Song).filter(Song.song_hash==hash).all()
+                        for d in dupls:
+                            print(d.title)
+                        print("-------")
+
                 # With x songs we have x*(x - 1) / 2 comparisons.
                 max_progress_count = max_songs * (max_songs - 1) // 2
                 self.duplicate_search_progress_bar.setMaximum(max_progress_count)

=== modified file 'openlp/plugins/songs/lib/__init__.py'
--- openlp/plugins/songs/lib/__init__.py	2017-08-01 20:59:41 +0000
+++ openlp/plugins/songs/lib/__init__.py	2017-08-08 19:40:26 +0000
@@ -390,6 +390,11 @@
         song.add_author(author)
     if song.copyright:
         song.copyright = CONTROL_CHARS.sub('', song.copyright).strip()
+    import hashlib
+    m = hashlib.md5()
+    m.update(song.search_lyrics.encode('utf-8'))
+    print("{a} {b}".format(a=str(m.digest_size), b=str(m.hexdigest())))
+    song.song_hash = str(m.hexdigest())
 
 
 def get_encoding(font, font_table, default_encoding, failed=False):
@@ -546,12 +551,12 @@
     song_plugin.manager.delete_object(Song, song_id)
 
 
-def transpose_lyrics(lyrics, transepose_value):
+def transpose_lyrics(lyrics, transpose_value):
     """
-    Transepose lyrics
+    Transpose lyrics
 
-    :param lyrcs: The lyrics to be transposed
-    :param transepose_value: The value to transpose the lyrics with
+    :param lyrics: The lyrics to be transposed
+    :param transpose_value: The value to transpose the lyrics with
     :return: The transposed lyrics
     """
     # Split text by verse delimiter - both normal and optional
@@ -562,16 +567,17 @@
         if verse.startswith('---[') or verse == '[---]':
             transposed_lyrics += verse
         else:
-            transposed_lyrics += transpose_verse(verse, transepose_value, notation)
+            transposed_lyrics += transpose_verse(verse, transpose_value, notation)
     return transposed_lyrics
 
 
-def transpose_verse(verse_text, transepose_value, notation):
+def transpose_verse(verse_text, transpose_value, notation):
     """
-    Transepose lyrics
+    Transpose lyrics
 
-    :param lyrcs: The lyrics to be transposed
-    :param transepose_value: The value to transpose the lyrics with
+    :param verse_text: The lyrics to be transposed
+    :param transpose_value: The value to transpose the lyrics with
+    :param notation: what notation will we use
     :return: The transposed lyrics
     """
     if '[' not in verse_text:
@@ -593,7 +599,7 @@
                 transposed_lyrics += word
             else:
                 # This MUST be a chord
-                transposed_lyrics += transpose_chord(word, transepose_value, notation)
+                transposed_lyrics += transpose_chord(word, transpose_value, notation)
     # If still inside a chord tag something is wrong!
     if in_tag:
         return verse_text

=== modified file 'openlp/plugins/songs/lib/db.py'
--- openlp/plugins/songs/lib/db.py	2016-12-31 11:01:36 +0000
+++ openlp/plugins/songs/lib/db.py	2017-08-08 19:40:26 +0000
@@ -334,7 +334,8 @@
         Column('search_lyrics', types.UnicodeText, nullable=False),
         Column('create_date', types.DateTime(), default=func.now()),
         Column('last_modified', types.DateTime(), default=func.now(), onupdate=func.now()),
-        Column('temporary', types.Boolean(), default=False)
+        Column('temporary', types.Boolean(), default=False),
+        Column('song_hash', types.Unicode(128), default="")
     )
 
     # Definition of the "topics" table

=== modified file 'openlp/plugins/songs/lib/upgrade.py'
--- openlp/plugins/songs/lib/upgrade.py	2017-06-10 05:57:00 +0000
+++ openlp/plugins/songs/lib/upgrade.py	2017-08-08 19:40:26 +0000
@@ -32,7 +32,7 @@
 from openlp.core.lib.db import get_upgrade_op
 
 log = logging.getLogger(__name__)
-__version__ = 6
+__version__ = 7
 
 
 # TODO: When removing an upgrade path the ftw-data needs updating to the minimum supported version
@@ -162,3 +162,17 @@
             op.drop_column('songs', 'song_number')
     # Finally, clean up our mess in people's databases
     op.execute('DELETE FROM songs_songbooks WHERE songbook_id = 0')
+
+
+def upgrade_7(session, metadata):
+    """
+    Version 7 upgrade.
+
+    This upgrade adds a hash field to the database
+    """
+    op = get_upgrade_op(session)
+    songs_table = Table('songs', metadata, autoload=True)
+    if 'song_hash' not in [col.name for col in songs_table.c.values()]:
+        op.add_column('songs', Column('song_hash', types.Unicode(128), default=""))
+    else:
+        log.warning('Skipping upgrade_7 step of upgrading the song db')


Follow ups