← Back to team overview

openlp-core team mailing list archive

[Merge] lp:~m2j/openlp/work into lp:openlp

 

m2j has proposed merging lp:~m2j/openlp/work into lp:openlp.

Requested reviews:
  Jon Tibble (meths)


- openlp.org 1.x importer detects the character encoding for the database and shows a inputDialog for correcting the guess.
- prefer UTF-8 encoding for CCLI import
- split song order at consecutive white spaces (it rejected song orders with two consecutive spaces)
-- 
https://code.launchpad.net/~m2j/openlp/work/+merge/40782
Your team OpenLP Core is subscribed to branch lp:openlp.
=== modified file 'openlp/plugins/songs/forms/editsongform.py'
--- openlp/plugins/songs/forms/editsongform.py	2010-11-03 18:18:44 +0000
+++ openlp/plugins/songs/forms/editsongform.py	2010-11-13 08:07:09 +0000
@@ -525,7 +525,7 @@
                 return False
         if self.song.verse_order:
             order = []
-            order_names = self.song.verse_order.split(u' ')
+            order_names = self.song.verse_order.split(None)
             for item in order_names:
                 if len(item) == 1:
                     order.append(item.lower() + u'1')

=== modified file 'openlp/plugins/songs/lib/cclifileimport.py' (properties changed: +x to -x)
--- openlp/plugins/songs/lib/cclifileimport.py	2010-11-03 17:19:44 +0000
+++ openlp/plugins/songs/lib/cclifileimport.py	2010-11-13 08:07:09 +0000
@@ -76,7 +76,12 @@
             lines = []
             if os.path.isfile(filename):
                 detect_file = open(filename, u'r')
-                details = chardet.detect(detect_file.read(2048))
+                detect_content = detect_file.read(2048)
+                try:
+                    unicode(detect_content, u'utf-8')
+                    details = {'confidence': 1, 'encoding': 'utf-8'}
+                except UnicodeDecodeError:
+                    details = chardet.detect(detect_content)
                 detect_file.close()
                 infile = codecs.open(filename, u'r', details['encoding'])
                 lines = infile.readlines()

=== modified file 'openlp/plugins/songs/lib/mediaitem.py'
--- openlp/plugins/songs/lib/mediaitem.py	2010-11-03 17:19:44 +0000
+++ openlp/plugins/songs/lib/mediaitem.py	2010-11-13 08:07:09 +0000
@@ -358,7 +358,7 @@
                         verse[1][:30], unicode(verse[1]), verseTag)
             else:
                 #Loop through the verse list and expand the song accordingly.
-                for order in song.verse_order.upper().split(u' '):
+                for order in song.verse_order.upper().split(None):
                     if len(order) == 0:
                         break
                     for verse in verseList:

=== modified file 'openlp/plugins/songs/lib/olp1import.py'
--- openlp/plugins/songs/lib/olp1import.py	2010-11-03 18:03:28 +0000
+++ openlp/plugins/songs/lib/olp1import.py	2010-11-13 08:07:09 +0000
@@ -27,8 +27,11 @@
 The :mod:`olp1import` module provides the functionality for importing
 openlp.org 1.x song databases into the current installation database.
 """
+
+from PyQt4 import QtGui
+
 import logging
-import chardet
+from chardet.universaldetector import UniversalDetector
 import sqlite
 
 from openlp.core.lib import translate
@@ -56,60 +59,38 @@
         SongImport.__init__(self, manager)
         self.import_source = kwargs[u'filename']
 
-    def decode_string(self, raw, guess):
-        """
-        Use chardet to detect the encoding of the raw string, and convert it
-        to unicode.
-
-        ``raw``
-            The raw bytestring to decode.
-        ``guess``
-            What chardet guessed the encoding to be.
-        """
-        if guess[u'confidence'] < 0.8:
-            codec = u'windows-1252'
-        else:
-            codec = guess[u'encoding']
-        try:
-            decoded = unicode(raw, codec)
-            self.last_encoding = codec
-        except UnicodeDecodeError:
-            log.exception(
-                u'Error in detecting openlp.org 1.x database encoding.')
-            try:
-                decoded = unicode(raw, self.last_encoding)
-            except UnicodeDecodeError:
-                # possibly show an error form
-                #self.import_wizard.showError(u'There was a problem '
-                #    u'detecting the encoding of a string')
-                decoded = raw
-        return decoded
-
     def do_import(self):
         """
         Run the import for an openlp.org 1.x song database.
         """
         # Connect to the database
-        connection = sqlite.connect(self.import_source)
+        encoding = self.get_encoding()
+        if not encoding:
+            return False
+        connection = sqlite.connect(self.import_source, mode=0444,
+            encoding=(encoding, 'replace'))
         cursor = connection.cursor()
         # Determine if we're using a new or an old DB
         cursor.execute(u'SELECT name FROM sqlite_master '
             u'WHERE type = \'table\' AND name = \'tracks\'')
-        table_list = cursor.fetchall()
-        new_db = len(table_list) > 0
+        new_db = len(cursor.fetchall()) > 0
         # Count the number of records we need to import, for the progress bar
+        cursor.execute(u'-- types int')
         cursor.execute(u'SELECT COUNT(songid) FROM songs')
-        count = int(cursor.fetchone()[0])
+        count = cursor.fetchone()[0]
         success = True
         self.import_wizard.importProgressBar.setMaximum(count)
         # "cache" our list of authors
+        cursor.execute(u'-- types int, unicode')
         cursor.execute(u'SELECT authorid, authorname FROM authors')
         authors = cursor.fetchall()
         if new_db:
             # "cache" our list of tracks
+            cursor.execute(u'-- types int, unicode')
             cursor.execute(u'SELECT trackid, fulltrackname FROM tracks')
             tracks = cursor.fetchall()
         # Import the songs
+        cursor.execute(u'-- types int, unicode, unicode, unicode')
         cursor.execute(u'SELECT songid, songtitle, lyrics || \'\' AS lyrics, '
             u'copyrightinfo FROM songs')
         songs = cursor.fetchall()
@@ -119,16 +100,19 @@
                 success = False
                 break
             song_id = song[0]
-            guess = chardet.detect(song[2])
-            title = self.decode_string(song[1], guess)
-            lyrics = self.decode_string(song[2], guess).replace(u'\r', u'')
-            copyright = self.decode_string(song[3], guess)
+            title = song[1]
+            lyrics = song[2].replace(u'\r\n', u'\n')
+            copyright = song[3]
             self.import_wizard.incrementProgressBar(
                 unicode(translate('SongsPlugin.ImportWizardForm',
                     'Importing "%s"...')) % title)
             self.title = title
-            self.process_song_text(lyrics)
+            verses = lyrics.split(u'\n\n')
+            for verse in verses:
+                if verse.strip() != u'':
+                    self.add_verse(verse.strip())
             self.add_copyright(copyright)
+            cursor.execute(u'-- types int')
             cursor.execute(u'SELECT authorid FROM songauthors '
                 u'WHERE songid = %s' % song_id)
             author_ids = cursor.fetchall()
@@ -138,12 +122,13 @@
                     break
                 for author in authors:
                     if author[0] == author_id[0]:
-                        self.parse_author(self.decode_string(author[1], guess))
+                        self.parse_author(author[1])
                         break
             if self.stop_import_flag:
                 success = False
                 break
             if new_db:
+                cursor.execute(u'-- types int')
                 cursor.execute(u'SELECT trackid FROM songtracks '
                     u'WHERE songid = %s ORDER BY listindex' % song_id)
                 track_ids = cursor.fetchall()
@@ -153,8 +138,7 @@
                         break
                     for track in tracks:
                         if track[0] == track_id[0]:
-                            self.add_media_file(self.decode_string(track[1],
-                                guess))
+                            self.add_media_file(track[1])
                             break
             if self.stop_import_flag:
                 success = False
@@ -162,3 +146,107 @@
             self.finish()
         return success
 
+    def get_encoding(self):
+        """
+        Detect character encoding of an openlp.org 1.x song database.
+        """
+        # Connect to the database
+        connection = sqlite.connect(self.import_source, mode=0444)
+        cursor = connection.cursor()
+
+        detector = UniversalDetector()
+        # detect charset by authors
+        cursor.execute(u'SELECT authorname FROM authors')
+        authors = cursor.fetchall()
+        for author in authors:
+            detector.feed(author[0])
+            if detector.done:
+                detector.close()
+                return detector.result[u'encoding']
+        # detect charset by songs
+        cursor.execute(u'SELECT songtitle, copyrightinfo, '
+            u'lyrics || \'\' AS lyrics FROM songs')
+        songs = cursor.fetchall()
+        for index in [0, 1, 2]:
+            for song in songs:
+                detector.feed(song[index])
+                if detector.done:
+                    detector.close()
+                    return detector.result[u'encoding']
+        # detect charset by songs
+        cursor.execute(u'SELECT name FROM sqlite_master '
+            u'WHERE type = \'table\' AND name = \'tracks\'')
+        if len(cursor.fetchall()) > 0:
+            cursor.execute(u'SELECT fulltrackname FROM tracks')
+            tracks = cursor.fetchall()
+            for track in tracks:
+                detector.feed(track[0])
+                if detector.done:
+                    detector.close()
+                    return detector.result[u'encoding']
+        detector.close()
+        guess = detector.result[u'encoding']
+
+        # map chardet result to compatible windows standard code page
+        codepage_mapping = {'IBM866': u'cp866', 'TIS-620': u'cp874',
+            'SHIFT_JIS': u'cp932', 'GB2312': u'cp936', 'HZ-GB-2312': u'cp936',
+            'EUC-KR': u'cp949', 'Big5': u'cp950', 'ISO-8859-2': u'cp1250',
+            'windows-1250': u'cp1250', 'windows-1251': u'cp1251',
+            'windows-1252': u'cp1252', 'ISO-8859-7': u'cp1253',
+            'windows-1253': u'cp1253', 'ISO-8859-8': u'cp1255',
+            'windows-1255': u'cp1255'}
+        if guess in codepage_mapping:
+            guess = codepage_mapping[guess]
+        else:
+            guess = u'cp1252'
+
+        # Show dialog for encoding selection
+        encodings = [[u'cp874', u'cp932', u'cp936', u'cp949', u'cp950',
+                u'cp1250', u'cp1251', u'cp1252', u'cp1253', u'cp1254',
+                u'cp1255', u'cp1256', u'cp1257', u'cp1258'],
+            [translate('SongsPlugin.OpenLP1SongImport',
+                    'CP-874 (Thai)'),
+                translate('SongsPlugin.OpenLP1SongImport',
+                    'CP-932 (Japanese)'),
+                translate('SongsPlugin.OpenLP1SongImport',
+                    'CP-936 (Simplified Chinese)'),
+                translate('SongsPlugin.OpenLP1SongImport',
+                    'CP-949 (Korean)'),
+                translate('SongsPlugin.OpenLP1SongImport',
+                    'CP-950 (Traditional Chinese)'),
+                translate('SongsPlugin.OpenLP1SongImport',
+                    'CP-1250 (Central European)'),
+                translate('SongsPlugin.OpenLP1SongImport',
+                    'CP-1251 (Cyrillic)'),
+                translate('SongsPlugin.OpenLP1SongImport',
+                    'CP-1252 (Western European)'),
+                translate('SongsPlugin.OpenLP1SongImport',
+                    'CP-1253 (Greek)'),
+                translate('SongsPlugin.OpenLP1SongImport',
+                    'CP-1254 (Turkish)'),
+                translate('SongsPlugin.OpenLP1SongImport',
+                    'CP-1255 (Hebrew)'),
+                translate('SongsPlugin.OpenLP1SongImport',
+                    'CP-1256 (Arabic)'),
+                translate('SongsPlugin.OpenLP1SongImport',
+                    'CP-1257 (Baltic)'),
+                translate('SongsPlugin.OpenLP1SongImport',
+                    'CP-1258 (Vietnam)')]]
+        encoding_index = 0
+        for index in range(len(encodings[0])):
+            if guess == encodings[0][index]:
+                encoding_index = index
+                break
+        chosen_encoding = QtGui.QInputDialog.getItem(None,
+            translate('SongsPlugin.OpenLP1SongImport',
+                'Database Character Encoding'),
+            translate('SongsPlugin.OpenLP1SongImport',
+                'The codepage setting is responsible\n'
+                'for the correct character representation.\n'
+                'Usually you are fine with the preselected choise.'),
+            encodings[1], encoding_index, False)
+        if not chosen_encoding[1]:
+            return None
+        for index in range(len(encodings[1])):
+            if chosen_encoding[0] == encodings[1][index]:
+                return encodings[0][index]


Follow ups