openlp-core team mailing list archive
-
openlp-core team
-
Mailing list archive
-
Message #04470
[Merge] lp:~m2j/openlp/work into lp:openlp
m2j has proposed merging lp:~m2j/openlp/work into lp:openlp.
Requested reviews:
Raoul Snyman (raoul-snyman)
Jon Tibble (meths)
- openlp.org 1.x importer detects the character encoding for the database and shows a inputDialog for correcting the guess.
- prefer UTF-8 encoding for CCLI import
- split song order at consecutive white spaces (it rejected song orders with two consecutive spaces)
--
https://code.launchpad.net/~m2j/openlp/work/+merge/40796
Your team OpenLP Core is subscribed to branch lp:openlp.
=== modified file 'openlp/plugins/songs/forms/editsongform.py'
--- openlp/plugins/songs/forms/editsongform.py 2010-11-03 18:18:44 +0000
+++ openlp/plugins/songs/forms/editsongform.py 2010-11-13 22:06:34 +0000
@@ -525,7 +525,7 @@
return False
if self.song.verse_order:
order = []
- order_names = self.song.verse_order.split(u' ')
+ order_names = self.song.verse_order.split(None)
for item in order_names:
if len(item) == 1:
order.append(item.lower() + u'1')
=== modified file 'openlp/plugins/songs/lib/cclifileimport.py' (properties changed: +x to -x)
--- openlp/plugins/songs/lib/cclifileimport.py 2010-11-03 17:19:44 +0000
+++ openlp/plugins/songs/lib/cclifileimport.py 2010-11-13 22:06:34 +0000
@@ -76,7 +76,12 @@
lines = []
if os.path.isfile(filename):
detect_file = open(filename, u'r')
- details = chardet.detect(detect_file.read(2048))
+ detect_content = detect_file.read(2048)
+ try:
+ unicode(detect_content, u'utf-8')
+ details = {'confidence': 1, 'encoding': 'utf-8'}
+ except UnicodeDecodeError:
+ details = chardet.detect(detect_content)
detect_file.close()
infile = codecs.open(filename, u'r', details['encoding'])
lines = infile.readlines()
=== modified file 'openlp/plugins/songs/lib/mediaitem.py'
--- openlp/plugins/songs/lib/mediaitem.py 2010-11-03 17:19:44 +0000
+++ openlp/plugins/songs/lib/mediaitem.py 2010-11-13 22:06:34 +0000
@@ -358,7 +358,7 @@
verse[1][:30], unicode(verse[1]), verseTag)
else:
#Loop through the verse list and expand the song accordingly.
- for order in song.verse_order.upper().split(u' '):
+ for order in song.verse_order.upper().split(None):
if len(order) == 0:
break
for verse in verseList:
=== modified file 'openlp/plugins/songs/lib/olp1import.py'
--- openlp/plugins/songs/lib/olp1import.py 2010-11-03 18:03:28 +0000
+++ openlp/plugins/songs/lib/olp1import.py 2010-11-13 22:06:34 +0000
@@ -27,8 +27,11 @@
The :mod:`olp1import` module provides the functionality for importing
openlp.org 1.x song databases into the current installation database.
"""
+
+from PyQt4 import QtGui
+
import logging
-import chardet
+from chardet.universaldetector import UniversalDetector
import sqlite
from openlp.core.lib import translate
@@ -56,60 +59,38 @@
SongImport.__init__(self, manager)
self.import_source = kwargs[u'filename']
- def decode_string(self, raw, guess):
- """
- Use chardet to detect the encoding of the raw string, and convert it
- to unicode.
-
- ``raw``
- The raw bytestring to decode.
- ``guess``
- What chardet guessed the encoding to be.
- """
- if guess[u'confidence'] < 0.8:
- codec = u'windows-1252'
- else:
- codec = guess[u'encoding']
- try:
- decoded = unicode(raw, codec)
- self.last_encoding = codec
- except UnicodeDecodeError:
- log.exception(
- u'Error in detecting openlp.org 1.x database encoding.')
- try:
- decoded = unicode(raw, self.last_encoding)
- except UnicodeDecodeError:
- # possibly show an error form
- #self.import_wizard.showError(u'There was a problem '
- # u'detecting the encoding of a string')
- decoded = raw
- return decoded
-
def do_import(self):
"""
Run the import for an openlp.org 1.x song database.
"""
# Connect to the database
- connection = sqlite.connect(self.import_source)
+ encoding = self.get_encoding()
+ if not encoding:
+ return False
+ connection = sqlite.connect(self.import_source, mode=0444,
+ encoding=(encoding, 'replace'))
cursor = connection.cursor()
# Determine if we're using a new or an old DB
cursor.execute(u'SELECT name FROM sqlite_master '
u'WHERE type = \'table\' AND name = \'tracks\'')
- table_list = cursor.fetchall()
- new_db = len(table_list) > 0
+ new_db = len(cursor.fetchall()) > 0
# Count the number of records we need to import, for the progress bar
+ cursor.execute(u'-- types int')
cursor.execute(u'SELECT COUNT(songid) FROM songs')
- count = int(cursor.fetchone()[0])
+ count = cursor.fetchone()[0]
success = True
self.import_wizard.importProgressBar.setMaximum(count)
# "cache" our list of authors
+ cursor.execute(u'-- types int, unicode')
cursor.execute(u'SELECT authorid, authorname FROM authors')
authors = cursor.fetchall()
if new_db:
# "cache" our list of tracks
+ cursor.execute(u'-- types int, unicode')
cursor.execute(u'SELECT trackid, fulltrackname FROM tracks')
tracks = cursor.fetchall()
# Import the songs
+ cursor.execute(u'-- types int, unicode, unicode, unicode')
cursor.execute(u'SELECT songid, songtitle, lyrics || \'\' AS lyrics, '
u'copyrightinfo FROM songs')
songs = cursor.fetchall()
@@ -119,16 +100,19 @@
success = False
break
song_id = song[0]
- guess = chardet.detect(song[2])
- title = self.decode_string(song[1], guess)
- lyrics = self.decode_string(song[2], guess).replace(u'\r', u'')
- copyright = self.decode_string(song[3], guess)
+ title = song[1]
+ lyrics = song[2].replace(u'\r\n', u'\n')
+ copyright = song[3]
self.import_wizard.incrementProgressBar(
unicode(translate('SongsPlugin.ImportWizardForm',
'Importing "%s"...')) % title)
self.title = title
- self.process_song_text(lyrics)
+ verses = lyrics.split(u'\n\n')
+ for verse in verses:
+ if verse.strip() != u'':
+ self.add_verse(verse.strip())
self.add_copyright(copyright)
+ cursor.execute(u'-- types int')
cursor.execute(u'SELECT authorid FROM songauthors '
u'WHERE songid = %s' % song_id)
author_ids = cursor.fetchall()
@@ -138,12 +122,13 @@
break
for author in authors:
if author[0] == author_id[0]:
- self.parse_author(self.decode_string(author[1], guess))
+ self.parse_author(author[1])
break
if self.stop_import_flag:
success = False
break
if new_db:
+ cursor.execute(u'-- types int')
cursor.execute(u'SELECT trackid FROM songtracks '
u'WHERE songid = %s ORDER BY listindex' % song_id)
track_ids = cursor.fetchall()
@@ -153,8 +138,7 @@
break
for track in tracks:
if track[0] == track_id[0]:
- self.add_media_file(self.decode_string(track[1],
- guess))
+ self.add_media_file(track[1])
break
if self.stop_import_flag:
success = False
@@ -162,3 +146,103 @@
self.finish()
return success
+ def get_encoding(self):
+ """
+ Detect character encoding of an openlp.org 1.x song database.
+ """
+ # Connect to the database
+ connection = sqlite.connect(self.import_source, mode=0444)
+ cursor = connection.cursor()
+
+ detector = UniversalDetector()
+ # detect charset by authors
+ cursor.execute(u'SELECT authorname FROM authors')
+ authors = cursor.fetchall()
+ for author in authors:
+ detector.feed(author[0])
+ if detector.done:
+ detector.close()
+ return detector.result[u'encoding']
+ # detect charset by songs
+ cursor.execute(u'SELECT songtitle, copyrightinfo, '
+ u'lyrics || \'\' AS lyrics FROM songs')
+ songs = cursor.fetchall()
+ for index in [0, 1, 2]:
+ for song in songs:
+ detector.feed(song[index])
+ if detector.done:
+ detector.close()
+ return detector.result[u'encoding']
+ # detect charset by songs
+ cursor.execute(u'SELECT name FROM sqlite_master '
+ u'WHERE type = \'table\' AND name = \'tracks\'')
+ if len(cursor.fetchall()) > 0:
+ cursor.execute(u'SELECT fulltrackname FROM tracks')
+ tracks = cursor.fetchall()
+ for track in tracks:
+ detector.feed(track[0])
+ if detector.done:
+ detector.close()
+ return detector.result[u'encoding']
+ detector.close()
+ guess = detector.result[u'encoding']
+
+ # map chardet result to compatible windows standard code page
+ codepage_mapping = {'IBM866': u'cp866', 'TIS-620': u'cp874',
+ 'SHIFT_JIS': u'cp932', 'GB2312': u'cp936', 'HZ-GB-2312': u'cp936',
+ 'EUC-KR': u'cp949', 'Big5': u'cp950', 'ISO-8859-2': u'cp1250',
+ 'windows-1250': u'cp1250', 'windows-1251': u'cp1251',
+ 'windows-1252': u'cp1252', 'ISO-8859-7': u'cp1253',
+ 'windows-1253': u'cp1253', 'ISO-8859-8': u'cp1255',
+ 'windows-1255': u'cp1255'}
+ if guess in codepage_mapping:
+ guess = codepage_mapping[guess]
+ else:
+ guess = u'cp1252'
+
+ # Show dialog for encoding selection
+ encodings = [(u'cp874', translate('SongsPlugin.OpenLP1SongImport',
+ 'CP-874 (Thai)')),
+ (u'cp932', translate('SongsPlugin.OpenLP1SongImport',
+ 'CP-932 (Japanese)')),
+ (u'cp936', translate('SongsPlugin.OpenLP1SongImport',
+ 'CP-936 (Simplified Chinese)')),
+ (u'cp949', translate('SongsPlugin.OpenLP1SongImport',
+ 'CP-949 (Korean)')),
+ (u'cp950', translate('SongsPlugin.OpenLP1SongImport',
+ 'CP-950 (Traditional Chinese)')),
+ (u'cp1250', translate('SongsPlugin.OpenLP1SongImport',
+ 'CP-1250 (Central European)')),
+ (u'cp1251', translate('SongsPlugin.OpenLP1SongImport',
+ 'CP-1251 (Cyrillic)')),
+ (u'cp1252', translate('SongsPlugin.OpenLP1SongImport',
+ 'CP-1252 (Western European)')),
+ (u'cp1253', translate('SongsPlugin.OpenLP1SongImport',
+ 'CP-1253 (Greek)')),
+ (u'cp1254', translate('SongsPlugin.OpenLP1SongImport',
+ 'CP-1254 (Turkish)')),
+ (u'cp1255', translate('SongsPlugin.OpenLP1SongImport',
+ 'CP-1255 (Hebrew)')),
+ (u'cp1256', translate('SongsPlugin.OpenLP1SongImport',
+ 'CP-1256 (Arabic)')),
+ (u'cp1257', translate('SongsPlugin.OpenLP1SongImport',
+ 'CP-1257 (Baltic)')),
+ (u'cp1258', translate('SongsPlugin.OpenLP1SongImport',
+ 'CP-1258 (Vietnam)'))]
+ encoding_index = 0
+ for index in range(len(encodings)):
+ if guess == encodings[index][0]:
+ encoding_index = index
+ break
+ chosen_encoding = QtGui.QInputDialog.getItem(None,
+ translate('SongsPlugin.OpenLP1SongImport',
+ 'Database Character Encoding'),
+ translate('SongsPlugin.OpenLP1SongImport',
+ 'The codepage setting is responsible\n'
+ 'for the correct character representation.\n'
+ 'Usually you are fine with the preselected choise.'),
+ [pair[1] for pair in encodings], encoding_index, False)
+ if not chosen_encoding[1]:
+ return None
+ return filter(lambda item: item[1] == chosen_encoding[0],
+ encodings)[0][0]
Follow ups