openlp-core team mailing list archive
-
openlp-core team
-
Mailing list archive
-
Message #04191
[Merge] lp:~whydoubt/openlp/easyworship into lp:openlp
Jeffrey Smith has proposed merging lp:~whydoubt/openlp/easyworship into lp:openlp.
Requested reviews:
OpenLP Core (openlp-core)
This is a fix for bug #659673
--
https://code.launchpad.net/~whydoubt/openlp/easyworship/+merge/38445
Your team OpenLP Core is requested to review the proposed merge of lp:~whydoubt/openlp/easyworship into lp:openlp.
=== modified file 'openlp/plugins/songs/lib/ewimport.py'
--- openlp/plugins/songs/lib/ewimport.py 2010-09-25 12:13:42 +0000
+++ openlp/plugins/songs/lib/ewimport.py 2010-10-14 18:23:54 +0000
@@ -35,7 +35,7 @@
from openlp.core.lib import translate
from songimport import SongImport
-def strip_rtf(blob):
+def strip_rtf(blob, encoding):
depth = 0
control = False
clear_text = []
@@ -69,12 +69,42 @@
if control_str == 'par' or control_str == 'line':
clear_text.append(u'\n')
elif control_str == 'tab':
- clear_text.append(u'\n')
+ clear_text.append(u'\t')
+ # Prefer the encoding specified by the RTF data to that
+ # specified by the Paradox table header
+ # West European encoding
+ elif control_str == 'fcharset0':
+ encoding = u'cp1252'
+ # Greek encoding
+ elif control_str == 'fcharset161':
+ encoding = u'cp1253'
+ # Turkish encoding
+ elif control_str == 'fcharset162':
+ encoding = u'cp1254'
+ # Vietnamese encoding
+ elif control_str == 'fcharset163':
+ encoding = u'cp1258'
+ # Hebrew encoding
+ elif control_str == 'fcharset177':
+ encoding = u'cp1255'
+ # Arabic encoding
+ elif control_str == 'fcharset178':
+ encoding = u'cp1256'
+ # Baltic encoding
+ elif control_str == 'fcharset186':
+ encoding = u'cp1257'
+ # Cyrillic encoding
+ elif control_str == 'fcharset204':
+ encoding = u'cp1251'
+ # Thai encoding
+ elif control_str == 'fcharset222':
+ encoding = u'cp874'
+ # Central+East European encoding
+ elif control_str == 'fcharset238':
+ encoding = u'cp1250'
elif control_str[0] == '\'':
- # Really should take RTF character set into account but
- # for now assume ANSI (Windows-1252) and call it good
s = chr(int(control_str[1:3], 16))
- clear_text.append(s.decode(u'windows-1252'))
+ clear_text.append(s.decode(encoding))
del control_word[:]
if c == '\\' and new_control:
control = True
@@ -126,6 +156,30 @@
db_file.close()
self.memo_file.close()
return False
+ # Take a stab at how text is encoded
+ self.encoding = u'cp1252'
+ db_file.seek(106)
+ code_page, = struct.unpack('<h', db_file.read(2))
+ if code_page == 852:
+ self.encoding = u'cp1250'
+ # The following codepage to actual encoding mappings have not been
+ # observed, but merely guessed. Actual example files are needed.
+ #if code_page == 737:
+ # self.encoding = u'cp1253'
+ #if code_page == 775:
+ # self.encoding = u'cp1257'
+ #if code_page == 855:
+ # self.encoding = u'cp1251'
+ #if code_page == 857:
+ # self.encoding = u'cp1254'
+ #if code_page == 866:
+ # self.encoding = u'cp1251'
+ #if code_page == 869:
+ # self.encoding = u'cp1253'
+ #if code_page == 862:
+ # self.encoding = u'cp1255'
+ #if code_page == 874:
+ # self.encoding = u'cp874'
# There does not appear to be a _reliable_ way of getting the number
# of songs/records, so let's use file blocks for measuring progress.
total_blocks = (db_size - header_size) / (block_size * 1024)
@@ -204,7 +258,7 @@
self.add_author(author_name.strip())
if words:
# Format the lyrics
- words = strip_rtf(words)
+ words = strip_rtf(words, self.encoding)
for verse in words.split(u'\n\n'):
self.add_verse(verse.strip(), u'V')
if self.stop_import_flag:
@@ -263,7 +317,7 @@
# Format the field depending on the field type
if field_desc.type == 1:
# string
- return field.rstrip('\0').decode(u'windows-1252')
+ return field.rstrip('\0').decode(self.encoding)
elif field_desc.type == 3:
# 16-bit int
return field ^ 0x8000
Follow ups