openlp-core team mailing list archive

Thread
Date

[Merge] lp:~whydoubt/openlp/easyworship into lp:openlp

To: mp+38445@xxxxxxxxxxxxxxxxxx
From: Jeffrey Smith <whydoubt@xxxxxxxxx>
Date: Thu, 14 Oct 2010 18:23:58 -0000
Reply-to: mp+38445@xxxxxxxxxxxxxxxxxx
Sender: bounces@xxxxxxxxxxxxx

Jeffrey Smith has proposed merging lp:~whydoubt/openlp/easyworship into lp:openlp.

Requested reviews:
  OpenLP Core (openlp-core)


This is a fix for bug #659673
-- 
https://code.launchpad.net/~whydoubt/openlp/easyworship/+merge/38445
Your team OpenLP Core is requested to review the proposed merge of lp:~whydoubt/openlp/easyworship into lp:openlp.

=== modified file 'openlp/plugins/songs/lib/ewimport.py'
--- openlp/plugins/songs/lib/ewimport.py	2010-09-25 12:13:42 +0000
+++ openlp/plugins/songs/lib/ewimport.py	2010-10-14 18:23:54 +0000
@@ -35,7 +35,7 @@
 from openlp.core.lib import translate
 from songimport import SongImport
 
-def strip_rtf(blob):
+def strip_rtf(blob, encoding):
     depth = 0
     control = False
     clear_text = []
@@ -69,12 +69,42 @@
                     if control_str == 'par' or control_str == 'line':
                         clear_text.append(u'\n')
                     elif control_str == 'tab':
-                        clear_text.append(u'\n')
+                        clear_text.append(u'\t')
+                    # Prefer the encoding specified by the RTF data to that
+                    #  specified by the Paradox table header
+                    # West European encoding
+                    elif control_str == 'fcharset0':
+                        encoding = u'cp1252'
+                    # Greek encoding
+                    elif control_str == 'fcharset161':
+                        encoding = u'cp1253'
+                    # Turkish encoding
+                    elif control_str == 'fcharset162':
+                        encoding = u'cp1254'
+                    # Vietnamese encoding
+                    elif control_str == 'fcharset163':
+                        encoding = u'cp1258'
+                    # Hebrew encoding
+                    elif control_str == 'fcharset177':
+                        encoding = u'cp1255'
+                    # Arabic encoding
+                    elif control_str == 'fcharset178':
+                        encoding = u'cp1256'
+                    # Baltic encoding
+                    elif control_str == 'fcharset186':
+                        encoding = u'cp1257'
+                    # Cyrillic encoding
+                    elif control_str == 'fcharset204':
+                        encoding = u'cp1251'
+                    # Thai encoding
+                    elif control_str == 'fcharset222':
+                        encoding = u'cp874'
+                    # Central+East European encoding
+                    elif control_str == 'fcharset238':
+                        encoding = u'cp1250'
                     elif control_str[0] == '\'':
-                        # Really should take RTF character set into account but
-                        # for now assume ANSI (Windows-1252) and call it good
                         s = chr(int(control_str[1:3], 16))
-                        clear_text.append(s.decode(u'windows-1252'))
+                        clear_text.append(s.decode(encoding))
                     del control_word[:]
             if c == '\\' and new_control:
                 control = True
@@ -126,6 +156,30 @@
             db_file.close()
             self.memo_file.close()
             return False
+        # Take a stab at how text is encoded
+        self.encoding = u'cp1252'
+        db_file.seek(106)
+        code_page, = struct.unpack('<h', db_file.read(2))
+        if code_page == 852:
+            self.encoding = u'cp1250'
+        # The following codepage to actual encoding mappings have not been
+        #  observed, but merely guessed.  Actual example files are needed.
+        #if code_page == 737:
+        #    self.encoding = u'cp1253'
+        #if code_page == 775:
+        #    self.encoding = u'cp1257'
+        #if code_page == 855:
+        #    self.encoding = u'cp1251'
+        #if code_page == 857:
+        #    self.encoding = u'cp1254'
+        #if code_page == 866:
+        #    self.encoding = u'cp1251'
+        #if code_page == 869:
+        #    self.encoding = u'cp1253'
+        #if code_page == 862:
+        #    self.encoding = u'cp1255'
+        #if code_page == 874:
+        #    self.encoding = u'cp874'
         # There does not appear to be a _reliable_ way of getting the number
         # of songs/records, so let's use file blocks for measuring progress.
         total_blocks = (db_size - header_size) / (block_size * 1024)
@@ -204,7 +258,7 @@
                         self.add_author(author_name.strip())
                 if words:
                     # Format the lyrics
-                    words = strip_rtf(words)
+                    words = strip_rtf(words, self.encoding)
                     for verse in words.split(u'\n\n'):
                         self.add_verse(verse.strip(), u'V')
                 if self.stop_import_flag:
@@ -263,7 +317,7 @@
         # Format the field depending on the field type
         if field_desc.type == 1:
             # string
-            return field.rstrip('\0').decode(u'windows-1252')
+            return field.rstrip('\0').decode(self.encoding)
         elif field_desc.type == 3:
             # 16-bit int
             return field ^ 0x8000

Follow ups

useful information
From: Jeffrey Smith, 2017-04-10
what a pleasant surprise
From: Jeffrey Smith, 2017-03-17
[Merge] lp:~whydoubt/openlp/easyworship into lp:openlp
From: noreply, 2010-10-14
Re: [Merge] lp:~whydoubt/openlp/easyworship into lp:openlp
From: Raoul Snyman, 2010-10-14
Re: [Merge] lp:~whydoubt/openlp/easyworship into lp:openlp
From: Tim Bentley, 2010-10-14