openlp-core team mailing list archive

Thread
Date

[Merge] lp:~mahfiaz/openlp/bug-933706 into lp:openlp

To: mp+111791@xxxxxxxxxxxxxxxxxx, Raoul Snyman <raoul.snyman@xxxxxxxxxxxxxxxxxxxxxxxx>
From: mahfiaz <mahfiaz@xxxxxxxxx>
Date: Mon, 25 Jun 2012 08:12:26 -0000
Reply-to: mp+111791@xxxxxxxxxxxxxxxxxx
Sender: bounces@xxxxxxxxxxxxx

mahfiaz has proposed merging lp:~mahfiaz/openlp/bug-933706 into lp:openlp.

Requested reviews:
  Tim Bentley (trb143)
  Raoul Snyman (raoul-snyman)

For more details, see:
https://code.launchpad.net/~mahfiaz/openlp/bug-933706/+merge/111791

Adds SundayPlus importer with new StripRtf class.
-- 
https://code.launchpad.net/~mahfiaz/openlp/bug-933706/+merge/111791
Your team OpenLP Core is subscribed to branch lp:openlp.

=== modified file 'openlp/plugins/songs/lib/__init__.py'
--- openlp/plugins/songs/lib/__init__.py	2012-06-22 14:14:53 +0000
+++ openlp/plugins/songs/lib/__init__.py	2012-06-25 08:11:25 +0000
@@ -25,6 +25,7 @@
 # with this program; if not, write to the Free Software Foundation, Inc., 59  #
 # Temple Place, Suite 330, Boston, MA 02111-1307 USA                          #
 ###############################################################################
+import logging
 import re
 
 from PyQt4 import QtGui
@@ -34,6 +35,8 @@
 from db import Author
 from ui import SongStrings
 
+log = logging.getLogger(__name__)
+
 WHITESPACE = re.compile(r'[\W_]+', re.UNICODE)
 APOSTROPHE = re.compile(u'[\'`’ʻ′]', re.UNICODE)
 
@@ -195,7 +198,7 @@
         return verse_index
 
 
-def retrieve_windows_encoding(recommendation=None):
+def retrieve_windows_encoding(recommendation=None, example_text=None):
     """
     Determines which encoding to use on an information source. The process uses
     both automated detection, which is passed to this method as a
@@ -204,6 +207,9 @@
     ``recommendation``
         A recommended encoding discovered programmatically for the user to
         confirm.
+
+    ``example_text``
+        Still not decoded text to show to users to help them decide.
     """
     # map chardet result to compatible windows standard code page
     codepage_mapping = {'IBM866': u'cp866', 'TIS-620': u'cp874',
@@ -366,6 +372,235 @@
     if song.copyright:
         song.copyright = CONTROL_CHARS.sub(u'', song.copyright).strip()
 
+class StripRtf():
+    """
+    This class strips RTF control structures and returns an unicode string.
+
+    Thanks to Markus Jarderot (MizardX) for this code, used by permission.
+    http://stackoverflow.com/questions/188545
+    """
+    PATTERN = re.compile(r"\\([a-z]{1,32})(-?\d{1,10})?[ ]?|\\'"
+        r"([0-9a-f]{2})|\\([^a-z])|([{}])|[\r\n]+|(.)", re.I)
+    # Control words which specify a "destination" to be ignored.
+    DESTINATIONS = frozenset((
+        u'aftncn', u'aftnsep', u'aftnsepc', u'annotation', u'atnauthor', 
+        u'atndate', u'atnicn', u'atnid', u'atnparent', u'atnref', u'atntime',
+        u'atrfend', u'atrfstart', u'author', u'background', u'bkmkend',
+        u'bkmkstart', u'blipuid', u'buptim', u'category',
+        u'colorschememapping', u'colortbl', u'comment', u'company', u'creatim',
+        u'datafield', u'datastore', u'defchp', u'defpap', u'do', u'doccomm',
+        u'docvar', u'dptxbxtext', u'ebcend', u'ebcstart', u'factoidname',
+        u'falt', u'fchars', u'ffdeftext', u'ffentrymcr', u'ffexitmcr',
+        u'ffformat', u'ffhelptext', u'ffl', u'ffname', u'ffstattext', u'field',
+        u'file', u'filetbl', u'fldinst', u'fldrslt', u'fldtype', u'fname',
+        u'fontemb', u'fontfile', u'footer', u'footerf', u'footerl', u'footerr',
+        u'footnote', u'formfield', u'ftncn', u'ftnsep', u'ftnsepc', u'g',
+        u'generator', u'gridtbl', u'header', u'headerf', u'headerl',
+        u'headerr', u'hl', u'hlfr', u'hlinkbase', u'hlloc', u'hlsrc', u'hsv',
+        u'htmltag', u'info', u'keycode', u'keywords', u'latentstyles',
+        u'lchars', u'levelnumbers', u'leveltext', u'lfolevel', u'linkval',
+        u'list', u'listlevel', u'listname', u'listoverride',
+        u'listoverridetable', u'listpicture', u'liststylename', u'listtable',
+        u'listtext', u'lsdlockedexcept', u'macc', u'maccPr', u'mailmerge',
+        u'maln', u'malnScr', u'manager', u'margPr', u'mbar', u'mbarPr',
+        u'mbaseJc', u'mbegChr', u'mborderBox', u'mborderBoxPr', u'mbox',
+        u'mboxPr', u'mchr', u'mcount', u'mctrlPr', u'md', u'mdeg', u'mdegHide',
+        u'mden', u'mdiff', u'mdPr', u'me', u'mendChr', u'meqArr', u'meqArrPr',
+        u'mf', u'mfName', u'mfPr', u'mfunc', u'mfuncPr', u'mgroupChr',
+        u'mgroupChrPr', u'mgrow', u'mhideBot', u'mhideLeft', u'mhideRight',
+        u'mhideTop', u'mhtmltag', u'mlim', u'mlimloc', u'mlimlow',
+        u'mlimlowPr', u'mlimupp', u'mlimuppPr', u'mm', u'mmaddfieldname',
+        u'mmath', u'mmathPict', u'mmathPr', u'mmaxdist', u'mmc', u'mmcJc',
+        u'mmconnectstr', u'mmconnectstrdata', u'mmcPr', u'mmcs',
+        u'mmdatasource', u'mmheadersource', u'mmmailsubject', u'mmodso',
+        u'mmodsofilter', u'mmodsofldmpdata', u'mmodsomappedname',
+        u'mmodsoname', u'mmodsorecipdata', u'mmodsosort', u'mmodsosrc',
+        u'mmodsotable', u'mmodsoudl', u'mmodsoudldata', u'mmodsouniquetag',
+        u'mmPr', u'mmquery', u'mmr', u'mnary', u'mnaryPr', u'mnoBreak',
+        u'mnum', u'mobjDist', u'moMath', u'moMathPara', u'moMathParaPr',
+        u'mopEmu', u'mphant', u'mphantPr', u'mplcHide', u'mpos', u'mr',
+        u'mrad', u'mradPr', u'mrPr', u'msepChr', u'mshow', u'mshp', u'msPre',
+        u'msPrePr', u'msSub', u'msSubPr', u'msSubSup', u'msSubSupPr', u'msSup',
+        u'msSupPr', u'mstrikeBLTR', u'mstrikeH', u'mstrikeTLBR', u'mstrikeV',
+        u'msub', u'msubHide', u'msup', u'msupHide', u'mtransp', u'mtype',
+        u'mvertJc', u'mvfmf', u'mvfml', u'mvtof', u'mvtol', u'mzeroAsc',
+        u'mzeroDesc', u'mzeroWid', u'nesttableprops', u'nextfile',
+        u'nonesttables', u'objalias', u'objclass', u'objdata', u'object',
+        u'objname', u'objsect', u'objtime', u'oldcprops', u'oldpprops',
+        u'oldsprops', u'oldtprops', u'oleclsid', u'operator', u'panose',
+        u'password', u'passwordhash', u'pgp', u'pgptbl', u'picprop', u'pict',
+        u'pn', u'pnseclvl', u'pntext', u'pntxta', u'pntxtb', u'printim',
+        u'private', u'propname', u'protend', u'protstart', u'protusertbl',
+        u'pxe', u'result', u'revtbl', u'revtim', u'rsidtbl', u'rxe', u'shp',
+        u'shpgrp', u'shpinst', u'shppict', u'shprslt', u'shptxt', u'sn', u'sp',
+        u'staticval', u'stylesheet', u'subject', u'sv', u'svb', u'tc',
+        u'template', u'themedata', u'title', u'txe', u'ud', u'upr',
+        u'userprops', u'wgrffmtfilter', u'windowcaption', u'writereservation',
+        u'writereservhash', u'xe', u'xform', u'xmlattrname', u'xmlattrvalue',
+        u'xmlclose', u'xmlname', u'xmlnstbl', u'xmlopen'))
+    # Translation of some special characters.
+    SPECIALCHARS = {
+        u'par': u'\n',
+        u'sect': u'\n\n',
+        # Required page and column break.
+        # Would be good if we could split verse into subverses here.
+        u'page': u'\n\n',
+        u'column': u'\n\n',
+        # Soft breaks.
+        u'softpage': u'[---]',
+        u'softcol': u'[---]',
+        u'line': u'\n',
+        u'tab': u'\t',
+        u'emdash': u'\u2014',
+        u'endash': u'\u2013',
+        u'emspace': u'\u2003',
+        u'enspace': u'\u2002',
+        u'qmspace': u'\u2005',
+        u'bullet': u'\u2022',
+        u'lquote': u'\u2018',
+        u'rquote': u'\u2019',
+        u'ldblquote': u'\u201C',
+        u'rdblquote': u'\u201D',
+        u'ltrmark': u'\u200E',
+        u'rtlmark': u'\u200F',
+        u'zwj': u'\u200D',
+        u'zwnj': u'\u200C'}
+    CHARSET_MAPPING = {
+        u'fcharset0': u'cp1252',
+        u'fcharset1': None,
+        u'fcharset2': None,
+        u'fcharset77': None,
+        u'fcharset128': None,
+        u'fcharset129': None,
+        u'fcharset130': None,
+        u'fcharset134': None,
+        u'fcharset136': None,
+        u'fcharset161': u'cp1253',
+        u'fcharset162': u'cp1254',
+        u'fcharset163': u'cp1258',
+        u'fcharset177': u'cp1255',
+        u'fcharset178': u'cp1256',
+        u'fcharset186': u'cp1257',
+        u'fcharset204': u'cp1251',
+        u'fcharset222': u'cp874',
+        u'fcharset238': u'cp1250'}
+    # If user is asked for an encoding, it is used since then.
+    user_encoding = []
+
+    def strip_rtf(self, text, default_encoding=None):
+        self.default_encoding = default_encoding
+        # Current font is the font tag we last met.
+        font = u''
+        # Character encoding is defined inside fonttable.
+        # font_table could contain eg u'0': u'cp1252'
+        font_table = {u'': default_encoding}
+        # Stack of things to keep track of when entering/leaving groups.
+        stack = []
+        # Whether this group (and all inside it) are "ignorable".
+        ignorable = False
+        # Number of ASCII characters to skip after an unicode character.
+        ucskip = 1
+        # Number of ASCII characters left to skip.
+        curskip = 0
+        # Output buffer.
+        out = []
+        for match in self.PATTERN.finditer(text):
+            word, arg, hex, char, brace, tchar = match.groups()
+            if brace:
+                curskip = 0
+                if brace == u'{':
+                    # Push state
+                    stack.append((ucskip, ignorable, font))
+                elif brace == u'}':
+                    # Pop state
+                    ucskip, ignorable, font = stack.pop()
+            # \x (not a letter)
+            elif char:
+                curskip = 0
+                if char == u'~' and not ignorable:
+                    out.append(u'\xA0')
+                elif char in u'{}\\' and not ignorable:
+                    out.append(char)
+                elif char == u'-' and not ignorable:
+                    out.append(u'\u00AD')
+                elif char == u'_' and not ignorable:
+                    out.append(u'\u2011')
+                elif char == u'*':
+                    ignorable = True
+            # \command
+            elif word:
+                curskip = 0
+                if word in self.DESTINATIONS:
+                    ignorable = True
+                elif word in self.SPECIALCHARS:
+                    out.append(self.SPECIALCHARS[word])
+                elif word == u'uc':
+                    ucskip = int(arg)
+                elif word == u' ':
+                    c = int(arg)
+                    if c < 0:
+                        c += 0x10000
+                    out.append(unichr(c))
+                    curskip = ucskip
+                elif word == u'fonttbl':
+                    inside_font_table = True
+                    ignorable = True
+                elif word == u'f':
+                    font = arg
+                elif word == u'ansicpg':
+                    font_table[font] = 'cp' + arg
+                elif word == u'fcharset':
+                    charset_reference = word + arg
+                    if charset_reference in self.CHARSET_MAPPING:
+                        charset = self.CHARSET_MAPPING[charset_reference]
+                    else:
+                        charset = None
+                        log.error(u"Charset '%s' not in CHARSET_MAPPING "
+                            u"dictionary in "
+                            u"openlp/plugins/songs/lib/__init__.py"
+                            % charset_reference)
+                    # This makes ansicpg always override fcharset if present.
+                    if font not in font_table:
+                        font_table[font] = charset
+            # \'xx
+            elif hex:
+                if curskip > 0:
+                    curskip -= 1
+                elif not ignorable:
+                    charcode = int(hex, 16)
+                    encoding = self.get_encoding(font, font_table)
+                    while True:
+                        try:
+                            out.append(chr(charcode).decode(encoding))
+                        except UnicodeDecodeError:
+                            encoding = self.get_encoding(font, font_table,
+                                failed=True)
+                        else:
+                            break
+            elif tchar:
+                if curskip > 0:
+                    curskip -= 1
+                elif not ignorable:
+                    out.append(tchar)
+        return u''.join(out)
+
+    def get_encoding(self, font, font_table, failed=False):
+        encoding = None
+        if font in font_table:
+            encoding = font_table[font]
+        if not encoding and len(self.user_encoding):
+            encoding = self.user_encoding[-1]
+        if not encoding and self.default_encoding:
+            encoding = self.default_encoding
+        if not encoding or (failed and self.user_encoding == encoding):
+            encoding = retrieve_windows_encoding(self.default_encoding)
+            if encoding not in self.user_encoding:
+                self.user_encoding.append(encoding)
+        elif failed:
+            encoding = self.user_encoding
+        font_table[font] = encoding
+        return encoding
+
 from xml import OpenLyrics, SongXML
 from songstab import SongsTab
 from mediaitem import SongMediaItem

=== modified file 'openlp/plugins/songs/lib/ewimport.py'
--- openlp/plugins/songs/lib/ewimport.py	2012-06-22 14:14:53 +0000
+++ openlp/plugins/songs/lib/ewimport.py	2012-06-25 08:11:25 +0000
@@ -36,7 +36,7 @@
 
 from openlp.core.lib import translate
 from openlp.plugins.songs.lib import VerseType
-from openlp.plugins.songs.lib import retrieve_windows_encoding
+from openlp.plugins.songs.lib import retrieve_windows_encoding, StripRtf
 from songimport import SongImport
 
 RTF_STRIPPING_REGEX = re.compile(r'\{\\tx[^}]*\}')
@@ -45,101 +45,6 @@
 NUMBER_REGEX = re.compile(r'[0-9]+')
 NOTE_REGEX = re.compile(r'\(.*?\)')
 
-def strip_rtf(blob, encoding):
-    depth = 0
-    control = False
-    clear_text = []
-    control_word = []
-
-    # workaround for \tx bug: remove one pair of curly braces
-    # if \tx is encountered
-    match = RTF_STRIPPING_REGEX.search(blob)
-    if match:
-        # start and end indices of match are curly braces - filter them out
-        blob = ''.join([blob[i] for i in xrange(len(blob))
-            if i != match.start() and i !=match.end()])
-
-    for c in blob:
-        if control:
-            # for delimiters, set control to False
-            if c == '{':
-                if control_word:
-                    depth += 1
-                control = False
-            elif c == '}':
-                if control_word:
-                    depth -= 1
-                control = False
-            elif c == '\\':
-                new_control = bool(control_word)
-                control = False
-            elif c.isspace():
-                control = False
-            else:
-                control_word.append(c)
-                if len(control_word) == 3 and control_word[0] == '\'':
-                    control = False
-            if not control:
-                if not control_word:
-                    if c == '{' or c == '}' or c == '\\':
-                        clear_text.append(c)
-                else:
-                    control_str = ''.join(control_word)
-                    if control_str == 'par' or control_str == 'line':
-                        clear_text.append(u'\n')
-                    elif control_str == 'tab':
-                        clear_text.append(u'\t')
-                    # Prefer the encoding specified by the RTF data to that
-                    # specified by the Paradox table header
-                    # West European encoding
-                    elif control_str == 'fcharset0':
-                        encoding = u'cp1252'
-                    # Greek encoding
-                    elif control_str == 'fcharset161':
-                        encoding = u'cp1253'
-                    # Turkish encoding
-                    elif control_str == 'fcharset162':
-                        encoding = u'cp1254'
-                    # Vietnamese encoding
-                    elif control_str == 'fcharset163':
-                        encoding = u'cp1258'
-                    # Hebrew encoding
-                    elif control_str == 'fcharset177':
-                        encoding = u'cp1255'
-                    # Arabic encoding
-                    elif control_str == 'fcharset178':
-                        encoding = u'cp1256'
-                    # Baltic encoding
-                    elif control_str == 'fcharset186':
-                        encoding = u'cp1257'
-                    # Cyrillic encoding
-                    elif control_str == 'fcharset204':
-                        encoding = u'cp1251'
-                    # Thai encoding
-                    elif control_str == 'fcharset222':
-                        encoding = u'cp874'
-                    # Central+East European encoding
-                    elif control_str == 'fcharset238':
-                        encoding = u'cp1250'
-                    elif control_str[0] == '\'':
-                        s = chr(int(control_str[1:3], 16))
-                        clear_text.append(s.decode(encoding))
-                    del control_word[:]
-            if c == '\\' and new_control:
-                control = True
-        elif c == '{':
-            depth += 1
-        elif c == '}':
-            depth -= 1
-        elif depth > 2:
-            continue
-        elif c == '\n' or c == '\r':
-            continue
-        elif c == '\\':
-            control = True
-        else:
-            clear_text.append(c)
-    return u''.join(clear_text)
 
 class FieldDescEntry:
     def __init__(self, name, type, size):
@@ -155,6 +60,7 @@
     """
     def __init__(self, manager, **kwargs):
         SongImport.__init__(self, manager, **kwargs)
+        self.rtf = StripRtf()
 
     def doImport(self):
         # Open the DB and MB files if they exist
@@ -274,7 +180,7 @@
                         self.addAuthor(author_name.strip())
                 if words:
                     # Format the lyrics
-                    words = strip_rtf(words, self.encoding)
+                    words = self.rtf.strip_rtf(words, self.encoding)
                     verse_type = VerseType.Tags[VerseType.Verse]
                     for verse in SLIDE_BREAK_REGEX.split(words):
                         verse = verse.strip()

=== modified file 'openlp/plugins/songs/lib/importer.py'
--- openlp/plugins/songs/lib/importer.py	2012-06-22 14:14:53 +0000
+++ openlp/plugins/songs/lib/importer.py	2012-06-25 08:11:25 +0000
@@ -44,6 +44,7 @@
 from ewimport import EasyWorshipSongImport
 from songbeamerimport import SongBeamerImport
 from songshowplusimport import SongShowPlusImport
+from sundayplusimport import SundayPlusImport
 from foilpresenterimport import FoilPresenterImport
 from zionworximport import ZionWorxImport
 # Imports that might fail
@@ -145,9 +146,10 @@
     SongBeamer = 11
     SongShowPlus = 12
     SongsOfFellowship = 13
-    WordsOfWorship = 14
-    ZionWorx = 15
-    #CSV = 16
+    SundayPlus = 14
+    WordsOfWorship = 15
+    ZionWorx = 16
+    #CSV = 17
 
     # Set optional attribute defaults
     __defaults__ = {
@@ -275,6 +277,13 @@
                 'The Songs of Fellowship importer has been disabled because '
                 'OpenLP cannot access OpenOffice or LibreOffice.')
         },
+        SundayPlus: {
+            u'class': SundayPlusImport,
+            u'name': u'SundayPlus',
+            u'prefix': u'sundayPlus',
+            u'filter': u'%s (*.ptf)' % translate(
+                'SongsPlugin.ImportWizardForm', 'SundayPlus Song Files')
+        },
         WordsOfWorship: {
             u'class': WowImport,
             u'name': u'Words of Worship',
@@ -322,6 +331,7 @@
             SongFormat.SongBeamer,
             SongFormat.SongShowPlus,
             SongFormat.SongsOfFellowship,
+            SongFormat.SundayPlus,
             SongFormat.WordsOfWorship,
             SongFormat.ZionWorx
         ]

=== added file 'openlp/plugins/songs/lib/sundayplusimport.py'
--- openlp/plugins/songs/lib/sundayplusimport.py	1970-01-01 00:00:00 +0000
+++ openlp/plugins/songs/lib/sundayplusimport.py	2012-06-25 08:11:25 +0000
@@ -0,0 +1,199 @@
+# -*- coding: utf-8 -*-
+# vim: autoindent shiftwidth=4 expandtab textwidth=80 tabstop=4 softtabstop=4
+
+###############################################################################
+# OpenLP - Open Source Lyrics Projection                                      #
+# --------------------------------------------------------------------------- #
+# Copyright (c) 2008-2012 Raoul Snyman                                        #
+# Portions copyright (c) 2008-2012 Tim Bentley, Gerald Britton, Jonathan      #
+# Corwin, Michael Gorven, Scott Guerrieri, Matthias Hub, Meinert Jordan,      #
+# Armin Köhler, Joshua Miller, Stevan Pettit, Andreas Preikschat, Mattias     #
+# Põldaru, Christian Richter, Philip Ridout, Simon Scudder, Jeffrey Smith,    #
+# Maikel Stuivenberg, Martin Thompson, Jon Tibble, Frode Woldsund             #
+# --------------------------------------------------------------------------- #
+# This program is free software; you can redistribute it and/or modify it     #
+# under the terms of the GNU General Public License as published by the Free  #
+# Software Foundation; version 2 of the License.                              #
+#                                                                             #
+# This program is distributed in the hope that it will be useful, but WITHOUT #
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or       #
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for    #
+# more details.                                                               #
+#                                                                             #
+# You should have received a copy of the GNU General Public License along     #
+# with this program; if not, write to the Free Software Foundation, Inc., 59  #
+# Temple Place, Suite 330, Boston, MA 02111-1307 USA                          #
+###############################################################################
+
+import logging
+import os
+import re
+
+from openlp.plugins.songs.lib import VerseType, retrieve_windows_encoding
+from openlp.plugins.songs.lib import StripRtf
+from openlp.plugins.songs.lib.songimport import SongImport
+
+log = logging.getLogger(__name__)
+
+class SundayPlusImport(SongImport):
+    """
+    Import Sunday Plus songs
+
+    The format examples can be found attached to bug report at
+    <http://support.openlp.org/issues/395>
+    """
+    HOTKEYTOVERSETYPE = {
+        u'1': u'v1',
+        u'2': u'v2',
+        u'3': u'v3',
+        u'4': u'v4',
+        u'5': u'v5',
+        u'6': u'v6',
+        u'7': u'v7',
+        u'8': u'v8',
+        u'9': u'v9',
+        u'C': u'c',
+        u'+': u'b',
+        u'Z': u'o'}
+
+    def __init__(self, manager, **kwargs):
+        """
+        Initialise the class.
+        """
+        SongImport.__init__(self, manager, **kwargs)
+        self.rtf = StripRtf()
+
+    def doImport(self):
+        self.importWizard.progressBar.setMaximum(len(self.importSource))
+        self.encoding = 'us-ascii'
+        for filename in self.importSource:
+            if self.stopImportFlag:
+                return
+            song_file = open(filename)
+            self.doImportFile(song_file)
+            song_file.close()
+
+    def doImportFile(self, file):
+        """
+        Process the Sunday Plus file object.
+        """
+        self.setDefaults()
+        if not self.parse(file.read()):
+            self.logError(file.name)
+            return
+        if self.title == '':
+            self.title = self.titleFromFilename(file.name)
+        if not self.finish():
+            self.logError(file.name)
+
+    def parse(self, data, cell = False):
+        if len(data) == 0 or data[0:1] != '[' or data[-1] != ']':
+            self.logError(u'File is malformed')
+            return False
+        i = 1
+        verse_type = VerseType.Tags[VerseType.Verse]
+        while i < len(data):
+            # Data is held as #name: value pairs inside groups marked as [].
+            # Now we are looking for name.
+            if data[i:i+1] == '#':
+                name_end = data.find(':', i+1)
+                name = data[i+1:name_end]
+                i = name_end + 1
+                while data[i:i+1] == ' ':
+                    i += 1
+                if data[i:i+1] == '"':
+                    end = data.find('"', i+1)
+                    value = data[i+1:end]
+                elif data[i:i+1] == '[':
+                    j = i
+                    inside_quotes = False
+                    while j < len(data):
+                        char = data[j:j+1]
+                        if char == '"':
+                            inside_quotes = not inside_quotes
+                        elif not inside_quotes and char == ']':
+                            end = j + 1
+                            break
+                        j += 1
+                    value = data[i:end]
+                else:
+                    end = data.find(',', i+1)
+                    if data.find('(', i, end) != -1:
+                        end = data.find(')', i) + 1
+                    value = data[i:end]
+                # If we are in the main group.
+                if cell == False:
+                    if name == 'title':
+                        self.title = self.decode(self.unescape(value))
+                    elif name == 'Author':
+                        author = self.decode(self.unescape(value))
+                        if len(author):
+                            self.addAuthor(author)
+                    elif name == 'Copyright':
+                        self.copyright = self.decode(self.unescape(value))
+                    elif name[0:4] == 'CELL':
+                        self.parse(value, cell = name[4:])
+                # We are in a verse group.
+                else:
+                    if name == 'MARKER_NAME':
+                        value = value.strip()
+                        if len(value):
+                            verse_type = VerseType.Tags[
+                                VerseType.from_loose_input(value[0])]
+                            if len(value) >= 2 and value[-1] in ['0', '1', '2',
+                                '3', '4', '5', '6', '7', '8', '9']:
+                                verse_type = "%s%s" % (verse_type, value[-1])
+                    elif name == 'Hotkey':
+                        # Hotkey always appears after MARKER_NAME, so it
+                        # effectively overrides MARKER_NAME, if present.
+                        if len(value) and \
+                            value in self.HOTKEYTOVERSETYPE.keys():
+                            verse_type = self.HOTKEYTOVERSETYPE[value]
+                    if name == 'rtf':
+                        value = self.unescape(value)
+                        verse = self.rtf.strip_rtf(value, self.encoding)
+                        lines = verse.strip().split('\n')
+                        # If any line inside any verse contains CCLI or
+                        # only Public Domain, we treat this as special data:
+                        # we remove that line and add data to specific field.
+                        for i in xrange(len(lines)):
+                            lines[i] = lines[i].strip()
+                            line = lines[i]
+                            if line[:4].lower() == u'ccli':
+                                m = re.search(r'[0-9]+', line)
+                                if m:
+                                    self.ccliNumber = int(m.group(0))
+                                    lines.pop(i)
+                            elif line.lower() == u'public domain':
+                                self.copyright = u'Public Domain'
+                                lines.pop(i)
+                        self.addVerse('\n'.join(lines).strip(), verse_type)
+                if end == -1:
+                    break
+                i = end + 1
+            i += 1
+        return True
+
+    def titleFromFilename(self, filename):
+        title = os.path.split(filename)[1]
+        if title.endswith(u'.ptf'):
+            title = title[:-4]
+        # For some strange reason all example files names ended with 1-7.
+        if title.endswith('1-7'):
+            title = title[:-3]
+        return title.replace(u'_', u' ')
+
+    def decode(self, blob):
+        while True:
+            try:
+                return unicode(blob, self.encoding)
+            except:
+                # This is asked again every time the previously chosen
+                # encoding does not work.
+                self.encoding = retrieve_windows_encoding()
+
+    def unescape(self, text):
+        text = text.replace('^^', '"')
+        text = text.replace('^', '\'')
+        return text.strip()
+

Follow ups

Re: [Merge] lp:~mahfiaz/openlp/bug-933706 into lp:openlp
From: Samuel Findlay, 2012-06-25