openlp-core team mailing list archive
-
openlp-core team
-
Mailing list archive
-
Message #01224
[Merge] lp:~raoul-snyman/openlp/biblefixes into lp:openlp
Raoul Snyman has proposed merging lp:~raoul-snyman/openlp/biblefixes into lp:openlp.
Requested reviews:
OpenLP Core (openlp-core)
Fix HTML entities in Web Downloads.
--
https://code.launchpad.net/~raoul-snyman/openlp/biblefixes/+merge/21800
Your team OpenLP Core is subscribed to branch lp:openlp.
=== modified file 'openlp/plugins/bibles/forms/importwizardform.py'
--- openlp/plugins/bibles/forms/importwizardform.py 2010-03-19 22:08:06 +0000
+++ openlp/plugins/bibles/forms/importwizardform.py 2010-03-21 14:44:19 +0000
@@ -341,17 +341,17 @@
download_location = self.field(u'web_location').toInt()[0]
if download_location == DownloadLocation.Crosswalk:
bible = self.web_bible_list[DownloadLocation.Crosswalk][
- unicode(self.BibleComboBox.currentText())]
+ unicode(self.BibleComboBox.currentText(), u'utf8')]
elif download_location == DownloadLocation.BibleGateway:
bible = self.web_bible_list[DownloadLocation.BibleGateway][
- unicode(self.BibleComboBox.currentText())]
+ self.BibleComboBox.currentText()]
importer = self.manager.import_bible(BibleFormat.WebDownload,
- name=unicode(self.field(u'license_version').toString()),
+ name=unicode(self.field(u'license_version').toString(), u'utf8'),
download_source=unicode(DownloadLocation.get_name(download_location)),
- download_name=unicode(bible),
- proxy_server=unicode(self.field(u'proxy_server').toString()),
- proxy_username=unicode(self.field(u'proxy_username').toString()),
- proxy_password=unicode(self.field(u'proxy_password').toString())
+ download_name=unicode(bible, u'utf8'),
+ proxy_server=unicode(self.field(u'proxy_server').toString(), u'utf8'),
+ proxy_username=unicode(self.field(u'proxy_username').toString(), u'utf8'),
+ proxy_password=unicode(self.field(u'proxy_password').toString(), u'utf8')
)
success = importer.do_import()
if success:
=== modified file 'openlp/plugins/bibles/lib/common.py'
--- openlp/plugins/bibles/lib/common.py 2010-03-01 18:37:10 +0000
+++ openlp/plugins/bibles/lib/common.py 2010-03-21 14:44:19 +0000
@@ -27,6 +27,7 @@
import logging
import re
import chardet
+import htmlentitydefs
only_verses = re.compile(r'([\w .]+)[ ]+([0-9]+)[ ]*[:|v|V][ ]*([0-9]+)'
r'(?:[ ]*-[ ]*([0-9]+|end))?(?:[ ]*,[ ]*([0-9]+)(?:[ ]*-[ ]*([0-9]+|end))?)?',
@@ -115,7 +116,6 @@
log.debug(reference_list)
return reference_list
-
class SearchResults(object):
"""
Encapsulate a set of search results. This is Bible-type independant.
@@ -247,3 +247,33 @@
start_tag = text.find(u'<')
text = text.replace(u'>', u'')
return text.rstrip().lstrip()
+
+
+def unescape(text):
+ """
+ Removes HTML or XML character references and entities from a text string.
+ Courtesy of Fredrik Lundh, http://effbot.org/zone/re-sub.htm#unescape-html
+
+ @param text The HTML (or XML) source text.
+ @return The plain text, as a Unicode string, if necessary.
+ """
+ def fixup(m):
+ text = m.group(0)
+ if text[:2] == u'&#':
+ # character reference
+ try:
+ if text[:3] == u'&#x':
+ return unichr(int(text[3:-1], 16))
+ else:
+ return unichr(int(text[2:-1]))
+ except ValueError:
+ pass
+ else:
+ # named entity
+ try:
+ text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
+ except KeyError:
+ pass
+ return text # leave as is
+ return re.sub(u'&#?\w+;', fixup, text)
+
=== modified file 'openlp/plugins/bibles/lib/http.py'
--- openlp/plugins/bibles/lib/http.py 2010-03-19 07:00:41 +0000
+++ openlp/plugins/bibles/lib/http.py 2010-03-21 14:44:19 +0000
@@ -32,7 +32,7 @@
from openlp.core.lib import Receiver
from openlp.core.utils import AppLocation
-from common import BibleCommon, SearchResults
+from common import BibleCommon, SearchResults, unescape
from db import BibleDB
from openlp.plugins.bibles.lib.models import Book
@@ -196,7 +196,8 @@
verse_list[verse_number] = u''
continue
if isinstance(verse, NavigableString):
- verse_list[verse_number] = verse_list[verse_number] + verse.replace(u' ', u' ')
+ verse_list[verse_number] = verse_list[verse_number] + \
+ unescape(unicode(verse, u'utf-8').replace(u' ', u' '))
# Delete the "0" element, since we don't need it, it's just there for
# some stupid initial whitespace, courtesy of Bible Gateway.
del verse_list[0]
Follow ups