openlp-core team mailing list archive
-
openlp-core team
-
Mailing list archive
-
Message #17106
[Merge] lp:~raoul-snyman/openlp/bug-1049977 into lp:openlp
Raoul Snyman has proposed merging lp:~raoul-snyman/openlp/bug-1049977 into lp:openlp.
Requested reviews:
OpenLP Core (openlp-core)
Related bugs:
Bug #1049977 in OpenLP: "Biblegateway bible does not download new verses after upgrade to 1.9.11"
https://bugs.launchpad.net/openlp/+bug/1049977
For more details, see:
https://code.launchpad.net/~raoul-snyman/openlp/bug-1049977/+merge/124290
Fixed bug #1049977 where some Bibles from Bible Gateway still used the old HTML format.
--
https://code.launchpad.net/~raoul-snyman/openlp/bug-1049977/+merge/124290
Your team OpenLP Core is requested to review the proposed merge of lp:~raoul-snyman/openlp/bug-1049977 into lp:openlp.
=== modified file 'openlp/plugins/bibles/lib/http.py'
--- openlp/plugins/bibles/lib/http.py 2012-06-22 14:14:53 +0000
+++ openlp/plugins/bibles/lib/http.py 2012-09-13 20:05:25 +0000
@@ -124,6 +124,8 @@
self._remove_elements(tag, 'div', 'footnotes')
self._remove_elements(tag, 'div', 'crossrefs')
self._remove_elements(tag, 'h3')
+ self._remove_elements(tag, 'h4')
+ self._remove_elements(tag, 'h5')
def _extract_verses(self, tags):
"""
@@ -161,6 +163,46 @@
verse_list[verse] = text
return verse_list
+ def _extract_verses_old(self, div):
+ """
+ Use the old style of parsing for those Bibles on BG who mysteriously
+ have not been migrated to the new (still broken) HTML.
+
+ ``div``
+ The parent div.
+ """
+ verse_list = {}
+ # Cater for inconsistent mark up in the first verse of a chapter.
+ first_verse = div.find(u'versenum')
+ if first_verse and first_verse.contents:
+ verse_list[1] = unicode(first_verse.contents[0])
+ for verse in div(u'sup', u'versenum'):
+ raw_verse_num = verse.next
+ clean_verse_num = 0
+ # Not all verses exist in all translations and may or may not be
+ # represented by a verse number. If they are not fine, if they are
+ # it will probably be in a format that breaks int(). We will then
+ # have no idea what garbage may be sucked in to the verse text so
+ # if we do not get a clean int() then ignore the verse completely.
+ try:
+ clean_verse_num = int(str(raw_verse_num))
+ except ValueError:
+ log.warn(u'Illegal verse number: %s', unicode(raw_verse_num))
+ if clean_verse_num:
+ verse_text = raw_verse_num.next
+ part = raw_verse_num.next.next
+ while not (isinstance(part, Tag) and
+ part.get(u'class') == u'versenum'):
+ # While we are still in the same verse grab all the text.
+ if isinstance(part, NavigableString):
+ verse_text += part
+ if isinstance(part.next, Tag) and part.next.name == u'div':
+ # Run out of verses so stop.
+ break
+ part = part.next
+ verse_list[clean_verse_num] = unicode(verse_text)
+ return verse_list
+
def get_bible_chapter(self, version, book_name, chapter):
"""
Access and decode Bibles via the BibleGateway website.
@@ -189,7 +231,13 @@
Receiver.send_message(u'openlp_process_events')
div = soup.find('div', 'result-text-style-normal')
self._clean_soup(div)
- verse_list = self._extract_verses(div.findAll('span', 'text'))
+ span_list = div.findAll('span', 'text')
+ log.debug('Span list: %s', span_list)
+ if not span_list:
+ # If we don't get any spans then we must have the old HTML format
+ verse_list = self._extract_verses_old(div)
+ else:
+ verse_list = self._extract_verses(span_list)
if not verse_list:
log.debug(u'No content found in the BibleGateway response.')
send_error_message(u'parse')
Follow ups