openlp-core team mailing list archive
-
openlp-core team
-
Mailing list archive
-
Message #00151
[Merge] lp:~raoul-snyman/openlp/biblefixes into lp:openlp
Raoul Snyman has proposed merging lp:~raoul-snyman/openlp/biblefixes into lp:openlp.
Requested reviews:
openlp.org Core (openlp-core)
Fixes issues that some of us were having.
--
https://code.launchpad.net/~raoul-snyman/openlp/biblefixes/+merge/8345
Your team openlp.org Core is subscribed to branch lp:openlp.
=== modified file 'openlp/plugins/bibles/lib/common.py'
--- openlp/plugins/bibles/lib/common.py 2009-06-20 19:11:17 +0000
+++ openlp/plugins/bibles/lib/common.py 2009-07-07 20:18:36 +0000
@@ -19,93 +19,140 @@
import os.path
import sys
import urllib2
-
+import chardet
import logging
class SearchResults:
+ """
+ Encapsulate a set of search results. This is Bible-type independant.
+ """
def __init__(self, book, chapter, verselist):
+ """
+ Create the search result object.
+
+ ``book``
+ The book of the Bible.
+
+ ``chapter``
+ The chapter of the book.
+
+ ``verselist``
+ The list of verses for this reading
+ """
self.book = book
self.chapter = chapter
self.verselist = verselist
+
def get_verselist(self):
+ """
+ Returns the list of verses.
+ """
return self.verselist
+
def get_book(self):
+ """
+ Returns the book of the Bible.
+ """
return self.book
+
def get_chapter(self):
+ """
+ Returns the chapter of the book.
+ """
return self.chapter
+
def has_verselist(self):
- if self.verselist == {}:
- return False
- else:
- return True
-
-class BibleCommon:
+ """
+ Returns whether or not the verse list contains verses.
+ """
+ return len(self.verselist) > 0
+
+
+class BibleCommon(object):
+ """
+ A common ancestor for bible download sites.
+ """
global log
log = logging.getLogger(u'BibleCommon')
log.info(u'BibleCommon')
+
def __init__(self):
"""
+ An empty constructor... not sure why I'm here.
"""
+ pass
+
def _get_web_text(self, urlstring, proxyurl):
+ """
+ Get the HTML from the web page.
+
+ ``urlstring``
+ The URL of the page to open.
+
+ ``proxyurl``
+ The URL of a proxy server used to access the Internet.
+ """
log.debug(u'get_web_text %s %s', proxyurl, urlstring)
- if not proxyurl == None:
- proxy_support = urllib2.ProxyHandler({'http': self.proxyurl})
+ if proxyurl is not None:
+ proxy_support = urllib2.ProxyHandler({'http': self.proxyurl})
http_support = urllib2.HTTPHandler()
- opener= urllib2.build_opener(proxy_support, http_support)
+ opener = urllib2.build_opener(proxy_support, http_support)
urllib2.install_opener(opener)
xml_string = u''
req = urllib2.Request(urlstring)
- req.add_header(u'User-Agent', 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)')
+ req.add_header('User-Agent', 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)')
try:
handle = urllib2.urlopen(req)
- xml_string = unicode(handle.read())
+ html = handle.read()
+ details = chardet.detect(html)
+ xml_string = unicode(html, details['encoding'])
except IOError, e:
if hasattr(e, u'reason'):
- log.error(u'Reason : ')
- log.error( e.reason)
+ log.error(u'Reason : %s', e.reason)
return xml_string
def _clean_text(self, text):
"""
- Clean up text and remove extra characters
- after been downloaded from web
+ Clean up text and remove extra characters after been downloaded from
+ the Internet.
+
+ ``text``
+ The text from the web page that needs to be cleaned up.
"""
#return text.rstrip()
# Remove Headings from the Text
- i = text.find(u'<h')
- while i > -1:
- j=text.find(u'</h', i)
- text = text[ : (i - 1)]+text[(j+4)]
- i = text.find(u'<h')
-
+ start_tag = text.find(u'<h')
+ while start_tag > -1:
+ end_tag = text.find(u'</h', start_tag)
+ text = text[:(start_tag - 1)] + text[(end_tag + 4)]
+ start_tag = text.find(u'<h')
# Remove Support References from the Text
- x = text.find(u'<sup>')
- while x > -1:
- y = text.find(u'</sup>')
- text= text[:x] + text[y + 6:len(text)]
- x = text.find(u'<sup>')
-
+ start_tag = text.find(u'<sup>')
+ while start_tag > -1:
+ end_tag = text.find(u'</sup>')
+ text = text[:start_tag] + text[end_tag + 6:len(text)]
+ start_tag = text.find(u'<sup>')
# Static Clean ups
- text= text.replace(u'\n', u'')
- text= text.replace(u'\r', u'')
- text= text.replace(u' ', u'')
- text= text.replace(u'<P>', u'')
- text= text.replace(u'<I>', u'')
- text= text.replace(u'</I>', u'')
- text= text.replace(u'<P />', u'')
- text= text.replace(u'<p />', u'')
- text= text.replace(u'</P>', u'')
- text= text.replace(u'<BR>', u'')
- text= text.replace(u'<BR />', u'')
- #text= text.replace(chr(189), u'1/2');print "l"
- text= text.replace(u'"', "'")
- text= text.replace(u''', "'")
-
- i = text.find(u'<')
- while i > -1 :
- j = text.find(u'>', i)
- text= text[:i] + text[j+1:]
- i = text.find(u'<')
-
- text= text.replace(u'>', u'')
+ text = text.replace(u'\n', u'')
+ text = text.replace(u'\r', u'')
+ text = text.replace(u' ', u'')
+ text = text.replace(u'<P>', u'')
+ text = text.replace(u'<I>', u'')
+ text = text.replace(u'</I>', u'')
+ text = text.replace(u'<P />', u'')
+ text = text.replace(u'<p />', u'')
+ text = text.replace(u'</P>', u'')
+ text = text.replace(u'<BR>', u'')
+ text = text.replace(u'<BR />', u'')
+ #text = text.replace(chr(189), u'1/2');print "l"
+ text = text.replace(u'"', u'\"')
+ text = text.replace(u''', u'\'')
+ # Remove some other tags
+ start_tag = text.find(u'<')
+ while start_tag > -1 :
+ end_tag = text.find(u'>', start_tag)
+ text = text[:start_tag] + text[end_tag + 1:]
+ start_tag = text.find(u'<')
+ text = text.replace(u'>', u'')
return text.rstrip()
+
Follow ups