ibid-dev team mailing list archive
-
ibid-dev team
-
Mailing list archive
-
Message #00005
[Merge] lp:~marco-gallotta/ibid/whens-it-up into lp:ibid
marcog has proposed merging lp:~marco-gallotta/ibid/whens-it-up into lp:ibid.
Requested reviews:
Ibid Core Team (ibid-core)
--
https://code.launchpad.net/~marco-gallotta/ibid/whens-it-up/+merge/17661
Your team Ibid Dev Team is subscribed to branch lp:ibid.
=== modified file 'ibid/plugins/network.py'
--- ibid/plugins/network.py 2010-01-18 23:20:33 +0000
+++ ibid/plugins/network.py 2010-01-19 15:52:13 +0000
@@ -1,18 +1,29 @@
+<<<<<<< TREE
# Copyright (c) 2008-2010, Michael Gorven, Stefano Rivera
# Released under terms of the MIT/X/Expat Licence. See COPYING for details.
+=======
+# Copyright (c) 2008-2010, Michael Gorven, Stefano Rivera, Marco Gallotta
+# Released under terms of the MIT/X/Expat Licence. See COPYING for details.
+
+>>>>>>> MERGE-SOURCE
import re
+import socket
+from httplib import HTTPConnection, HTTPSConnection
from subprocess import Popen, PIPE
+from urllib import getproxies_environment
+from urlparse import urlparse
+from sys import version_info
from dns.resolver import Resolver, NoAnswer, NXDOMAIN
from dns.reversename import from_address
-from httplib import HTTPConnection, HTTPSConnection
from urllib import getproxies_environment
from urlparse import urlparse
+import ibid
from ibid.plugins import Processor, match
-from ibid.config import Option, IntOption
-from ibid.utils import file_in_path, unicode_output, human_join
+from ibid.config import Option, IntOption, FloatOption, DictOption
+from ibid.utils import file_in_path, unicode_output, human_join, url_to_bytestring
from ibid.utils.html import get_country_codes
help = {}
@@ -174,22 +185,133 @@
error = unicode_output(error.strip())
event.addresponse(error.replace(u'\n', u' '))
-help['get'] = u'Retrieves a URL and returns the HTTP status and optionally the HTML title.'
+class HTTPException(Exception):
+ pass
+
+help['http'] = u'Tests if an HTTP site is up and retrieves HTTP URLs.'
class HTTP(Processor):
- u"""(get|head) <url>"""
- feature = 'get'
+ u"""(get|head) <url>
+ is <domain> (up|down)
+ tell me when <domain|url> is up"""
+ feature = 'http'
+ priority = -10
max_size = IntOption('max_size', 'Only request this many bytes', 500)
-
- @match(r'^(get|head)\s+(\S+\.\S+)$')
- def handler(self, event, action, url):
- if not url.lower().startswith("http://") and not url.lower().startswith("https://"):
- url = "http://" + url
- if url.count("/") < 3:
- url += "/"
-
- action = action.upper()
-
+ timeout = IntOption('timeout', 'Timeout for HTTP connections in seconds', 15)
+ sites = DictOption('sites', 'Mapping of site names to domains', {})
+ max_hops = IntOption('max_hops', 'Maximum hops in get/head when receiving a 30[12]', 3)
+ whensitup_delay = IntOption('whensitup_delay', 'Initial delay between whensitup attemtps in seconds', 60)
+ whensitup_factor = FloatOption('whensitup_factor', 'Factor to mutliply subsequent delays by for whensitup', 1.03)
+ whensitup_maxdelay = IntOption('whensitup_maxdelay', 'Maximum delay between whensitup attempts in seconds', 30*60)
+ whensitup_maxperiod = FloatOption('whensitup_maxperiod', 'Maximum period after which to stop checking the url for whensitup in hours', 72)
+
+ def _get_header(self, headers, name):
+ for header in headers:
+ if header[0] == name:
+ return header[1]
+ return None
+
+ @match(r'^(get|head)\s+(\S+)$')
+ def get(self, event, action, url):
+ try:
+ status, reason, data, headers = self._request(self._makeurl(url), action.upper())
+ reply = u'%s %s' % (status, reason)
+
+ hops = 0
+ while status == 301 or status == 302 and self._get_header(headers, 'location'):
+ location = self._get_header(headers, 'location')
+ status, reason, data, headers = self._request(location, 'GET')
+ if hops >= self.max_hops:
+ reply += u' to %s' % location
+ break
+ hops += 1
+ reply += u' to %(location)s, which gets a %(status)d %(reason)s' % {
+ u'location': location,
+ u'status': status,
+ u'reason': reason,
+ }
+
+ if action.upper() == 'GET':
+ match = title.search(data)
+ print data
+ if match:
+ reply += u' "%s"' % match.groups()[0].strip()
+
+ event.addresponse(reply)
+
+ except HTTPException, e:
+ event.addresponse(unicode(e))
+
+ def _makeurl(self, url):
+ if url in self.sites:
+ url = self.sites[url]
+ else:
+ if not urlparse(url).netloc:
+ if '.' not in url:
+ url += '.com'
+ url = 'http://' + url
+ if not urlparse(url).path:
+ url += '/'
+ return url
+
+ def _isitup(self, url):
+ try:
+ status, reason, data, headers = self._request(self._makeurl(url), 'HEAD')
+ if not urlparse(url).netloc and not urlparse('http://' + url).path:
+ up = True # only domain provided, so since the server responded it is up
+ else:
+ up = status < 400 # url provided, so check the status returned
+ reason = u'%(status)d %(reason)s' % {
+ u'status': status,
+ u'reason': reason,
+ }
+ except HTTPException, e:
+ up = False
+ reason = u'Server is not responding'
+
+ return up, reason
+
+ @match(r'^is\s+(\S+)\s+(up|down)$')
+ def isit(self, event, url, type):
+ up, reason = self._isitup(url)
+ if up:
+ if type.lower() == 'up':
+ event.addresponse(u'Yes, %s is up', url)
+ else:
+ event.addresponse(u"No, it's just you")
+ else:
+ if type.lower() == 'up':
+ event.addresponse(u'No, %(url)s is down (%(reason)s)', {
+ u'url': url,
+ u'reason': reason,
+ })
+ else:
+ event.addresponse(u'Yes, %(url)s is down (%(reason)s)', {
+ u'url': url,
+ u'reason': reason,
+ })
+
+ def _whensitup(self, event, url, delay, total_delay = 0):
+ up, _, _ = self._isitup(url)
+ if up:
+ event.addresponse(u'%s is now up', self._makeurl(url))
+ return
+ total_delay += delay
+ if total_delay >= self.whensitup_maxperiod * 60 * 60:
+ event.addresponse(u"Sorry, it appears %s is never coming up. I'm not going to check any more.", self._makeurl(url))
+ delay *= self.whensitup_factor
+ delay = max(delay, self.whensitup_maxdelay)
+ ibid.dispatcher.call_later(delay, self._whensitup, event, url, delay)
+
+ @match(r'^(?:tell\s+me|let\s+me\s+know)\s+when\s+(\S+)\s+is\s+(?:back\s+)?up$')
+ def whensitup(self, event, url):
+ if self._isitup(url):
+ event.addresponse(u'%s is up right now', self._makeurl(url))
+ return
+ ibid.dispatcher.call_later(self.whensitup_delay, self._whensitup, event, url, self.whensitup_delay)
+ event.addresponse(u"I'll let you know when %s is up", url)
+
+ def _request(self, url, method):
scheme, host = urlparse(url)[:2]
scheme = scheme.lower()
proxies = getproxies_environment()
@@ -197,28 +319,34 @@
scheme, host = urlparse(proxies[scheme])[:2]
scheme = scheme.lower()
+ kwargs = {}
+ if version_info[1] >= 6:
+ kwargs['timeout'] = self.timeout
+ else:
+ socket.setdefaulttimeout(self.timeout)
+
if scheme == "https":
- conn = HTTPSConnection(host)
+ conn = HTTPSConnection(host, **kwargs)
else:
- conn = HTTPConnection(host)
+ conn = HTTPConnection(host, **kwargs)
headers={}
- if action == 'GET':
+ if method == 'GET':
headers['Range'] = 'bytes=0-%s' % self.max_size
- conn.request(action.upper(), url, headers=headers)
-
- response = conn.getresponse()
- reply = u'%s %s' % (response.status, response.reason)
-
- data = response.read()
- conn.close()
-
- if action == 'GET':
- match = title.search(data)
- if match:
- reply += u' "%s"' % match.groups()[0].strip()
-
- event.addresponse(reply)
+
+ try:
+ conn.request(method.upper(), url_to_bytestring(url), headers=headers)
+ response = conn.getresponse()
+ data = response.read(self.max_size)
+ conn.close()
+ except socket.error, e:
+ raise HTTPException(e.message or e.args[1])
+
+ contenttype = response.getheader('Content-Type', 'text/html; charset=utf-8')
+ match = re.search('charset=([a-zA-Z0-9-]+)', contenttype)
+ charset = match and match.group(1) or 'utf-8'
+
+ return response.status, response.reason, data.decode(charset), response.getheaders()
help['tld'] = u"Resolve country TLDs (ISO 3166)"
class TLD(Processor):
=== modified file 'ibid/utils/__init__.py'
--- ibid/utils/__init__.py 2010-01-18 22:45:15 +0000
+++ ibid/utils/__init__.py 2010-01-19 15:52:13 +0000
@@ -9,7 +9,7 @@
from StringIO import StringIO
from threading import Lock
import time
-from urllib import urlencode
+from urllib import urlencode, quote
import urllib2
from urlparse import urlparse, urlunparse
import zlib
@@ -168,6 +168,7 @@
host = parts[1].split(':')
host[0] = host[0].encode('idna')
parts[1] = ':'.join(host)
+ parts[2] = quote(parts[2].encode('utf-8'))
return urlunparse(parts).encode('utf-8')
def json_webservice(url, params={}, headers={}):
Follow ups