← Back to team overview

ibid-dev team mailing list archive

[Merge] lp:~marco-gallotta/ibid/whens-it-up into lp:ibid

 

marcog has proposed merging lp:~marco-gallotta/ibid/whens-it-up into lp:ibid.

    Requested reviews:
    Ibid Core Team (ibid-core)

-- 
https://code.launchpad.net/~marco-gallotta/ibid/whens-it-up/+merge/17661
Your team Ibid Dev Team is subscribed to branch lp:ibid.
=== modified file 'ibid/plugins/network.py'
--- ibid/plugins/network.py	2010-01-18 23:20:33 +0000
+++ ibid/plugins/network.py	2010-01-19 15:52:13 +0000
@@ -1,18 +1,29 @@
+<<<<<<< TREE
 # Copyright (c) 2008-2010, Michael Gorven, Stefano Rivera
 # Released under terms of the MIT/X/Expat Licence. See COPYING for details.
 
+=======
+# Copyright (c) 2008-2010, Michael Gorven, Stefano Rivera, Marco Gallotta
+# Released under terms of the MIT/X/Expat Licence. See COPYING for details.
+
+>>>>>>> MERGE-SOURCE
 import re
+import socket
+from httplib import HTTPConnection, HTTPSConnection
 from subprocess import Popen, PIPE
+from urllib import getproxies_environment
+from urlparse import urlparse
+from sys import version_info
 
 from dns.resolver import Resolver, NoAnswer, NXDOMAIN
 from dns.reversename import from_address
-from httplib import HTTPConnection, HTTPSConnection
 from urllib import getproxies_environment
 from urlparse import urlparse
 
+import ibid
 from ibid.plugins import Processor, match
-from ibid.config import Option, IntOption
-from ibid.utils import file_in_path, unicode_output, human_join
+from ibid.config import Option, IntOption, FloatOption, DictOption
+from ibid.utils import file_in_path, unicode_output, human_join, url_to_bytestring
 from ibid.utils.html import get_country_codes
 
 help = {}
@@ -174,22 +185,133 @@
             error = unicode_output(error.strip())
             event.addresponse(error.replace(u'\n', u' '))
 
-help['get'] = u'Retrieves a URL and returns the HTTP status and optionally the HTML title.'
+class HTTPException(Exception):
+    pass
+
+help['http'] = u'Tests if an HTTP site is up and retrieves HTTP URLs.'
 class HTTP(Processor):
-    u"""(get|head) <url>"""
-    feature = 'get'
+    u"""(get|head) <url>
+    is <domain> (up|down)
+    tell me when <domain|url> is up"""
+    feature = 'http'
+    priority = -10
 
     max_size = IntOption('max_size', 'Only request this many bytes', 500)
-
-    @match(r'^(get|head)\s+(\S+\.\S+)$')
-    def handler(self, event, action, url):
-        if not url.lower().startswith("http://";) and not url.lower().startswith("https://";):
-            url = "http://"; + url
-        if url.count("/") < 3:
-            url += "/"
-
-        action = action.upper()
-
+    timeout = IntOption('timeout', 'Timeout for HTTP connections in seconds', 15)
+    sites = DictOption('sites', 'Mapping of site names to domains', {})
+    max_hops = IntOption('max_hops', 'Maximum hops in get/head when receiving a 30[12]', 3)
+    whensitup_delay = IntOption('whensitup_delay', 'Initial delay between whensitup attemtps in seconds', 60)
+    whensitup_factor = FloatOption('whensitup_factor', 'Factor to mutliply subsequent delays by for whensitup', 1.03)
+    whensitup_maxdelay = IntOption('whensitup_maxdelay', 'Maximum delay between whensitup attempts in seconds', 30*60)
+    whensitup_maxperiod = FloatOption('whensitup_maxperiod', 'Maximum period after which to stop checking the url for whensitup in hours', 72)
+
+    def _get_header(self, headers, name):
+        for header in headers:
+            if header[0] == name:
+                return header[1]
+        return None
+
+    @match(r'^(get|head)\s+(\S+)$')
+    def get(self, event, action, url):
+        try:
+            status, reason, data, headers = self._request(self._makeurl(url), action.upper())
+            reply = u'%s %s' % (status, reason)
+
+            hops = 0
+            while status == 301 or status == 302 and self._get_header(headers, 'location'):
+                location = self._get_header(headers, 'location')
+                status, reason, data, headers = self._request(location, 'GET')
+                if hops >= self.max_hops:
+                    reply += u' to %s' % location
+                    break
+                hops += 1
+                reply += u' to %(location)s, which gets a %(status)d %(reason)s' % {
+                    u'location': location,
+                    u'status': status,
+                    u'reason': reason,
+                }
+
+            if action.upper() == 'GET':
+                match = title.search(data)
+                print data
+                if match:
+                    reply += u' "%s"' % match.groups()[0].strip()
+
+            event.addresponse(reply)
+
+        except HTTPException, e:
+            event.addresponse(unicode(e))
+
+    def _makeurl(self, url):
+        if url in self.sites:
+            url = self.sites[url]
+        else:
+            if not urlparse(url).netloc:
+                if '.' not in url:
+                    url += '.com'
+                url = 'http://' + url
+            if not urlparse(url).path:
+                url += '/'
+        return url
+
+    def _isitup(self, url):
+        try:
+            status, reason, data, headers = self._request(self._makeurl(url), 'HEAD')
+            if not urlparse(url).netloc and not urlparse('http://' + url).path:
+                up = True # only domain provided, so since the server responded it is up
+            else:
+                up = status < 400 # url provided, so check the status returned
+                reason = u'%(status)d %(reason)s' % {
+                    u'status': status,
+                    u'reason': reason,
+                }
+        except HTTPException, e:
+            up = False
+            reason = u'Server is not responding'
+
+        return up, reason
+
+    @match(r'^is\s+(\S+)\s+(up|down)$')
+    def isit(self, event, url, type):
+        up, reason = self._isitup(url)
+        if up:
+            if type.lower() == 'up':
+                event.addresponse(u'Yes, %s is up', url)
+            else:
+                event.addresponse(u"No, it's just you")
+        else:
+            if type.lower() == 'up':
+                event.addresponse(u'No, %(url)s is down (%(reason)s)', {
+                    u'url': url,
+                    u'reason': reason,
+                })
+            else:
+                event.addresponse(u'Yes, %(url)s is down (%(reason)s)', {
+                    u'url': url,
+                    u'reason': reason,
+                })
+
+    def _whensitup(self, event, url, delay, total_delay = 0):
+        up, _, _ = self._isitup(url)
+        if up:
+            event.addresponse(u'%s is now up', self._makeurl(url))
+            return
+        total_delay += delay
+        if total_delay >= self.whensitup_maxperiod * 60 * 60:
+            event.addresponse(u"Sorry, it appears %s is never coming up. I'm not going to check any more.", self._makeurl(url))
+        delay *= self.whensitup_factor
+        delay = max(delay, self.whensitup_maxdelay)
+        ibid.dispatcher.call_later(delay, self._whensitup, event, url, delay)
+
+    @match(r'^(?:tell\s+me|let\s+me\s+know)\s+when\s+(\S+)\s+is\s+(?:back\s+)?up$')
+    def whensitup(self, event, url):
+        if self._isitup(url):
+            event.addresponse(u'%s is up right now', self._makeurl(url))
+            return
+        ibid.dispatcher.call_later(self.whensitup_delay, self._whensitup, event, url, self.whensitup_delay)
+        event.addresponse(u"I'll let you know when %s is up", url)
+
+    def _request(self, url, method):
         scheme, host = urlparse(url)[:2]
         scheme = scheme.lower()
         proxies = getproxies_environment()
@@ -197,28 +319,34 @@
             scheme, host = urlparse(proxies[scheme])[:2]
             scheme = scheme.lower()
 
+        kwargs = {}
+        if version_info[1] >= 6:
+            kwargs['timeout'] = self.timeout
+        else:
+            socket.setdefaulttimeout(self.timeout)
+
         if scheme == "https":
-            conn = HTTPSConnection(host)
+            conn = HTTPSConnection(host, **kwargs)
         else:
-            conn = HTTPConnection(host)
+            conn = HTTPConnection(host, **kwargs)
 
         headers={}
-        if action == 'GET':
+        if method == 'GET':
             headers['Range'] = 'bytes=0-%s' % self.max_size
-        conn.request(action.upper(), url, headers=headers)
-
-        response = conn.getresponse()
-        reply = u'%s %s' % (response.status, response.reason)
-
-        data = response.read()
-        conn.close()
-
-        if action == 'GET':
-            match = title.search(data)
-            if match:
-                reply += u' "%s"' % match.groups()[0].strip()
-
-        event.addresponse(reply)
+
+        try:
+            conn.request(method.upper(), url_to_bytestring(url), headers=headers)
+            response = conn.getresponse()
+            data = response.read(self.max_size)
+            conn.close()
+        except socket.error, e:
+            raise HTTPException(e.message or e.args[1])
+
+        contenttype = response.getheader('Content-Type', 'text/html; charset=utf-8')
+        match = re.search('charset=([a-zA-Z0-9-]+)', contenttype)
+        charset = match and match.group(1) or 'utf-8'
+
+        return response.status, response.reason, data.decode(charset), response.getheaders()
 
 help['tld'] = u"Resolve country TLDs (ISO 3166)"
 class TLD(Processor):

=== modified file 'ibid/utils/__init__.py'
--- ibid/utils/__init__.py	2010-01-18 22:45:15 +0000
+++ ibid/utils/__init__.py	2010-01-19 15:52:13 +0000
@@ -9,7 +9,7 @@
 from StringIO import StringIO
 from threading import Lock
 import time
-from urllib import urlencode
+from urllib import urlencode, quote
 import urllib2
 from urlparse import urlparse, urlunparse
 import zlib
@@ -168,6 +168,7 @@
     host = parts[1].split(':')
     host[0] = host[0].encode('idna')
     parts[1] = ':'.join(host)
+    parts[2] = quote(parts[2].encode('utf-8'))
     return urlunparse(parts).encode('utf-8')
 
 def json_webservice(url, params={}, headers={}):


Follow ups