← Back to team overview

launchpad-reviewers team mailing list archive

Re: [Merge] lp:~cjwatson/launchpad/timeout-with-requests into lp:launchpad

 

Review: Approve code



Diff comments:

> 
> === modified file 'lib/lp/services/timeout.py'
> --- lib/lp/services/timeout.py	2014-08-29 01:41:14 +0000
> +++ lib/lp/services/timeout.py	2016-03-22 16:38:32 +0000
> @@ -145,47 +158,112 @@
>          return call_with_timeout
>  
>  
> -class CleanableHTTPHandler(urllib2.HTTPHandler):
> -    """Subclass of `urllib2.HTTPHandler` that can be cleaned-up."""
> -
> -    def http_open(self, req):
> -        """See `urllib2.HTTPHandler`."""
> -        def connection_factory(*args, **kwargs):
> -            """Save the created connection so that we can clean it up."""
> -            self.__conn = httplib.HTTPConnection(*args, **kwargs)
> -            return self.__conn
> -        return self.do_open(connection_factory, req)
> -
> -    def reset_connection(self):
> -        """Reset the underlying HTTP connection."""
> -        try:
> -            self.__conn.sock.shutdown(socket.SHUT_RDWR)
> -        except AttributeError:
> -            # It's possible that the other thread closed the socket
> -            # beforehand.
> -            pass
> -        self.__conn.close()
> +class CleanableConnectionPoolMixin:
> +    """Enhance urllib3's connection pools to support forced socket cleanup."""
> +
> +    def __init__(self, *args, **kwargs):
> +        super(CleanableConnectionPoolMixin, self).__init__(*args, **kwargs)
> +        self._all_connections = []
> +        self._all_connections_mutex = Lock()
> +
> +    def _new_conn(self):
> +        self._all_connections_mutex.acquire()
> +        try:
> +            if self._all_connections is None:
> +                raise ClosedPoolError(self, "Pool is closed.")

I'm disappointed that you don't blithely catch AttributeError around a complex function call like the superclass does, but I suppose I can live with no further perpetuation of that horror.

> +            conn = super(CleanableConnectionPoolMixin, self)._new_conn()
> +            self._all_connections.append(conn)
> +            return conn
> +        finally:
> +            self._all_connections_mutex.release()

This mutex could be used in a with statement instead.

> +
> +    def close(self):
> +        self._all_connections_mutex.acquire()
> +        try:
> +            if self._all_connections is None:
> +                return
> +            for conn in self._all_connections:
> +                sock = getattr(conn, "sock", None)
> +                if sock is not None:
> +                    sock.shutdown(socket.SHUT_RDWR)
> +                    sock.close()
> +                    conn.sock = None
> +            self._all_connections = None
> +        finally:
> +            self._all_connections_mutex.release()
> +        super(CleanableConnectionPoolMixin, self).close()

Would it be more polite to let pooled connections be closed by the superclass and then close the leftovers? Does it matter?

> +
> +
> +class CleanableHTTPConnectionPool(
> +    CleanableConnectionPoolMixin, HTTPConnectionPool):
> +    pass
> +
> +
> +class CleanableHTTPSConnectionPool(
> +    CleanableConnectionPoolMixin, HTTPSConnectionPool):
> +    pass
> +
> +
> +cleanable_pool_classes_by_scheme = {
> +    "http": CleanableHTTPConnectionPool,
> +    "https": CleanableHTTPSConnectionPool,
> +    }
> +
> +
> +class CleanablePoolManager(PoolManager):
> +    """A version of urllib3's PoolManager supporting forced socket cleanup."""
> +
> +    # XXX cjwatson 2015-03-11: Reimplements PoolManager._new_pool; check
> +    # this when upgrading requests.
> +    def _new_pool(self, scheme, host, port):
> +        if scheme not in cleanable_pool_classes_by_scheme:
> +            raise ValueError("Unhandled scheme: %s" % scheme)
> +        pool_cls = cleanable_pool_classes_by_scheme[scheme]
> +        if scheme == 'http':
> +            kwargs = self.connection_pool_kw.copy()
> +            for kw in ('key_file', 'cert_file', 'cert_reqs', 'ca_certs',
> +                       'ssl_version'):

Ew. I see a commit with message "Remove import of non-public name." though so I'm just going to blindly assume this line was because of that and not check because I don't want my evening ruined by such a popular library please don't tell me.

> +                kwargs.pop(kw, None)
> +
> +        return pool_cls(host, port, **kwargs)
> +
> +
> +class CleanableHTTPAdapter(HTTPAdapter):
> +    """Enhance HTTPAdapter to use CleanablePoolManager."""
> +
> +    # XXX cjwatson 2015-03-11: Reimplements HTTPAdapter.init_poolmanager;
> +    # check this when upgrading requests.
> +    def init_poolmanager(self, connections, maxsize, block=DEFAULT_POOLBLOCK):

This is missing the superclass's **kwargs. Is that a problem?

> +        # save these values for pickling
> +        self._pool_connections = connections
> +        self._pool_maxsize = maxsize
> +        self._pool_block = block
> +
> +        self.poolmanager = CleanablePoolManager(
> +            num_pools=connections, maxsize=maxsize, block=block)
>  
>  
>  class URLFetcher:
>      """Object fetching remote URLs with a time out."""
>  
>      @with_timeout(cleanup='cleanup')
> -    def fetch(self, url, data=None):
> +    def fetch(self, url, **request_kwargs):
>          """Fetch the URL using a custom HTTP handler supporting timeout."""
> -        assert url.startswith('http://'), "only http is supported."
> -        self.handler = CleanableHTTPHandler()
> -        opener = urllib2.build_opener(self.handler)
> -        return opener.open(url, data).read()
> +        request_kwargs.setdefault("method", "GET")
> +        self.session = Session()
> +        # Mount our custom adapters.
> +        self.session.mount("https://";, CleanableHTTPAdapter())
> +        self.session.mount("http://";, CleanableHTTPAdapter())
> +        return self.session.request(url=url, **request_kwargs).content
>  
>      def cleanup(self):
>          """Reset the connection when the operation timed out."""
> -        self.handler.reset_connection()
> -
> -
> -def urlfetch(url, data=None):
> -    """Wrapper for `urllib2.urlopen()` that times out."""
> -    return URLFetcher().fetch(url, data)
> +        self.session.close()
> +
> +
> +def urlfetch(url, **request_kwargs):
> +    """Wrapper for `requests.get()` that times out."""
> +    return URLFetcher().fetch(url, **request_kwargs)
>  
>  
>  class TransportWithTimeout(Transport):


-- 
https://code.launchpad.net/~cjwatson/launchpad/timeout-with-requests/+merge/252570
Your team Launchpad code reviewers is subscribed to branch lp:launchpad.


References