← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~jameinel/maas/accept-gzip into lp:maas

 

John A Meinel has proposed merging lp:~jameinel/maas/accept-gzip into lp:maas.

Commit message:
Change the MAAS application so that it can return content with 'Content-Encoding: gzip'

This should be useful for general web browsers, so that any large page gets compressed before being sent on the wire. It helps the Tag builders immensely, because we are requesting many MB of data in each request, which compresses very well. This also teaches the MAASDispatcher how to set the Accept-Encoding header, and how to handle the Content-Encoding response.

In testing with many nodes, rebuilding tags can saturate a 1GB network link. So compressing the stream speeds up the tag processing from 22s to 15s on the nodes that have compression enabled.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~jameinel/maas/accept-gzip/+merge/130302
-- 
https://code.launchpad.net/~jameinel/maas/accept-gzip/+merge/130302
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~jameinel/maas/accept-gzip into lp:maas.
=== modified file 'src/apiclient/maas_client.py'
--- src/apiclient/maas_client.py	2012-10-09 14:19:33 +0000
+++ src/apiclient/maas_client.py	2012-10-18 09:03:41 +0000
@@ -16,6 +16,8 @@
     'MAASOAuth',
     ]
 
+import gzip
+from io import BytesIO
 import urllib2
 
 from apiclient.encode_json import encode_json_data
@@ -82,8 +84,29 @@
 
         :return: A open file-like object that contains the response.
         """
+        headers = dict(headers)
+        # header keys are case insensitive, so we have to pass over them
+        set_accept_encoding = False
+        for key in headers:
+            if key.lower() == 'accept-encoding':
+                # The user already supplied a requested encoding, so just pass
+                # it along.
+                break
+        else:
+            set_accept_encoding = True
+            headers['Accept-encoding'] = 'gzip'
         req = urllib2.Request(request_url, data, headers)
-        return urllib2.urlopen(req)
+        res = urllib2.urlopen(req)
+        # If we set the Accept-encoding header, then we decode the header for
+        # the caller
+        if set_accept_encoding and res.info().get('Content-Encoding') == 'gzip':
+            # Workaround python's gzip failure, gzip.GzipFile wants to be able
+            # to seek the file object.
+            res_content_io = BytesIO(res.read())
+            ungz = gzip.GzipFile(mode='rb', fileobj=res_content_io)
+            ungz.code = res.code
+            return ungz
+        return res
 
 
 class MAASClient:

=== modified file 'src/apiclient/tests/test_maas_client.py'
--- src/apiclient/tests/test_maas_client.py	2012-10-09 14:19:33 +0000
+++ src/apiclient/tests/test_maas_client.py	2012-10-18 09:03:41 +0000
@@ -12,8 +12,11 @@
 __metaclass__ = type
 __all__ = []
 
+import gzip
+from io import BytesIO
 import json
 from random import randint
+import urllib2
 from urlparse import (
     parse_qs,
     urljoin,
@@ -29,7 +32,9 @@
     parse_headers_and_body_with_django,
     parse_headers_and_body_with_mimer,
     )
+from maastesting.fixtures import TempWDFixture
 from maastesting.factory import factory
+from maastesting.httpd import HTTPServerFixture
 from maastesting.testcase import TestCase
 from testtools.matchers import (
     AfterPreprocessing,
@@ -55,6 +60,61 @@
         self.assertEqual(
             contents, MAASDispatcher().dispatch_query(url, {}).read())
 
+    def test_request_from_http(self):
+        # We can't just call self.make_file because HTTPServerFixture will only
+        # serve content from the current WD. And we don't want to create random
+        # content in the original WD.
+        self.useFixture(TempWDFixture())
+        name = factory.getRandomString()
+        content = factory.getRandomString().encode('ascii')
+        factory.make_file(location='.', name=name, contents=content)
+        with HTTPServerFixture() as httpd:
+            url = urljoin(httpd.url, name)
+            response = MAASDispatcher().dispatch_query(url, {})
+            self.assertEqual(200, response.code)
+            self.assertEqual(content, response.read())
+
+    def test_supports_content_encoding_gzip(self):
+        # The client will set the Accept-Encoding: gzip header, and it will
+        # also decompress the response if it comes back with Content-Encoding:
+        # gzip.
+        self.useFixture(TempWDFixture())
+        name = factory.getRandomString()
+        content = factory.getRandomString(300).encode('ascii')
+        factory.make_file(location='.', name=name, contents=content)
+        called = []
+        orig_urllib = urllib2.urlopen
+        def logging_urlopen(*args, **kwargs):
+            called.append((args, kwargs))
+            return orig_urllib(*args, **kwargs)
+        self.patch(urllib2, 'urlopen', logging_urlopen)
+        with HTTPServerFixture() as httpd:
+            url = urljoin(httpd.url, name)
+            res = MAASDispatcher().dispatch_query(url, {})
+            self.assertEqual(200, res.code)
+            self.assertEqual(content, res.read())
+        request = called[0][0][0]
+        self.assertEqual([((request,), {})], called)
+        self.assertEqual('gzip', request.headers.get('Accept-encoding'))
+
+    def test_doesnt_override_accept_encoding_headers(self):
+        # If someone passes their own Accept-Encoding header, then dispatch
+        # just passes it through.
+        self.useFixture(TempWDFixture())
+        name = factory.getRandomString()
+        content = factory.getRandomString(300).encode('ascii')
+        factory.make_file(location='.', name=name, contents=content)
+        with HTTPServerFixture() as httpd:
+            url = urljoin(httpd.url, name)
+            headers = {'Accept-encoding': 'gzip'}
+            res = MAASDispatcher().dispatch_query(url, headers)
+            self.assertEqual(200, res.code)
+            self.assertEqual('gzip', res.info().get('Content-Encoding'))
+            raw_content = res.read()
+        read_content = gzip.GzipFile(
+            mode='rb', fileobj=BytesIO(raw_content)).read()
+        self.assertEqual(content, read_content)
+
 
 def make_url():
     """Create an arbitrary URL."""

=== modified file 'src/maas/settings.py'
--- src/maas/settings.py	2012-10-15 15:20:22 +0000
+++ src/maas/settings.py	2012-10-18 09:03:41 +0000
@@ -247,6 +247,7 @@
     'django.contrib.auth.middleware.AuthenticationMiddleware',
     'django.contrib.messages.middleware.MessageMiddleware',
     'maasserver.middleware.AccessMiddleware',
+    'django.middleware.gzip.GzipMiddleware',
 )
 
 ROOT_URLCONF = 'maas.urls'

=== modified file 'src/maastesting/fixtures.py'
--- src/maastesting/fixtures.py	2012-10-06 11:05:44 +0000
+++ src/maastesting/fixtures.py	2012-10-18 09:03:41 +0000
@@ -18,10 +18,12 @@
     ]
 
 import logging
+import os
 
 from fixtures import (
     EnvironmentVariableFixture,
     Fixture,
+    TempDir,
     )
 from pyvirtualdisplay import Display
 from sst.actions import (
@@ -91,3 +93,16 @@
         super(ProxiesDisabledFixture, self).setUp()
         self.useFixture(EnvironmentVariableFixture("http_proxy"))
         self.useFixture(EnvironmentVariableFixture("https_proxy"))
+
+
+class TempWDFixture(TempDir):
+    """Change the current working directory into a temp dir.
+
+    This will restore the original WD and delete the temp directory on cleanup.
+    """
+
+    def setUp(self):
+        cwd = os.getcwd()
+        super(TempWDFixture, self).setUp()
+        self.addCleanup(os.chdir, cwd)
+        os.chdir(self.path)

=== modified file 'src/maastesting/httpd.py'
--- src/maastesting/httpd.py	2012-05-22 09:41:20 +0000
+++ src/maastesting/httpd.py	2012-10-18 09:03:41 +0000
@@ -15,6 +15,9 @@
     ]
 
 from BaseHTTPServer import HTTPServer
+import gzip
+from io import BytesIO
+import os
 from SimpleHTTPServer import SimpleHTTPRequestHandler
 from SocketServer import ThreadingMixIn
 import threading
@@ -31,6 +34,99 @@
     log_request = lambda *args, **kwargs: None
     log_error = lambda *args, **kwargs: None
 
+    def _gzip_compress(self, f):
+        gz_out = BytesIO()
+        gz = gzip.GzipFile(mode='wb', fileobj=gz_out)
+        gz.write(f.read())
+        gz.flush()
+        gz_content = gz_out.getvalue()
+        return gz_out
+
+    def is_gzip_accepted(self):
+        accepted = set()
+        for header in self.headers.getallmatchingheaders('Accept-Encoding'):
+            # getallmatchingheaders returns the whole line, so first we have to
+            # split off the header definition
+            _, content = header.split(':', 1)
+            content = content.strip()
+            # Then, you are allowed to specify a comma separated list of
+            # acceptable encodings. You are also allowed to specify
+            # 'encoding;q=XXX' to specify what encodings you would prefer.
+            # We'll allow it to be set, but just ignore it.
+            #   http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html
+            encodings = [encoding.strip().split(';', )[0]
+                         for encoding in content.split(',')]
+            accepted.update(encodings)
+        if 'gzip' in accepted:
+            return True
+        return False
+
+    # This is a copy & paste and minor modification of
+    # SimpleHTTPRequestHandler's send_head code. Because to support
+    # Content-Encoding gzip, we have to change what headers get returned (as it
+    # affects Content-Length headers.
+    def send_head(self):
+        """Common code for GET and HEAD commands.
+
+        This sends the response code and MIME headers.
+
+        Return value is either a file object (which has to be copied
+        to the outputfile by the caller unless the command was HEAD,
+        and must be closed by the caller under all circumstances), or
+        None, in which case the caller has nothing further to do.
+
+        """
+        path = self.translate_path(self.path)
+        self.AS_GZIP = False
+        f = None
+        if os.path.isdir(path):
+            if not self.path.endswith('/'):
+                # redirect browser - doing basically what apache does
+                self.send_response(301)
+                self.send_header("Location", self.path + "/")
+                self.end_headers()
+                return None
+            for index in "index.html", "index.htm":
+                index = os.path.join(path, index)
+                if os.path.exists(index):
+                    path = index
+                    break
+            else:
+                return self.list_directory(path)
+        ctype = self.guess_type(path)
+        try:
+            # Always read in binary mode. Opening files in text mode may cause
+            # newline translations, making the actual size of the content
+            # transmitted *less* than the content-length!
+            f = open(path, 'rb')
+        except IOError:
+            self.send_error(404, "File not found")
+            return None
+        if self.is_gzip_accepted():
+            self.AS_GZIP = True
+            return self.start_gz_response(ctype, f)
+        else:
+            return self.start_response(ctype, f)
+
+    def start_gz_response(self, ctype, f):
+        self.send_response(200)
+        self.send_header("Content-type", ctype)
+        self.send_header("Content-Encoding", 'gzip')
+        gz_out = self._gzip_compress(f)
+        self.send_header("Content-Length", str(gz_out.tell()))
+        gz_out.seek(0)
+        self.end_headers()
+        return gz_out
+
+    def start_response(self, ctype, f):
+        self.send_response(200)
+        self.send_header("Content-type", ctype)
+        fs = os.fstat(f.fileno())
+        self.send_header("Content-Length", str(fs[6]))
+        self.send_header("Last-Modified", self.date_time_string(fs.st_mtime))
+        self.end_headers()
+        return f
+
 
 class HTTPServerFixture(Fixture):
     """Bring up a very simple, threaded, web server.

=== modified file 'src/maastesting/tests/test_fixtures.py'
--- src/maastesting/tests/test_fixtures.py	2012-10-06 11:05:44 +0000
+++ src/maastesting/tests/test_fixtures.py	2012-10-18 09:03:41 +0000
@@ -16,7 +16,10 @@
 
 from fixtures import EnvironmentVariableFixture
 from maastesting.factory import factory
-from maastesting.fixtures import ProxiesDisabledFixture
+from maastesting.fixtures import (
+    ProxiesDisabledFixture,
+    TempWDFixture,
+    )
 from maastesting.testcase import TestCase
 
 
@@ -42,3 +45,17 @@
             self.assertNotIn("https_proxy", os.environ)
         # On exit, http_proxy is restored.
         self.assertEqual(https_proxy, os.environ.get("https_proxy"))
+
+
+class TestTempWDFixture(TestCase):
+
+    def test_changes_dir_and_cleans_up(self):
+        orig_cwd = os.getcwd()
+        with TempWDFixture() as temp_wd:
+            new_cwd = os.getcwd()
+            self.assertTrue(os.path.isdir(temp_wd.path))
+            self.assertNotEqual(orig_cwd, new_cwd)
+            self.assertEqual(new_cwd, temp_wd.path)
+        final_cwd = os.getcwd()
+        self.assertEqual(orig_cwd, final_cwd)
+        self.assertFalse(os.path.isdir(new_cwd))

=== modified file 'src/maastesting/tests/test_httpd.py'
--- src/maastesting/tests/test_httpd.py	2012-10-06 11:35:20 +0000
+++ src/maastesting/tests/test_httpd.py	2012-10-18 09:03:41 +0000
@@ -13,12 +13,17 @@
 __all__ = []
 
 from contextlib import closing
+import gzip
+from io import BytesIO
 from os.path import relpath
 from socket import (
     gethostbyname,
     gethostname,
     )
-from urllib2 import urlopen
+from urllib2 import (
+    Request,
+    urlopen,
+    )
 from urlparse import urljoin
 
 from maastesting.fixtures import ProxiesDisabledFixture
@@ -56,3 +61,24 @@
         self.assertEqual(
             file_data_in, http_data_in,
             "The content of %s differs from %s." % (url, filename))
+
+    def ungzip(self, content):
+        gz = gzip.GzipFile(fileobj=BytesIO(content))
+        return gz.read()
+
+    def test_supports_gzip(self):
+        filename = relpath(__file__)
+        with HTTPServerFixture() as httpd:
+            url = urljoin(httpd.url, filename)
+            headers = {'Accept-Encoding': 'gzip, deflate'}
+            request = Request(url, None, headers=headers)
+            with closing(urlopen(request)) as http_in:
+                http_headers = http_in.info()
+                http_data_in = http_in.read()
+        self.assertEqual('gzip', http_headers['Content-Encoding'])
+        with open(filename, "rb") as file_in:
+            file_data_in = file_in.read()
+        http_data_decompressed = self.ungzip(http_data_in)
+        self.assertEqual(
+            file_data_in, http_data_decompressed,
+            "The content of %s differs from %s." % (url, filename))