← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~gary/launchpad/bug676489 into lp:launchpad/devel

 

Gary Poster has proposed merging lp:~gary/launchpad/bug676489 into lp:launchpad/devel.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)


This change fixes bug 676489 by adding code to handle a space in the path.  I added tests for these changes, as well as for an existing code path pertinent to HTTP 1.0 possibly not including the protocol.

To run all the pertinent tests I know about, run ``./bin/test -vvt test_apachelogparser``.

``make lint`` is happy.

Thank you.
-- 
https://code.launchpad.net/~gary/launchpad/bug676489/+merge/41069
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~gary/launchpad/bug676489 into lp:launchpad/devel.
=== modified file 'lib/lp/services/apachelogparser/base.py'
--- lib/lp/services/apachelogparser/base.py	2010-09-11 19:25:13 +0000
+++ lib/lp/services/apachelogparser/base.py	2010-11-17 16:43:20 +0000
@@ -204,15 +204,16 @@
 
 def get_method_and_path(request):
     """Extract the method of the request and path of the requested file."""
-    L = request.split()
-    # HTTP 1.0 requests might omit the HTTP version so we must cope with them.
-    if len(L) == 2:
-        method, path = L
-    else:
-        method, path, protocol = L
-
+    method, ignore, rest = request.partition(' ')
+    path, ignore, protocol = rest.rpartition(' ')
+    if not path:
+        # HTTP 1.0 requests might omit the HTTP version so we cope with them.
+        path = protocol
+    elif not protocol.startswith('HTTP'):
+        # We cope with HTTP 1.0 protocol without HTTP version *and* a
+        # space in the path (see bug 676489 for example).
+        path = rest
     if path.startswith('http://') or path.startswith('https://'):
         uri = URI(path)
         path = uri.path
-
     return method, path

=== modified file 'lib/lp/services/apachelogparser/tests/test_apachelogparser.py'
--- lib/lp/services/apachelogparser/tests/test_apachelogparser.py	2010-10-04 19:50:45 +0000
+++ lib/lp/services/apachelogparser/tests/test_apachelogparser.py	2010-11-17 16:43:20 +0000
@@ -29,6 +29,7 @@
     get_fd_and_file_size,
     get_files_to_parse,
     get_host_date_status_and_request,
+    get_method_and_path,
     parse_file,
     )
 from lp.services.apachelogparser.model.parsedapachelog import ParsedApacheLog
@@ -71,6 +72,35 @@
         date = '[13/Jun/2008:18:38:57 +0100]'
         self.assertEqual(get_day(date), datetime(2008, 6, 13))
 
+    def test_parsing_path_with_missing_protocol(self):
+        request = (r'GET /56222647/deluge-gtk_1.3.0-0ubuntu1_all.deb?'
+                   r'N\x1f\x9b')
+        method, path = get_method_and_path(request)
+        self.assertEqual(method, 'GET')
+        self.assertEqual(
+            path,
+            r'/56222647/deluge-gtk_1.3.0-0ubuntu1_all.deb?N\x1f\x9b')
+
+    def test_parsing_path_with_space(self):
+        # See bug 676489.
+        request = (r'GET /56222647/deluge-gtk_1.3.0-0ubuntu1_all.deb?'
+                   r'N\x1f\x9b Z%7B... HTTP/1.0')
+        method, path = get_method_and_path(request)
+        self.assertEqual(method, 'GET')
+        self.assertEqual(
+            path,
+            r'/56222647/deluge-gtk_1.3.0-0ubuntu1_all.deb?N\x1f\x9b Z%7B...')
+
+    def test_parsing_path_with_space_and_missing_protocol(self):
+        # This is a variation of bug 676489.
+        request = (r'GET /56222647/deluge-gtk_1.3.0-0ubuntu1_all.deb?'
+                   r'N\x1f\x9b Z%7B...')
+        method, path = get_method_and_path(request)
+        self.assertEqual(method, 'GET')
+        self.assertEqual(
+            path,
+            r'/56222647/deluge-gtk_1.3.0-0ubuntu1_all.deb?N\x1f\x9b Z%7B...')
+
 
 class Test_get_fd_and_file_size(TestCase):