launchpad-reviewers team mailing list archive
  
  - 
     launchpad-reviewers team launchpad-reviewers team
- 
    Mailing list archive
  
- 
    Message #04782
  
	lp:~matsubara/python-oops-datedir-repo/833857-blankline-reqvars	into	lp:python-oops-datedir-repo
  
Diogo Matsubara has proposed merging lp:~matsubara/python-oops-datedir-repo/833857-blankline-reqvars into lp:python-oops-datedir-repo.
Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)
Related bugs:
  Bug #833857 in Python OOPS Date-dir repository: "serializer can't read isd oops report file"
  https://bugs.launchpad.net/python-oops-datedir-repo/+bug/833857
For more details, see:
https://code.launchpad.net/~matsubara/python-oops-datedir-repo/833857-blankline-reqvars/+merge/72955
This branch fixes the serializer to correctly parse request variables with a blank line between them as found in some ISD oops reports.
-- 
https://code.launchpad.net/~matsubara/python-oops-datedir-repo/833857-blankline-reqvars/+merge/72955
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~matsubara/python-oops-datedir-repo/833857-blankline-reqvars into lp:python-oops-datedir-repo.
=== modified file 'oops_datedir_repo/serializer_rfc822.py'
--- oops_datedir_repo/serializer_rfc822.py	2011-08-17 00:46:13 +0000
+++ oops_datedir_repo/serializer_rfc822.py	2011-08-25 20:19:26 +0000
@@ -78,7 +78,10 @@
         topic = msg.getheader('page-id')
     username = msg.getheader('user')
     url = msg.getheader('url')
-    duration = int(float(msg.getheader('duration', '-1')))
+    try:
+        duration = int(float(msg.getheader('duration', '-1')))
+    except ValueError:
+        duration = -1
     informational = msg.getheader('informational')
     branch_nick = msg.getheader('branch')
     revno = msg.getheader('revision')
@@ -90,33 +93,42 @@
     # support iteration.
     lines = iter(msg.fp)
 
-    # Request variables until the first blank line.
+    def is_req_var(line):
+        return "=" in line
+
+    def is_statement(line):
+        return re.match(r'^(\d+)-(\d+)(?:@([\w-]+))?\s+(.*)', line)
+
+    def is_traceback(line):
+        return 'traceback' in line.lower() or '== EXTRA DATA ==' in line
+
     req_vars = []
-    for line in lines:
-        line = line.strip()
-        if line == '':
-            break
-        key, value = line.split('=', 1)
-        req_vars.append((urllib.unquote(key), urllib.unquote(value)))
-
-    # Statements until the next blank line.
     statements = []
+    first_tb_line = ''
     for line in lines:
         line = line.strip()
         if line == '':
-            break
-        match = re.match(
-            r'^(\d+)-(\d+)(?:@([\w-]+))?\s+(.*)', line)
-        assert match is not None, (
-            "Unable to interpret oops line: %s" % line)
-        start, end, db_id, statement = match.groups()
-        if db_id is not None:
-            db_id = intern(db_id)  # This string is repeated lots.
-        statements.append(
-            (int(start), int(end), db_id, statement))
+            continue
+        else:
+            if is_req_var(line):
+                key, value = line.split('=', 1)
+                req_vars.append((urllib.unquote(key), urllib.unquote(value)))
+            elif is_statement(line):
+                match = re.match(
+                    r'^(\d+)-(\d+)(?:@([\w-]+))?\s+(.*)', line)
+                assert match is not None, (
+                    "Unable to interpret oops line: %s" % line)
+                start, end, db_id, statement = match.groups()
+                if db_id is not None:
+                    db_id = intern(db_id)  # This string is repeated lots.
+                statements.append(
+                    (int(start), int(end), db_id, statement))
+            elif is_traceback(line):
+                first_tb_line = line
+                break
 
     # The rest is traceback.
-    tb_text = ''.join(lines)
+    tb_text = ''.join([first_tb_line] + list(lines))
 
     result = dict(id=id, type=exc_type, value=exc_value, time=date,
             topic=topic, tb_text=tb_text, username=username, url=url,
=== modified file 'oops_datedir_repo/tests/test_serializer_rfc822.py'
--- oops_datedir_repo/tests/test_serializer_rfc822.py	2011-08-17 00:38:02 +0000
+++ oops_datedir_repo/tests/test_serializer_rfc822.py	2011-08-25 20:19:26 +0000
@@ -79,6 +79,37 @@
             report['db_statements'][1],
             (5, 10, 'store_b', 'SELECT 2'))
 
+    def test_read_blankline_req_vars(self):
+        """Test ErrorReport.read() for old logs with a blankline between
+        reqvars."""
+        fp = StringIO.StringIO(dedent("""\
+            Oops-Id: OOPS-A0001
+            Exception-Type: NotFound
+            Exception-Value: error message
+            Date: 2005-04-01T00:00:00+00:00
+            Topic: IFoo:+foo-template
+            User: Sample User
+            URL: http://localhost:9000/foo
+            Duration: 42
+
+            HTTP_USER_AGENT=Mozilla/5.0
+
+            HTTP_REFERER=http://localhost:9000/
+            name%3Dfoo=hello%0Aworld
+
+            00001-00005@store_a SELECT 1
+            00005-00010@store_b SELECT 2
+
+            traceback-text"""))
+        report = read(fp)
+        self.assertEqual(report['id'], 'OOPS-A0001')
+        self.assertEqual(report['req_vars'][0], ('HTTP_USER_AGENT',
+                                             'Mozilla/5.0'))
+        self.assertEqual(report['req_vars'][1], ('HTTP_REFERER',
+                                             'http://localhost:9000/'))
+        self.assertEqual(report['req_vars'][2], ('name=foo', 'hello\nworld'))
+        self.assertEqual(len(report['db_statements']), 2)
+
     def test_read_no_store_id(self):
         """Test ErrorReport.read() for old logs with no store_id."""
         fp = StringIO.StringIO(dedent("""\
@@ -145,6 +176,16 @@
         self.assertEqual(report['branch_nick'], 'mybranch')
         self.assertEqual(report['revno'], '45')
 
+    def test_read_duration_as_string(self):
+        """Test ErrorReport.read()."""
+        fp = StringIO.StringIO(dedent("""\
+            Oops-Id: OOPS-A0001
+            Duration: foo/bar
+
+            """))
+        report = read(fp)
+        self.assertEqual(report['duration'], -1)
+
     def test_read_reporter(self):
         """Test ErrorReport.read()."""
         fp = StringIO.StringIO(dedent("""\