launchpad-reviewers team mailing list archive

Thread
Date

[Merge] lp:~lifeless/python-oops-tools/bug-879309 into lp:python-oops-tools

To: mp+80175@xxxxxxxxxxxxxxxxxx
From: Robert Collins <robertc@xxxxxxxxxxxxxxxxx>
Date: Mon, 24 Oct 2011 05:37:24 -0000
Reply-to: mp+80175@xxxxxxxxxxxxxxxxxx
Sender: bounces@xxxxxxxxxxxxx

Robert Collins has proposed merging lp:~lifeless/python-oops-tools/bug-879309 into lp:python-oops-tools.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)
Related bugs:
  Bug #880641 in python-oops-tools: "pageid (topic) is set to url if missing"
  https://bugs.launchpad.net/python-oops-tools/+bug/880641

For more details, see:
https://code.launchpad.net/~lifeless/python-oops-tools/bug-879309/+merge/80175

This branch makes two key changes:
 - it stops forcing the pageid to be the url if there is no pageid. This will (eventually) permit dropping custom code checking for identical values in dbsummaries. It also permits events without topics to aggregate together (I think this is a good thing).

 - it treats unicode url inputs as garbage and defensively converts them to urls.

This won't help existing data, but as most reports work on new data, applying the fixes should let an existing oops-tools instance behave better from a day later.
-- 
https://code.launchpad.net/~lifeless/python-oops-tools/bug-879309/+merge/80175
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~lifeless/python-oops-tools/bug-879309 into lp:python-oops-tools.

=== modified file 'src/oopstools/NEWS.txt'
--- src/oopstools/NEWS.txt	2011-10-18 14:18:51 +0000
+++ src/oopstools/NEWS.txt	2011-10-24 05:36:23 +0000
@@ -15,6 +15,14 @@
 * amqp2disk -v will print the received OOPS ids on the console, for
   entertainment and delight. (Robert Collins)
 
+* OOPS reports with unicode url values are now handled during oops loading: the
+  unicode string is utf8 encoded (an arbitrary choice) and url escaped.
+  (Robert Collins, #879309)
+
+* OOPS reports with no topic (formerlly called pageid) will no longer use their
+  url instead. Rather '' is used, and reports will show Unknown for empty
+  pageids. (Robert Collins, #880641)
+
 0.6
 ===
 

=== modified file 'src/oopstools/oops/dbsummaries.py'
--- src/oopstools/oops/dbsummaries.py	2011-10-13 20:18:51 +0000
+++ src/oopstools/oops/dbsummaries.py	2011-10-24 05:36:23 +0000
@@ -126,6 +126,8 @@
         for data in res:
             if data['url'].startswith(data['pageid']):
                 data['pageid'] = 'Unknown'
+            if data['pageid'] == '':
+                data['pageid'] = 'Unknown'
             data['escaped_url'] = _escape(data['url'])
             data['errors'] = data['errors'].split(',')
             data['errors'].sort()
@@ -418,6 +420,8 @@
             # might end up with multiple instances of the same top value
             # and pageid. Here we store all those OOPSes in a list but
             # self.render* methods display only the first one.
+            if not pageid:
+                pageid = 'Unknown'
             if self.top_errors and self.top_errors[-1][2] == pageid:
                 assert self.top_errors[-1][0] == value, 'programmer error'
                 self.top_errors[-1][1].append(oopsid)

=== modified file 'src/oopstools/oops/models.py'
--- src/oopstools/oops/models.py	2011-10-16 22:35:01 +0000
+++ src/oopstools/oops/models.py	2011-10-24 05:36:23 +0000
@@ -367,6 +367,10 @@
     if most_expensive_statement is not None:
         most_expensive_statement = conform(most_expensive_statement, 200)
     url = conform(oops.get('url') or '', MAX_URL_LEN)
+    if type(url) is unicode:
+        # We have gotten a ringer, URL's are bytestrings. Encode to UTF8 to get
+        # a bytestring and urllib.quote to get a url.
+        url = urllib.quote(url.encode('utf8'))
     informational = oops.get('informational', 'False').lower() == 'true'
     oops_date = oops.get('time')
     if oops_date is None:
@@ -375,8 +379,10 @@
         oopsid = oopsid,
         prefix = prefix,
         date = oops_date.replace(microsecond=0),
-        # Missing pageids are urls because that suits our queries.
-        pageid = oops.get('topic') or url,
+        # Missing topics are '': will group all such reports together in some
+        # ways, but thats tolerable vs something (near) unique like url: that
+        # prevents aggregation.
+        pageid = oops.get('topic') or '',
         url = url,
         duration = duration,
         informational = informational,

=== modified file 'src/oopstools/oops/test/db-summary.txt'
--- src/oopstools/oops/test/db-summary.txt	2011-10-13 20:18:51 +0000
+++ src/oopstools/oops/test/db-summary.txt	2011-10-24 05:36:23 +0000
@@ -45,10 +45,10 @@
     >>> print get_section_contents("Top 10 Durations", webapp_summary)
     26.46s  OOPS-689S4      FooBar:+questions
     20.67s  OOPS-689S6      FooBar:+bug-text
-    0.28s  OOPS-1308X1     https://launchpad.dev/foobar
+    0.28s  OOPS-1308X1      Unknown
 
     >>> print get_section_contents("Top 10 Statement Counts", webapp_summary)
-    14  OOPS-1308X1     https://launchpad.dev/foobar
+    14  OOPS-1308X1     Unknown
     4 OOPS-689S6        FooBar:+bug-text
     3  OOPS-689S4       FooBar:+questions
 

=== modified file 'src/oopstools/oops/test/test_dboopsloader.py'
--- src/oopstools/oops/test/test_dboopsloader.py	2011-10-13 20:18:51 +0000
+++ src/oopstools/oops/test/test_dboopsloader.py	2011-10-24 05:36:23 +0000
@@ -19,6 +19,7 @@
     datetime,
     )
 import os
+import urllib
 
 from fixtures import TempDir
 from oops_datedir_repo.serializer_bson import write as write_bson
@@ -34,6 +35,7 @@
 from oopstools.oops.models import (
     DBOopsRootDirectory,
     Oops,
+    parsed_oops_to_model_oops,
     )
 
 
@@ -137,3 +139,18 @@
         loader = OopsLoader()
         list(loader.find_oopses(start_date))
         self.assertNotEqual(None, Oops.objects.get(oopsid='OOPS-123S202'))
+
+
+class TestParsedToModel(TestCase):
+
+    def test_url_handling(self):
+        unicode_url = u'http://example.com/foo\u2019s asset'
+        report = { 'url': unicode_url, 'id': 'testurlhandling'}
+        expected_url = urllib.quote(unicode_url.encode('utf8'))
+        oops = parsed_oops_to_model_oops(report, 'test_url_handling')
+        self.assertEqual(expected_url, oops.url)
+
+    def test_no_topic_pageid_empty_bug_880641(self):
+        report = { 'url': 'foo', 'id': 'testnotopichandling'}
+        oops = parsed_oops_to_model_oops(report, 'bug_880641')
+        self.assertEqual('', oops.pageid)