launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #05310
[Merge] lp:~lifeless/python-oops-tools/bug-879309 into lp:python-oops-tools
Robert Collins has proposed merging lp:~lifeless/python-oops-tools/bug-879309 into lp:python-oops-tools.
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
Related bugs:
Bug #880641 in python-oops-tools: "pageid (topic) is set to url if missing"
https://bugs.launchpad.net/python-oops-tools/+bug/880641
For more details, see:
https://code.launchpad.net/~lifeless/python-oops-tools/bug-879309/+merge/80175
This branch makes two key changes:
- it stops forcing the pageid to be the url if there is no pageid. This will (eventually) permit dropping custom code checking for identical values in dbsummaries. It also permits events without topics to aggregate together (I think this is a good thing).
- it treats unicode url inputs as garbage and defensively converts them to urls.
This won't help existing data, but as most reports work on new data, applying the fixes should let an existing oops-tools instance behave better from a day later.
--
https://code.launchpad.net/~lifeless/python-oops-tools/bug-879309/+merge/80175
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~lifeless/python-oops-tools/bug-879309 into lp:python-oops-tools.
=== modified file 'src/oopstools/NEWS.txt'
--- src/oopstools/NEWS.txt 2011-10-18 14:18:51 +0000
+++ src/oopstools/NEWS.txt 2011-10-24 05:36:23 +0000
@@ -15,6 +15,14 @@
* amqp2disk -v will print the received OOPS ids on the console, for
entertainment and delight. (Robert Collins)
+* OOPS reports with unicode url values are now handled during oops loading: the
+ unicode string is utf8 encoded (an arbitrary choice) and url escaped.
+ (Robert Collins, #879309)
+
+* OOPS reports with no topic (formerlly called pageid) will no longer use their
+ url instead. Rather '' is used, and reports will show Unknown for empty
+ pageids. (Robert Collins, #880641)
+
0.6
===
=== modified file 'src/oopstools/oops/dbsummaries.py'
--- src/oopstools/oops/dbsummaries.py 2011-10-13 20:18:51 +0000
+++ src/oopstools/oops/dbsummaries.py 2011-10-24 05:36:23 +0000
@@ -126,6 +126,8 @@
for data in res:
if data['url'].startswith(data['pageid']):
data['pageid'] = 'Unknown'
+ if data['pageid'] == '':
+ data['pageid'] = 'Unknown'
data['escaped_url'] = _escape(data['url'])
data['errors'] = data['errors'].split(',')
data['errors'].sort()
@@ -418,6 +420,8 @@
# might end up with multiple instances of the same top value
# and pageid. Here we store all those OOPSes in a list but
# self.render* methods display only the first one.
+ if not pageid:
+ pageid = 'Unknown'
if self.top_errors and self.top_errors[-1][2] == pageid:
assert self.top_errors[-1][0] == value, 'programmer error'
self.top_errors[-1][1].append(oopsid)
=== modified file 'src/oopstools/oops/models.py'
--- src/oopstools/oops/models.py 2011-10-16 22:35:01 +0000
+++ src/oopstools/oops/models.py 2011-10-24 05:36:23 +0000
@@ -367,6 +367,10 @@
if most_expensive_statement is not None:
most_expensive_statement = conform(most_expensive_statement, 200)
url = conform(oops.get('url') or '', MAX_URL_LEN)
+ if type(url) is unicode:
+ # We have gotten a ringer, URL's are bytestrings. Encode to UTF8 to get
+ # a bytestring and urllib.quote to get a url.
+ url = urllib.quote(url.encode('utf8'))
informational = oops.get('informational', 'False').lower() == 'true'
oops_date = oops.get('time')
if oops_date is None:
@@ -375,8 +379,10 @@
oopsid = oopsid,
prefix = prefix,
date = oops_date.replace(microsecond=0),
- # Missing pageids are urls because that suits our queries.
- pageid = oops.get('topic') or url,
+ # Missing topics are '': will group all such reports together in some
+ # ways, but thats tolerable vs something (near) unique like url: that
+ # prevents aggregation.
+ pageid = oops.get('topic') or '',
url = url,
duration = duration,
informational = informational,
=== modified file 'src/oopstools/oops/test/db-summary.txt'
--- src/oopstools/oops/test/db-summary.txt 2011-10-13 20:18:51 +0000
+++ src/oopstools/oops/test/db-summary.txt 2011-10-24 05:36:23 +0000
@@ -45,10 +45,10 @@
>>> print get_section_contents("Top 10 Durations", webapp_summary)
26.46s OOPS-689S4 FooBar:+questions
20.67s OOPS-689S6 FooBar:+bug-text
- 0.28s OOPS-1308X1 https://launchpad.dev/foobar
+ 0.28s OOPS-1308X1 Unknown
>>> print get_section_contents("Top 10 Statement Counts", webapp_summary)
- 14 OOPS-1308X1 https://launchpad.dev/foobar
+ 14 OOPS-1308X1 Unknown
4 OOPS-689S6 FooBar:+bug-text
3 OOPS-689S4 FooBar:+questions
=== modified file 'src/oopstools/oops/test/test_dboopsloader.py'
--- src/oopstools/oops/test/test_dboopsloader.py 2011-10-13 20:18:51 +0000
+++ src/oopstools/oops/test/test_dboopsloader.py 2011-10-24 05:36:23 +0000
@@ -19,6 +19,7 @@
datetime,
)
import os
+import urllib
from fixtures import TempDir
from oops_datedir_repo.serializer_bson import write as write_bson
@@ -34,6 +35,7 @@
from oopstools.oops.models import (
DBOopsRootDirectory,
Oops,
+ parsed_oops_to_model_oops,
)
@@ -137,3 +139,18 @@
loader = OopsLoader()
list(loader.find_oopses(start_date))
self.assertNotEqual(None, Oops.objects.get(oopsid='OOPS-123S202'))
+
+
+class TestParsedToModel(TestCase):
+
+ def test_url_handling(self):
+ unicode_url = u'http://example.com/foo\u2019s asset'
+ report = { 'url': unicode_url, 'id': 'testurlhandling'}
+ expected_url = urllib.quote(unicode_url.encode('utf8'))
+ oops = parsed_oops_to_model_oops(report, 'test_url_handling')
+ self.assertEqual(expected_url, oops.url)
+
+ def test_no_topic_pageid_empty_bug_880641(self):
+ report = { 'url': 'foo', 'id': 'testnotopichandling'}
+ oops = parsed_oops_to_model_oops(report, 'bug_880641')
+ self.assertEqual('', oops.pageid)