launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #27265
[Merge] lp:~cjwatson/lp-dev-utils/ppr-talisker into lp:lp-dev-utils
Colin Watson has proposed merging lp:~cjwatson/lp-dev-utils/ppr-talisker into lp:lp-dev-utils.
Commit message:
Add Talisker log format support to the PPR.
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
For more details, see:
https://code.launchpad.net/~cjwatson/lp-dev-utils/ppr-talisker/+merge/405633
--
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~cjwatson/lp-dev-utils/ppr-talisker into lp:lp-dev-utils.
=== modified file 'page-performance-report.ini'
--- page-performance-report.ini 2021-07-09 13:19:44 +0000
+++ page-performance-report.ini 2021-07-13 13:44:38 +0000
@@ -51,8 +51,8 @@
Translations - Front page=^https?://translations\.[^/]+/translations/\+index$
Translations - Overview=^https?://translations\..*/\+lang/\w+(/\+index)?$
-Public XML-RPC=^https://(launchpad|xmlrpc)[^/]+/bazaar/\w+$
-Private XML-RPC=^https://(launchpad|xmlrpc)[^/]+/
+Public XML-RPC=^https?://(launchpad|xmlrpc)[^/]+/bazaar/\w+$
+Private XML-RPC=^https?://(launchpad|xmlrpc)[^/]+/
(archive|authserver|bugs|codehosting|
codeimportscheduler|mailinglists|
featureflags|git)/\w+$
=== modified file 'pageperformancereport.py'
--- pageperformancereport.py 2021-07-09 13:15:54 +0000
+++ pageperformancereport.py 2021-07-13 13:44:38 +0000
@@ -23,6 +23,7 @@
import sre_constants
from textwrap import dedent
import time
+from urlparse import urlunsplit
logging.basicConfig()
@@ -136,6 +137,44 @@
return interval.seconds + interval.microseconds / 1000000.0
+class TaliskerRequest:
+
+ def __init__(self, dt, logfmt):
+ self.dt = dt
+ host = logfmt['host']
+ if logfmt['scheme'] == 'https' and host.endswith(':443'):
+ host = host[:-len(':443')]
+ self.url = urlunsplit(
+ (logfmt['scheme'], host, logfmt['path'], logfmt.get('qs', ''), ''))
+ self.interval = timedelta(milliseconds=float(logfmt['duration_ms']))
+ self.traversal_interval = (
+ timedelta(milliseconds=float(logfmt['traversal_duration_ms']))
+ if 'traversal_duration_ms' in logfmt else None)
+ self.publication_interval = (
+ timedelta(milliseconds=float(logfmt['publication_duration_ms']))
+ if 'publication_duration_ms' in logfmt else None)
+ self.pageid = logfmt.get('pageid')
+ self.sql_statements = (
+ int(logfmt['sql_statements'])
+ if 'sql_statements' in logfmt else None)
+ self.sql_seconds = (
+ float(logfmt['sql_ms']) / 1000 if 'sql_ms' in logfmt else None)
+
+ @property
+ def app_seconds(self):
+ if self.traversal_interval is not None:
+ interval = self.traversal_interval
+ if self.publication_interval is not None:
+ interval += self.publication_interval
+ else:
+ interval = self.interval
+ return interval.total_seconds()
+
+ @property
+ def total_seconds(self):
+ return self.interval.total_seconds()
+
+
class Category:
"""A Category in our report.
@@ -872,6 +911,8 @@
parse_launchpad(args, times, options)
elif log_format == 'access':
parse_access(args, times, options)
+ elif log_format == 'talisker':
+ parse_talisker(args, times, options)
else:
log.fatal("No parser available for log_format: %r" % log_format)
return 1
@@ -998,12 +1039,15 @@
_zserver_ts_re = re.compile(
- '^(\d{4})-(\d\d)-(\d\d)\s(\d\d):(\d\d):(\d\d)(?:.(\d{6}))?$')
+ r'^(\d{4})-(\d\d)-(\d\d)\s(\d\d):(\d\d):(\d\d)(?:.(\d{6}))?$')
_access_ts_re = re.compile(
- '^\[(\d{2})/([A-Za-z]+)/(\d{4}):(\d{2}):(\d{2}):(\d{2})[+-]\d{4}\]$')
-
-
+ r'^\[(\d{2})/([A-Za-z]+)/(\d{4}):(\d{2}):(\d{2}):(\d{2})[+-]\d{4}\]$')
+
+_talisker_ts_re = re.compile(
+ r'^(\d{4})-(\d\d)-(\d\d)\s(\d\d):(\d\d):(\d\d)\.(\d{3})Z$')
+
+
def parse_timestamp_zserverlog(ts_string):
match = _zserver_ts_re.search(ts_string)
if match is None:
@@ -1020,7 +1064,16 @@
month = time.strptime(alpha_month,'%b').tm_mon # get numeric month
return datetime(
*(int(elem) for elem in (year, month, day, hour, min, sec)))
-
+
+
+def parse_timestamp_taliskerlog(ts_string):
+ match = _talisker_ts_re.search(ts_string)
+ if match is None:
+ raise ValueError("Invalid timestamp")
+ year, month, day, hour, min, sec, millisec = match.groups()
+ return datetime(
+ int(year), int(month), int(day), int(hour), int(min), int(sec),
+ int(millisec) * 1000)
def parse_launchpad(tracefiles, times, options):
@@ -1180,8 +1233,75 @@
except MalformedLine as x:
log.error(
"Malformed line %s (%s)" % (repr(line), x))
-
-
+
+
+def unquote_logfmt_value(value):
+ if value.startswith('"') and value.endswith('"'):
+ value = value[1:-1]
+ return value.replace(r'\"', '"')
+
+
+def parse_talisker(tracefiles, times, options):
+ talisker_re = re.compile(
+ r'^(?P<timestamp>\d+-\d+-\d+ \d+:\d+:\d+\.\d+Z)'
+ r' (?P<levelname>\S+) (?P<module>[0-9a-zA-Z_]+(?:\.[0-9a-zA-Z_]+)*)'
+ r' "(?P<message>(?:[^"\\]|\\.)*(?:"|\.\.\.))'
+ r' ?(?P<logfmt>.*)')
+ logfmt_re = re.compile(
+ r'(?P<key>.+?)=(?P<value>"(?:[^"\\]|\\.)*?"|\S+)\s*')
+ requests = {}
+ total_requests = 0
+ for tracefile in tracefiles:
+ log.info('Processing %s', tracefile)
+ for line in smart_open(tracefile):
+ line = line.rstrip()
+ if not line or not line[0].isdigit():
+ # Log entries start with a digit. Other lines are things
+ # like tracebacks or warnings, so just ignore those.
+ continue
+
+ try:
+ match = talisker_re.match(line)
+ if not match:
+ raise MalformedLine()
+
+ try:
+ dt = parse_timestamp_taliskerlog(
+ match.group('timestamp'))
+ except ValueError:
+ raise MalformedLine(
+ 'Invalid timestamp %r' % match.group('timestamp'))
+
+ # Filter entries by command line date range.
+ if options.from_ts is not None and dt < options.from_ts:
+ continue # Skip to next line.
+ if options.until_ts is not None and dt > options.until_ts:
+ break # Skip to next log file.
+
+ # Parse logfmt.
+ logfmt = {
+ m.group('key'): unquote_logfmt_value(m.group('value'))
+ for m in logfmt_re.finditer(match.group('logfmt'))}
+ if not all(
+ key in logfmt for key in
+ ('request_id', 'method', 'scheme', 'host', 'path')):
+ # Some other log entry, not an HTTP request.
+ continue
+
+ url = urlunsplit(
+ (logfmt['scheme'], logfmt['host'], logfmt['path'],
+ logfmt.get('qs', ''), ''))
+ request = TaliskerRequest(dt, logfmt)
+ requests[logfmt['request_id']] = request
+ total_requests += 1
+ if total_requests % 10000 == 0:
+ log.debug("Parsed %d requests", total_requests)
+ # Add the request to any matching categories.
+ times.add_request(request)
+ except MalformedLine as x:
+ log.error("Malformed line %r (%s)" % (line, x))
+
+
def html_report(
outf, category_times, pageid_times, url_times,
ninetyninth_percentile_threshold=None, histogram_resolution=0.5,