launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #27260
[Merge] ~cjwatson/launchpad:more-talisker-logging into launchpad:master
Colin Watson has proposed merging ~cjwatson/launchpad:more-talisker-logging into launchpad:master.
Commit message:
Add several more details to the Talisker logging context
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/+git/launchpad/+merge/405551
This adds:
* host
* nonpython_actions
* pageid
* publication_duration_ms
* publication_thread_duration_ms
* scheme
* traversal_duration_ms
* traversal_thread_duration_ms
* userid
Most of these are used in one way or another by the page performance report. I chose to log the durations in milliseconds to match Talisker's built-in "duration_ms" entry.
--
Your team Launchpad code reviewers is requested to review the proposed merge of ~cjwatson/launchpad:more-talisker-logging into launchpad:master.
diff --git a/lib/lp/services/webapp/adapter.py b/lib/lp/services/webapp/adapter.py
index cbb3bde..9494141 100644
--- a/lib/lp/services/webapp/adapter.py
+++ b/lib/lp/services/webapp/adapter.py
@@ -1,4 +1,4 @@
-# Copyright 2009-2018 Canonical Ltd. This software is licensed under the
+# Copyright 2009-2021 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
__metaclass__ = type
@@ -35,6 +35,7 @@ from storm.databases.postgres import (
from storm.exceptions import TimeoutError
from storm.store import Store
from storm.tracer import install_tracer
+from talisker.logs import logging_context
from timeline.timeline import Timeline
import transaction
from zope.component import getUtility
@@ -266,8 +267,11 @@ def store_sql_statements_and_request_duration(event):
actions = get_request_timeline(get_current_browser_request()).actions
event.request.setInWSGIEnvironment(
'launchpad.nonpythonactions', len(actions))
+ logging_context.push(nonpython_actions=len(actions))
event.request.setInWSGIEnvironment(
'launchpad.requestduration', get_request_duration())
+ # Talisker already tracks the request duration itself, so there's no
+ # need to push that onto the logging context here.
def get_request_statements():
diff --git a/lib/lp/services/webapp/doc/webapp-publication.txt b/lib/lp/services/webapp/doc/webapp-publication.txt
index a4b4e41..13c2b25 100644
--- a/lib/lp/services/webapp/doc/webapp-publication.txt
+++ b/lib/lp/services/webapp/doc/webapp-publication.txt
@@ -589,10 +589,14 @@ Page ID
-------
Our publication implementation sets a WSGI variable 'launchpad.pageid'.
-This is an identifier of the form ContextName:ViewName.
+This is an identifier of the form ContextName:ViewName. We also set the
+'pageid' key in the Talisker logging context.
+ >>> from talisker.context import Context
+ >>> from talisker.logs import logging_context
>>> from lp.services.webapp.interfaces import (
... IPlacelessAuthUtility)
+ >>> _ = Context.new()
>>> auth_utility = getUtility(IPlacelessAuthUtility)
>>> request, publication = get_request_and_publication()
>>> request.setPrincipal(auth_utility.unauthenticatedPrincipal())
@@ -601,6 +605,8 @@ Originally, this variable isn't set.
>>> 'launchpad.pageid' in request._orig_env
False
+ >>> 'pageid' in logging_context
+ False
>>> logout()
It is set during the afterTraversal() hook. The pageid is made of the
@@ -624,6 +630,8 @@ name of the context class and the view class name.
>>> publication.afterTraversal(request, view)
>>> print(request._orig_env['launchpad.pageid'])
TestContext:TestView
+ >>> print(logging_context['pageid'])
+ TestContext:TestView
>>> from lp.services.webapp.adapter import (
... clear_request_started, set_request_started)
>>> clear_request_started()
@@ -637,6 +645,7 @@ durations for the traversal and object publication processes in WSGI
variables which aren't set originally.
>>> import time
+ >>> _ = Context.new()
>>> request, publication = get_request_and_publication()
>>> request.setPrincipal(auth_utility.unauthenticatedPrincipal())
>>> logout()
@@ -649,17 +658,28 @@ current thread.
>>> 'launchpad.traversalduration' in request._orig_env
False
+ >>> 'traversal_duration_ms' in logging_context
+ False
>>> 'launchpad.traversalthreadduration' in request._orig_env
False
+ >>> 'traversal_thread_duration_ms' in logging_context
+ False
>>> publication.beforeTraversal(request)
>>> publication.afterTraversal(request, None)
>>> 'launchpad.traversalduration' in request._orig_env
True
+ >>> 'traversal_duration_ms' in logging_context
+ True
>>> if hasattr(time, 'CLOCK_THREAD_CPUTIME_ID'):
... 'launchpad.traversalthreadduration' in request._orig_env
... else:
... True
True
+ >>> if hasattr(time, 'CLOCK_THREAD_CPUTIME_ID'):
+ ... 'traversal_thread_duration_ms' in logging_context
+ ... else:
+ ... True
+ True
For publication we start counting the duration during the callObject()
hook and stop the count in afterCall(). The duration is then available as
@@ -669,19 +689,30 @@ current thread.
>>> 'launchpad.publicationduration' in request._orig_env
False
+ >>> 'publication_duration_ms' in logging_context
+ False
>>> 'launchpad.publicationthreadduration' in request._orig_env
False
+ >>> 'publication_thread_duration_ms' in logging_context
+ False
>>> print(publication.callObject(
... request, TestView(TestContext(), request)))
Result
>>> publication.afterCall(request, None)
>>> 'launchpad.publicationduration' in request._orig_env
True
+ >>> 'publication_duration_ms' in logging_context
+ True
>>> if hasattr(time, 'CLOCK_THREAD_CPUTIME_ID'):
... 'launchpad.publicationthreadduration' in request._orig_env
... else:
... True
True
+ >>> if hasattr(time, 'CLOCK_THREAD_CPUTIME_ID'):
+ ... 'publication_thread_duration_ms' in logging_context
+ ... else:
+ ... True
+ True
>>> publication.endRequest(request, None)
If an exception is raised during traversal or object publication, we'll
@@ -694,6 +725,7 @@ If the exception is raised before we even start the traversal, then
there's nothing to store.
>>> logger.setLevel(logging.CRITICAL)
+ >>> _ = Context.new()
>>> request, publication = get_request_and_publication()
>>> request.setPrincipal(auth_utility.unauthenticatedPrincipal())
>>> import sys
@@ -706,12 +738,20 @@ there's nothing to store.
... None, request, exc_info, retry_allowed=False)
>>> 'launchpad.traversalduration' in request._orig_env
False
+ >>> 'traversal_duration_ms' in logging_context
+ False
>>> 'launchpad.traversalthreadduration' in request._orig_env
False
+ >>> 'traversal_thread_duration_ms' in logging_context
+ False
>>> 'launchpad.publicationduration' in request._orig_env
False
+ >>> 'publication_duration_ms' in logging_context
+ False
>>> 'launchpad.publicationthreadduration' in request._orig_env
False
+ >>> 'publication_thread_duration_ms' in logging_context
+ False
>>> clear_request_started()
If we started the traversal, but haven't finished it, we'll only have
@@ -722,15 +762,26 @@ the duration for the traversal and not for the publication.
... None, request, exc_info, retry_allowed=False)
>>> 'launchpad.traversalduration' in request._orig_env
True
+ >>> 'traversal_duration_ms' in logging_context
+ True
>>> if hasattr(time, 'CLOCK_THREAD_CPUTIME_ID'):
... 'launchpad.traversalthreadduration' in request._orig_env
... else:
... True
True
+ >>> if hasattr(time, 'CLOCK_THREAD_CPUTIME_ID'):
+ ... 'traversal_thread_duration_ms' in logging_context
+ ... else:
+ ... True
+ True
>>> 'launchpad.publicationduration' in request._orig_env
False
+ >>> 'publication_duration_ms' in logging_context
+ False
>>> 'launchpad.publicationthreadduration' in request._orig_env
False
+ >>> 'publication_thread_duration_ms' in logging_context
+ False
>>> clear_request_started()
If we started the publication (which means the traversal has been
@@ -746,32 +797,52 @@ the publication, up to the point where it was forcefully stopped.
... None, request, exc_info, retry_allowed=False)
>>> 'launchpad.traversalduration' in request._orig_env
True
+ >>> 'traversal_duration_ms' in logging_context
+ True
>>> if hasattr(time, 'CLOCK_THREAD_CPUTIME_ID'):
... 'launchpad.traversalthreadduration' in request._orig_env
... else:
... True
True
+ >>> if hasattr(time, 'CLOCK_THREAD_CPUTIME_ID'):
+ ... 'traversal_thread_duration_ms' in logging_context
+ ... else:
+ ... True
+ True
>>> 'launchpad.publicationduration' in request._orig_env
True
+ >>> 'publication_duration_ms' in logging_context
+ True
>>> if hasattr(time, 'CLOCK_THREAD_CPUTIME_ID'):
... 'launchpad.publicationthreadduration' in request._orig_env
... else:
... True
True
+ >>> if hasattr(time, 'CLOCK_THREAD_CPUTIME_ID'):
+ ... 'publication_thread_duration_ms' in logging_context
+ ... else:
+ ... True
+ True
>>> publication.endRequest(request, None)
>>> logger.setLevel(old_level)
When a Retry or DisconnectionError exception is raised and the request
supports retry, it will be retried with a copy of the WSGI environment.
If that happens, though, we'll remove the
-{publication,traversal}{,thread}duration variables from there.
+{publication,traversal}{,thread}duration variables from there, and unwind
+the Talisker logging context.
+ >>> _ = Context.new()
>>> request, publication = get_request_and_publication()
+ >>> publication.initializeLoggingContext(request)
>>> request.setPrincipal(auth_utility.unauthenticatedPrincipal())
>>> request._orig_env['launchpad.traversalduration'] = 0.5
>>> request._orig_env['launchpad.traversalthreadduration'] = 0.4
>>> request._orig_env['launchpad.publicationduration'] = 0.5
>>> request._orig_env['launchpad.publicationthreadduration'] = 0.4
+ >>> _ = logging_context.push(
+ ... traversal_duration_ms=500, traversal_thread_duration_ms=400,
+ ... publication_duration_ms=500, publication_thread_duration_ms=400)
>>> request.supportsRetry()
True
>>> from zope.publisher.interfaces import Retry
@@ -788,19 +859,32 @@ If that happens, though, we'll remove the
>>> 'launchpad.publicationduration' in request._orig_env
False
+ >>> 'publication_duration_ms' in logging_context
+ False
>>> 'launchpad.publicationthreadduration' in request._orig_env
False
+ >>> 'publication_thread_duration_ms' in logging_context
+ False
>>> 'launchpad.traversalduration' in request._orig_env
False
+ >>> 'traversal_duration_ms' in logging_context
+ False
>>> 'launchpad.traversalthreadduration' in request._orig_env
False
+ >>> 'traversal_thread_duration_ms' in logging_context
+ False
+ >>> _ = Context.new()
>>> request, publication = get_request_and_publication()
+ >>> publication.initializeLoggingContext(request)
>>> request.setPrincipal(auth_utility.unauthenticatedPrincipal())
>>> request._orig_env['launchpad.traversalduration'] = 0.5
>>> request._orig_env['launchpad.traversalthreadduration'] = 0.4
>>> request._orig_env['launchpad.publicationduration'] = 0.5
>>> request._orig_env['launchpad.publicationthreadduration'] = 0.4
+ >>> _ = logging_context.push(
+ ... traversal_duration_ms=500, traversal_thread_duration_ms=400,
+ ... publication_duration_ms=500, publication_thread_duration_ms=400)
>>> request.supportsRetry()
True
>>> from storm.exceptions import DisconnectionError
@@ -817,23 +901,36 @@ If that happens, though, we'll remove the
>>> 'launchpad.publicationduration' in request._orig_env
False
+ >>> 'publication_duration_ms' in logging_context
+ False
+ >>> 'publication_thread_duration_ms' in logging_context
+ False
>>> 'launchpad.publicationthreadduration' in request._orig_env
False
>>> 'launchpad.traversalduration' in request._orig_env
False
+ >>> 'traversal_duration_ms' in logging_context
+ False
>>> 'launchpad.traversalthreadduration' in request._orig_env
False
+ >>> 'traversal_thread_duration_ms' in logging_context
+ False
Of course, any request can only be retried a certain number of times and
when we reach that number of retries we don't pop the durations from the
WSGI env.
+ >>> _ = Context.new()
>>> request, publication = get_request_and_publication()
+ >>> publication.initializeLoggingContext(request)
>>> request.setPrincipal(auth_utility.unauthenticatedPrincipal())
>>> request._orig_env['launchpad.traversalduration'] = 0.5
>>> request._orig_env['launchpad.traversalthreadduration'] = 0.4
>>> request._orig_env['launchpad.publicationduration'] = 0.5
>>> request._orig_env['launchpad.publicationthreadduration'] = 0.4
+ >>> _ = logging_context.push(
+ ... traversal_duration_ms=500, traversal_thread_duration_ms=400,
+ ... publication_duration_ms=500, publication_thread_duration_ms=400)
>>> request.supportsRetry = lambda: False
>>> request.supportsRetry()
False
@@ -850,12 +947,20 @@ WSGI env.
>>> request._orig_env['launchpad.publicationduration']
0.5
+ >>> logging_context['publication_duration_ms']
+ 500
>>> request._orig_env['launchpad.publicationthreadduration']
0.4
+ >>> logging_context['publication_thread_duration_ms']
+ 400
>>> request._orig_env['launchpad.traversalduration']
0.5
+ >>> logging_context['traversal_duration_ms']
+ 500
>>> request._orig_env['launchpad.traversalthreadduration']
0.4
+ >>> logging_context['traversal_thread_duration_ms']
+ 400
(A bit of cleanup so the test can continue)
diff --git a/lib/lp/services/webapp/publication.py b/lib/lp/services/webapp/publication.py
index 85a8ab3..e316f91 100644
--- a/lib/lp/services/webapp/publication.py
+++ b/lib/lp/services/webapp/publication.py
@@ -28,6 +28,7 @@ from storm.exceptions import (
TimeoutError,
)
from storm.zope.interfaces import IZStorm
+from talisker.logs import logging_context
import transaction
from zc.zservertracelog.interfaces import ITraceLog
import zope.app.publication.browser
@@ -242,6 +243,12 @@ class LaunchpadBrowserPublication(
else:
return getUtility(self.root_object_interface)
+ def initializeLoggingContext(self, request):
+ # Remember the Talisker logging context stack level, so that we can
+ # unwind to it on retry.
+ request._initial_logging_context_level = logging_context.push()
+ logging_context.pop()
+
# The below overrides to zopepublication (callTraversalHooks,
# afterTraversal, and _maybePlacefullyAuthenticate) make the
# assumption that there will never be a ZODB "local"
@@ -266,6 +273,7 @@ class LaunchpadBrowserPublication(
request_txt += 'Unable to render traceback!'
threadrequestfile.write(request_txt.encode('UTF-8'))
threadrequestfile.close()
+ self.initializeLoggingContext(request)
# Tell our custom database adapter that the request has started.
da.set_request_started()
@@ -435,6 +443,7 @@ class LaunchpadBrowserPublication(
request.setInWSGIEnvironment(
'launchpad.userid', request.principal.id)
+ logging_context.push(userid=request.principal.id)
# pageid is calculated at `afterTraversal`, but can be missing
# if callObject is used directly, so either use the one we've got
@@ -493,10 +502,15 @@ class LaunchpadBrowserPublication(
publication_thread_duration = None
request.setInWSGIEnvironment(
'launchpad.publicationduration', publication_duration)
+ logging_context.push(
+ publication_duration_ms=round(publication_duration * 1000, 3))
if publication_thread_duration is not None:
request.setInWSGIEnvironment(
'launchpad.publicationthreadduration',
publication_thread_duration)
+ logging_context.push(
+ publication_thread_duration_ms=round(
+ publication_thread_duration * 1000, 3))
# Update statsd, timing is in milliseconds
getUtility(IStatsdClient).timing(
'publication_duration', publication_duration * 1000,
@@ -584,6 +598,7 @@ class LaunchpadBrowserPublication(
context = removeSecurityProxy(getattr(view, 'context', None))
pageid = self.constructPageID(view, context)
request.setInWSGIEnvironment('launchpad.pageid', pageid)
+ logging_context.push(pageid=pageid)
return pageid
def _prepPageIDForMetrics(self, pageid):
@@ -610,11 +625,16 @@ class LaunchpadBrowserPublication(
traversal_duration = time.time() - request._traversal_start
request.setInWSGIEnvironment(
'launchpad.traversalduration', traversal_duration)
+ logging_context.push(
+ traversal_duration_ms=round(traversal_duration * 1000, 3))
if request._traversal_thread_start is not None:
traversal_thread_duration = (
_get_thread_time() - request._traversal_thread_start)
request.setInWSGIEnvironment(
'launchpad.traversalthreadduration', traversal_thread_duration)
+ logging_context.push(
+ traversal_thread_duration_ms=round(
+ traversal_thread_duration * 1000, 3))
# Update statsd, timing is in milliseconds
getUtility(IStatsdClient).timing(
'traversal_duration', traversal_duration * 1000,
@@ -649,12 +669,17 @@ class LaunchpadBrowserPublication(
publication_duration = now - request._publication_start
request.setInWSGIEnvironment(
'launchpad.publicationduration', publication_duration)
+ logging_context.push(
+ publication_duration_ms=round(publication_duration * 1000, 3))
if thread_now is not None:
publication_thread_duration = (
thread_now - request._publication_thread_start)
request.setInWSGIEnvironment(
'launchpad.publicationthreadduration',
publication_thread_duration)
+ logging_context.push(
+ publication_thread_duration_ms=round(
+ publication_thread_duration * 1000, 3))
# Update statsd, timing is in milliseconds
getUtility(IStatsdClient).timing(
'publication_duration', publication_duration * 1000,
@@ -669,12 +694,17 @@ class LaunchpadBrowserPublication(
traversal_duration = now - request._traversal_start
request.setInWSGIEnvironment(
'launchpad.traversalduration', traversal_duration)
+ logging_context.push(
+ traversal_duration_ms=round(traversal_duration * 1000, 3))
if thread_now is not None:
traversal_thread_duration = (
thread_now - request._traversal_thread_start)
request.setInWSGIEnvironment(
'launchpad.traversalthreadduration',
traversal_thread_duration)
+ logging_context.push(
+ traversal_thread_duration_ms=round(
+ traversal_thread_duration * 1000, 3))
# Update statsd, timing is in milliseconds
getUtility(IStatsdClient).timing(
'traversal_duration', traversal_duration * 1000,
@@ -747,6 +777,12 @@ class LaunchpadBrowserPublication(
orig_env.pop('launchpad.traversalthreadduration', None)
orig_env.pop('launchpad.publicationduration', None)
orig_env.pop('launchpad.publicationthreadduration', None)
+ # If we made it as far as beforeTraversal, then unwind the
+ # Talisker logging context to its state on entering that
+ # method.
+ if hasattr(request, '_initial_logging_context_level'):
+ logging_context.unwind(
+ request._initial_logging_context_level)
# Our endRequest needs to know if a retry is pending or not.
request._wants_retry = True
# Abort any in-progress transaction and reset any
diff --git a/lib/lp/services/webapp/servers.py b/lib/lp/services/webapp/servers.py
index 57de022..1e040b3 100644
--- a/lib/lp/services/webapp/servers.py
+++ b/lib/lp/services/webapp/servers.py
@@ -23,6 +23,7 @@ from lazr.uri import URI
import six
from six.moves import xmlrpc_client
from six.moves.urllib.parse import parse_qs
+from talisker.logs import logging_context
import transaction
from transaction.interfaces import ISynchronizer
from zc.zservertracelog.tracelog import Server as ZServerTracelogServer
@@ -627,6 +628,19 @@ class BasicLaunchpadRequest(LaunchpadBrowserRequestMixin):
# Publish revision information.
self.response.setHeader('X-Launchpad-Revision', versioninfo.revision)
+ # Talisker doesn't normally bother logging the Host: header, but
+ # since we have a number of different virtual hosts it's useful to
+ # have it do so. Log the scheme as well so that log parsers can
+ # reconstruct the full URL.
+ extra = {}
+ if 'HTTP_HOST' in environ:
+ extra['host'] = environ['HTTP_HOST']
+ if environ.get('HTTPS', '').lower() == 'on':
+ extra['scheme'] = 'https'
+ else:
+ extra['scheme'] = 'http'
+ logging_context.push(**extra)
+
@property
def stepstogo(self):
return StepsToGo(self)
diff --git a/lib/lp/services/webapp/tests/test_servers.py b/lib/lp/services/webapp/tests/test_servers.py
index adfd1a6..e224bab 100644
--- a/lib/lp/services/webapp/tests/test_servers.py
+++ b/lib/lp/services/webapp/tests/test_servers.py
@@ -1,4 +1,4 @@
-# Copyright 2009-2019 Canonical Ltd. This software is licensed under the
+# Copyright 2009-2021 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
__metaclass__ = type
@@ -21,6 +21,8 @@ from lazr.restful.testing.webservice import (
IGenericEntry,
WebServiceTestCase,
)
+from talisker.context import Context
+from talisker.logs import logging_context
from zope.component import (
getGlobalSiteManager,
getUtility,
@@ -414,6 +416,27 @@ class TestBasicLaunchpadRequest(TestCase):
request = LaunchpadBrowserRequest(io.BytesIO(b''), env)
self.assertEqual(u'fnord/trunk\xE4', request.getHeader('PATH_INFO'))
+ def test_baserequest_logging_context_no_host_header(self):
+ Context.new()
+ LaunchpadBrowserRequest(io.BytesIO(b''), {})
+ self.assertNotIn('host', logging_context)
+
+ def test_baserequest_logging_context_host_header(self):
+ Context.new()
+ env = {'HTTP_HOST': 'launchpad.test'}
+ LaunchpadBrowserRequest(io.BytesIO(b''), env)
+ self.assertEqual('launchpad.test', logging_context['host'])
+
+ def test_baserequest_logging_context_https(self):
+ Context.new()
+ LaunchpadBrowserRequest(io.BytesIO(b''), {'HTTPS': 'on'})
+ self.assertEqual('https', logging_context['scheme'])
+
+ def test_baserequest_logging_context_http(self):
+ Context.new()
+ LaunchpadBrowserRequest(io.BytesIO(b''), {})
+ self.assertEqual('http', logging_context['scheme'])
+
def test_request_with_invalid_query_string_recovers(self):
# When the query string has invalid utf-8, it is decoded with
# replacement.
diff --git a/lib/lp/testing/layers.py b/lib/lp/testing/layers.py
index 3b72c73..b8842e5 100644
--- a/lib/lp/testing/layers.py
+++ b/lib/lp/testing/layers.py
@@ -85,6 +85,7 @@ from six.moves.urllib.parse import (
urlparse,
)
from six.moves.urllib.request import urlopen
+from talisker.context import Context
import transaction
from webob.request import environ_from_url as orig_environ_from_url
import wsgi_intercept
@@ -955,6 +956,22 @@ class LaunchpadLayer(LibrarianLayer, MemcachedLayer, RabbitMQLayer):
"DELETE FROM SessionData")
+class BasicTaliskerMiddleware:
+ """Middleware to set up a Talisker context.
+
+ The full `talisker.wsgi.TaliskerMiddleware` does a lot of things we
+ don't need in our tests, but it's useful to at least have a context so
+ that we can test logging behaviour.
+ """
+
+ def __init__(self, app):
+ self.app = app
+
+ def __call__(self, environ, start_response):
+ Context.new()
+ return self.app(environ, start_response)
+
+
class TransactionMiddleware:
"""Middleware to commit the current transaction before the test.
@@ -1022,6 +1039,7 @@ class _FunctionalBrowserLayer(zope.testbrowser.wsgi.Layer, ZCMLFileLayer):
RemoteAddrMiddleware,
SortHeadersMiddleware,
TransactionMiddleware,
+ BasicTaliskerMiddleware,
]
def setUp(self):