launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #25565
[Merge] ~twom/launchpad:stats-better-builder-stats-stacking into launchpad:master
Tom Wardill has proposed merging ~twom/launchpad:stats-better-builder-stats-stacking into launchpad:master.
Commit message:
Move to use statsd environment config variable
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
For more details, see:
https://code.launchpad.net/~twom/launchpad/+git/launchpad/+merge/392892
The prefix setting in statsd doesn't let us do the final queries in quite the way we wanted, so add another setting for it and apply it as a label in all the metrics.
Also increase number cruncher logging level to DEBUG to see if we can work out why it appears to stop from the logs.
--
Your team Launchpad code reviewers is requested to review the proposed merge of ~twom/launchpad:stats-better-builder-stats-stacking into launchpad:master.
diff --git a/lib/lp/buildmaster/manager.py b/lib/lp/buildmaster/manager.py
index 7bbe4aa..e9732ea 100644
--- a/lib/lp/buildmaster/manager.py
+++ b/lib/lp/buildmaster/manager.py
@@ -532,10 +532,13 @@ class SlaveScanner:
if builder.current_build is not None:
builder.current_build.gotFailure()
self.statsd_client.incr(
- 'builders.judged_failed,build=True,arch={}'.format(
- builder.current_build.processor.name))
+ 'builders.judged_failed,build=True,arch={},env={}'.format(
+ builder.current_build.processor.name,
+ self.statsd_client.lp_environment))
else:
- self.statsd_client.incr('builders.judged_failed,build=False')
+ self.statsd_client.incr(
+ 'builders.judged_failed,build=False,env={}'.format(
+ self.statsd_client.lp_environment))
recover_failure(self.logger, vitals, builder, retry, failure.value)
transaction.commit()
except Exception:
diff --git a/lib/lp/buildmaster/model/buildfarmjobbehaviour.py b/lib/lp/buildmaster/model/buildfarmjobbehaviour.py
index edff53b..88ede6a 100644
--- a/lib/lp/buildmaster/model/buildfarmjobbehaviour.py
+++ b/lib/lp/buildmaster/model/buildfarmjobbehaviour.py
@@ -156,10 +156,12 @@ class BuildFarmJobBehaviourBase:
# Update stats
job_type = getattr(self.build, 'job_type', None)
job_type_name = job_type.name if job_type else 'UNKNOWN'
- getUtility(IStatsdClient).incr(
- 'build.count,job_type={},builder_name={}'.format(
+ statsd_client = getUtility(IStatsdClient)
+ statsd_client.incr(
+ 'build.count,job_type={},builder_name={},env={}'.format(
job_type_name,
- self._builder.name))
+ self._builder.name,
+ statsd_client.lp_environment))
logger.info(
"Job %s (%s) started on %s: %s %s"
diff --git a/lib/lp/buildmaster/tests/test_buildfarmjobbehaviour.py b/lib/lp/buildmaster/tests/test_buildfarmjobbehaviour.py
index 3bdf53d..06b0e3f 100644
--- a/lib/lp/buildmaster/tests/test_buildfarmjobbehaviour.py
+++ b/lib/lp/buildmaster/tests/test_buildfarmjobbehaviour.py
@@ -275,7 +275,8 @@ class TestDispatchBuildToSlave(StatsMixin, TestCase):
self.assertEqual(1, self.stats_client.incr.call_count)
self.assertEqual(
self.stats_client.incr.call_args_list[0][0],
- ('build.count,job_type=UNKNOWN,builder_name=mock-builder',))
+ ('build.count,job_type=UNKNOWN,'
+ 'builder_name=mock-builder,env=test',))
class TestGetUploadMethodsMixin:
diff --git a/lib/lp/services/config/schema-lazr.conf b/lib/lp/services/config/schema-lazr.conf
index c16270a..aaa9d30 100644
--- a/lib/lp/services/config/schema-lazr.conf
+++ b/lib/lp/services/config/schema-lazr.conf
@@ -2028,3 +2028,4 @@ concurrency: 1
host: none
port: none
prefix: none
+environment: none
diff --git a/lib/lp/services/statsd/model/statsd_client.py b/lib/lp/services/statsd/model/statsd_client.py
index 868af30..6ccf027 100644
--- a/lib/lp/services/statsd/model/statsd_client.py
+++ b/lib/lp/services/statsd/model/statsd_client.py
@@ -43,6 +43,12 @@ class StatsdClient:
self._make_client()
def __getattr__(self, name):
+ # This is a convenience to keep all statsd client related
+ # items on the client. Having this separate from prefix
+ # allows us to use it as a label, rather than as part of the
+ # gauge name
+ if name == 'lp_environment':
+ return config.statsd.environment
if self._client is not None:
return getattr(self._client, name)
else:
diff --git a/lib/lp/services/statsd/numbercruncher.py b/lib/lp/services/statsd/numbercruncher.py
index d2301d0..0a0552c 100644
--- a/lib/lp/services/statsd/numbercruncher.py
+++ b/lib/lp/services/statsd/numbercruncher.py
@@ -47,7 +47,7 @@ class NumberCruncher(service.Service):
def _setupLogger(self):
"""Set up a 'number-cruncher' logger that redirects to twisted.
"""
- level = logging.INFO
+ level = logging.DEBUG
logger = logging.getLogger(NUMBER_CRUNCHER_LOG_NAME)
logger.propagate = False
@@ -74,8 +74,8 @@ class NumberCruncher(service.Service):
for queue_type, contents in queue_details.items():
virt = queue_type == 'virt'
for arch, value in contents.items():
- gauge_name = "buildqueue,virtualized={},arch={}".format(
- virt, arch)
+ gauge_name = "buildqueue,virtualized={},arch={},env={}".format(
+ virt, arch, self.statsd_client.lp_environment)
self.logger.debug("{}: {}".format(gauge_name, value[0]))
self.statsd_client.gauge(gauge_name, value[0])
self.logger.debug("Build queue stats update complete.")
@@ -107,8 +107,8 @@ class NumberCruncher(service.Service):
counts['idle'] += 1
for processor, counts in counts_by_processor.items():
for count_name, count_value in counts.items():
- gauge_name = "builders.{},arch={}".format(
- count_name, processor)
+ gauge_name = "builders,status={},arch={},env={}".format(
+ count_name, processor, self.statsd_client.lp_environment)
self.logger.debug("{}: {}".format(gauge_name, count_value))
self.statsd_client.gauge(gauge_name, count_value)
self.logger.debug("Builder stats update complete.")
diff --git a/lib/lp/services/statsd/tests/__init__.py b/lib/lp/services/statsd/tests/__init__.py
index 20885a4..23fb067 100644
--- a/lib/lp/services/statsd/tests/__init__.py
+++ b/lib/lp/services/statsd/tests/__init__.py
@@ -19,5 +19,6 @@ class StatsMixin:
# Install a mock statsd client so we can assert against the call
# counts and args.
self.stats_client = mock.Mock()
+ self.stats_client.lp_environment = "test"
self.useFixture(
ZopeUtilityFixture(self.stats_client, IStatsdClient))
diff --git a/lib/lp/services/statsd/tests/test_numbercruncher.py b/lib/lp/services/statsd/tests/test_numbercruncher.py
index 47c633f..c5a4ff6 100644
--- a/lib/lp/services/statsd/tests/test_numbercruncher.py
+++ b/lib/lp/services/statsd/tests/test_numbercruncher.py
@@ -85,10 +85,18 @@ class TestNumberCruncher(StatsMixin, TestCaseWithFactory):
if 'amd64' in c[0][0]]
self.assertThat(
calls, MatchesListwise(
- [Equals(('builders.disabled,arch=amd64,virtualized=True', 0)),
- Equals(('builders.building,arch=amd64,virtualized=True', 0)),
- Equals(('builders.idle,arch=amd64,virtualized=True', 0)),
- Equals(('builders.cleaning,arch=amd64,virtualized=True', 1))
+ [Equals((
+ 'builders,status=disabled,arch=amd64,'
+ 'virtualized=True,env=test', 0)),
+ Equals((
+ 'builders,status=building,arch=amd64,'
+ 'virtualized=True,env=test', 0)),
+ Equals((
+ 'builders,status=idle,arch=amd64,'
+ 'virtualized=True,env=test', 0)),
+ Equals((
+ 'builders,status=cleaning,arch=amd64,'
+ 'virtualized=True,env=test', 1))
]))
def test_updateBuilderQueues(self):
@@ -109,9 +117,9 @@ class TestNumberCruncher(StatsMixin, TestCaseWithFactory):
self.assertThat(
[x[0] for x in self.stats_client.gauge.call_args_list],
MatchesListwise(
- [Equals(('buildqueue,virtualized=True,arch={}'.format(
+ [Equals(('buildqueue,virtualized=True,arch={},env=test'.format(
build.processor.name), 1)),
- Equals(('buildqueue,virtualized=False,arch=386', 1))
+ Equals(('buildqueue,virtualized=False,arch=386,env=test', 1))
]))
def test_startService_starts_update_queues_loop(self):
diff --git a/lib/lp/services/statsd/tests/test_statsd_client.py b/lib/lp/services/statsd/tests/test_statsd_client.py
index 6852af7..2e1a8e1 100644
--- a/lib/lp/services/statsd/tests/test_statsd_client.py
+++ b/lib/lp/services/statsd/tests/test_statsd_client.py
@@ -26,7 +26,8 @@ class TestClientConfiguration(TestCase):
self.addCleanup(client.reload)
config.push(
'statsd_test',
- "[statsd]\nhost: 127.0.01\nport: 9999\nprefix: test\n")
+ "[statsd]\nhost: 127.0.01\n"
+ "port: 9999\nprefix: test\nenvironment: test\n")
client.reload()
self.assertIsInstance(client._client, StatsClient)
@@ -45,6 +46,17 @@ class TestClientConfiguration(TestCase):
self.addCleanup(client.reload)
config.push(
'statsd_test',
- "[statsd]\nhost: 127.0.01\nport: 9999\nprefix: test\n")
+ "[statsd]\nhost: 127.0.01\n"
+ "port: 9999\nprefix: test\nenvironment: test\n")
client.reload()
self.assertIsInstance(client._client, StatsClient)
+
+ def test_lp_environment_is_available(self):
+ client = getUtility(IStatsdClient)
+ self.addCleanup(client.reload)
+ config.push(
+ 'statsd_test',
+ "[statsd]\nhost: 127.0.01\n"
+ "port: 9999\nprefix: test\nenvironment: test\n")
+ client.reload()
+ self.assertEqual(client.lp_environment, "test")
diff --git a/lib/lp/services/webapp/publication.py b/lib/lp/services/webapp/publication.py
index 7325d9f..9a9a0e3 100644
--- a/lib/lp/services/webapp/publication.py
+++ b/lib/lp/services/webapp/publication.py
@@ -795,13 +795,16 @@ class LaunchpadBrowserPublication(
status = request.response.getStatus()
if status == 404: # Not Found
OpStats.stats['404s'] += 1
- statsd_client.incr('errors.404')
+ statsd_client.incr('errors.404,env={}'.format(
+ statsd_client.lp_environment))
elif status == 500: # Unhandled exceptions
OpStats.stats['500s'] += 1
- statsd_client.incr('errors.500')
+ statsd_client.incr('errors.500,env={}'.format(
+ statsd_client.lp_environment))
elif status == 503: # Timeouts
OpStats.stats['503s'] += 1
- statsd_client.incr('errors.503')
+ statsd_client.incr('errors.503,env={}'.format(
+ statsd_client.lp_environment))
# Increment counters for status code groups.
status_group = str(status)[0] + 'XXs'
@@ -810,7 +813,8 @@ class LaunchpadBrowserPublication(
# Increment counter for 5XXs_b.
if is_browser(request) and status_group == '5XXs':
OpStats.stats['5XXs_b'] += 1
- statsd_client.incr('errors.5XX')
+ statsd_client.incr('errors.5XX,env={}'.format(
+ statsd_client.lp_environment))
# Make sure our databases are in a sane state for the next request.
thread_name = threading.current_thread().name