launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #03058
[Merge] lp:~stub/launchpad/db-stats into lp:launchpad
Stuart Bishop has proposed merging lp:~stub/launchpad/db-stats into lp:launchpad.
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
Related bugs:
Bug #588673 in Launchpad itself: "update-database-stats.py explodes on production"
https://bugs.launchpad.net/launchpad/+bug/588673
Bug #590684 in Launchpad itself: "report-database-stats.py gives bogus cpu utilization"
https://bugs.launchpad.net/launchpad/+bug/590684
For more details, see:
https://code.launchpad.net/~stub/launchpad/db-stats/+merge/54666
Add bloat information to the database reports. This will appear on the daily and monthly reports, as we only collect this information daily.
The report now looks like this:
$ utilities/report-database-stats.py -U postgres -i '6 months' -b 0 --min-bloat=0
== Most Active Users ==
stub || 1.45% CPU
lpnet || 0.00% CPU
== Most Written Tables ==
databasediskutilization || 10.45 tuples/sec
databasetablestats || 1.39 tuples/sec
databasecpustats || 0.00 tuples/sec
wikiname || 0.00 tuples/sec
webserviceban || 0.00 tuples/sec
votecast || 0.00 tuples/sec
vote || 0.00 tuples/sec
usertouseremail || 0.00 tuples/sec
translator || 0.00 tuples/sec
translationtemplatesbuild || 0.00 tuples/sec
translationtemplateitem || 0.00 tuples/sec
translationrelicensingagreement || 0.00 tuples/sec
translationmessage || 0.00 tuples/sec
translationimportqueueentry || 0.00 tuples/sec
translationgroup || 0.00 tuples/sec
== Most Read Tables ==
databasetablestats || 1247.99 tuples/sec
databasediskutilization || 12.00 tuples/sec
translationmessage || 3.07 tuples/sec
potranslation || 2.94 tuples/sec
language || 2.63 tuples/sec
spokenin || 1.30 tuples/sec
country || 1.13 tuples/sec
teamparticipation || 0.96 tuples/sec
lp_teamparticipation || 0.96 tuples/sec
potmsgset || 0.94 tuples/sec
translationtemplateitem || 0.92 tuples/sec
pomsgid || 0.74 tuples/sec
teammembership || 0.48 tuples/sec
personsettings || 0.46 tuples/sec
person || 0.46 tuples/sec
== Most Bloated Tables ==
pg_index || 3% || 9530 bytes of 256 kB
pg_index || 3% || 9530 bytes of 256 kB
pg_index || 3% || 9530 bytes of 256 kB
pg_shdepend || 17% || 927 kB of 5232 kB
pg_shdepend || 17% || 927 kB of 5232 kB
pg_shdepend || 17% || 927 kB of 5232 kB
pg_attrdef || 3% || 8676 bytes of 248 kB
pg_attrdef || 3% || 8676 bytes of 248 kB
pg_attrdef || 3% || 8676 bytes of 248 kB
pg_conversion || 33% || 8124 bytes of 24 kB
pg_conversion || 33% || 8124 bytes of 24 kB
pg_conversion || 33% || 8124 bytes of 24 kB
specificationbug || 99% || 8120 bytes of 8192 bytes
specificationbug || 99% || 8120 bytes of 8192 bytes
specificationbug || 99% || 8120 bytes of 8192 bytes
== Most Bloated Indexes ==
pg_shdepend_reference_index || 85% || 8545 kB of 10032 kB
pg_shdepend_reference_index || 85% || 8545 kB of 10032 kB
pg_shdepend_reference_index || 85% || 8545 kB of 10032 kB
pg_attribute_relid_attnam_index || 16% || 84 kB of 504 kB
pg_attribute_relid_attnam_index || 16% || 84 kB of 504 kB
pg_attribute_relid_attnam_index || 16% || 84 kB of 504 kB
pg_toast_3852116_index || 49% || 8128 bytes of 16 kB
pg_ts_parser_oid_index || 49% || 8128 bytes of 16 kB
translationgroup__owner__idx || 49% || 8128 bytes of 16 kB
pg_ts_parser_oid_index || 49% || 8128 bytes of 16 kB
specificationbug_specification_idx || 49% || 8128 bytes of 16 kB
pg_ts_parser_oid_index || 49% || 8128 bytes of 16 kB
translationgroup__owner__idx || 49% || 8128 bytes of 16 kB
pg_toast_3851167_index || 49% || 8128 bytes of 16 kB
translationgroup__owner__idx || 49% || 8128 bytes of 16 kB
== Most Bloating Tables ==
databasetablestats || +3.49% || +152 kB
databasetablestats || +3.49% || +152 kB
databasetablestats || +3.49% || +152 kB
databasecpustats || +98.39% || +8192 bytes
databasecpustats || +98.39% || +8192 bytes
databasecpustats || +98.39% || +8192 bytes
== Most Bloating Indexes ==
databasetablestats_pkey || +22.31% || +48 kB
databasetablestats_pkey || +22.31% || +48 kB
databasetablestats_pkey || +22.31% || +48 kB
databasecpustats_pkey || +49.39% || +8192 bytes
databasecpustats_pkey || +49.39% || +8192 bytes
databasecpustats_pkey || +49.39% || +8192 bytes
--
https://code.launchpad.net/~stub/launchpad/db-stats/+merge/54666
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~stub/launchpad/db-stats into lp:launchpad.
=== modified file 'lib/canonical/database/sqlbase.py'
--- lib/canonical/database/sqlbase.py 2010-11-19 17:41:18 +0000
+++ lib/canonical/database/sqlbase.py 2011-03-24 09:42:31 +0000
@@ -185,7 +185,8 @@
"""
from canonical.launchpad.interfaces.lpstorm import IMasterStore
# Make it simple to write dumb-invalidators - initialised
- # _cached_properties to a valid list rather than just-in-time creation.
+ # _cached_properties to a valid list rather than just-in-time
+ # creation.
self._cached_properties = []
store = IMasterStore(self.__class__)
@@ -643,7 +644,7 @@
>>> print quoteIdentifier('\\"')
"\"""
'''
- return '"%s"' % identifier.replace('"','""')
+ return '"%s"' % identifier.replace('"', '""')
quoteIdentifier = quote_identifier # Backwards compatibility for now.
@@ -743,6 +744,7 @@
def block_implicit_flushes(func):
"""A decorator that blocks implicit flushes on the main store."""
+
def block_implicit_flushes_decorator(*args, **kwargs):
from canonical.launchpad.webapp.interfaces import DisallowedStore
try:
@@ -759,6 +761,7 @@
def reset_store(func):
"""Function decorator that resets the main store."""
+
def reset_store_decorator(*args, **kwargs):
try:
return func(*args, **kwargs)
@@ -852,6 +855,10 @@
def rowcount(self):
return self._result._raw_cursor.rowcount
+ @property
+ def description(self):
+ return self._result._raw_cursor.description
+
def fetchone(self):
assert self._result is not None, "No results to fetch"
return self._result.get_one()
=== added file 'lib/lp/services/database/namedrow.py'
--- lib/lp/services/database/namedrow.py 1970-01-01 00:00:00 +0000
+++ lib/lp/services/database/namedrow.py 2011-03-24 09:42:31 +0000
@@ -0,0 +1,17 @@
+# Copyright 2011 Canonical Ltd. This software is licensed under the
+# GNU Affero General Public License version 3 (see the file LICENSE).
+
+"""Convert the tuples returned by Cursor.fetchall() into namedtuples."""
+
+__metaclass__ = type
+__all__ = []
+
+from collections import namedtuple
+
+
+def named_fetchall(cur):
+ row_type = namedtuple(
+ 'DatabaseRow',
+ (description[0] for description in cur.description))
+ for row in cur.fetchall():
+ yield row_type(*row)
=== modified file 'utilities/report-database-stats.py'
--- utilities/report-database-stats.py 2010-11-03 07:52:04 +0000
+++ utilities/report-database-stats.py 2011-03-24 09:42:31 +0000
@@ -10,18 +10,22 @@
from datetime import datetime
from operator import attrgetter
-from textwrap import dedent
+from textwrap import (
+ dedent,
+ fill,
+ )
from canonical.database.sqlbase import connect, sqlvalues
from canonical.launchpad.scripts import db_options
from lp.scripts.helpers import LPOptionParser
+from lp.services.database.namedrow import named_fetchall
class Table:
pass
-def get_where_clause(options):
+def get_where_clause(options, fuzz='0 seconds'):
"Generate a WHERE clause referencing the date_created column."
# We have two of the from timestamp, the until timestamp and an
# interval. The interval is in a format unsuitable for processing in
@@ -55,7 +59,9 @@
else:
until_sql = "CURRENT_TIMESTAMP AT TIME ZONE 'UTC'"
- clause = "date_created BETWEEN (%s) AND (%s)" % (from_sql, until_sql)
+ fuzz_sql = "CAST(%s AS interval)" % sqlvalues(fuzz)
+ clause = "date_created BETWEEN (%s - %s) AND (%s + %s)" % (
+ from_sql, fuzz_sql, until_sql, fuzz_sql)
return clause
@@ -152,6 +158,61 @@
return cpu_stats
+def get_bloat_stats(cur, options, kind):
+ # Return information on bloated tables and indexes, as of the end of
+ # the requested time period.
+ params = {
+ # We only collect these statistics daily, so add some fuzz
+ # to ensure bloat information ends up on the daily reports;
+ # we cannot guarantee the disk utilization statistics occur
+ # exactly 24 hours apart.
+ 'where': get_where_clause(options, fuzz='6 hours'),
+ 'bloat': options.bloat,
+ 'min_bloat': options.min_bloat,
+ 'kind': kind,
+ }
+ query = dedent("""
+ SELECT * FROM (
+ SELECT
+ namespace,
+ name,
+ sub_namespace,
+ sub_name,
+ count(*) OVER t AS num_samples,
+ last_value(table_len) OVER t AS table_len,
+ pg_size_pretty(last_value(table_len) OVER t) AS table_size,
+ pg_size_pretty(last_value(dead_tuple_len + free_space) OVER t)
+ AS bloat_size,
+ first_value(dead_tuple_percent + free_percent) OVER t
+ AS start_bloat_percent,
+ last_value(dead_tuple_percent + free_percent) OVER t
+ AS end_bloat_percent,
+ (last_value(dead_tuple_percent + free_percent) OVER t
+ - first_value(dead_tuple_percent + free_percent) OVER t
+ ) AS delta_bloat_percent,
+ (last_value(table_len) OVER t
+ - first_value(table_len) OVER t) AS delta_bloat_len,
+ pg_size_pretty(
+ last_value(table_len) OVER t
+ - first_value(table_len) OVER t) AS delta_bloat_size
+ FROM DatabaseDiskUtilization
+ WHERE
+ %(where)s
+ AND kind = %%(kind)s
+ WINDOW t AS (
+ PARTITION BY sort ORDER BY date_created
+ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
+ ) AS whatever
+ WHERE
+ table_len >= %(min_bloat)s
+ AND end_bloat_percent >= %(bloat)s
+ ORDER BY bloat_size DESC
+ """ % params)
+ cur.execute(query, params)
+ bloat_stats = named_fetchall(cur)
+ return list(bloat_stats)
+
+
def main():
parser = LPOptionParser()
db_options(parser)
@@ -174,6 +235,14 @@
"-n", "--limit", dest="limit", type=int,
default=15, metavar="NUM",
help="Display the top NUM items in each category.")
+ parser.add_option(
+ "-b", "--bloat", dest="bloat", type=float,
+ default=40, metavar="BLOAT",
+ help="Display tables and indexes bloated by more than BLOAT%.")
+ parser.add_option(
+ "--min-bloat", dest="min_bloat", type=int,
+ default=10000000, metavar="BLOAT",
+ help="Don't report tables bloated less than BLOAT bytes.")
parser.set_defaults(dbuser="database_stats_report")
options, args = parser.parse_args()
@@ -190,7 +259,27 @@
arbitrary_table = tables[0]
interval = arbitrary_table.date_end - arbitrary_table.date_start
per_second = float(interval.days * 24 * 60 * 60 + interval.seconds)
-
+ if per_second == 0:
+ parser.error("Only one sample in that time range.")
+
+ user_cpu = get_cpu_stats(cur, options)
+ print "== Most Active Users =="
+ print
+ for cpu, username in sorted(user_cpu, reverse=True)[:options.limit]:
+ print "%40s || %10.2f%% CPU" % (username, float(cpu) / 10)
+
+ print
+ print "== Most Written Tables =="
+ print
+ tables_sort = [
+ 'total_tup_written', 'n_tup_upd', 'n_tup_ins', 'n_tup_del', 'relname']
+ most_written_tables = sorted(
+ tables, key=attrgetter(*tables_sort), reverse=True)
+ for table in most_written_tables[:options.limit]:
+ print "%40s || %10.2f tuples/sec" % (
+ table.relname, table.total_tup_written / per_second)
+
+ print
print "== Most Read Tables =="
print
# These match the pg_user_table_stats view. schemaname is the
@@ -203,24 +292,77 @@
for table in most_read_tables[:options.limit]:
print "%40s || %10.2f tuples/sec" % (
table.relname, table.total_tup_read / per_second)
- print
-
- print "== Most Written Tables =="
- print
- tables_sort = [
- 'total_tup_written', 'n_tup_upd', 'n_tup_ins', 'n_tup_del', 'relname']
- most_written_tables = sorted(
- tables, key=attrgetter(*tables_sort), reverse=True)
- for table in most_written_tables[:options.limit]:
- print "%40s || %10.2f tuples/sec" % (
- table.relname, table.total_tup_written / per_second)
- print
-
- user_cpu = get_cpu_stats(cur, options)
- print "== Most Active Users =="
- print
- for cpu, username in sorted(user_cpu, reverse=True)[:options.limit]:
- print "%40s || %10.2f%% CPU" % (username, float(cpu) / 10)
+
+ table_bloat_stats = get_bloat_stats(cur, options, 'r')
+
+ if not table_bloat_stats:
+ print
+ print "(There is no bloat information available in this time range.)"
+
+ else:
+ print
+ print "== Most Bloated Tables =="
+ print
+ for bloated_table in table_bloat_stats[:options.limit]:
+ print "%40s || %2d%% || %s of %s" % (
+ bloated_table.name,
+ bloated_table.end_bloat_percent,
+ bloated_table.bloat_size,
+ bloated_table.table_size)
+
+ index_bloat_stats = get_bloat_stats(cur, options, 'i')
+
+ print
+ print "== Most Bloated Indexes =="
+ print
+ for bloated_index in index_bloat_stats[:options.limit]:
+ print "%40s || %2d%% || %s of %s" % (
+ bloated_index.sub_name,
+ bloated_index.end_bloat_percent,
+ bloated_index.bloat_size,
+ bloated_index.table_size)
+
+ # Order bloat delta report by size of bloat increase.
+ # We might want to change this to percentage bloat increase.
+ bloating_sort_key = lambda x: x.delta_bloat_len
+
+ table_bloating_stats = sorted(
+ table_bloat_stats, key=bloating_sort_key, reverse=True)
+
+ if table_bloating_stats[0].num_samples <= 1:
+ print
+ print fill(dedent("""\
+ (There are not enough samples in this time range to display
+ bloat change statistics)
+ """))
+ else:
+ print
+ print "== Most Bloating Tables =="
+ print
+
+ for bloated_table in table_bloating_stats[:options.limit]:
+ # Bloat decreases are uninteresting, and would need to be in
+ # a seperate table sorted in reverse anyway.
+ if bloated_table.delta_bloat_percent > 0:
+ print "%40s || +%4.2f%% || +%s" % (
+ bloated_table.name,
+ bloated_table.delta_bloat_percent,
+ bloated_table.delta_bloat_size)
+
+ index_bloating_stats = sorted(
+ index_bloat_stats, key=bloating_sort_key, reverse=True)
+
+ print
+ print "== Most Bloating Indexes =="
+ print
+ for bloated_index in index_bloating_stats[:options.limit]:
+ # Bloat decreases are uninteresting, and would need to be in
+ # a seperate table sorted in reverse anyway.
+ if bloated_index.delta_bloat_percent > 0:
+ print "%40s || +%4.2f%% || +%s" % (
+ bloated_index.sub_name,
+ bloated_index.delta_bloat_percent,
+ bloated_index.delta_bloat_size)
if __name__ == '__main__':