launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #05431
[Merge] lp:~allenap/launchpad/participation-keyerror-bug-810114-3 into lp:launchpad
Gavin Panella has proposed merging lp:~allenap/launchpad/participation-keyerror-bug-810114-3 into lp:launchpad.
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
Related bugs:
Bug #810114 in Launchpad itself: "KeyError in +participation page."
https://bugs.launchpad.net/launchpad/+bug/810114
For more details, see:
https://code.launchpad.net/~allenap/launchpad/participation-keyerror-bug-810114-3/+merge/81340
This branch adds load and save support for the team participation info
that check-teamparticipation.py deals with. This was originally so
that I could get hold of a data set to figure out why it was so slow.
I bundled my work and asked a LOSA to run it and pass me the data set,
which quickly helped me understand the problem: set.intersection(dict)
is slow. I assume it iterates through the dict instead of doing some
clever optimization. I've heard that sets and dicts share an
underlying implementation so I kind of assumed that they would work
really well together. Er, no.
So I fixed that problem and it seems to be fairly fast now.
I have left the load and save support in because it may come in handy
another day should we need to diagnose another team membership or
participation problem.
--
https://code.launchpad.net/~allenap/launchpad/participation-keyerror-bug-810114-3/+merge/81340
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~allenap/launchpad/participation-keyerror-bug-810114-3 into lp:launchpad.
=== modified file 'cronscripts/check-teamparticipation.py'
--- cronscripts/check-teamparticipation.py 2011-10-31 21:21:16 +0000
+++ cronscripts/check-teamparticipation.py 2011-11-04 22:19:25 +0000
@@ -1,6 +1,6 @@
#!/usr/bin/python -S
#
-# Copyright 2009 Canonical Ltd. This software is licensed under the
+# Copyright 2009-2011 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
# pylint: disable-msg=C0103,W0403
@@ -20,15 +20,47 @@
import _pythonpath
-from lp.registry.scripts.teamparticipation import check_teamparticipation
+from lp.registry.scripts.teamparticipation import (
+ check_teamparticipation_circular,
+ check_teamparticipation_consistency,
+ check_teamparticipation_self,
+ fetch_team_participation_info,
+ )
from lp.services.scripts.base import LaunchpadScript
+from lp.services.utils import (
+ load_bz2_pickle,
+ save_bz2_pickle,
+ )
class CheckTeamParticipationScript(LaunchpadScript):
description = "Check for invalid/missing TeamParticipation entries."
+ def add_my_options(self):
+ self.parser.add_option(
+ "--load-participation-info",
+ dest="load_info", metavar="FILE", help=(
+ "File from which to load participation information "
+ "instead of going to the database."))
+ self.parser.add_option(
+ "--save-participation-info",
+ dest="save_info", metavar="FILE", help=(
+ "File in which to save participation information, for "
+ "later processing with --load-participation-info."))
+
def main(self):
- check_teamparticipation(self.logger)
+ """Perform various checks on the `TeamParticipation` table."""
+ if self.options.load_info:
+ participation_info = load_bz2_pickle(self.options.load_info)
+ else:
+ check_teamparticipation_self(self.logger)
+ check_teamparticipation_circular(self.logger)
+ participation_info = fetch_team_participation_info(self.logger)
+ if self.options.save_info:
+ save_bz2_pickle(participation_info, self.options.save_info)
+ else:
+ check_teamparticipation_consistency(
+ self.logger, participation_info)
if __name__ == '__main__':
=== modified file 'lib/lp/registry/scripts/teamparticipation.py'
--- lib/lp/registry/scripts/teamparticipation.py 2011-11-03 14:27:06 +0000
+++ lib/lp/registry/scripts/teamparticipation.py 2011-11-04 22:19:25 +0000
@@ -5,7 +5,10 @@
__metaclass__ = type
__all__ = [
- "check_teamparticipation",
+ "check_teamparticipation_circular",
+ "check_teamparticipation_consistency",
+ "check_teamparticipation_self",
+ "fetch_team_participation_info",
]
from collections import (
@@ -84,6 +87,21 @@
"ConsistencyError", ("type", "team", "people"))
+def report_progress(log, interval, results, what):
+ """Iterate through `results`, reporting on progress.
+
+ :param log: A logger.
+ :param interval: How many results to report progress about.
+ :param results: An iterable of things.
+ :param what: A string descriping what the results are.
+ """
+ for num, result in izip(count(1), results):
+ if num % interval == 0:
+ log.debug("%d %s", num, what)
+ yield result
+ log.debug("%d %s", num, what)
+
+
def execute_long_query(store, log, interval, query):
"""Execute the given query, reporting as results are fetched.
@@ -91,21 +109,16 @@
the total number of rows fetched thus far.
"""
log.debug(query)
- for rows, result in izip(count(1), store.execute(query)):
- if rows % interval == 0:
- log.debug("%d rows", rows)
- yield result
-
-
-def check_teamparticipation_consistency(log):
- """Check for missing or spurious participations.
-
- For example, participations for people who are not members, or missing
- participations for people who are members.
- """
+ results = store.execute(query)
+ # Hackish; the default is 10 which seems fairly low.
+ results._raw_cursor.arraysize = interval
+ return report_progress(log, interval, results, "rows")
+
+
+def fetch_team_participation_info(log):
+ """Fetch people, teams, memberships and participations."""
slurp = partial(execute_long_query, get_store(), log, 10000)
- # Slurp everything in.
people = dict(
slurp(
"SELECT id, name FROM Person"
@@ -131,35 +144,54 @@
# Don't hold any locks.
transaction.commit()
+ return people, teams, team_memberships, team_participations
+
+
+def check_teamparticipation_consistency(log, info):
+ """Check for missing or spurious participations.
+
+ For example, participations for people who are not members, or missing
+ participations for people who are members.
+ """
+ people, teams, team_memberships, team_participations = info
+
+ # set.intersection() with a dict is slow.
+ people_set = frozenset(people)
+ teams_set = frozenset(teams)
+
def get_participants(team):
"""Recurse through membership records to get participants."""
- member_people = team_memberships[team].intersection(people)
+ member_people = team_memberships[team].intersection(people_set)
member_people.add(team) # Teams always participate in themselves.
- member_teams = team_memberships[team].intersection(teams)
+ member_teams = team_memberships[team].intersection(teams_set)
return member_people.union(
chain.from_iterable(imap(get_participants, member_teams)))
- def check_participants(expected, observed):
+ def check_participants(person, expected, observed):
spurious = observed - expected
missing = expected - observed
if len(spurious) > 0:
- yield ConsistencyError("spurious", team, sorted(spurious))
+ yield ConsistencyError("spurious", person, sorted(spurious))
if len(missing) > 0:
- yield ConsistencyError("missing", team, sorted(missing))
+ yield ConsistencyError("missing", person, sorted(missing))
errors = []
- for person in people:
+ log.debug("Checking consistency of %d people", len(people))
+ for person in report_progress(log, 50000, people, "people"):
participants_expected = set((person,))
participants_observed = team_participations[person]
errors.extend(
- check_participants(participants_expected, participants_observed))
+ check_participants(
+ person, participants_expected, participants_observed))
- for team in teams:
+ log.debug("Checking consistency of %d teams", len(teams))
+ for team in report_progress(log, 1000, teams, "teams"):
participants_expected = get_participants(team)
participants_observed = team_participations[team]
errors.extend(
- check_participants(participants_expected, participants_observed))
+ check_participants(
+ team, participants_expected, participants_observed))
def get_repr(id):
if id in people:
@@ -177,10 +209,3 @@
get_repr(error.team), error.type, people_repr)
return errors
-
-
-def check_teamparticipation(log):
- """Perform various checks on the `TeamParticipation` table."""
- check_teamparticipation_self(log)
- check_teamparticipation_circular(log)
- check_teamparticipation_consistency(log)
=== modified file 'lib/lp/registry/tests/test_teammembership.py'
--- lib/lp/registry/tests/test_teammembership.py 2011-11-03 11:58:09 +0000
+++ lib/lp/registry/tests/test_teammembership.py 2011-11-04 22:19:25 +0000
@@ -3,14 +3,18 @@
__metaclass__ = type
+import bz2
from datetime import (
datetime,
timedelta,
)
+import os
+import pickle
import re
import subprocess
from unittest import TestLoader
+from fixtures import TempDir
import pytz
from testtools.content import text_content
from testtools.matchers import Equals
@@ -53,9 +57,11 @@
TeamParticipation,
)
from lp.registry.scripts.teamparticipation import (
- check_teamparticipation,
+ check_teamparticipation_circular,
check_teamparticipation_consistency,
+ check_teamparticipation_self,
ConsistencyError,
+ fetch_team_participation_info,
)
from lp.services.log.logger import BufferLogger
from lp.testing import (
@@ -1054,22 +1060,23 @@
layer = DatabaseFunctionalLayer
- def _runScript(self, expected_returncode=0):
+ def _runScript(self, *args):
+ cmd = ["cronscripts/check-teamparticipation.py"]
+ cmd.extend(args)
process = subprocess.Popen(
- 'cronscripts/check-teamparticipation.py', shell=True,
- stdin=subprocess.PIPE, stdout=subprocess.PIPE,
+ cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
out, err = process.communicate()
if out != "":
self.addDetail("stdout", text_content(out))
if err != "":
self.addDetail("stderr", text_content(err))
- self.assertEqual(process.returncode, expected_returncode)
- return out, err
+ return process.poll(), out, err
def test_no_output_if_no_invalid_entries(self):
"""No output if there's no invalid teamparticipation entries."""
- out, err = self._runScript()
+ code, out, err = self._runScript()
+ self.assertEqual(0, code)
self.assertEqual(0, len(out))
self.assertEqual(0, len(err))
@@ -1111,7 +1118,8 @@
""" % sqlvalues(TeamMembershipStatus.APPROVED))
transaction.commit()
- out, err = self._runScript()
+ code, out, err = self._runScript()
+ self.assertEqual(0, code)
self.assertEqual(0, len(out))
self.failUnless(
re.search('missing TeamParticipation entries for zzzzz', err))
@@ -1149,7 +1157,8 @@
VALUES (9997, 9998);
""" % sqlvalues(approved=TeamMembershipStatus.APPROVED))
transaction.commit()
- out, err = self._runScript(expected_returncode=1)
+ code, out, err = self._runScript()
+ self.assertEqual(1, code)
self.assertEqual(0, len(out))
self.failUnless(re.search('Circular references found', err))
@@ -1173,13 +1182,37 @@
""" % sqlvalues(approved=TeamMembershipStatus.APPROVED))
transaction.commit()
logger = BufferLogger()
- errors = check_teamparticipation_consistency(logger)
- if logger.getLogBuffer() != "":
- self.addDetail("log", text_content(logger.getLogBuffer()))
+ self.addDetail("log", logger.content)
+ errors = check_teamparticipation_consistency(
+ logger, fetch_team_participation_info(logger))
self.assertEqual(
[ConsistencyError("spurious", 6969, [6970])],
errors)
+ def test_load_and_save_team_participation(self):
+ """The script can load and save participation info."""
+ logger = BufferLogger()
+ self.addDetail("log", logger.content)
+ info = fetch_team_participation_info(logger)
+ tempdir = self.useFixture(TempDir()).path
+ filename_in = os.path.join(tempdir, "info.in")
+ filename_out = os.path.join(tempdir, "info.out")
+ fout = bz2.BZ2File(filename_in, "w")
+ try:
+ pickle.dump(info, fout, pickle.HIGHEST_PROTOCOL)
+ finally:
+ fout.close()
+ code, out, err = self._runScript(
+ "--load-participation-info", filename_in,
+ "--save-participation-info", filename_out)
+ self.assertEqual(0, code)
+ fin = bz2.BZ2File(filename_out, "r")
+ try:
+ saved_info = pickle.load(fin)
+ finally:
+ fin.close()
+ self.assertEqual(info, saved_info)
+
class TestCheckTeamParticipationScriptPerformance(TestCaseWithFactory):
@@ -1201,9 +1234,13 @@
team.addMember(another_person, team.teamowner)
team = another_team
transaction.commit()
+ logger = BufferLogger()
+ self.addDetail("log", logger.content)
with StormStatementRecorder() as recorder:
- logger = BufferLogger()
- check_teamparticipation(logger)
+ check_teamparticipation_self(logger)
+ check_teamparticipation_circular(logger)
+ check_teamparticipation_consistency(
+ logger, fetch_team_participation_info(logger))
self.assertThat(recorder, HasQueryCount(Equals(6)))
=== modified file 'lib/lp/services/log/logger.py'
--- lib/lp/services/log/logger.py 2011-04-13 05:38:13 +0000
+++ lib/lp/services/log/logger.py 2011-11-04 22:19:25 +0000
@@ -18,6 +18,11 @@
import sys
import traceback
+from testtools.content import (
+ Content,
+ UTF8_TEXT,
+ )
+
from lp.services.log import loglevels
@@ -100,6 +105,8 @@
class FakeLogger:
"""Emulates a proper logger, just printing everything out the given file.
"""
+ # XXX: GavinPanella 2011-11-04 bug=886053: This is a test fixture not a
+ # service.
loglevel = loglevels.DEBUG
@@ -190,6 +197,8 @@
class DevNullLogger(FakeLogger):
"""A logger that drops all messages."""
+ # XXX: GavinPanella 2011-11-04 bug=886053: This is a test fixture not a
+ # service.
def message(self, *args, **kwargs):
"""Do absolutely nothing."""
@@ -197,6 +206,8 @@
class BufferLogger(FakeLogger):
"""A logger that logs to a StringIO object."""
+ # XXX: GavinPanella 2011-11-04 bug=886053: This is a test fixture not a
+ # service.
def __init__(self):
super(BufferLogger, self).__init__(StringIO())
@@ -214,3 +225,13 @@
messages = self.getLogBuffer()
self.clearLogBuffer()
return messages
+
+ @property
+ def content(self):
+ """Return a `testtools.content.Content` for this object's buffer.
+
+ Use with `testtools.TestCase.addDetail`, `fixtures.Fixture.addDetail`,
+ and anything else that understands details.
+ """
+ get_bytes = lambda: [self.getLogBuffer().encode("utf-8")]
+ return Content(UTF8_TEXT, get_bytes)
=== added file 'lib/lp/services/log/tests/test_logger.py'
--- lib/lp/services/log/tests/test_logger.py 1970-01-01 00:00:00 +0000
+++ lib/lp/services/log/tests/test_logger.py 2011-11-04 22:19:25 +0000
@@ -0,0 +1,23 @@
+# Copyright 2011 Canonical Ltd. This software is licensed under the
+# GNU Affero General Public License version 3 (see the file LICENSE).
+
+"""Tests for `lp.services.log.logger`."""
+
+__metaclass__ = type
+
+from lp.services.log.logger import BufferLogger
+from lp.testing import TestCase
+
+
+class TestBufferLogger(TestCase):
+ """Tests for `BufferLogger`."""
+
+ def test_content(self):
+ # The content property returns a `testtools.content.Content` object
+ # representing the contents of the logger's buffer.
+ logger = BufferLogger()
+ logger.info("Hello")
+ logger.warn("World")
+ self.assertEqual(
+ "INFO Hello\nWARNING World\n",
+ "".join(logger.content.iter_text()))
=== modified file 'lib/lp/services/tests/test_utils.py'
--- lib/lp/services/tests/test_utils.py 2011-07-01 10:28:47 +0000
+++ lib/lp/services/tests/test_utils.py 2011-11-04 22:19:25 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009 Canonical Ltd. This software is licensed under the
+# Copyright 2009-2011 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
"""Tests for lp.services.utils."""
@@ -12,6 +12,7 @@
import os
import sys
+from fixtures import TempDir
from pytz import UTC
from testtools.matchers import (
Equals,
@@ -23,13 +24,15 @@
from lp.services.utils import (
AutoDecorate,
base,
+ CachingIterator,
compress_hash,
- CachingIterator,
decorate_with,
docstring_dedent,
file_exists,
iter_split,
+ load_bz2_pickle,
run_capturing_output,
+ save_bz2_pickle,
traceback_info,
utc_now,
)
@@ -329,3 +332,14 @@
new_now = datetime.utcnow().replace(tzinfo=UTC)
self.assertThat(now, GreaterThanOrEqual(old_now))
self.assertThat(now, LessThanOrEqual(new_now))
+
+
+class TestBZ2Pickle(TestCase):
+ """Tests for `save_bz2_pickle` and `load_bz2_pickle`."""
+
+ def test_save_and_load(self):
+ data = {1: 2, "room": 101}
+ tempdir = self.useFixture(TempDir()).path
+ tempfile = os.path.join(tempdir, "dump")
+ save_bz2_pickle(data, tempfile)
+ self.assertEqual(data, load_bz2_pickle(tempfile))
=== modified file 'lib/lp/services/utils.py'
--- lib/lp/services/utils.py 2011-07-21 16:01:54 +0000
+++ lib/lp/services/utils.py 2011-11-04 22:19:25 +0000
@@ -18,9 +18,11 @@
'file_exists',
'iter_list_chunks',
'iter_split',
+ 'load_bz2_pickle',
'obfuscate_email',
're_email_address',
'run_capturing_output',
+ 'save_bz2_pickle',
'synchronize',
'text_delta',
'traceback_info',
@@ -28,16 +30,18 @@
'value_string',
]
+import bz2
from datetime import datetime
from itertools import tee
import os
import re
+import string
from StringIO import StringIO
-import string
import sys
from textwrap import dedent
from types import FunctionType
+import cPickle as pickle
from fixtures import (
Fixture,
MonkeyPatch,
@@ -135,7 +139,7 @@
I'm amazed this isn't in itertools (mwhudson).
"""
for i in range(0, len(a_list), size):
- yield a_list[i:i+size]
+ yield a_list[i:i + size]
def synchronize(source, target, add, remove):
@@ -245,7 +249,7 @@
then reassemble.
"""
# Make sure there is at least one newline so the split works.
- first, rest = (s+'\n').split('\n', 1)
+ first, rest = (s + '\n').split('\n', 1)
return (first + '\n' + dedent(rest)).strip()
@@ -333,3 +337,21 @@
text = text.replace(
"<<email address hidden>>", "<email address hidden>")
return text
+
+
+def save_bz2_pickle(obj, filename):
+ """Save a bz2 compressed pickle of `obj` to `filename`."""
+ fout = bz2.BZ2File(filename, "w")
+ try:
+ pickle.dump(obj, fout, pickle.HIGHEST_PROTOCOL)
+ finally:
+ fout.close()
+
+
+def load_bz2_pickle(filename):
+ """Load and return a bz2 compressed pickle from `filename`."""
+ fin = bz2.BZ2File(filename, "r")
+ try:
+ return pickle.load(fin)
+ finally:
+ fin.close()