← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~allenap/launchpad/participation-keyerror-bug-810114-3 into lp:launchpad

 

Gavin Panella has proposed merging lp:~allenap/launchpad/participation-keyerror-bug-810114-3 into lp:launchpad.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)
Related bugs:
  Bug #810114 in Launchpad itself: "KeyError in +participation page."
  https://bugs.launchpad.net/launchpad/+bug/810114

For more details, see:
https://code.launchpad.net/~allenap/launchpad/participation-keyerror-bug-810114-3/+merge/81340

This branch adds load and save support for the team participation info
that check-teamparticipation.py deals with. This was originally so
that I could get hold of a data set to figure out why it was so slow.

I bundled my work and asked a LOSA to run it and pass me the data set,
which quickly helped me understand the problem: set.intersection(dict)
is slow. I assume it iterates through the dict instead of doing some
clever optimization. I've heard that sets and dicts share an
underlying implementation so I kind of assumed that they would work
really well together. Er, no.

So I fixed that problem and it seems to be fairly fast now.

I have left the load and save support in because it may come in handy
another day should we need to diagnose another team membership or
participation problem.

-- 
https://code.launchpad.net/~allenap/launchpad/participation-keyerror-bug-810114-3/+merge/81340
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~allenap/launchpad/participation-keyerror-bug-810114-3 into lp:launchpad.
=== modified file 'cronscripts/check-teamparticipation.py'
--- cronscripts/check-teamparticipation.py	2011-10-31 21:21:16 +0000
+++ cronscripts/check-teamparticipation.py	2011-11-04 22:19:25 +0000
@@ -1,6 +1,6 @@
 #!/usr/bin/python -S
 #
-# Copyright 2009 Canonical Ltd.  This software is licensed under the
+# Copyright 2009-2011 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 # pylint: disable-msg=C0103,W0403
@@ -20,15 +20,47 @@
 
 import _pythonpath
 
-from lp.registry.scripts.teamparticipation import check_teamparticipation
+from lp.registry.scripts.teamparticipation import (
+    check_teamparticipation_circular,
+    check_teamparticipation_consistency,
+    check_teamparticipation_self,
+    fetch_team_participation_info,
+    )
 from lp.services.scripts.base import LaunchpadScript
+from lp.services.utils import (
+    load_bz2_pickle,
+    save_bz2_pickle,
+    )
 
 
 class CheckTeamParticipationScript(LaunchpadScript):
     description = "Check for invalid/missing TeamParticipation entries."
 
+    def add_my_options(self):
+        self.parser.add_option(
+            "--load-participation-info",
+            dest="load_info", metavar="FILE", help=(
+                "File from which to load participation information "
+                "instead of going to the database."))
+        self.parser.add_option(
+            "--save-participation-info",
+            dest="save_info", metavar="FILE", help=(
+                "File in which to save participation information, for "
+                "later processing with --load-participation-info."))
+
     def main(self):
-        check_teamparticipation(self.logger)
+        """Perform various checks on the `TeamParticipation` table."""
+        if self.options.load_info:
+            participation_info = load_bz2_pickle(self.options.load_info)
+        else:
+            check_teamparticipation_self(self.logger)
+            check_teamparticipation_circular(self.logger)
+            participation_info = fetch_team_participation_info(self.logger)
+        if self.options.save_info:
+            save_bz2_pickle(participation_info, self.options.save_info)
+        else:
+            check_teamparticipation_consistency(
+                self.logger, participation_info)
 
 
 if __name__ == '__main__':

=== modified file 'lib/lp/registry/scripts/teamparticipation.py'
--- lib/lp/registry/scripts/teamparticipation.py	2011-11-03 14:27:06 +0000
+++ lib/lp/registry/scripts/teamparticipation.py	2011-11-04 22:19:25 +0000
@@ -5,7 +5,10 @@
 
 __metaclass__ = type
 __all__ = [
-    "check_teamparticipation",
+    "check_teamparticipation_circular",
+    "check_teamparticipation_consistency",
+    "check_teamparticipation_self",
+    "fetch_team_participation_info",
     ]
 
 from collections import (
@@ -84,6 +87,21 @@
     "ConsistencyError", ("type", "team", "people"))
 
 
+def report_progress(log, interval, results, what):
+    """Iterate through `results`, reporting on progress.
+
+    :param log: A logger.
+    :param interval: How many results to report progress about.
+    :param results: An iterable of things.
+    :param what: A string descriping what the results are.
+    """
+    for num, result in izip(count(1), results):
+        if num % interval == 0:
+            log.debug("%d %s", num, what)
+        yield result
+    log.debug("%d %s", num, what)
+
+
 def execute_long_query(store, log, interval, query):
     """Execute the given query, reporting as results are fetched.
 
@@ -91,21 +109,16 @@
     the total number of rows fetched thus far.
     """
     log.debug(query)
-    for rows, result in izip(count(1), store.execute(query)):
-        if rows % interval == 0:
-            log.debug("%d rows", rows)
-        yield result
-
-
-def check_teamparticipation_consistency(log):
-    """Check for missing or spurious participations.
-
-    For example, participations for people who are not members, or missing
-    participations for people who are members.
-    """
+    results = store.execute(query)
+    # Hackish; the default is 10 which seems fairly low.
+    results._raw_cursor.arraysize = interval
+    return report_progress(log, interval, results, "rows")
+
+
+def fetch_team_participation_info(log):
+    """Fetch people, teams, memberships and participations."""
     slurp = partial(execute_long_query, get_store(), log, 10000)
 
-    # Slurp everything in.
     people = dict(
         slurp(
             "SELECT id, name FROM Person"
@@ -131,35 +144,54 @@
     # Don't hold any locks.
     transaction.commit()
 
+    return people, teams, team_memberships, team_participations
+
+
+def check_teamparticipation_consistency(log, info):
+    """Check for missing or spurious participations.
+
+    For example, participations for people who are not members, or missing
+    participations for people who are members.
+    """
+    people, teams, team_memberships, team_participations = info
+
+    # set.intersection() with a dict is slow.
+    people_set = frozenset(people)
+    teams_set = frozenset(teams)
+
     def get_participants(team):
         """Recurse through membership records to get participants."""
-        member_people = team_memberships[team].intersection(people)
+        member_people = team_memberships[team].intersection(people_set)
         member_people.add(team)  # Teams always participate in themselves.
-        member_teams = team_memberships[team].intersection(teams)
+        member_teams = team_memberships[team].intersection(teams_set)
         return member_people.union(
             chain.from_iterable(imap(get_participants, member_teams)))
 
-    def check_participants(expected, observed):
+    def check_participants(person, expected, observed):
         spurious = observed - expected
         missing = expected - observed
         if len(spurious) > 0:
-            yield ConsistencyError("spurious", team, sorted(spurious))
+            yield ConsistencyError("spurious", person, sorted(spurious))
         if len(missing) > 0:
-            yield ConsistencyError("missing", team, sorted(missing))
+            yield ConsistencyError("missing", person, sorted(missing))
 
     errors = []
 
-    for person in people:
+    log.debug("Checking consistency of %d people", len(people))
+    for person in report_progress(log, 50000, people, "people"):
         participants_expected = set((person,))
         participants_observed = team_participations[person]
         errors.extend(
-            check_participants(participants_expected, participants_observed))
+            check_participants(
+                person, participants_expected, participants_observed))
 
-    for team in teams:
+    log.debug("Checking consistency of %d teams", len(teams))
+    for team in report_progress(log, 1000, teams, "teams"):
         participants_expected = get_participants(team)
         participants_observed = team_participations[team]
         errors.extend(
-            check_participants(participants_expected, participants_observed))
+            check_participants(
+                team, participants_expected, participants_observed))
 
     def get_repr(id):
         if id in people:
@@ -177,10 +209,3 @@
             get_repr(error.team), error.type, people_repr)
 
     return errors
-
-
-def check_teamparticipation(log):
-    """Perform various checks on the `TeamParticipation` table."""
-    check_teamparticipation_self(log)
-    check_teamparticipation_circular(log)
-    check_teamparticipation_consistency(log)

=== modified file 'lib/lp/registry/tests/test_teammembership.py'
--- lib/lp/registry/tests/test_teammembership.py	2011-11-03 11:58:09 +0000
+++ lib/lp/registry/tests/test_teammembership.py	2011-11-04 22:19:25 +0000
@@ -3,14 +3,18 @@
 
 __metaclass__ = type
 
+import bz2
 from datetime import (
     datetime,
     timedelta,
     )
+import os
+import pickle
 import re
 import subprocess
 from unittest import TestLoader
 
+from fixtures import TempDir
 import pytz
 from testtools.content import text_content
 from testtools.matchers import Equals
@@ -53,9 +57,11 @@
     TeamParticipation,
     )
 from lp.registry.scripts.teamparticipation import (
-    check_teamparticipation,
+    check_teamparticipation_circular,
     check_teamparticipation_consistency,
+    check_teamparticipation_self,
     ConsistencyError,
+    fetch_team_participation_info,
     )
 from lp.services.log.logger import BufferLogger
 from lp.testing import (
@@ -1054,22 +1060,23 @@
 
     layer = DatabaseFunctionalLayer
 
-    def _runScript(self, expected_returncode=0):
+    def _runScript(self, *args):
+        cmd = ["cronscripts/check-teamparticipation.py"]
+        cmd.extend(args)
         process = subprocess.Popen(
-            'cronscripts/check-teamparticipation.py', shell=True,
-            stdin=subprocess.PIPE, stdout=subprocess.PIPE,
+            cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE,
             stderr=subprocess.PIPE)
         out, err = process.communicate()
         if out != "":
             self.addDetail("stdout", text_content(out))
         if err != "":
             self.addDetail("stderr", text_content(err))
-        self.assertEqual(process.returncode, expected_returncode)
-        return out, err
+        return process.poll(), out, err
 
     def test_no_output_if_no_invalid_entries(self):
         """No output if there's no invalid teamparticipation entries."""
-        out, err = self._runScript()
+        code, out, err = self._runScript()
+        self.assertEqual(0, code)
         self.assertEqual(0, len(out))
         self.assertEqual(0, len(err))
 
@@ -1111,7 +1118,8 @@
             """ % sqlvalues(TeamMembershipStatus.APPROVED))
         transaction.commit()
 
-        out, err = self._runScript()
+        code, out, err = self._runScript()
+        self.assertEqual(0, code)
         self.assertEqual(0, len(out))
         self.failUnless(
             re.search('missing TeamParticipation entries for zzzzz', err))
@@ -1149,7 +1157,8 @@
                 VALUES (9997, 9998);
             """ % sqlvalues(approved=TeamMembershipStatus.APPROVED))
         transaction.commit()
-        out, err = self._runScript(expected_returncode=1)
+        code, out, err = self._runScript()
+        self.assertEqual(1, code)
         self.assertEqual(0, len(out))
         self.failUnless(re.search('Circular references found', err))
 
@@ -1173,13 +1182,37 @@
             """ % sqlvalues(approved=TeamMembershipStatus.APPROVED))
         transaction.commit()
         logger = BufferLogger()
-        errors = check_teamparticipation_consistency(logger)
-        if logger.getLogBuffer() != "":
-            self.addDetail("log", text_content(logger.getLogBuffer()))
+        self.addDetail("log", logger.content)
+        errors = check_teamparticipation_consistency(
+            logger, fetch_team_participation_info(logger))
         self.assertEqual(
             [ConsistencyError("spurious", 6969, [6970])],
             errors)
 
+    def test_load_and_save_team_participation(self):
+        """The script can load and save participation info."""
+        logger = BufferLogger()
+        self.addDetail("log", logger.content)
+        info = fetch_team_participation_info(logger)
+        tempdir = self.useFixture(TempDir()).path
+        filename_in = os.path.join(tempdir, "info.in")
+        filename_out = os.path.join(tempdir, "info.out")
+        fout = bz2.BZ2File(filename_in, "w")
+        try:
+            pickle.dump(info, fout, pickle.HIGHEST_PROTOCOL)
+        finally:
+            fout.close()
+        code, out, err = self._runScript(
+            "--load-participation-info", filename_in,
+            "--save-participation-info", filename_out)
+        self.assertEqual(0, code)
+        fin = bz2.BZ2File(filename_out, "r")
+        try:
+            saved_info = pickle.load(fin)
+        finally:
+            fin.close()
+        self.assertEqual(info, saved_info)
+
 
 class TestCheckTeamParticipationScriptPerformance(TestCaseWithFactory):
 
@@ -1201,9 +1234,13 @@
                 team.addMember(another_person, team.teamowner)
             team = another_team
         transaction.commit()
+        logger = BufferLogger()
+        self.addDetail("log", logger.content)
         with StormStatementRecorder() as recorder:
-            logger = BufferLogger()
-            check_teamparticipation(logger)
+            check_teamparticipation_self(logger)
+            check_teamparticipation_circular(logger)
+            check_teamparticipation_consistency(
+                logger, fetch_team_participation_info(logger))
         self.assertThat(recorder, HasQueryCount(Equals(6)))
 
 

=== modified file 'lib/lp/services/log/logger.py'
--- lib/lp/services/log/logger.py	2011-04-13 05:38:13 +0000
+++ lib/lp/services/log/logger.py	2011-11-04 22:19:25 +0000
@@ -18,6 +18,11 @@
 import sys
 import traceback
 
+from testtools.content import (
+    Content,
+    UTF8_TEXT,
+    )
+
 from lp.services.log import loglevels
 
 
@@ -100,6 +105,8 @@
 class FakeLogger:
     """Emulates a proper logger, just printing everything out the given file.
     """
+    # XXX: GavinPanella 2011-11-04 bug=886053: This is a test fixture not a
+    # service.
 
     loglevel = loglevels.DEBUG
 
@@ -190,6 +197,8 @@
 
 class DevNullLogger(FakeLogger):
     """A logger that drops all messages."""
+    # XXX: GavinPanella 2011-11-04 bug=886053: This is a test fixture not a
+    # service.
 
     def message(self, *args, **kwargs):
         """Do absolutely nothing."""
@@ -197,6 +206,8 @@
 
 class BufferLogger(FakeLogger):
     """A logger that logs to a StringIO object."""
+    # XXX: GavinPanella 2011-11-04 bug=886053: This is a test fixture not a
+    # service.
 
     def __init__(self):
         super(BufferLogger, self).__init__(StringIO())
@@ -214,3 +225,13 @@
         messages = self.getLogBuffer()
         self.clearLogBuffer()
         return messages
+
+    @property
+    def content(self):
+        """Return a `testtools.content.Content` for this object's buffer.
+
+        Use with `testtools.TestCase.addDetail`, `fixtures.Fixture.addDetail`,
+        and anything else that understands details.
+        """
+        get_bytes = lambda: [self.getLogBuffer().encode("utf-8")]
+        return Content(UTF8_TEXT, get_bytes)

=== added file 'lib/lp/services/log/tests/test_logger.py'
--- lib/lp/services/log/tests/test_logger.py	1970-01-01 00:00:00 +0000
+++ lib/lp/services/log/tests/test_logger.py	2011-11-04 22:19:25 +0000
@@ -0,0 +1,23 @@
+# Copyright 2011 Canonical Ltd.  This software is licensed under the
+# GNU Affero General Public License version 3 (see the file LICENSE).
+
+"""Tests for `lp.services.log.logger`."""
+
+__metaclass__ = type
+
+from lp.services.log.logger import BufferLogger
+from lp.testing import TestCase
+
+
+class TestBufferLogger(TestCase):
+    """Tests for `BufferLogger`."""
+
+    def test_content(self):
+        # The content property returns a `testtools.content.Content` object
+        # representing the contents of the logger's buffer.
+        logger = BufferLogger()
+        logger.info("Hello")
+        logger.warn("World")
+        self.assertEqual(
+            "INFO Hello\nWARNING World\n",
+            "".join(logger.content.iter_text()))

=== modified file 'lib/lp/services/tests/test_utils.py'
--- lib/lp/services/tests/test_utils.py	2011-07-01 10:28:47 +0000
+++ lib/lp/services/tests/test_utils.py	2011-11-04 22:19:25 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009 Canonical Ltd.  This software is licensed under the
+# Copyright 2009-2011 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 """Tests for lp.services.utils."""
@@ -12,6 +12,7 @@
 import os
 import sys
 
+from fixtures import TempDir
 from pytz import UTC
 from testtools.matchers import (
     Equals,
@@ -23,13 +24,15 @@
 from lp.services.utils import (
     AutoDecorate,
     base,
+    CachingIterator,
     compress_hash,
-    CachingIterator,
     decorate_with,
     docstring_dedent,
     file_exists,
     iter_split,
+    load_bz2_pickle,
     run_capturing_output,
+    save_bz2_pickle,
     traceback_info,
     utc_now,
     )
@@ -329,3 +332,14 @@
         new_now = datetime.utcnow().replace(tzinfo=UTC)
         self.assertThat(now, GreaterThanOrEqual(old_now))
         self.assertThat(now, LessThanOrEqual(new_now))
+
+
+class TestBZ2Pickle(TestCase):
+    """Tests for `save_bz2_pickle` and `load_bz2_pickle`."""
+
+    def test_save_and_load(self):
+        data = {1: 2, "room": 101}
+        tempdir = self.useFixture(TempDir()).path
+        tempfile = os.path.join(tempdir, "dump")
+        save_bz2_pickle(data, tempfile)
+        self.assertEqual(data, load_bz2_pickle(tempfile))

=== modified file 'lib/lp/services/utils.py'
--- lib/lp/services/utils.py	2011-07-21 16:01:54 +0000
+++ lib/lp/services/utils.py	2011-11-04 22:19:25 +0000
@@ -18,9 +18,11 @@
     'file_exists',
     'iter_list_chunks',
     'iter_split',
+    'load_bz2_pickle',
     'obfuscate_email',
     're_email_address',
     'run_capturing_output',
+    'save_bz2_pickle',
     'synchronize',
     'text_delta',
     'traceback_info',
@@ -28,16 +30,18 @@
     'value_string',
     ]
 
+import bz2
 from datetime import datetime
 from itertools import tee
 import os
 import re
+import string
 from StringIO import StringIO
-import string
 import sys
 from textwrap import dedent
 from types import FunctionType
 
+import cPickle as pickle
 from fixtures import (
     Fixture,
     MonkeyPatch,
@@ -135,7 +139,7 @@
     I'm amazed this isn't in itertools (mwhudson).
     """
     for i in range(0, len(a_list), size):
-        yield a_list[i:i+size]
+        yield a_list[i:i + size]
 
 
 def synchronize(source, target, add, remove):
@@ -245,7 +249,7 @@
     then reassemble.
     """
     # Make sure there is at least one newline so the split works.
-    first, rest = (s+'\n').split('\n', 1)
+    first, rest = (s + '\n').split('\n', 1)
     return (first + '\n' + dedent(rest)).strip()
 
 
@@ -333,3 +337,21 @@
     text = text.replace(
         "<<email address hidden>>", "<email address hidden>")
     return text
+
+
+def save_bz2_pickle(obj, filename):
+    """Save a bz2 compressed pickle of `obj` to `filename`."""
+    fout = bz2.BZ2File(filename, "w")
+    try:
+        pickle.dump(obj, fout, pickle.HIGHEST_PROTOCOL)
+    finally:
+        fout.close()
+
+
+def load_bz2_pickle(filename):
+    """Load and return a bz2 compressed pickle from `filename`."""
+    fin = bz2.BZ2File(filename, "r")
+    try:
+        return pickle.load(fin)
+    finally:
+        fin.close()