← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~jml/launchpad/generate-ppa-logging into lp:launchpad

 

Jonathan Lange has proposed merging lp:~jml/launchpad/generate-ppa-logging into lp:launchpad.

Requested reviews:
  Jelmer Vernooij (jelmer): code

For more details, see:
https://code.launchpad.net/~jml/launchpad/generate-ppa-logging/+merge/108040

Instrument generate_ppa_htaccess and also do some clean ups along the way.

 * Add a debug log to all cronscripts to say how long they spent running
 * Add total_seconds helper to get the number of seconds in a timedelta
 * Rename deactivateTokens to deactivateInvalidTokens, split its implementation into something that figures out what the tokens are, and something that deactivates arbitrary toukens
 * Lots of instrumentation
-- 
https://code.launchpad.net/~jml/launchpad/generate-ppa-logging/+merge/108040
Your team Launchpad code reviewers is subscribed to branch lp:launchpad.
=== modified file 'database/schema/security.cfg'
--- database/schema/security.cfg	2012-05-25 18:08:42 +0000
+++ database/schema/security.cfg	2012-05-30 22:51:21 +0000
@@ -2219,6 +2219,7 @@
 
 [generateppahtaccess]
 groups=script
+public.account                          = SELECT
 public.archive                          = SELECT
 public.archiveauthtoken                 = SELECT, UPDATE
 public.archivesubscriber                = SELECT, UPDATE

=== modified file 'lib/lp/archivepublisher/htaccess.py'
--- lib/lp/archivepublisher/htaccess.py	2012-05-30 16:32:50 +0000
+++ lib/lp/archivepublisher/htaccess.py	2012-05-30 22:51:21 +0000
@@ -22,6 +22,7 @@
 
 from zope.component import getUtility
 
+from lp.registry.interfaces.person import IPersonSet
 from lp.soyuz.interfaces.archiveauthtoken import IArchiveAuthTokenSet
 from lp.soyuz.model.archiveauthtoken import ArchiveAuthToken
 
@@ -80,14 +81,20 @@
     assert archive.private, "Archive %r must be private" % archive
 
     if tokens is None:
-        tokens = getUtility(IArchiveAuthTokenSet).getByArchive(
-            archive).order_by(ArchiveAuthToken.id)
+        tokens = getUtility(IArchiveAuthTokenSet).getByArchive(archive)
+    tokens = list(tokens)
 
     # The first .htpasswd entry is the buildd_secret.
     yield (BUILDD_USER_NAME, archive.buildd_secret, BUILDD_USER_NAME[:2])
 
+    person_ids = map(attrgetter('person_id'), tokens)
+    valid_persons = dict((person.id, person) for person in
+        getUtility(IPersonSet).getPrecachedPersonsFromIDs(
+            person_ids, need_validity=True) if person.is_valid_person)
+    usable_tokens = [(token, valid_persons[token.person_id])
+        for token in tokens if token.person_id in valid_persons]
     # Iterate over tokens and write the appropriate htpasswd
-    # entries for them.  For consistent sort order, the tokens
-    # should be ordered by id.
-    for token in tokens:
-        yield (token.person.name, token.token, token.person.name[:2])
+    # entries for them.  Use a consistent sort order so that the
+    # generated file can be compared to an existing one later.
+    for token, person in sorted(usable_tokens, key=lambda item:item[0].id):
+        yield (person.name, token.token, person.name[:2])

=== modified file 'lib/lp/archivepublisher/scripts/generate_ppa_htaccess.py'
--- lib/lp/archivepublisher/scripts/generate_ppa_htaccess.py	2012-05-30 17:05:21 +0000
+++ lib/lp/archivepublisher/scripts/generate_ppa_htaccess.py	2012-05-30 22:51:21 +0000
@@ -33,6 +33,7 @@
     )
 from lp.services.scripts.base import LaunchpadCronScript
 from lp.services.scripts.interfaces.scriptactivity import IScriptActivitySet
+from lp.services.utils import total_seconds
 from lp.services.webapp import canonical_url
 from lp.soyuz.enums import (
     ArchiveStatus,
@@ -159,16 +160,12 @@
 
         simple_sendmail(from_address, to_address, subject, body, headers)
 
-    def deactivateTokens(self, send_email=False):
-        """Deactivate tokens as necessary.
-
-        If a subscriber no longer has an active token for the PPA, we
-        deactivate it.
-
-        :param send_email: Whether to send a cancellation email to the owner
-            of the token.  This defaults to False to speed up the test
-            suite.
-        :return: the set of ppas affected by token deactivations.
+    def _getInvalidTokens(self):
+        """Return all invalid tokens.
+
+        A token is invalid if it is active and the token owner is *not* a
+        subscriber to the archive that the token is for. The subscription can
+        be either direct or through a team.
         """
         # First we grab all the active tokens for which there is a
         # matching current archive subscription for a team of which the
@@ -187,19 +184,44 @@
         all_active_tokens = store.find(
             ArchiveAuthToken,
             ArchiveAuthToken.date_deactivated == None)
-        invalid_tokens = all_active_tokens.difference(valid_tokens)
-
-        # We note the ppas affected by these token deactivations so that
-        # we can later update their htpasswd files.
+
+        return all_active_tokens.difference(valid_tokens)
+
+    def deactivateTokens(self, tokens, send_email=False):
+        """Deactivate the given tokens.
+
+        :return: A set of PPAs affected by the deactivations.
+        """
         affected_ppas = set()
-        for token in invalid_tokens:
+        # XXX: Perhaps we can do this with one call, rather than looping
+        # through everything.
+        num_tokens = 0
+        for token in tokens:
             if send_email:
                 self.sendCancellationEmail(token)
             token.deactivate()
             affected_ppas.add(token.archive)
-
+            num_tokens += 1
+        self.logger.debug(
+            "Deactivated %s tokens, %s PPAs affected"
+            % (num_tokens, len(affected_ppas)))
         return affected_ppas
 
+    def deactivateInvalidTokens(self, send_email=False):
+        """Deactivate tokens as necessary.
+
+        If an active token for a PPA no longer has any subscribers,
+        we deactivate the token.
+
+        :param send_email: Whether to send a cancellation email to the owner
+            of the token.  This defaults to False to speed up the test
+            suite.
+        :return: the set of ppas affected by token deactivations so that we
+            can later update their htpasswd files.
+        """
+        invalid_tokens = self._getInvalidTokens()
+        return self.deactivateTokens(invalid_tokens, send_email=send_email)
+
     def expireSubscriptions(self):
         """Expire subscriptions as necessary.
 
@@ -235,6 +257,8 @@
             # NTP is running on our servers and therefore we can assume
             # only minimal skew, we include a fudge-factor of 1s so that
             # even the minimal skew cannot demonstrate bug 627608.
+            #
+            # XXX: Pass date_started in.
             last_script_start_with_skew = last_success.date_started - (
                 timedelta(seconds=1))
             extra_expr = [
@@ -257,6 +281,7 @@
         last_success = getUtility(IScriptActivitySet).getLastActivity(
             'generate-ppa-htaccess')
         extra_expr = []
+        # XXX: This uses date_completed, other point uses date_started.
         if last_success:
             extra_expr = [
                 Archive.date_created >= last_success.date_completed]
@@ -268,16 +293,32 @@
         """Script entry point."""
         self.logger.info('Starting the PPA .htaccess generation')
         self.expireSubscriptions()
-        affected_ppas = self.deactivateTokens(send_email=True)
+        affected_ppas = self.deactivateInvalidTokens(send_email=True)
+        current_ppa_count = len(affected_ppas)
+        self.logger.debug(
+            '%s PPAs with deactivated tokens' % current_ppa_count)
 
         # In addition to the ppas that are affected by deactivated
         # tokens, we also want to include any ppas that have tokens
         # created since the last time we ran.
+        num_tokens = 0
         for token in self.getNewTokensSinceLastRun():
             affected_ppas.add(token.archive)
+            num_tokens += 1
+
+        new_ppa_count = len(affected_ppas)
+        self.logger.debug(
+            "%s new tokens since last run, %s PPAs affected"
+            % (num_tokens, new_ppa_count - current_ppa_count))
+        current_ppa_count = new_ppa_count
 
         affected_ppas.update(self.getNewPrivatePPAs())
+        new_ppa_count = len(affected_ppas)
+        self.logger.debug(
+            "%s new private PPAs since last run"
+            % (new_ppa_count - current_ppa_count))
 
+        self.logger.debug('%s PPAs require updating' % new_ppa_count)
         for ppa in affected_ppas:
             # If this PPA is blacklisted, do not touch it's htaccess/pwd
             # files.
@@ -285,23 +326,28 @@
                 ppa.owner.name, [])
             if ppa.name in blacklisted_ppa_names_for_owner:
                 self.logger.info(
-                    "Skipping htacess updates for blacklisted PPA "
+                    "Skipping htaccess updates for blacklisted PPA "
                     " '%s' owned by %s.",
                         ppa.name,
                         ppa.owner.displayname)
                 continue
             elif ppa.status == ArchiveStatus.DELETED or ppa.enabled is False:
                 self.logger.info(
-                    "Skipping htacess updates for deleted or disabled PPA "
+                    "Skipping htaccess updates for deleted or disabled PPA "
                     " '%s' owned by %s.",
                         ppa.name,
                         ppa.owner.displayname)
                 continue
 
             self.ensureHtaccess(ppa)
+            htpasswd_write_start = datetime.now()
             temp_htpasswd = self.generateHtpasswd(ppa)
             if not self.replaceUpdatedHtpasswd(ppa, temp_htpasswd):
                 os.remove(temp_htpasswd)
+            htpasswd_write_duration = datetime.now() - htpasswd_write_start
+            self.logger.debug(
+                "Wrote htpasswd for '%s': %ss"
+                % (ppa.name, total_seconds(htpasswd_write_duration)))
 
         if self.options.no_deactivation or self.options.dryrun:
             self.logger.info('Dry run, so not committing transaction.')

=== modified file 'lib/lp/archivepublisher/tests/test_generate_ppa_htaccess.py'
--- lib/lp/archivepublisher/tests/test_generate_ppa_htaccess.py	2012-05-30 17:05:21 +0000
+++ lib/lp/archivepublisher/tests/test_generate_ppa_htaccess.py	2012-05-30 22:51:21 +0000
@@ -12,6 +12,7 @@
 import subprocess
 import sys
 import tempfile
+import time
 
 import pytz
 import transaction
@@ -38,7 +39,10 @@
     lp_dbuser,
     switch_dbuser,
     )
-from lp.testing.layers import LaunchpadZopelessLayer
+from lp.testing.layers import (
+    LaunchpadZopelessLayer,
+    ZopelessDatabaseLayer,
+    )
 from lp.testing.mail_helpers import pop_notifications
 
 
@@ -269,7 +273,7 @@
 
         # Initially, nothing is eligible for deactivation.
         script = self.getScript()
-        script.deactivateTokens()
+        script.deactivateInvalidTokens()
         for person in tokens:
             self.assertNotDeactivated(tokens[person])
 
@@ -280,7 +284,7 @@
         # Clear out emails generated when leaving a team.
         pop_notifications()
 
-        script.deactivateTokens(send_email=True)
+        script.deactivateInvalidTokens(send_email=True)
         self.assertDeactivated(tokens[team1_person])
         del tokens[team1_person]
         for person in tokens:
@@ -298,7 +302,7 @@
             promiscuous_person.leave(team1)
         # Clear out emails generated when leaving a team.
         pop_notifications()
-        script.deactivateTokens(send_email=True)
+        script.deactivateInvalidTokens(send_email=True)
         self.assertNotDeactivated(tokens[promiscuous_person])
         for person in tokens:
             self.assertNotDeactivated(tokens[person])
@@ -317,7 +321,7 @@
             parent_team.setMembershipData(
                 team2, TeamMembershipStatus.DEACTIVATED, name12)
             self.assertFalse(team2.inTeam(parent_team))
-        script.deactivateTokens()
+        script.deactivateInvalidTokens()
         for person in persons2:
             self.assertDeactivated(tokens[person])
 
@@ -667,3 +671,52 @@
         self.assertTrue(os.path.isfile(htpasswd))
         os.remove(htaccess)
         os.remove(htpasswd)
+
+
+
+class TestPerformance(TestCaseWithFactory):
+
+    layer = ZopelessDatabaseLayer
+
+    def add_subscribers(self, ppa, num_subscribers):
+        for i in range(num_subscribers):
+            dude = self.factory.makePerson()
+            ppa.newSubscription(dude, ppa.owner)
+            ppa.newAuthToken(dude)
+        print 'Added %s subscribers to %s' % (num_subscribers, ppa.name)
+
+    def run_script(self):
+        """Run the expiry script.
+
+        :return: a tuple of return code, stdout and stderr.
+        """
+        script = os.path.join(
+            config.root, "cronscripts", "generate-ppa-htaccess.py")
+        temp_dir = self.makeTemporaryDirectory()
+        filename = os.path.join(temp_dir, 'debug.log')
+        args = [
+            sys.executable, script, "-q", "--log-file=DEBUG:%s" % (filename,)]
+        process = subprocess.Popen(
+            args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        stdout, stderr = process.communicate()
+        if process.returncode:
+            raise RuntimeError(stderr)
+        return open(filename).read()
+
+
+    def test_initial_ppa_creation(self):
+        ppa = self.factory.makeArchive(private=True)
+        print 'adding subscribers'
+        self.add_subscribers(ppa, 5000)
+        transaction.commit()
+        print 'running script'
+        self.run_script()
+        print 'adding single subscriber to popular ppa'
+        self.add_subscribers(ppa, 1)
+        transaction.commit()
+        print 'running script again'
+        start_time = time.time()
+        output = self.run_script()
+        print output
+        duration = time.time() - start_time
+        print "Script took %ss" % (duration,)

=== modified file 'lib/lp/services/scripts/base.py'
--- lib/lp/services/scripts/base.py	2012-01-06 11:08:30 +0000
+++ lib/lp/services/scripts/base.py	2012-05-30 22:51:21 +0000
@@ -47,6 +47,7 @@
 from lp.services.mail.sendmail import set_immediate_mail_delivery
 from lp.services.scripts.interfaces.scriptactivity import IScriptActivitySet
 from lp.services.scripts.logger import OopsHandler
+from lp.services.utils import total_seconds
 from lp.services.webapp.errorlog import globalErrorUtility
 from lp.services.webapp.interaction import (
     ANONYMOUS,
@@ -424,6 +425,12 @@
             date_started=date_started,
             date_completed=date_completed)
         self.txn.commit()
+        # date_started is recorded *after* the lock is acquired and we've
+        # initialized Zope components and the database.  Thus this time is
+        # only for the script proper, rather than total execution time.
+        seconds_taken = total_seconds(date_completed - date_started)
+        self.logger.debug(
+            "%s ran in %ss (excl. load & lock)" % (self.name, seconds_taken))
 
 
 @contextmanager

=== modified file 'lib/lp/services/utils.py'
--- lib/lp/services/utils.py	2012-05-24 20:25:54 +0000
+++ lib/lp/services/utils.py	2012-05-30 22:51:21 +0000
@@ -26,6 +26,7 @@
     'save_bz2_pickle',
     'synchronize',
     'text_delta',
+    'total_seconds',
     'traceback_info',
     'utc_now',
     'value_string',
@@ -383,3 +384,16 @@
             for key, value in o.iteritems())
     else:
         return o
+
+
+def total_seconds(duration):
+    """The number of total seconds in a timedelta.
+
+    In Python 2.7, spell this as duration.total_seconds().  Only needed for
+    Python 2.6 or earlier.
+    """
+    # XXX: jml didn't TDD this. Naughty, naughty.
+    return (
+        (duration.microseconds +
+         (duration.seconds + duration.days * 24 * 3600) * 1e6)
+        / 1e6)

=== modified file 'lib/lp/soyuz/interfaces/archiveauthtoken.py'
--- lib/lp/soyuz/interfaces/archiveauthtoken.py	2011-12-24 16:54:44 +0000
+++ lib/lp/soyuz/interfaces/archiveauthtoken.py	2012-05-30 22:51:21 +0000
@@ -13,7 +13,10 @@
     ]
 
 from lazr.restful.fields import Reference
-from zope.interface import Interface
+from zope.interface import (
+    Attribute,
+    Interface,
+    )
 from zope.schema import (
     Datetime,
     Int,
@@ -36,6 +39,7 @@
     person = Reference(
         IPerson, title=_("Person"), required=True, readonly=True,
         description=_("The person for this authorisation token."))
+    person_id = Attribute('db person value')
 
     date_created = Datetime(
         title=_("Date Created"), required=True, readonly=True,


Follow ups