launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #08414
[Merge] lp:~jml/launchpad/generate-ppa-logging into lp:launchpad
Jonathan Lange has proposed merging lp:~jml/launchpad/generate-ppa-logging into lp:launchpad.
Requested reviews:
Jelmer Vernooij (jelmer): code
For more details, see:
https://code.launchpad.net/~jml/launchpad/generate-ppa-logging/+merge/108040
Instrument generate_ppa_htaccess and also do some clean ups along the way.
* Add a debug log to all cronscripts to say how long they spent running
* Add total_seconds helper to get the number of seconds in a timedelta
* Rename deactivateTokens to deactivateInvalidTokens, split its implementation into something that figures out what the tokens are, and something that deactivates arbitrary toukens
* Lots of instrumentation
--
https://code.launchpad.net/~jml/launchpad/generate-ppa-logging/+merge/108040
Your team Launchpad code reviewers is subscribed to branch lp:launchpad.
=== modified file 'database/schema/security.cfg'
--- database/schema/security.cfg 2012-05-25 18:08:42 +0000
+++ database/schema/security.cfg 2012-05-30 22:51:21 +0000
@@ -2219,6 +2219,7 @@
[generateppahtaccess]
groups=script
+public.account = SELECT
public.archive = SELECT
public.archiveauthtoken = SELECT, UPDATE
public.archivesubscriber = SELECT, UPDATE
=== modified file 'lib/lp/archivepublisher/htaccess.py'
--- lib/lp/archivepublisher/htaccess.py 2012-05-30 16:32:50 +0000
+++ lib/lp/archivepublisher/htaccess.py 2012-05-30 22:51:21 +0000
@@ -22,6 +22,7 @@
from zope.component import getUtility
+from lp.registry.interfaces.person import IPersonSet
from lp.soyuz.interfaces.archiveauthtoken import IArchiveAuthTokenSet
from lp.soyuz.model.archiveauthtoken import ArchiveAuthToken
@@ -80,14 +81,20 @@
assert archive.private, "Archive %r must be private" % archive
if tokens is None:
- tokens = getUtility(IArchiveAuthTokenSet).getByArchive(
- archive).order_by(ArchiveAuthToken.id)
+ tokens = getUtility(IArchiveAuthTokenSet).getByArchive(archive)
+ tokens = list(tokens)
# The first .htpasswd entry is the buildd_secret.
yield (BUILDD_USER_NAME, archive.buildd_secret, BUILDD_USER_NAME[:2])
+ person_ids = map(attrgetter('person_id'), tokens)
+ valid_persons = dict((person.id, person) for person in
+ getUtility(IPersonSet).getPrecachedPersonsFromIDs(
+ person_ids, need_validity=True) if person.is_valid_person)
+ usable_tokens = [(token, valid_persons[token.person_id])
+ for token in tokens if token.person_id in valid_persons]
# Iterate over tokens and write the appropriate htpasswd
- # entries for them. For consistent sort order, the tokens
- # should be ordered by id.
- for token in tokens:
- yield (token.person.name, token.token, token.person.name[:2])
+ # entries for them. Use a consistent sort order so that the
+ # generated file can be compared to an existing one later.
+ for token, person in sorted(usable_tokens, key=lambda item:item[0].id):
+ yield (person.name, token.token, person.name[:2])
=== modified file 'lib/lp/archivepublisher/scripts/generate_ppa_htaccess.py'
--- lib/lp/archivepublisher/scripts/generate_ppa_htaccess.py 2012-05-30 17:05:21 +0000
+++ lib/lp/archivepublisher/scripts/generate_ppa_htaccess.py 2012-05-30 22:51:21 +0000
@@ -33,6 +33,7 @@
)
from lp.services.scripts.base import LaunchpadCronScript
from lp.services.scripts.interfaces.scriptactivity import IScriptActivitySet
+from lp.services.utils import total_seconds
from lp.services.webapp import canonical_url
from lp.soyuz.enums import (
ArchiveStatus,
@@ -159,16 +160,12 @@
simple_sendmail(from_address, to_address, subject, body, headers)
- def deactivateTokens(self, send_email=False):
- """Deactivate tokens as necessary.
-
- If a subscriber no longer has an active token for the PPA, we
- deactivate it.
-
- :param send_email: Whether to send a cancellation email to the owner
- of the token. This defaults to False to speed up the test
- suite.
- :return: the set of ppas affected by token deactivations.
+ def _getInvalidTokens(self):
+ """Return all invalid tokens.
+
+ A token is invalid if it is active and the token owner is *not* a
+ subscriber to the archive that the token is for. The subscription can
+ be either direct or through a team.
"""
# First we grab all the active tokens for which there is a
# matching current archive subscription for a team of which the
@@ -187,19 +184,44 @@
all_active_tokens = store.find(
ArchiveAuthToken,
ArchiveAuthToken.date_deactivated == None)
- invalid_tokens = all_active_tokens.difference(valid_tokens)
-
- # We note the ppas affected by these token deactivations so that
- # we can later update their htpasswd files.
+
+ return all_active_tokens.difference(valid_tokens)
+
+ def deactivateTokens(self, tokens, send_email=False):
+ """Deactivate the given tokens.
+
+ :return: A set of PPAs affected by the deactivations.
+ """
affected_ppas = set()
- for token in invalid_tokens:
+ # XXX: Perhaps we can do this with one call, rather than looping
+ # through everything.
+ num_tokens = 0
+ for token in tokens:
if send_email:
self.sendCancellationEmail(token)
token.deactivate()
affected_ppas.add(token.archive)
-
+ num_tokens += 1
+ self.logger.debug(
+ "Deactivated %s tokens, %s PPAs affected"
+ % (num_tokens, len(affected_ppas)))
return affected_ppas
+ def deactivateInvalidTokens(self, send_email=False):
+ """Deactivate tokens as necessary.
+
+ If an active token for a PPA no longer has any subscribers,
+ we deactivate the token.
+
+ :param send_email: Whether to send a cancellation email to the owner
+ of the token. This defaults to False to speed up the test
+ suite.
+ :return: the set of ppas affected by token deactivations so that we
+ can later update their htpasswd files.
+ """
+ invalid_tokens = self._getInvalidTokens()
+ return self.deactivateTokens(invalid_tokens, send_email=send_email)
+
def expireSubscriptions(self):
"""Expire subscriptions as necessary.
@@ -235,6 +257,8 @@
# NTP is running on our servers and therefore we can assume
# only minimal skew, we include a fudge-factor of 1s so that
# even the minimal skew cannot demonstrate bug 627608.
+ #
+ # XXX: Pass date_started in.
last_script_start_with_skew = last_success.date_started - (
timedelta(seconds=1))
extra_expr = [
@@ -257,6 +281,7 @@
last_success = getUtility(IScriptActivitySet).getLastActivity(
'generate-ppa-htaccess')
extra_expr = []
+ # XXX: This uses date_completed, other point uses date_started.
if last_success:
extra_expr = [
Archive.date_created >= last_success.date_completed]
@@ -268,16 +293,32 @@
"""Script entry point."""
self.logger.info('Starting the PPA .htaccess generation')
self.expireSubscriptions()
- affected_ppas = self.deactivateTokens(send_email=True)
+ affected_ppas = self.deactivateInvalidTokens(send_email=True)
+ current_ppa_count = len(affected_ppas)
+ self.logger.debug(
+ '%s PPAs with deactivated tokens' % current_ppa_count)
# In addition to the ppas that are affected by deactivated
# tokens, we also want to include any ppas that have tokens
# created since the last time we ran.
+ num_tokens = 0
for token in self.getNewTokensSinceLastRun():
affected_ppas.add(token.archive)
+ num_tokens += 1
+
+ new_ppa_count = len(affected_ppas)
+ self.logger.debug(
+ "%s new tokens since last run, %s PPAs affected"
+ % (num_tokens, new_ppa_count - current_ppa_count))
+ current_ppa_count = new_ppa_count
affected_ppas.update(self.getNewPrivatePPAs())
+ new_ppa_count = len(affected_ppas)
+ self.logger.debug(
+ "%s new private PPAs since last run"
+ % (new_ppa_count - current_ppa_count))
+ self.logger.debug('%s PPAs require updating' % new_ppa_count)
for ppa in affected_ppas:
# If this PPA is blacklisted, do not touch it's htaccess/pwd
# files.
@@ -285,23 +326,28 @@
ppa.owner.name, [])
if ppa.name in blacklisted_ppa_names_for_owner:
self.logger.info(
- "Skipping htacess updates for blacklisted PPA "
+ "Skipping htaccess updates for blacklisted PPA "
" '%s' owned by %s.",
ppa.name,
ppa.owner.displayname)
continue
elif ppa.status == ArchiveStatus.DELETED or ppa.enabled is False:
self.logger.info(
- "Skipping htacess updates for deleted or disabled PPA "
+ "Skipping htaccess updates for deleted or disabled PPA "
" '%s' owned by %s.",
ppa.name,
ppa.owner.displayname)
continue
self.ensureHtaccess(ppa)
+ htpasswd_write_start = datetime.now()
temp_htpasswd = self.generateHtpasswd(ppa)
if not self.replaceUpdatedHtpasswd(ppa, temp_htpasswd):
os.remove(temp_htpasswd)
+ htpasswd_write_duration = datetime.now() - htpasswd_write_start
+ self.logger.debug(
+ "Wrote htpasswd for '%s': %ss"
+ % (ppa.name, total_seconds(htpasswd_write_duration)))
if self.options.no_deactivation or self.options.dryrun:
self.logger.info('Dry run, so not committing transaction.')
=== modified file 'lib/lp/archivepublisher/tests/test_generate_ppa_htaccess.py'
--- lib/lp/archivepublisher/tests/test_generate_ppa_htaccess.py 2012-05-30 17:05:21 +0000
+++ lib/lp/archivepublisher/tests/test_generate_ppa_htaccess.py 2012-05-30 22:51:21 +0000
@@ -12,6 +12,7 @@
import subprocess
import sys
import tempfile
+import time
import pytz
import transaction
@@ -38,7 +39,10 @@
lp_dbuser,
switch_dbuser,
)
-from lp.testing.layers import LaunchpadZopelessLayer
+from lp.testing.layers import (
+ LaunchpadZopelessLayer,
+ ZopelessDatabaseLayer,
+ )
from lp.testing.mail_helpers import pop_notifications
@@ -269,7 +273,7 @@
# Initially, nothing is eligible for deactivation.
script = self.getScript()
- script.deactivateTokens()
+ script.deactivateInvalidTokens()
for person in tokens:
self.assertNotDeactivated(tokens[person])
@@ -280,7 +284,7 @@
# Clear out emails generated when leaving a team.
pop_notifications()
- script.deactivateTokens(send_email=True)
+ script.deactivateInvalidTokens(send_email=True)
self.assertDeactivated(tokens[team1_person])
del tokens[team1_person]
for person in tokens:
@@ -298,7 +302,7 @@
promiscuous_person.leave(team1)
# Clear out emails generated when leaving a team.
pop_notifications()
- script.deactivateTokens(send_email=True)
+ script.deactivateInvalidTokens(send_email=True)
self.assertNotDeactivated(tokens[promiscuous_person])
for person in tokens:
self.assertNotDeactivated(tokens[person])
@@ -317,7 +321,7 @@
parent_team.setMembershipData(
team2, TeamMembershipStatus.DEACTIVATED, name12)
self.assertFalse(team2.inTeam(parent_team))
- script.deactivateTokens()
+ script.deactivateInvalidTokens()
for person in persons2:
self.assertDeactivated(tokens[person])
@@ -667,3 +671,52 @@
self.assertTrue(os.path.isfile(htpasswd))
os.remove(htaccess)
os.remove(htpasswd)
+
+
+
+class TestPerformance(TestCaseWithFactory):
+
+ layer = ZopelessDatabaseLayer
+
+ def add_subscribers(self, ppa, num_subscribers):
+ for i in range(num_subscribers):
+ dude = self.factory.makePerson()
+ ppa.newSubscription(dude, ppa.owner)
+ ppa.newAuthToken(dude)
+ print 'Added %s subscribers to %s' % (num_subscribers, ppa.name)
+
+ def run_script(self):
+ """Run the expiry script.
+
+ :return: a tuple of return code, stdout and stderr.
+ """
+ script = os.path.join(
+ config.root, "cronscripts", "generate-ppa-htaccess.py")
+ temp_dir = self.makeTemporaryDirectory()
+ filename = os.path.join(temp_dir, 'debug.log')
+ args = [
+ sys.executable, script, "-q", "--log-file=DEBUG:%s" % (filename,)]
+ process = subprocess.Popen(
+ args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ stdout, stderr = process.communicate()
+ if process.returncode:
+ raise RuntimeError(stderr)
+ return open(filename).read()
+
+
+ def test_initial_ppa_creation(self):
+ ppa = self.factory.makeArchive(private=True)
+ print 'adding subscribers'
+ self.add_subscribers(ppa, 5000)
+ transaction.commit()
+ print 'running script'
+ self.run_script()
+ print 'adding single subscriber to popular ppa'
+ self.add_subscribers(ppa, 1)
+ transaction.commit()
+ print 'running script again'
+ start_time = time.time()
+ output = self.run_script()
+ print output
+ duration = time.time() - start_time
+ print "Script took %ss" % (duration,)
=== modified file 'lib/lp/services/scripts/base.py'
--- lib/lp/services/scripts/base.py 2012-01-06 11:08:30 +0000
+++ lib/lp/services/scripts/base.py 2012-05-30 22:51:21 +0000
@@ -47,6 +47,7 @@
from lp.services.mail.sendmail import set_immediate_mail_delivery
from lp.services.scripts.interfaces.scriptactivity import IScriptActivitySet
from lp.services.scripts.logger import OopsHandler
+from lp.services.utils import total_seconds
from lp.services.webapp.errorlog import globalErrorUtility
from lp.services.webapp.interaction import (
ANONYMOUS,
@@ -424,6 +425,12 @@
date_started=date_started,
date_completed=date_completed)
self.txn.commit()
+ # date_started is recorded *after* the lock is acquired and we've
+ # initialized Zope components and the database. Thus this time is
+ # only for the script proper, rather than total execution time.
+ seconds_taken = total_seconds(date_completed - date_started)
+ self.logger.debug(
+ "%s ran in %ss (excl. load & lock)" % (self.name, seconds_taken))
@contextmanager
=== modified file 'lib/lp/services/utils.py'
--- lib/lp/services/utils.py 2012-05-24 20:25:54 +0000
+++ lib/lp/services/utils.py 2012-05-30 22:51:21 +0000
@@ -26,6 +26,7 @@
'save_bz2_pickle',
'synchronize',
'text_delta',
+ 'total_seconds',
'traceback_info',
'utc_now',
'value_string',
@@ -383,3 +384,16 @@
for key, value in o.iteritems())
else:
return o
+
+
+def total_seconds(duration):
+ """The number of total seconds in a timedelta.
+
+ In Python 2.7, spell this as duration.total_seconds(). Only needed for
+ Python 2.6 or earlier.
+ """
+ # XXX: jml didn't TDD this. Naughty, naughty.
+ return (
+ (duration.microseconds +
+ (duration.seconds + duration.days * 24 * 3600) * 1e6)
+ / 1e6)
=== modified file 'lib/lp/soyuz/interfaces/archiveauthtoken.py'
--- lib/lp/soyuz/interfaces/archiveauthtoken.py 2011-12-24 16:54:44 +0000
+++ lib/lp/soyuz/interfaces/archiveauthtoken.py 2012-05-30 22:51:21 +0000
@@ -13,7 +13,10 @@
]
from lazr.restful.fields import Reference
-from zope.interface import Interface
+from zope.interface import (
+ Attribute,
+ Interface,
+ )
from zope.schema import (
Datetime,
Int,
@@ -36,6 +39,7 @@
person = Reference(
IPerson, title=_("Person"), required=True, readonly=True,
description=_("The person for this authorisation token."))
+ person_id = Attribute('db person value')
date_created = Datetime(
title=_("Date Created"), required=True, readonly=True,
Follow ups