← Back to team overview

launchpad-reviewers team mailing list archive

lp:~michael.nelson/launchpad/628711-generate-ppa-htacess-too-slow-try-2 into lp:launchpad/devel

 

Michael Nelson has proposed merging lp:~michael.nelson/launchpad/628711-generate-ppa-htacess-too-slow-try-2 into lp:launchpad/devel.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)
Related bugs:
  #628711 generate-ppa-htaccess is too slow
  https://bugs.launchpad.net/bugs/628711


= Summary =
This branch addresses bug 628711 - generate-ppa-htaccess is too slow.

== Proposed fix ==
Based on the discussion on the bug, this branch updates the script so that it:

1) Queries for *all* current subscriptions that should be marked as expired
and expires them,
2) Queries for all tokens that are now invalid (ie. based on subscriptions
that have been cancelled or expired) and deactivates them, noting the ppas
affected.
3) Finds all the ppas that have new tokens created since the last successful
run of the script, combines these with the ppas affected by token deactivation
and checks/recreates their htpasswd files.

== Pre-implementation notes ==
See bug 628711

== Implementation details ==

== Tests ==
bin/test -vvm test_generate_ppa_htaccess

== Demo and Q/A ==
I'll be starting the QA on dogfood now.


= Launchpad lint =

Checking for conflicts and issues in changed files.

Linting changed files:
  lib/lp/archivepublisher/scripts/generate_ppa_htaccess.py
  lib/lp/archivepublisher/tests/test_generate_ppa_htaccess.py
-- 
https://code.launchpad.net/~michael.nelson/launchpad/628711-generate-ppa-htacess-too-slow-try-2/+merge/34546
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~michael.nelson/launchpad/628711-generate-ppa-htacess-too-slow-try-2 into lp:launchpad/devel.
=== modified file 'lib/lp/archivepublisher/scripts/generate_ppa_htaccess.py'
--- lib/lp/archivepublisher/scripts/generate_ppa_htaccess.py	2010-08-23 17:26:42 +0000
+++ lib/lp/archivepublisher/scripts/generate_ppa_htaccess.py	2010-09-03 14:17:44 +0000
@@ -6,34 +6,34 @@
 # pylint: disable-msg=C0103,W0403
 
 import crypt
-from datetime import datetime
 import filecmp
-from operator import attrgetter
 import os
+import pytz
 import tempfile
 
-import pytz
+from collections import defaultdict
+from datetime import datetime
+from operator import attrgetter
 from zope.component import getUtility
 
 from canonical.config import config
 from canonical.launchpad.helpers import get_email_template
+from canonical.launchpad.interfaces.lpstorm import IStore
 from canonical.launchpad.mail import (
     format_address,
     simple_sendmail,
     )
 from canonical.launchpad.webapp import canonical_url
 from lp.archivepublisher.config import getPubConfig
+from lp.registry.model.teammembership import TeamParticipation
 from lp.services.mail.mailwrapper import MailWrapper
 from lp.services.scripts.base import LaunchpadCronScript
+from lp.services.scripts.interfaces.scriptactivity import IScriptActivitySet
 from lp.soyuz.enums import ArchiveStatus
-from lp.soyuz.interfaces.archive import (
-    IArchiveSet,
-    )
-from lp.soyuz.interfaces.archiveauthtoken import IArchiveAuthTokenSet
-from lp.soyuz.interfaces.archivesubscriber import (
-    IArchiveSubscriberSet,
-    )
 from lp.soyuz.enums import ArchiveSubscriberStatus
+from lp.soyuz.model.archiveauthtoken import ArchiveAuthToken
+from lp.soyuz.model.archivesubscriber import ArchiveSubscriber
+
 
 # These PPAs should never have their htaccess/pwd files touched.
 BLACKLISTED_PPAS = {
@@ -96,7 +96,7 @@
             # It's not there, so create it.
             if not os.path.exists(pub_config.htaccessroot):
                 os.makedirs(pub_config.htaccessroot)
-            interpolations = {"path" : pub_config.htaccessroot}
+            interpolations = {"path": pub_config.htaccessroot}
             file = open(htaccess_filename, "w")
             file.write(HTACCESS_TEMPLATE % interpolations)
             file.close()
@@ -169,9 +169,9 @@
 
         to_address = [send_to_person.preferredemail.email]
         replacements = {
-            'recipient_name' : send_to_person.displayname,
-            'ppa_name' : ppa_name,
-            'ppa_owner_url' : ppa_owner_url,
+            'recipient_name': send_to_person.displayname,
+            'ppa_name': ppa_name,
+            'ppa_owner_url': ppa_owner_url,
             }
         body = MailWrapper(72).format(
             template % replacements, force_wrap=True)
@@ -181,64 +181,112 @@
             config.canonical.noreply_from_address)
 
         headers = {
-            'Sender' : config.canonical.bounce_address,
+            'Sender': config.canonical.bounce_address,
             }
 
         simple_sendmail(from_address, to_address, subject, body, headers)
 
-    def deactivateTokens(self, ppa, send_email=False):
+    def deactivateTokens(self, send_email=False):
         """Deactivate tokens as necessary.
 
         If a subscriber no longer has an active token for the PPA, we
         deactivate it.
 
-        :param ppa: The PPA to check tokens for.
         :param send_email: Whether to send a cancellation email to the owner
             of the token.  This defaults to False to speed up the test
             suite.
-        :return: a list of valid tokens.
+        :return: the list of valid tokens for each affected PPA.
         """
-        tokens = getUtility(IArchiveAuthTokenSet).getByArchive(ppa)
-        valid_tokens = []
-        for token in tokens:
-            result = getUtility(
-                IArchiveSubscriberSet).getBySubscriberWithActiveToken(
-                    token.person, ppa)
-            if result.count() == 0:
-                # The subscriber's token is no longer active,
-                # deactivate it.
-                if send_email:
-                    self.sendCancellationEmail(token)
-                token.deactivate()
-            else:
-                valid_tokens.append(token)
-        return valid_tokens
-
-    def expireSubscriptions(self, ppa):
+        store = IStore(ArchiveSubscriber)
+        valid_tokens = store.find(
+            ArchiveAuthToken,
+            ArchiveAuthToken.date_deactivated == None,
+            ArchiveAuthToken.archive_id == ArchiveSubscriber.archive_id,
+            ArchiveSubscriber.status == ArchiveSubscriberStatus.CURRENT,
+            ArchiveSubscriber.subscriber_id == TeamParticipation.teamID,
+            TeamParticipation.personID == ArchiveAuthToken.person_id)
+
+        all_active_tokens = store.find(
+            ArchiveAuthToken,
+            ArchiveAuthToken.date_deactivated == None)
+        invalid_tokens = all_active_tokens.difference(valid_tokens)
+
+        affected_ppa_ids = set()
+        for token in invalid_tokens:
+            if send_email:
+                self.sendCancellationEmail(token)
+            token.deactivate()
+            affected_ppa_ids.add(token.archive_id)
+
+        # Now get the list of valid tokens for each of the affected
+        # PPAs, and return a dict keyed on archive.
+        affected_ppas = defaultdict(list)
+        if affected_ppa_ids:
+            affected_ppa_tokens = store.find(
+                ArchiveAuthToken,
+                ArchiveAuthToken.date_deactivated == None,
+                ArchiveAuthToken.archive_id.is_in(affected_ppa_ids))
+
+            for token in affected_ppa_tokens:
+                affected_ppas[token.archive].append(token)
+
+        return affected_ppas
+
+    def expireSubscriptions(self):
         """Expire subscriptions as necessary.
 
         If an `ArchiveSubscriber`'s date_expires has passed, then
         set its status to EXPIRED.
-
-        :param ppa: The PPA to expire subscriptons for.
         """
         now = datetime.now(pytz.UTC)
-        subscribers = getUtility(IArchiveSubscriberSet).getByArchive(ppa)
-        for subscriber in subscribers:
-            date_expires = subscriber.date_expires
-            if date_expires is not None and date_expires <= now:
-                self.logger.info(
-                    "Expiring subscription: %s" % subscriber.displayname)
-                subscriber.status = ArchiveSubscriberStatus.EXPIRED
+
+        store = IStore(ArchiveSubscriber)
+        newly_expired_subscriptions = store.find(
+            ArchiveSubscriber,
+            ArchiveSubscriber.status == ArchiveSubscriberStatus.CURRENT,
+            ArchiveSubscriber.date_expires != None,
+            ArchiveSubscriber.date_expires <= now)
+
+        subscription_names = [
+            subs.displayname for subs in newly_expired_subscriptions]
+        if subscription_names:
+            newly_expired_subscriptions.set(
+                status=ArchiveSubscriberStatus.EXPIRED)
+            self.logger.info(
+                "Expired subscriptions: %s" % ", ".join(subscription_names))
+
+    def getNewTokensSinceLastRun(self):
+        """Return result set of new tokens created since the last run."""
+        store = IStore(ArchiveAuthToken)
+        # If we don't know when we last ran, we include all active
+        # tokens by default.
+        last_success = getUtility(IScriptActivitySet).getLastActivity(
+            'generate-ppa-htaccess')
+        extra_expr = []
+        if last_success:
+            extra_expr = [
+                ArchiveAuthToken.date_created >= last_success.date_completed]
+
+        new_ppa_tokens = store.find(
+            ArchiveAuthToken,
+            ArchiveAuthToken.date_deactivated == None,
+            *extra_expr)
+
+        return new_ppa_tokens
 
     def main(self):
         """Script entry point."""
         self.logger.info('Starting the PPA .htaccess generation')
-        ppas = getUtility(IArchiveSet).getPrivatePPAs()
-        for ppa in ppas:
-            self.expireSubscriptions(ppa)
-            valid_tokens = self.deactivateTokens(ppa, send_email=True)
-
+        self.expireSubscriptions()
+        affected_ppa_tokens = self.deactivateTokens(send_email=True)
+
+        # In addition to the ppas that are affected by deactivated
+        # tokens, we also want to include any ppas that have tokens
+        # created since the last time we ran.
+        for token in self.getNewTokensSinceLastRun():
+            affected_ppa_tokens[token.archive].append(token)
+
+        for ppa, valid_tokens in affected_ppa_tokens.iteritems():
             # If this PPA is blacklisted, do not touch it's htaccess/pwd
             # files.
             blacklisted_ppa_names_for_owner = self.blacklist.get(
@@ -271,4 +319,3 @@
             self.txn.commit()
 
         self.logger.info('Finished PPA .htaccess generation')
-

=== modified file 'lib/lp/archivepublisher/tests/test_generate_ppa_htaccess.py'
--- lib/lp/archivepublisher/tests/test_generate_ppa_htaccess.py	2010-08-23 17:26:42 +0000
+++ lib/lp/archivepublisher/tests/test_generate_ppa_htaccess.py	2010-09-03 14:17:44 +0000
@@ -15,6 +15,7 @@
 
 import pytz
 from zope.component import getUtility
+from zope.security.proxy import removeSecurityProxy
 
 from canonical.config import config
 from canonical.launchpad.interfaces import (
@@ -29,6 +30,7 @@
     HtaccessTokenGenerator,
     )
 from lp.services.mail import stub
+from lp.services.scripts.interfaces.scriptactivity import IScriptActivitySet
 from lp.soyuz.enums import (
     ArchiveStatus,
     ArchiveSubscriberStatus,
@@ -298,7 +300,7 @@
 
         # Initially, nothing is eligible for deactivation.
         script = self.getScript()
-        script.deactivateTokens(self.ppa)
+        script.deactivateTokens()
         for person in tokens:
             self.assertNotDeactivated(tokens[person])
 
@@ -311,7 +313,7 @@
         # Clear out emails generated when leaving a team.
         pop_notifications()
 
-        script.deactivateTokens(self.ppa, send_email=True)
+        script.deactivateTokens(send_email=True)
         self.assertDeactivated(tokens[team1_person])
         del tokens[team1_person]
         for person in tokens:
@@ -332,7 +334,7 @@
         self.layer.switchDbUser(self.dbuser)
         # Clear out emails generated when leaving a team.
         pop_notifications()
-        script.deactivateTokens(self.ppa, send_email=True)
+        script.deactivateTokens(send_email=True)
         self.assertNotDeactivated(tokens[promiscuous_person])
         for person in tokens:
             self.assertNotDeactivated(tokens[person])
@@ -353,7 +355,7 @@
         self.assertFalse(team2.inTeam(parent_team))
         self.layer.txn.commit()
         self.layer.switchDbUser(self.dbuser)
-        script.deactivateTokens(self.ppa)
+        script.deactivateTokens()
         for person in persons2:
             self.assertDeactivated(tokens[person])
 
@@ -573,3 +575,34 @@
             "feedback@xxxxxxxxxxxxx\n\n"
             "Regards,\n"
             "The Launchpad team")
+
+    def test_getNewTokensSinceLastRun_no_previous_run(self):
+        """All valid tokens returned if there is no record of previous run."""
+        now = datetime.now(pytz.UTC)
+        tokens = self.setupDummyTokens()[1]
+
+        # If there is no record of the script running previously, all
+        # valid tokens are returned.
+        script = self.getScript()
+        self.assertContentEqual(tokens, script.getNewTokensSinceLastRun())
+
+    def test_getNewTokensSinceLastRun_only_those_since_last_run(self):
+        """Only tokens created since the last run are returned."""
+        now = datetime.now(pytz.UTC)
+        tokens = self.setupDummyTokens()[1]
+        getUtility(IScriptActivitySet).recordSuccess(
+            'generate-ppa-htaccess', now, now - timedelta(minutes=3))
+        removeSecurityProxy(tokens[0]).date_created = (
+            now - timedelta(minutes=4))
+
+        script = self.getScript()
+        self.assertContentEqual(tokens[1:], script.getNewTokensSinceLastRun())
+
+    def test_getNewTokensSinceLastRun_only_active_tokens(self):
+        """Only active tokens are returned."""
+        now = datetime.now(pytz.UTC)
+        tokens = self.setupDummyTokens()[1]
+        tokens[0].deactivate()
+
+        script = self.getScript()
+        self.assertContentEqual(tokens[1:], script.getNewTokensSinceLastRun())