← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] ~cjwatson/launchpad:stormify-expire-archive-files into launchpad:master

 

Colin Watson has proposed merging ~cjwatson/launchpad:stormify-expire-archive-files into launchpad:master.

Commit message:
Convert expire-archive-files to Storm

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/+git/launchpad/+merge/398348

Using Storm's query compiler makes it straightforward to consolidate the two nearly-identical queries in determineSourceExpirables and determineBinaryExpirables into a parameterized helper method, and will make it easier for future work to add more options to fine-tune the behaviour of expire-archive-files.

The generated queries are substantially unchanged, except that I removed the unnecessary joins of SourcePackageRelease/BinaryPackageRelease; on dogfood this shaves over 50% off the combined time for these two queries (26m42s → 12m30s), although that doesn't matter too much since this script is only run daily.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of ~cjwatson/launchpad:stormify-expire-archive-files into launchpad:master.
diff --git a/lib/lp/soyuz/scripts/expire_archive_files.py b/lib/lp/soyuz/scripts/expire_archive_files.py
index 420fc10..0f7b93f 100755
--- a/lib/lp/soyuz/scripts/expire_archive_files.py
+++ b/lib/lp/soyuz/scripts/expire_archive_files.py
@@ -3,11 +3,38 @@
 # Copyright 2009 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
+from __future__ import absolute_import, print_function, unicode_literals
+
+from storm.expr import (
+    And,
+    Cast,
+    Except,
+    Not,
+    Or,
+    Select,
+    )
+
+from lp.registry.model.person import Person
+from lp.services.database.constants import UTC_NOW
 from lp.services.database.interfaces import IStore
-from lp.services.database.sqlbase import sqlvalues
+from lp.services.database.stormexpr import (
+    Concatenate,
+    IsTrue,
+    )
+from lp.services.librarian.model import LibraryFileAlias
 from lp.services.scripts.base import LaunchpadCronScript
 from lp.soyuz.enums import ArchivePurpose
 from lp.soyuz.model.archive import Archive
+from lp.soyuz.model.binarypackagerelease import BinaryPackageRelease
+from lp.soyuz.model.files import (
+    BinaryPackageFile,
+    SourcePackageReleaseFile,
+    )
+from lp.soyuz.model.publishing import (
+    BinaryPackagePublishingHistory,
+    SourcePackagePublishingHistory,
+    )
+from lp.soyuz.model.sourcepackagerelease import SourcePackageRelease
 
 # PPA owners or particular PPAs that we never want to expire.
 BLACKLISTED_PPAS = """
@@ -62,125 +89,61 @@ class ArchiveExpirer(LaunchpadCronScript):
             help=("The number of days after which to expire binaries. "
                   "Must be specified."))
 
-    def determineSourceExpirables(self, num_days):
-        """Return expirable libraryfilealias IDs."""
-        stay_of_execution = '%d days' % num_days
+    def _determineExpirables(self, num_days, binary):
+        stay_of_execution = Cast('%d days' % num_days, 'interval')
         archive_types = (ArchivePurpose.PPA, ArchivePurpose.PARTNER)
 
+        LFA = LibraryFileAlias
+        if binary:
+            xPF = BinaryPackageFile
+            xPPH = BinaryPackagePublishingHistory
+            xPR_join = xPF.binarypackagerelease == xPPH.binarypackagereleaseID
+        else:
+            xPF = SourcePackageReleaseFile
+            xPPH = SourcePackagePublishingHistory
+            xPR_join = xPF.sourcepackagerelease == xPPH.sourcepackagereleaseID
+        full_archive_name = Concatenate(
+            Person.name, Concatenate('/', Archive.name))
+
         # The subquery here has to repeat the checks for privacy and
         # blacklisting on *other* publications that are also done in
         # the main loop for the archive being considered.
-        results = self.store.execute("""
-            SELECT lfa.id
-            FROM
-                LibraryFileAlias AS lfa,
-                Archive,
-                SourcePackageReleaseFile AS sprf,
-                SourcePackageRelease AS spr,
-                SourcePackagePublishingHistory AS spph
-            WHERE
-                lfa.id = sprf.libraryfile
-                AND spr.id = sprf.sourcepackagerelease
-                AND spph.sourcepackagerelease = spr.id
-                AND spph.dateremoved < (
-                    CURRENT_TIMESTAMP AT TIME ZONE 'UTC' -
-                    interval %(stay_of_execution)s)
-                AND spph.archive = archive.id
-                AND archive.purpose IN %(archive_types)s
-                AND lfa.expires IS NULL
-            EXCEPT
-            SELECT sprf.libraryfile
-            FROM
-                SourcePackageRelease AS spr,
-                SourcePackageReleaseFile AS sprf,
-                SourcePackagePublishingHistory AS spph,
-                Archive AS a,
-                Person AS p
-            WHERE
-                spr.id = sprf.sourcepackagerelease
-                AND spph.sourcepackagerelease = spr.id
-                AND spph.archive = a.id
-                AND p.id = a.owner
-                AND (
-                    ((p.name IN %(blacklist)s
-                      OR (p.name || '/' || a.name) IN %(blacklist)s)
-                     AND a.purpose = %(ppa)s)
-                    OR (a.private IS TRUE
-                        AND (p.name || '/' || a.name) NOT IN %(whitelist)s)
-                    OR a.purpose NOT IN %(archive_types)s
-                    OR dateremoved >
-                        CURRENT_TIMESTAMP AT TIME ZONE 'UTC' -
-                        interval %(stay_of_execution)s
-                    OR dateremoved IS NULL);
-            """ % sqlvalues(
-                stay_of_execution=stay_of_execution,
-                archive_types=archive_types,
-                blacklist=self.blacklist,
-                whitelist=self.whitelist,
-                ppa=ArchivePurpose.PPA))
-
-        lfa_ids = results.get_all()
-        return lfa_ids
+        eligible = Select(
+            LFA.id,
+            where=And(
+                xPF.libraryfile == LFA.id,
+                xPR_join,
+                xPPH.dateremoved < UTC_NOW - stay_of_execution,
+                xPPH.archive == Archive.id,
+                Archive.purpose.is_in(archive_types),
+                LFA.expires == None))
+        denied = Select(
+            xPF.libraryfileID,
+            where=And(
+                xPR_join,
+                xPPH.archive == Archive.id,
+                Archive.owner == Person.id,
+                Or(
+                    And(
+                        Or(
+                            Person.name.is_in(self.blacklist),
+                            full_archive_name.is_in(self.blacklist)),
+                        Archive.purpose == ArchivePurpose.PPA),
+                    And(
+                        IsTrue(Archive._private),
+                        Not(full_archive_name.is_in(self.whitelist))),
+                    Not(Archive.purpose.is_in(archive_types)),
+                    xPPH.dateremoved > UTC_NOW - stay_of_execution,
+                    xPPH.dateremoved == None)))
+        return list(self.store.execute(Except(eligible, denied)))
 
-    def determineBinaryExpirables(self, num_days):
+    def determineSourceExpirables(self, num_days):
         """Return expirable libraryfilealias IDs."""
-        stay_of_execution = '%d days' % num_days
-        archive_types = (ArchivePurpose.PPA, ArchivePurpose.PARTNER)
+        return self._determineExpirables(num_days=num_days, binary=False)
 
-        # The subquery here has to repeat the checks for privacy and
-        # blacklisting on *other* publications that are also done in
-        # the main loop for the archive being considered.
-        results = self.store.execute("""
-            SELECT lfa.id
-            FROM
-                LibraryFileAlias AS lfa,
-                Archive,
-                BinaryPackageFile AS bpf,
-                BinaryPackageRelease AS bpr,
-                BinaryPackagePublishingHistory AS bpph
-            WHERE
-                lfa.id = bpf.libraryfile
-                AND bpr.id = bpf.binarypackagerelease
-                AND bpph.binarypackagerelease = bpr.id
-                AND bpph.dateremoved < (
-                    CURRENT_TIMESTAMP AT TIME ZONE 'UTC' -
-                    interval %(stay_of_execution)s)
-                AND bpph.archive = archive.id
-                AND archive.purpose IN %(archive_types)s
-                AND lfa.expires IS NULL
-            EXCEPT
-            SELECT bpf.libraryfile
-            FROM
-                BinaryPackageRelease AS bpr,
-                BinaryPackageFile AS bpf,
-                BinaryPackagePublishingHistory AS bpph,
-                Archive AS a,
-                Person AS p
-            WHERE
-                bpr.id = bpf.binarypackagerelease
-                AND bpph.binarypackagerelease = bpr.id
-                AND bpph.archive = a.id
-                AND p.id = a.owner
-                AND (
-                    ((p.name IN %(blacklist)s
-                      OR (p.name || '/' || a.name) IN %(blacklist)s)
-                     AND a.purpose = %(ppa)s)
-                    OR (a.private IS TRUE
-                        AND (p.name || '/' || a.name) NOT IN %(whitelist)s)
-                    OR a.purpose NOT IN %(archive_types)s
-                    OR dateremoved > (
-                        CURRENT_TIMESTAMP AT TIME ZONE 'UTC' -
-                        interval %(stay_of_execution)s)
-                    OR dateremoved IS NULL)
-            """ % sqlvalues(
-                stay_of_execution=stay_of_execution,
-                archive_types=archive_types,
-                blacklist=self.blacklist,
-                whitelist=self.whitelist,
-                ppa=ArchivePurpose.PPA))
-
-        lfa_ids = results.get_all()
-        return lfa_ids
+    def determineBinaryExpirables(self, num_days):
+        """Return expirable libraryfilealias IDs."""
+        return self._determineExpirables(num_days=num_days, binary=True)
 
     def main(self):
         self.logger.info('Starting the PPA binary expiration')