← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~jtv/launchpad/getBinariesForDomination-bulk into lp:launchpad

 

Jeroen T. Vermeulen has proposed merging lp:~jtv/launchpad/getBinariesForDomination-bulk into lp:launchpad.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)
Related bugs:
  Bug #884649 in Launchpad itself: "Ubuntu publisher is taking more than an hour to complete"
  https://bugs.launchpad.net/launchpad/+bug/884649

For more details, see:
https://code.launchpad.net/~jtv/launchpad/getBinariesForDomination-bulk/+merge/81018

This is a little side-note to some Dominator optimization work I'm doing for bug 884649: bulk-fetch BinaryPackageReleases when querying for BinaryPackagePublishingHistory records.

There is no functional change, so existing tests apply unchanged.

As far as I can tell, the only performance downside to this is that the query returns more data; nothing else about the query changes, and there should be very little duplication between the extra objects (any duplication only happens, as far as I can see, when a package moves from one component to another or something relatively rare along those lines).

Existing tests:
{{{
./bin/test -vvc lp.archivepublisher.tests.test_dominator
}}}

No lint.


Jeroen
-- 
https://code.launchpad.net/~jtv/launchpad/getBinariesForDomination-bulk/+merge/81018
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~jtv/launchpad/getBinariesForDomination-bulk into lp:launchpad.
=== modified file 'lib/lp/archivepublisher/domination.py'
--- lib/lp/archivepublisher/domination.py	2011-10-18 11:56:09 +0000
+++ lib/lp/archivepublisher/domination.py	2011-11-02 13:33:36 +0000
@@ -53,6 +53,7 @@
 __all__ = ['Dominator']
 
 from datetime import timedelta
+from operator import itemgetter
 
 import apt_pkg
 from storm.expr import (
@@ -67,6 +68,9 @@
     flush_database_updates,
     sqlvalues,
     )
+from canonical.launchpad.components.decoratedresultset import (
+    DecoratedResultSet,
+    )
 from canonical.launchpad.interfaces.lpstorm import IStore
 from lp.registry.model.sourcepackagename import SourcePackageName
 from lp.services.database.bulk import load_related
@@ -258,7 +262,8 @@
         # Go through publications from latest version to oldest.  This
         # makes it easy to figure out which release superseded which:
         # the dominant is always the oldest live release that is newer
-        # than the one being superseded.
+        # than the one being superseded.  In this loop, that means the
+        # dominant is always the last live publication we saw.
         publications = sorted(
             publications, cmp=generalization.compare, reverse=True)
 
@@ -272,25 +277,29 @@
         for pub in publications:
             version = generalization.getPackageVersion(pub)
             # There should never be two published releases with the same
-            # version.  So this comparison is really a string
-            # comparison, not a version comparison: if the versions are
-            # equal by either measure, they're from the same release.
-            if dominant_version is not None and version == dominant_version:
+            # version.  So it doesn't matter whether this comparison is
+            # really a string comparison or a version comparison: if the
+            # versions are equal by either measure, they're from the same
+            # release.
+            if version == dominant_version:
                 # This publication is for a live version, but has been
                 # superseded by a newer publication of the same version.
                 # Supersede it.
                 pub.supersede(current_dominant, logger=self.logger)
                 self.logger.debug2(
                     "Superseding older publication for version %s.", version)
-            elif (version in live_versions or
-                  (not generalization.is_source and
-                   not self._checkArchIndep(pub))):
+            elif version in live_versions:
                 # This publication stays active; if any publications
                 # that follow right after this are to be superseded,
                 # this is the release that they are superseded by.
                 current_dominant = pub
                 dominant_version = version
                 self.logger.debug2("Keeping version %s.", version)
+            elif not (generalization.is_source or self._checkArchIndep(pub)):
+                # As a special case, we keep this version live as well.
+                current_dominant = pub
+                dominant_version = version
+                self.logger.debug2("Keeping version %s.", version)
             elif current_dominant is None:
                 # This publication is no longer live, but there is no
                 # newer version to supersede it either.  Therefore it
@@ -442,72 +451,90 @@
             # always equals to "scheduleddeletiondate - quarantine".
             pub_record.datemadepending = UTC_NOW
 
+    def findBinariesForDomination(self, distroarchseries, pocket):
+        """Find binary publications that need dominating.
+
+        This is only for traditional domination, where the latest published
+        publication is always kept published.  It will ignore publications
+        that have no other publications competing for the same binary package.
+        """
+        # Avoid circular imports.
+        from lp.soyuz.model.publishing import BinaryPackagePublishingHistory
+
+        bpph_location_clauses = [
+            BinaryPackagePublishingHistory.status ==
+                PackagePublishingStatus.PUBLISHED,
+            BinaryPackagePublishingHistory.distroarchseries ==
+                distroarchseries,
+            BinaryPackagePublishingHistory.archive == self.archive,
+            BinaryPackagePublishingHistory.pocket == pocket,
+            ]
+        candidate_binary_names = Select(
+            BinaryPackageName.id,
+            And(
+                BinaryPackageRelease.binarypackagenameID ==
+                    BinaryPackageName.id,
+                BinaryPackagePublishingHistory.binarypackagereleaseID ==
+                    BinaryPackageRelease.id,
+                bpph_location_clauses,
+            ),
+            group_by=BinaryPackageName.id,
+            having=Count(BinaryPackagePublishingHistory.id) > 1)
+        main_clauses = [
+            BinaryPackageRelease.id ==
+                BinaryPackagePublishingHistory.binarypackagereleaseID,
+            BinaryPackageRelease.binarypackagenameID.is_in(
+                candidate_binary_names),
+            BinaryPackageRelease.binpackageformat !=
+                BinaryPackageFormat.DDEB,
+            ]
+        main_clauses.extend(bpph_location_clauses)
+
+        store = IStore(BinaryPackagePublishingHistory)
+
+        # We're going to access the BPRs as well.  Since we make the
+        # database look them up anyway, and since there won't be many
+        # duplications among them, load them alongside the publications.
+        # We'll also want their BinaryPackageNames, but adding those to
+        # the join would complicate the query.
+        query = store.find(
+            (BinaryPackagePublishingHistory, BinaryPackageRelease),
+            *main_clauses)
+        return DecoratedResultSet(query, itemgetter(0))
+
     def dominateBinaries(self, distroseries, pocket):
         """Perform domination on binary package publications.
 
         Dominates binaries, restricted to `distroseries`, `pocket`, and
         `self.archive`.
         """
-        # Avoid circular imports.
-        from lp.soyuz.model.publishing import BinaryPackagePublishingHistory
-
         generalization = GeneralizedPublication(is_source=False)
 
         for distroarchseries in distroseries.architectures:
             self.logger.info(
                 "Performing domination across %s/%s (%s)",
-                distroseries.name, pocket.title,
+                distroarchseries.distroseries.name, pocket.title,
                 distroarchseries.architecturetag)
 
-            bpph_location_clauses = [
-                BinaryPackagePublishingHistory.status ==
-                    PackagePublishingStatus.PUBLISHED,
-                BinaryPackagePublishingHistory.distroarchseries ==
-                    distroarchseries,
-                BinaryPackagePublishingHistory.archive == self.archive,
-                BinaryPackagePublishingHistory.pocket == pocket,
-                ]
-            candidate_binary_names = Select(
-                BinaryPackageName.id,
-                And(
-                    BinaryPackageRelease.binarypackagenameID ==
-                        BinaryPackageName.id,
-                    BinaryPackagePublishingHistory.binarypackagereleaseID ==
-                        BinaryPackageRelease.id,
-                    bpph_location_clauses,
-                ),
-                group_by=BinaryPackageName.id,
-                having=Count(BinaryPackagePublishingHistory.id) > 1)
-            main_clauses = [
-                BinaryPackagePublishingHistory,
-                BinaryPackageRelease.id ==
-                    BinaryPackagePublishingHistory.binarypackagereleaseID,
-                BinaryPackageRelease.binarypackagenameID.is_in(
-                    candidate_binary_names),
-                BinaryPackageRelease.binpackageformat !=
-                    BinaryPackageFormat.DDEB,
-                ]
-            main_clauses.extend(bpph_location_clauses)
-
-            def do_domination(pass2_msg=""):
-                msg = "binaries..." + pass2_msg
-                self.logger.info("Finding %s" % msg)
-                bins = IStore(
-                    BinaryPackagePublishingHistory).find(*main_clauses)
-                self.logger.info("Dominating %s" % msg)
-                sorted_packages = self._sortPackages(bins, generalization)
-                self._dominatePublications(sorted_packages, generalization)
-
-            do_domination()
-
-            # We need to make a second pass to cover the cases where:
-            #  * arch-specific binaries were not all dominated before arch-all
-            #    ones that depend on them
-            #  * An arch-all turned into an arch-specific, or vice-versa
-            #  * A package is completely schizophrenic and changes all of
-            #    its binaries between arch-all and arch-any (apparently
-            #    occurs sometimes!)
-            do_domination("(2nd pass)")
+            self.logger.info("Finding binaries...")
+            bins = self.findBinariesForDomination(distroarchseries, pocket)
+            sorted_packages = self._sortPackages(bins, generalization)
+            self.logger.info("Dominating binaries...")
+            self._dominatePublications(sorted_packages, generalization)
+
+        # We need to make a second pass to cover the cases where:
+        #  * arch-specific binaries were not all dominated before arch-all
+        #    ones that depend on them
+        #  * An arch-all turned into an arch-specific, or vice-versa
+        #  * A package is completely schizophrenic and changes all of
+        #    its binaries between arch-all and arch-any (apparently
+        #    occurs sometimes!)
+        for distroarchseries in distroseries.architectures:
+            self.logger.info("Finding binaries...(2nd pass)")
+            bins = self.findBinariesForDomination(distroarchseries, pocket)
+            sorted_packages = self._sortPackages(bins, generalization)
+            self.logger.info("Dominating binaries...(2nd pass)")
+            self._dominatePublications(sorted_packages, generalization)
 
     def _composeActiveSourcePubsCondition(self, distroseries, pocket):
         """Compose ORM condition for restricting relevant source pubs."""
@@ -522,21 +549,11 @@
             SourcePackagePublishingHistory.pocket == pocket,
             )
 
-    def dominateSources(self, distroseries, pocket):
-        """Perform domination on source package publications.
-
-        Dominates sources, restricted to `distroseries`, `pocket`, and
-        `self.archive`.
-        """
+    def findSourcesForDomination(self, distroseries, pocket):
+        """Find binary publications that need dominating."""
         # Avoid circular imports.
         from lp.soyuz.model.publishing import SourcePackagePublishingHistory
 
-        generalization = GeneralizedPublication(is_source=True)
-
-        self.logger.debug(
-            "Performing domination across %s/%s (Source)",
-            distroseries.name, pocket.title)
-
         spph_location_clauses = self._composeActiveSourcePubsCondition(
             distroseries, pocket)
         having_multiple_active_publications = (
@@ -546,12 +563,33 @@
             And(join_spph_spr(), join_spr_spn(), spph_location_clauses),
             group_by=SourcePackageName.id,
             having=having_multiple_active_publications)
-        sources = IStore(SourcePackagePublishingHistory).find(
-            SourcePackagePublishingHistory,
+
+        # We'll also access the SourcePackageReleases associated with
+        # the publications we find.  Since they're in the join anyway,
+        # load them alongside the publications.
+        # Actually we'll also want the SourcePackageNames, but adding
+        # those to the (outer) query would complicate it, and
+        # potentially slow it down.
+        query = IStore(SourcePackagePublishingHistory).find(
+            (SourcePackagePublishingHistory, SourcePackageRelease),
             join_spph_spr(),
             SourcePackageRelease.sourcepackagenameID.is_in(
                 candidate_source_names),
             spph_location_clauses)
+        return DecoratedResultSet(query, itemgetter(0))
+
+    def dominateSources(self, distroseries, pocket):
+        """Perform domination on source package publications.
+
+        Dominates sources, restricted to `distroseries`, `pocket`, and
+        `self.archive`.
+        """
+        self.logger.debug(
+            "Performing domination across %s/%s (Source)",
+            distroseries.name, pocket.title)
+
+        generalization = GeneralizedPublication(is_source=True)
+        sources = self.findSourcesForDomination(distroseries, pocket)
 
         self.logger.debug("Dominating sources...")
         self._dominatePublications(

=== modified file 'lib/lp/archivepublisher/tests/test_dominator.py'
--- lib/lp/archivepublisher/tests/test_dominator.py	2011-10-27 11:36:13 +0000
+++ lib/lp/archivepublisher/tests/test_dominator.py	2011-11-02 13:33:36 +0000
@@ -361,7 +361,10 @@
 def make_spphs_for_versions(factory, versions):
     """Create publication records for each of `versions`.
 
-    They records are created in the same order in which they are specified.
+    All these publications will be in the same source package, archive,
+    distroseries, and pocket.  They will all be in Published status.
+
+    The records are created in the same order in which they are specified.
     Make the order irregular to prove that version ordering is not a
     coincidence of object creation order etc.
 
@@ -371,6 +374,7 @@
     spn = factory.makeSourcePackageName()
     distroseries = factory.makeDistroSeries()
     pocket = factory.getAnyPocket()
+    archive = distroseries.main_archive
     sprs = [
         factory.makeSourcePackageRelease(
             sourcepackagename=spn, version=version)
@@ -378,11 +382,34 @@
     return [
         factory.makeSourcePackagePublishingHistory(
             distroseries=distroseries, pocket=pocket,
-            sourcepackagerelease=spr,
+            sourcepackagerelease=spr, archive=archive,
             status=PackagePublishingStatus.PUBLISHED)
         for spr in sprs]
 
 
+def make_bpphs_for_versions(factory, versions):
+    """Create publication records for each of `versions`.
+
+    All these publications will be in the same binary package, source
+    package, archive, distroarchseries, and pocket.  They will all be in
+    Published status.
+    """
+    bpn = factory.makeBinaryPackageName()
+    spn = factory.makeSourcePackageName()
+    das = factory.makeDistroArchSeries()
+    archive = das.distroseries.main_archive
+    pocket = factory.getAnyPocket()
+    bprs = [
+        factory.makeBinaryPackageRelease(binarypackagename=bpn)
+        for version in versions]
+    return [
+        factory.makeBinaryPackagePublishingHistory(
+            binarypackagerelease=bpr, binarypackagename=bpn,
+            distroarchseries=das, pocket=pocket, archive=archive,
+            sourcepackagename=spn, status=PackagePublishingStatus.PUBLISHED)
+        for bpr in bprs]
+
+
 def list_source_versions(spphs):
     """Extract the versions from `spphs` as a list, in the same order."""
     return [spph.sourcepackagerelease.version for spph in spphs]
@@ -921,3 +948,144 @@
             [],
             dominator.findPublishedSPPHs(
                 spph.distroseries, spph.pocket, other_package.name))
+
+    def test_findBinariesForDomination_finds_published_publications(self):
+        bpphs = make_bpphs_for_versions(self.factory, ['1.0', '1.1'])
+        dominator = self.makeDominator(bpphs)
+        self.assertContentEqual(
+            bpphs, dominator.findBinariesForDomination(
+                bpphs[0].distroarchseries, bpphs[0].pocket))
+
+    def test_findBinariesForDomination_skips_single_pub_packages(self):
+        # The domination algorithm that uses findBinariesForDomination
+        # always keeps the latest version live.  Thus, a single
+        # publication isn't worth dominating.  findBinariesForDomination
+        # won't return it.
+        bpphs = make_bpphs_for_versions(self.factory, ['1.0'])
+        dominator = self.makeDominator(bpphs)
+        self.assertContentEqual(
+            [], dominator.findBinariesForDomination(
+                bpphs[0].distroarchseries, bpphs[0].pocket))
+
+    def test_findBinariesForDomination_ignores_other_distroseries(self):
+        bpphs = make_bpphs_for_versions(self.factory, ['1.0', '1.1'])
+        dominator = self.makeDominator(bpphs)
+        das = bpphs[0].distroarchseries
+        other_series = self.factory.makeDistroSeries(
+            distribution=das.distroseries.distribution)
+        other_das = self.factory.makeDistroArchSeries(
+            distroseries=other_series, architecturetag=das.architecturetag,
+            processorfamily=das.processorfamily)
+        self.assertContentEqual(
+            [], dominator.findBinariesForDomination(
+                other_das, bpphs[0].pocket))
+
+    def test_findBinariesForDomination_ignores_other_architectures(self):
+        bpphs = make_bpphs_for_versions(self.factory, ['1.0', '1.1'])
+        dominator = self.makeDominator(bpphs)
+        other_das = self.factory.makeDistroArchSeries(
+            distroseries=bpphs[0].distroseries)
+        self.assertContentEqual(
+            [], dominator.findBinariesForDomination(
+                other_das, bpphs[0].pocket))
+
+    def test_findBinariesForDomination_ignores_other_archive(self):
+        bpphs = make_bpphs_for_versions(self.factory, ['1.0', '1.1'])
+        dominator = self.makeDominator(bpphs)
+        dominator.archive = self.factory.makeArchive()
+        self.assertContentEqual(
+            [], dominator.findBinariesForDomination(
+                bpphs[0].distroarchseries, bpphs[0].pocket))
+
+    def test_findBinariesForDomination_ignores_other_pocket(self):
+        bpphs = make_bpphs_for_versions(self.factory, ['1.0', '1.1'])
+        dominator = self.makeDominator(bpphs)
+        for bpph in bpphs:
+            removeSecurityProxy(bpph).pocket = PackagePublishingPocket.UPDATES
+        self.assertContentEqual(
+            [], dominator.findBinariesForDomination(
+                bpphs[0].distroarchseries, PackagePublishingPocket.SECURITY))
+
+    def test_findBinariesForDomination_ignores_other_status(self):
+        # If we have one BPPH for each possible status, plus one
+        # Published one to stop findBinariesForDomination from skipping
+        # the package, findBinariesForDomination returns only the
+        # Published ones.
+        versions = [
+            '1.%d' % self.factory.getUniqueInteger()
+            for status in PackagePublishingStatus.items] + ['0.9']
+        bpphs = make_bpphs_for_versions(self.factory, versions)
+        dominator = self.makeDominator(bpphs)
+
+        for bpph, status in zip(bpphs, PackagePublishingStatus.items):
+            bpph.status = status
+
+        # These are the Published publications.  The other ones will all
+        # be ignored.
+        published_bpphs = [
+            bpph
+            for bpph in bpphs
+                if bpph.status == PackagePublishingStatus.PUBLISHED]
+
+        self.assertContentEqual(
+            published_bpphs,
+            dominator.findBinariesForDomination(
+                bpphs[0].distroarchseries, bpphs[0].pocket))
+
+    def test_findSourcesForDomination_finds_published_publications(self):
+        spphs = make_spphs_for_versions(self.factory, ['2.0', '2.1'])
+        dominator = self.makeDominator(spphs)
+        self.assertContentEqual(
+            spphs, dominator.findSourcesForDomination(
+                spphs[0].distroseries, spphs[0].pocket))
+
+    def test_findSourcesForDomination_skips_single_pub_packages(self):
+        # The domination algorithm that uses findSourcesForDomination
+        # always keeps the latest version live.  Thus, a single
+        # publication isn't worth dominating.  findSourcesForDomination
+        # won't return it.
+        spphs = make_spphs_for_versions(self.factory, ['2.0'])
+        dominator = self.makeDominator(spphs)
+        self.assertContentEqual(
+            [], dominator.findSourcesForDomination(
+                spphs[0].distroseries, spphs[0].pocket))
+
+    def test_findSourcesForDomination_ignores_other_distroseries(self):
+        spphs = make_spphs_for_versions(self.factory, ['2.0', '2.1'])
+        dominator = self.makeDominator(spphs)
+        other_series = self.factory.makeDistroSeries(
+            distribution=spphs[0].distroseries.distribution)
+        self.assertContentEqual(
+            [], dominator.findSourcesForDomination(
+                other_series, spphs[0].pocket))
+
+    def test_findSourcesForDomination_ignores_other_pocket(self):
+        spphs = make_spphs_for_versions(self.factory, ['2.0', '2.1'])
+        dominator = self.makeDominator(spphs)
+        for spph in spphs:
+            removeSecurityProxy(spph).pocket = PackagePublishingPocket.UPDATES
+        self.assertContentEqual(
+            [], dominator.findSourcesForDomination(
+                spphs[0].distroseries, PackagePublishingPocket.SECURITY))
+
+    def test_findSourcesForDomination_ignores_other_status(self):
+        versions = [
+            '1.%d' % self.factory.getUniqueInteger()
+            for status in PackagePublishingStatus.items] + ['0.9']
+        spphs = make_spphs_for_versions(self.factory, versions)
+        dominator = self.makeDominator(spphs)
+
+        for spph, status in zip(spphs, PackagePublishingStatus.items):
+            spph.status = status
+
+        # These are the Published publications.  The other ones will all
+        # be ignored.
+        published_spphs = [
+            spph
+            for spph in spphs
+                if spph.status == PackagePublishingStatus.PUBLISHED]
+
+        self.assertContentEqual(
+            published_spphs,
+            dominator.findSourcesForDomination(
+                spphs[0].distroseries, spphs[0].pocket))

=== modified file 'lib/lp/soyuz/model/publishing.py'
--- lib/lp/soyuz/model/publishing.py	2011-10-23 02:58:56 +0000
+++ lib/lp/soyuz/model/publishing.py	2011-11-02 13:33:36 +0000
@@ -33,7 +33,6 @@
     Desc,
     LeftJoin,
     Or,
-    Select,
     Sum,
     )
 from storm.store import Store
@@ -1154,19 +1153,10 @@
         # Avoid circular wotsits.
         from lp.soyuz.model.binarypackagebuild import BinaryPackageBuild
         from lp.soyuz.model.distroarchseries import DistroArchSeries
-        from lp.soyuz.model.sourcepackagerelease import SourcePackageRelease
-        source_select = Select(
-            SourcePackageRelease.id,
-            And(
-                BinaryPackageBuild.source_package_release_id ==
-                    SourcePackageRelease.id,
-                BinaryPackageRelease.build == BinaryPackageBuild.id,
-                self.binarypackagereleaseID == BinaryPackageRelease.id,
-            ))
+
         pubs = [
             BinaryPackageBuild.source_package_release_id ==
-                SourcePackageRelease.id,
-            SourcePackageRelease.id.is_in(source_select),
+                self.binarypackagerelease.build.source_package_release_id,
             BinaryPackageRelease.build == BinaryPackageBuild.id,
             BinaryPackagePublishingHistory.binarypackagereleaseID ==
                 BinaryPackageRelease.id,