launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #05370
[Merge] lp:~jtv/launchpad/getBinariesForDomination-bulk into lp:launchpad
Jeroen T. Vermeulen has proposed merging lp:~jtv/launchpad/getBinariesForDomination-bulk into lp:launchpad.
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
Related bugs:
Bug #884649 in Launchpad itself: "Ubuntu publisher is taking more than an hour to complete"
https://bugs.launchpad.net/launchpad/+bug/884649
For more details, see:
https://code.launchpad.net/~jtv/launchpad/getBinariesForDomination-bulk/+merge/81018
This is a little side-note to some Dominator optimization work I'm doing for bug 884649: bulk-fetch BinaryPackageReleases when querying for BinaryPackagePublishingHistory records.
There is no functional change, so existing tests apply unchanged.
As far as I can tell, the only performance downside to this is that the query returns more data; nothing else about the query changes, and there should be very little duplication between the extra objects (any duplication only happens, as far as I can see, when a package moves from one component to another or something relatively rare along those lines).
Existing tests:
{{{
./bin/test -vvc lp.archivepublisher.tests.test_dominator
}}}
No lint.
Jeroen
--
https://code.launchpad.net/~jtv/launchpad/getBinariesForDomination-bulk/+merge/81018
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~jtv/launchpad/getBinariesForDomination-bulk into lp:launchpad.
=== modified file 'lib/lp/archivepublisher/domination.py'
--- lib/lp/archivepublisher/domination.py 2011-10-18 11:56:09 +0000
+++ lib/lp/archivepublisher/domination.py 2011-11-02 13:33:36 +0000
@@ -53,6 +53,7 @@
__all__ = ['Dominator']
from datetime import timedelta
+from operator import itemgetter
import apt_pkg
from storm.expr import (
@@ -67,6 +68,9 @@
flush_database_updates,
sqlvalues,
)
+from canonical.launchpad.components.decoratedresultset import (
+ DecoratedResultSet,
+ )
from canonical.launchpad.interfaces.lpstorm import IStore
from lp.registry.model.sourcepackagename import SourcePackageName
from lp.services.database.bulk import load_related
@@ -258,7 +262,8 @@
# Go through publications from latest version to oldest. This
# makes it easy to figure out which release superseded which:
# the dominant is always the oldest live release that is newer
- # than the one being superseded.
+ # than the one being superseded. In this loop, that means the
+ # dominant is always the last live publication we saw.
publications = sorted(
publications, cmp=generalization.compare, reverse=True)
@@ -272,25 +277,29 @@
for pub in publications:
version = generalization.getPackageVersion(pub)
# There should never be two published releases with the same
- # version. So this comparison is really a string
- # comparison, not a version comparison: if the versions are
- # equal by either measure, they're from the same release.
- if dominant_version is not None and version == dominant_version:
+ # version. So it doesn't matter whether this comparison is
+ # really a string comparison or a version comparison: if the
+ # versions are equal by either measure, they're from the same
+ # release.
+ if version == dominant_version:
# This publication is for a live version, but has been
# superseded by a newer publication of the same version.
# Supersede it.
pub.supersede(current_dominant, logger=self.logger)
self.logger.debug2(
"Superseding older publication for version %s.", version)
- elif (version in live_versions or
- (not generalization.is_source and
- not self._checkArchIndep(pub))):
+ elif version in live_versions:
# This publication stays active; if any publications
# that follow right after this are to be superseded,
# this is the release that they are superseded by.
current_dominant = pub
dominant_version = version
self.logger.debug2("Keeping version %s.", version)
+ elif not (generalization.is_source or self._checkArchIndep(pub)):
+ # As a special case, we keep this version live as well.
+ current_dominant = pub
+ dominant_version = version
+ self.logger.debug2("Keeping version %s.", version)
elif current_dominant is None:
# This publication is no longer live, but there is no
# newer version to supersede it either. Therefore it
@@ -442,72 +451,90 @@
# always equals to "scheduleddeletiondate - quarantine".
pub_record.datemadepending = UTC_NOW
+ def findBinariesForDomination(self, distroarchseries, pocket):
+ """Find binary publications that need dominating.
+
+ This is only for traditional domination, where the latest published
+ publication is always kept published. It will ignore publications
+ that have no other publications competing for the same binary package.
+ """
+ # Avoid circular imports.
+ from lp.soyuz.model.publishing import BinaryPackagePublishingHistory
+
+ bpph_location_clauses = [
+ BinaryPackagePublishingHistory.status ==
+ PackagePublishingStatus.PUBLISHED,
+ BinaryPackagePublishingHistory.distroarchseries ==
+ distroarchseries,
+ BinaryPackagePublishingHistory.archive == self.archive,
+ BinaryPackagePublishingHistory.pocket == pocket,
+ ]
+ candidate_binary_names = Select(
+ BinaryPackageName.id,
+ And(
+ BinaryPackageRelease.binarypackagenameID ==
+ BinaryPackageName.id,
+ BinaryPackagePublishingHistory.binarypackagereleaseID ==
+ BinaryPackageRelease.id,
+ bpph_location_clauses,
+ ),
+ group_by=BinaryPackageName.id,
+ having=Count(BinaryPackagePublishingHistory.id) > 1)
+ main_clauses = [
+ BinaryPackageRelease.id ==
+ BinaryPackagePublishingHistory.binarypackagereleaseID,
+ BinaryPackageRelease.binarypackagenameID.is_in(
+ candidate_binary_names),
+ BinaryPackageRelease.binpackageformat !=
+ BinaryPackageFormat.DDEB,
+ ]
+ main_clauses.extend(bpph_location_clauses)
+
+ store = IStore(BinaryPackagePublishingHistory)
+
+ # We're going to access the BPRs as well. Since we make the
+ # database look them up anyway, and since there won't be many
+ # duplications among them, load them alongside the publications.
+ # We'll also want their BinaryPackageNames, but adding those to
+ # the join would complicate the query.
+ query = store.find(
+ (BinaryPackagePublishingHistory, BinaryPackageRelease),
+ *main_clauses)
+ return DecoratedResultSet(query, itemgetter(0))
+
def dominateBinaries(self, distroseries, pocket):
"""Perform domination on binary package publications.
Dominates binaries, restricted to `distroseries`, `pocket`, and
`self.archive`.
"""
- # Avoid circular imports.
- from lp.soyuz.model.publishing import BinaryPackagePublishingHistory
-
generalization = GeneralizedPublication(is_source=False)
for distroarchseries in distroseries.architectures:
self.logger.info(
"Performing domination across %s/%s (%s)",
- distroseries.name, pocket.title,
+ distroarchseries.distroseries.name, pocket.title,
distroarchseries.architecturetag)
- bpph_location_clauses = [
- BinaryPackagePublishingHistory.status ==
- PackagePublishingStatus.PUBLISHED,
- BinaryPackagePublishingHistory.distroarchseries ==
- distroarchseries,
- BinaryPackagePublishingHistory.archive == self.archive,
- BinaryPackagePublishingHistory.pocket == pocket,
- ]
- candidate_binary_names = Select(
- BinaryPackageName.id,
- And(
- BinaryPackageRelease.binarypackagenameID ==
- BinaryPackageName.id,
- BinaryPackagePublishingHistory.binarypackagereleaseID ==
- BinaryPackageRelease.id,
- bpph_location_clauses,
- ),
- group_by=BinaryPackageName.id,
- having=Count(BinaryPackagePublishingHistory.id) > 1)
- main_clauses = [
- BinaryPackagePublishingHistory,
- BinaryPackageRelease.id ==
- BinaryPackagePublishingHistory.binarypackagereleaseID,
- BinaryPackageRelease.binarypackagenameID.is_in(
- candidate_binary_names),
- BinaryPackageRelease.binpackageformat !=
- BinaryPackageFormat.DDEB,
- ]
- main_clauses.extend(bpph_location_clauses)
-
- def do_domination(pass2_msg=""):
- msg = "binaries..." + pass2_msg
- self.logger.info("Finding %s" % msg)
- bins = IStore(
- BinaryPackagePublishingHistory).find(*main_clauses)
- self.logger.info("Dominating %s" % msg)
- sorted_packages = self._sortPackages(bins, generalization)
- self._dominatePublications(sorted_packages, generalization)
-
- do_domination()
-
- # We need to make a second pass to cover the cases where:
- # * arch-specific binaries were not all dominated before arch-all
- # ones that depend on them
- # * An arch-all turned into an arch-specific, or vice-versa
- # * A package is completely schizophrenic and changes all of
- # its binaries between arch-all and arch-any (apparently
- # occurs sometimes!)
- do_domination("(2nd pass)")
+ self.logger.info("Finding binaries...")
+ bins = self.findBinariesForDomination(distroarchseries, pocket)
+ sorted_packages = self._sortPackages(bins, generalization)
+ self.logger.info("Dominating binaries...")
+ self._dominatePublications(sorted_packages, generalization)
+
+ # We need to make a second pass to cover the cases where:
+ # * arch-specific binaries were not all dominated before arch-all
+ # ones that depend on them
+ # * An arch-all turned into an arch-specific, or vice-versa
+ # * A package is completely schizophrenic and changes all of
+ # its binaries between arch-all and arch-any (apparently
+ # occurs sometimes!)
+ for distroarchseries in distroseries.architectures:
+ self.logger.info("Finding binaries...(2nd pass)")
+ bins = self.findBinariesForDomination(distroarchseries, pocket)
+ sorted_packages = self._sortPackages(bins, generalization)
+ self.logger.info("Dominating binaries...(2nd pass)")
+ self._dominatePublications(sorted_packages, generalization)
def _composeActiveSourcePubsCondition(self, distroseries, pocket):
"""Compose ORM condition for restricting relevant source pubs."""
@@ -522,21 +549,11 @@
SourcePackagePublishingHistory.pocket == pocket,
)
- def dominateSources(self, distroseries, pocket):
- """Perform domination on source package publications.
-
- Dominates sources, restricted to `distroseries`, `pocket`, and
- `self.archive`.
- """
+ def findSourcesForDomination(self, distroseries, pocket):
+ """Find binary publications that need dominating."""
# Avoid circular imports.
from lp.soyuz.model.publishing import SourcePackagePublishingHistory
- generalization = GeneralizedPublication(is_source=True)
-
- self.logger.debug(
- "Performing domination across %s/%s (Source)",
- distroseries.name, pocket.title)
-
spph_location_clauses = self._composeActiveSourcePubsCondition(
distroseries, pocket)
having_multiple_active_publications = (
@@ -546,12 +563,33 @@
And(join_spph_spr(), join_spr_spn(), spph_location_clauses),
group_by=SourcePackageName.id,
having=having_multiple_active_publications)
- sources = IStore(SourcePackagePublishingHistory).find(
- SourcePackagePublishingHistory,
+
+ # We'll also access the SourcePackageReleases associated with
+ # the publications we find. Since they're in the join anyway,
+ # load them alongside the publications.
+ # Actually we'll also want the SourcePackageNames, but adding
+ # those to the (outer) query would complicate it, and
+ # potentially slow it down.
+ query = IStore(SourcePackagePublishingHistory).find(
+ (SourcePackagePublishingHistory, SourcePackageRelease),
join_spph_spr(),
SourcePackageRelease.sourcepackagenameID.is_in(
candidate_source_names),
spph_location_clauses)
+ return DecoratedResultSet(query, itemgetter(0))
+
+ def dominateSources(self, distroseries, pocket):
+ """Perform domination on source package publications.
+
+ Dominates sources, restricted to `distroseries`, `pocket`, and
+ `self.archive`.
+ """
+ self.logger.debug(
+ "Performing domination across %s/%s (Source)",
+ distroseries.name, pocket.title)
+
+ generalization = GeneralizedPublication(is_source=True)
+ sources = self.findSourcesForDomination(distroseries, pocket)
self.logger.debug("Dominating sources...")
self._dominatePublications(
=== modified file 'lib/lp/archivepublisher/tests/test_dominator.py'
--- lib/lp/archivepublisher/tests/test_dominator.py 2011-10-27 11:36:13 +0000
+++ lib/lp/archivepublisher/tests/test_dominator.py 2011-11-02 13:33:36 +0000
@@ -361,7 +361,10 @@
def make_spphs_for_versions(factory, versions):
"""Create publication records for each of `versions`.
- They records are created in the same order in which they are specified.
+ All these publications will be in the same source package, archive,
+ distroseries, and pocket. They will all be in Published status.
+
+ The records are created in the same order in which they are specified.
Make the order irregular to prove that version ordering is not a
coincidence of object creation order etc.
@@ -371,6 +374,7 @@
spn = factory.makeSourcePackageName()
distroseries = factory.makeDistroSeries()
pocket = factory.getAnyPocket()
+ archive = distroseries.main_archive
sprs = [
factory.makeSourcePackageRelease(
sourcepackagename=spn, version=version)
@@ -378,11 +382,34 @@
return [
factory.makeSourcePackagePublishingHistory(
distroseries=distroseries, pocket=pocket,
- sourcepackagerelease=spr,
+ sourcepackagerelease=spr, archive=archive,
status=PackagePublishingStatus.PUBLISHED)
for spr in sprs]
+def make_bpphs_for_versions(factory, versions):
+ """Create publication records for each of `versions`.
+
+ All these publications will be in the same binary package, source
+ package, archive, distroarchseries, and pocket. They will all be in
+ Published status.
+ """
+ bpn = factory.makeBinaryPackageName()
+ spn = factory.makeSourcePackageName()
+ das = factory.makeDistroArchSeries()
+ archive = das.distroseries.main_archive
+ pocket = factory.getAnyPocket()
+ bprs = [
+ factory.makeBinaryPackageRelease(binarypackagename=bpn)
+ for version in versions]
+ return [
+ factory.makeBinaryPackagePublishingHistory(
+ binarypackagerelease=bpr, binarypackagename=bpn,
+ distroarchseries=das, pocket=pocket, archive=archive,
+ sourcepackagename=spn, status=PackagePublishingStatus.PUBLISHED)
+ for bpr in bprs]
+
+
def list_source_versions(spphs):
"""Extract the versions from `spphs` as a list, in the same order."""
return [spph.sourcepackagerelease.version for spph in spphs]
@@ -921,3 +948,144 @@
[],
dominator.findPublishedSPPHs(
spph.distroseries, spph.pocket, other_package.name))
+
+ def test_findBinariesForDomination_finds_published_publications(self):
+ bpphs = make_bpphs_for_versions(self.factory, ['1.0', '1.1'])
+ dominator = self.makeDominator(bpphs)
+ self.assertContentEqual(
+ bpphs, dominator.findBinariesForDomination(
+ bpphs[0].distroarchseries, bpphs[0].pocket))
+
+ def test_findBinariesForDomination_skips_single_pub_packages(self):
+ # The domination algorithm that uses findBinariesForDomination
+ # always keeps the latest version live. Thus, a single
+ # publication isn't worth dominating. findBinariesForDomination
+ # won't return it.
+ bpphs = make_bpphs_for_versions(self.factory, ['1.0'])
+ dominator = self.makeDominator(bpphs)
+ self.assertContentEqual(
+ [], dominator.findBinariesForDomination(
+ bpphs[0].distroarchseries, bpphs[0].pocket))
+
+ def test_findBinariesForDomination_ignores_other_distroseries(self):
+ bpphs = make_bpphs_for_versions(self.factory, ['1.0', '1.1'])
+ dominator = self.makeDominator(bpphs)
+ das = bpphs[0].distroarchseries
+ other_series = self.factory.makeDistroSeries(
+ distribution=das.distroseries.distribution)
+ other_das = self.factory.makeDistroArchSeries(
+ distroseries=other_series, architecturetag=das.architecturetag,
+ processorfamily=das.processorfamily)
+ self.assertContentEqual(
+ [], dominator.findBinariesForDomination(
+ other_das, bpphs[0].pocket))
+
+ def test_findBinariesForDomination_ignores_other_architectures(self):
+ bpphs = make_bpphs_for_versions(self.factory, ['1.0', '1.1'])
+ dominator = self.makeDominator(bpphs)
+ other_das = self.factory.makeDistroArchSeries(
+ distroseries=bpphs[0].distroseries)
+ self.assertContentEqual(
+ [], dominator.findBinariesForDomination(
+ other_das, bpphs[0].pocket))
+
+ def test_findBinariesForDomination_ignores_other_archive(self):
+ bpphs = make_bpphs_for_versions(self.factory, ['1.0', '1.1'])
+ dominator = self.makeDominator(bpphs)
+ dominator.archive = self.factory.makeArchive()
+ self.assertContentEqual(
+ [], dominator.findBinariesForDomination(
+ bpphs[0].distroarchseries, bpphs[0].pocket))
+
+ def test_findBinariesForDomination_ignores_other_pocket(self):
+ bpphs = make_bpphs_for_versions(self.factory, ['1.0', '1.1'])
+ dominator = self.makeDominator(bpphs)
+ for bpph in bpphs:
+ removeSecurityProxy(bpph).pocket = PackagePublishingPocket.UPDATES
+ self.assertContentEqual(
+ [], dominator.findBinariesForDomination(
+ bpphs[0].distroarchseries, PackagePublishingPocket.SECURITY))
+
+ def test_findBinariesForDomination_ignores_other_status(self):
+ # If we have one BPPH for each possible status, plus one
+ # Published one to stop findBinariesForDomination from skipping
+ # the package, findBinariesForDomination returns only the
+ # Published ones.
+ versions = [
+ '1.%d' % self.factory.getUniqueInteger()
+ for status in PackagePublishingStatus.items] + ['0.9']
+ bpphs = make_bpphs_for_versions(self.factory, versions)
+ dominator = self.makeDominator(bpphs)
+
+ for bpph, status in zip(bpphs, PackagePublishingStatus.items):
+ bpph.status = status
+
+ # These are the Published publications. The other ones will all
+ # be ignored.
+ published_bpphs = [
+ bpph
+ for bpph in bpphs
+ if bpph.status == PackagePublishingStatus.PUBLISHED]
+
+ self.assertContentEqual(
+ published_bpphs,
+ dominator.findBinariesForDomination(
+ bpphs[0].distroarchseries, bpphs[0].pocket))
+
+ def test_findSourcesForDomination_finds_published_publications(self):
+ spphs = make_spphs_for_versions(self.factory, ['2.0', '2.1'])
+ dominator = self.makeDominator(spphs)
+ self.assertContentEqual(
+ spphs, dominator.findSourcesForDomination(
+ spphs[0].distroseries, spphs[0].pocket))
+
+ def test_findSourcesForDomination_skips_single_pub_packages(self):
+ # The domination algorithm that uses findSourcesForDomination
+ # always keeps the latest version live. Thus, a single
+ # publication isn't worth dominating. findSourcesForDomination
+ # won't return it.
+ spphs = make_spphs_for_versions(self.factory, ['2.0'])
+ dominator = self.makeDominator(spphs)
+ self.assertContentEqual(
+ [], dominator.findSourcesForDomination(
+ spphs[0].distroseries, spphs[0].pocket))
+
+ def test_findSourcesForDomination_ignores_other_distroseries(self):
+ spphs = make_spphs_for_versions(self.factory, ['2.0', '2.1'])
+ dominator = self.makeDominator(spphs)
+ other_series = self.factory.makeDistroSeries(
+ distribution=spphs[0].distroseries.distribution)
+ self.assertContentEqual(
+ [], dominator.findSourcesForDomination(
+ other_series, spphs[0].pocket))
+
+ def test_findSourcesForDomination_ignores_other_pocket(self):
+ spphs = make_spphs_for_versions(self.factory, ['2.0', '2.1'])
+ dominator = self.makeDominator(spphs)
+ for spph in spphs:
+ removeSecurityProxy(spph).pocket = PackagePublishingPocket.UPDATES
+ self.assertContentEqual(
+ [], dominator.findSourcesForDomination(
+ spphs[0].distroseries, PackagePublishingPocket.SECURITY))
+
+ def test_findSourcesForDomination_ignores_other_status(self):
+ versions = [
+ '1.%d' % self.factory.getUniqueInteger()
+ for status in PackagePublishingStatus.items] + ['0.9']
+ spphs = make_spphs_for_versions(self.factory, versions)
+ dominator = self.makeDominator(spphs)
+
+ for spph, status in zip(spphs, PackagePublishingStatus.items):
+ spph.status = status
+
+ # These are the Published publications. The other ones will all
+ # be ignored.
+ published_spphs = [
+ spph
+ for spph in spphs
+ if spph.status == PackagePublishingStatus.PUBLISHED]
+
+ self.assertContentEqual(
+ published_spphs,
+ dominator.findSourcesForDomination(
+ spphs[0].distroseries, spphs[0].pocket))
=== modified file 'lib/lp/soyuz/model/publishing.py'
--- lib/lp/soyuz/model/publishing.py 2011-10-23 02:58:56 +0000
+++ lib/lp/soyuz/model/publishing.py 2011-11-02 13:33:36 +0000
@@ -33,7 +33,6 @@
Desc,
LeftJoin,
Or,
- Select,
Sum,
)
from storm.store import Store
@@ -1154,19 +1153,10 @@
# Avoid circular wotsits.
from lp.soyuz.model.binarypackagebuild import BinaryPackageBuild
from lp.soyuz.model.distroarchseries import DistroArchSeries
- from lp.soyuz.model.sourcepackagerelease import SourcePackageRelease
- source_select = Select(
- SourcePackageRelease.id,
- And(
- BinaryPackageBuild.source_package_release_id ==
- SourcePackageRelease.id,
- BinaryPackageRelease.build == BinaryPackageBuild.id,
- self.binarypackagereleaseID == BinaryPackageRelease.id,
- ))
+
pubs = [
BinaryPackageBuild.source_package_release_id ==
- SourcePackageRelease.id,
- SourcePackageRelease.id.is_in(source_select),
+ self.binarypackagerelease.build.source_package_release_id,
BinaryPackageRelease.build == BinaryPackageBuild.id,
BinaryPackagePublishingHistory.binarypackagereleaseID ==
BinaryPackageRelease.id,