← Back to team overview

launchpad-reviewers team mailing list archive

Re: [Merge] lp:~cjwatson/launchpad/preload-publisher-indexes into lp:launchpad

 

Review: Needs Fixing code



Diff comments:

> === modified file 'lib/lp/registry/model/distroseries.py'
> --- lib/lp/registry/model/distroseries.py	2014-01-06 14:24:05 +0000
> +++ lib/lp/registry/model/distroseries.py	2014-06-11 09:31:16 +0000
> @@ -1,4 +1,4 @@
> -# Copyright 2009-2013 Canonical Ltd.  This software is licensed under the
> +# Copyright 2009-2014 Canonical Ltd.  This software is licensed under the
>  # GNU Affero General Public License version 3 (see the file LICENSE).
>  
>  """Database classes for a distribution series."""
> @@ -90,6 +90,10 @@
>  from lp.registry.model.series import SeriesMixin
>  from lp.registry.model.sourcepackage import SourcePackage
>  from lp.registry.model.sourcepackagename import SourcePackageName
> +from lp.services.database.bulk import (
> +    load_referencing,
> +    load_related,
> +    )
>  from lp.services.database.constants import (
>      DEFAULT,
>      UTC_NOW,
> @@ -106,7 +110,10 @@
>      )
>  from lp.services.database.stormexpr import fti_search
>  from lp.services.librarian.interfaces import ILibraryFileAliasSet
> -from lp.services.librarian.model import LibraryFileAlias
> +from lp.services.librarian.model import (
> +    LibraryFileAlias,
> +    LibraryFileContent,
> +    )
>  from lp.services.mail.signedmessage import signed_message_from_string
>  from lp.services.propertycache import (
>      cachedproperty,
> @@ -135,7 +142,9 @@
>  from lp.soyuz.interfaces.sourcepackageformat import (
>      ISourcePackageFormatSelectionSet,
>      )
> +from lp.soyuz.model.binarypackagebuild import BinaryPackageBuild
>  from lp.soyuz.model.binarypackagename import BinaryPackageName
> +from lp.soyuz.model.binarypackagerelease import BinaryPackageRelease
>  from lp.soyuz.model.component import Component
>  from lp.soyuz.model.distroarchseries import (
>      DistroArchSeries,
> @@ -146,6 +155,10 @@
>  from lp.soyuz.model.distroseriessourcepackagerelease import (
>      DistroSeriesSourcePackageRelease,
>      )
> +from lp.soyuz.model.files import (
> +    BinaryPackageFile,
> +    SourcePackageReleaseFile,
> +    )
>  from lp.soyuz.model.publishing import (
>      BinaryPackagePublishingHistory,
>      get_current_source_releases,
> @@ -985,7 +998,7 @@
>  
>      def getSourcePackagePublishing(self, pocket, component, archive):
>          """See `IDistroSeries`."""
> -        return Store.of(self).find(
> +        spphs = Store.of(self).find(
>              SourcePackagePublishingHistory,
>              SourcePackagePublishingHistory.archive == archive,
>              SourcePackagePublishingHistory.distroseries == self,
> @@ -993,10 +1006,22 @@
>              SourcePackagePublishingHistory.component == component,
>              SourcePackagePublishingHistory.status ==
>                  PackagePublishingStatus.PUBLISHED)
> +        load_related(Section, spphs, ["sectionID"])
> +        sprs = load_related(
> +            SourcePackageRelease, spphs, ["sourcepackagereleaseID"])
> +        load_related(SourcePackageName, sprs, ["sourcepackagenameID"])
> +        sprfs = load_referencing(
> +            SourcePackageReleaseFile, sprs, ["sourcepackagereleaseID"])
> +        for spr in sprs:
> +            get_property_cache(spr).files = [
> +                sprf for sprf in sprfs if sprf.sourcepackagerelease == spr]

This is Θ(n²), which is slightly terrifying once you get to a few thousand packages. Consider building a map from SPR ID to the SPRFs, or just iterating through the SPRFs and appending them to (or creating) the cached list.

We should also make the order less inconsistent, by either being consistently ordered by ID, or dropping the order from the normal SPR.files implementation.

> +        lfas = load_related(LibraryFileAlias, sprfs, ["libraryfileID"])
> +        load_related(LibraryFileContent, lfas, ["contentID"])

These changes will actually lead to the main SPPH query being executed three times: twice in load_related when this method is invoked, and then a third time when the callsite evaluates the result. Consider a DecoratedResultSet pre_iter_hook, so the preloading is done just in time and the main query is issued just once.

> +        return spphs
>  
>      def getBinaryPackagePublishing(self, archtag, pocket, component, archive):
>          """See `IDistroSeries`."""
> -        return Store.of(self).find(
> +        bpphs = Store.of(self).find(
>              BinaryPackagePublishingHistory,
>              DistroArchSeries.distroseries == self,
>              DistroArchSeries.architecturetag == archtag,
> @@ -1007,6 +1032,21 @@
>              BinaryPackagePublishingHistory.component == component,
>              BinaryPackagePublishingHistory.status ==
>                  PackagePublishingStatus.PUBLISHED)
> +        bprs = load_related(
> +            BinaryPackageRelease, bpphs, ["binarypackagereleaseID"])
> +        bpbs = load_related(BinaryPackageBuild, bprs, ["buildID"])
> +        sprs = load_related(
> +            SourcePackageRelease, bpbs, ["source_package_release_id"])
> +        bpfs = load_referencing(
> +            BinaryPackageFile, bprs, ["binarypackagereleaseID"])
> +        for bpr in bprs:
> +            get_property_cache(bpr).files = [
> +                bpf for bpf in bpfs if bpf.binarypackagerelease == bpr]

Please fix the Θ(n²) complexity as above.

> +        lfas = load_related(LibraryFileAlias, bpfs, ["libraryfileID"])
> +        load_related(LibraryFileContent, lfas, ["contentID"])
> +        load_related(SourcePackageName, sprs, ["sourcepackagenameID"])
> +        load_related(BinaryPackageName, bprs, ["binarypackagenameID"])
> +        return bpphs

Please also use pre_iter_hook here.

>  
>      def getBuildRecords(self, build_state=None, name=None, pocket=None,
>                          arch_tag=None, user=None, binary_only=True):
> 
> === modified file 'lib/lp/registry/tests/test_distroseries.py'
> --- lib/lp/registry/tests/test_distroseries.py	2013-09-11 06:05:44 +0000
> +++ lib/lp/registry/tests/test_distroseries.py	2014-06-11 09:31:16 +0000
> @@ -1,4 +1,4 @@
> -# Copyright 2009-2013 Canonical Ltd.  This software is licensed under the
> +# Copyright 2009-2014 Canonical Ltd.  This software is licensed under the
>  # GNU Affero General Public License version 3 (see the file LICENSE).
>  
>  """Tests for distroseries."""
> @@ -9,6 +9,7 @@
>      'CurrentSourceReleasesMixin',
>      ]
>  
> +from functools import partial
>  from logging import getLogger
>  
>  from testtools.matchers import Equals
> @@ -39,6 +40,7 @@
>      ANONYMOUS,
>      login,
>      person_logged_in,
> +    record_two_runs,
>      StormStatementRecorder,
>      TestCase,
>      TestCaseWithFactory,
> @@ -358,7 +360,7 @@
>  
>  class TestDistroSeriesPackaging(TestCaseWithFactory):
>  
> -    layer = DatabaseFunctionalLayer
> +    layer = LaunchpadFunctionalLayer
>  
>      def setUp(self):
>          super(TestDistroSeriesPackaging, self).setUp()
> @@ -385,9 +387,12 @@
>          transaction.commit()
>          login(ANONYMOUS)
>  
> -    def makeSeriesPackage(self, name, is_main=False, bugs=None, messages=None,
> -                          is_translations=False):
> +    def makeSeriesPackage(self, name=None, is_main=False, bugs=None,
> +                          messages=None, is_translations=False, pocket=None,
> +                          status=None):
>          # Make a published source package.
> +        if name is None:
> +            name = self.factory.getUniqueString()
>          if is_main:
>              component = self.main_component
>          else:
> @@ -397,9 +402,10 @@
>          else:
>              section = 'web'
>          sourcepackagename = self.factory.makeSourcePackageName(name)
> -        self.factory.makeSourcePackagePublishingHistory(
> +        spph = self.factory.makeSourcePackagePublishingHistory(
>              sourcepackagename=sourcepackagename, distroseries=self.series,
> -            component=component, section_name=section)
> +            component=component, section_name=section, pocket=pocket,
> +            status=status)
>          source_package = self.factory.makeSourcePackage(
>              sourcepackagename=sourcepackagename, distroseries=self.series)
>          if bugs is not None:
> @@ -411,9 +417,36 @@
>                  distroseries=self.series, sourcepackagename=sourcepackagename,
>                  owner=self.user)
>              removeSecurityProxy(template).messagecount = messages
> +        spr = spph.sourcepackagerelease
> +        for extension in ("dsc", "tar.gz"):
> +            filename = "%s_%s.%s" % (spr.name, spr.version, extension)
> +            spr.addFile(self.factory.makeLibraryFileAlias(filename=filename))

Consider using makeLibraryFileAlias(db_only=True) here, to avoid requiring the slow Librarian for these tests. That drops the layer back to DatabaseFunctionalLayer, and eliminates database commits and time-consuming resets.

>          self.packages[name] = source_package
> +        transaction.commit()
>          return source_package
>  
> +    def makeSeriesBinaryPackage(self, name=None, is_main=False,
> +                                is_translations=False, das=None, pocket=None,
> +                                status=None):
> +        # Make a published binary package.
> +        if name is None:
> +            name = self.factory.getUniqueString()
> +        source = self.makeSeriesPackage(
> +            name=name, is_main=is_main, is_translations=is_translations,
> +            pocket=pocket, status=status)
> +        spr = source.distinctreleases[0]
> +        binarypackagename = self.factory.makeBinaryPackageName(name)
> +        bpph = self.factory.makeBinaryPackagePublishingHistory(
> +            binarypackagename=binarypackagename, distroarchseries=das,
> +            component=spr.component, section_name=spr.section.name,
> +            status=status, pocket=pocket, source_package_release=spr)
> +        bpr = bpph.binarypackagerelease
> +        filename = "%s_%s_%s.deb" % (
> +            bpr.name, bpr.version, das.architecturetag)
> +        bpr.addFile(self.factory.makeLibraryFileAlias(filename=filename))

db_only=True here too?

> +        transaction.commit()
> +        return bpph
> +
>      def linkPackage(self, name):
>          product_series = self.factory.makeProductSeries()
>          product_series.setPackaging(
> @@ -486,6 +519,42 @@
>          expected = [u'translatable', u'linked', u'importabletranslatable']
>          self.assertEqual(expected, names)
>  
> +    def test_getSourcePackagePublishing_query_count(self):
> +        # Check that the number of queries required to publish source
> +        # packages is constant in the number of source packages.
> +        def get_index_stanzas():
> +            for spp in self.series.getSourcePackagePublishing(
> +                    PackagePublishingPocket.RELEASE, self.universe_component,
> +                    self.series.main_archive):
> +                spp.getIndexStanza()
> +
> +        recorder1, recorder2 = record_two_runs(
> +            get_index_stanzas,
> +            partial(
> +                self.makeSeriesPackage, pocket=PackagePublishingPocket.RELEASE,
> +                status=PackagePublishingStatus.PUBLISHED),
> +            5, 5)
> +        self.assertThat(recorder2, HasQueryCount(Equals(recorder1.count)))
> +
> +    def test_getBinaryPackagePublishing_query_count(self):
> +        # Check that the number of queries required to publish binary
> +        # packages is constant in the number of binary packages.
> +        def get_index_stanzas(das):
> +            for bpp in self.series.getBinaryPackagePublishing(
> +                    das.architecturetag, PackagePublishingPocket.RELEASE,
> +                    self.universe_component, self.series.main_archive):
> +                bpp.getIndexStanza()
> +
> +        das = self.factory.makeDistroArchSeries(distroseries=self.series)
> +        recorder1, recorder2 = record_two_runs(
> +            partial(get_index_stanzas, das),
> +            partial(
> +                self.makeSeriesBinaryPackage, das=das,
> +                pocket=PackagePublishingPocket.RELEASE,
> +                status=PackagePublishingStatus.PUBLISHED),
> +            5, 5)
> +        self.assertThat(recorder2, HasQueryCount(Equals(recorder1.count)))
> +
>  
>  class TestDistroSeriesSet(TestCaseWithFactory):
>  
> 
> === modified file 'lib/lp/soyuz/model/sourcepackagerelease.py'
> --- lib/lp/soyuz/model/sourcepackagerelease.py	2014-04-24 06:36:22 +0000
> +++ lib/lp/soyuz/model/sourcepackagerelease.py	2014-06-11 09:31:16 +0000
> @@ -1,4 +1,4 @@
> -# Copyright 2009-2013 Canonical Ltd.  This software is licensed under the
> +# Copyright 2009-2014 Canonical Ltd.  This software is licensed under the
>  # GNU Affero General Public License version 3 (see the file LICENSE).
>  
>  __metaclass__ = type
> @@ -128,8 +128,6 @@
>      dsc_binaries = StringCol(dbName='dsc_binaries')
>  
>      # MultipleJoins
> -    files = SQLMultipleJoin('SourcePackageReleaseFile',
> -        joinColumn='sourcepackagerelease', orderBy="libraryfile")
>      publishings = SQLMultipleJoin('SourcePackagePublishingHistory',
>          joinColumn='sourcepackagerelease', orderBy="-datecreated")
>  
> @@ -245,10 +243,20 @@
>  
>      def addFile(self, file):
>          """See ISourcePackageRelease."""
> -        return SourcePackageReleaseFile(
> +        sprf = SourcePackageReleaseFile(
>              sourcepackagerelease=self,
>              filetype=determine_source_file_type(file.filename),
>              libraryfile=file)
> +        Store.of(self).flush()
> +        del get_property_cache(self).files
> +        return sprf
> +
> +    @cachedproperty
> +    def files(self):
> +        """See `ISourcePackageRelease`."""
> +        return list(Store.of(self).find(
> +            SourcePackageReleaseFile, sourcepackagerelease=self).order_by(
> +                SourcePackageReleaseFile.libraryfileID))
>  
>      def getFileByName(self, filename):
>          """See `ISourcePackageRelease`."""
> 
> === modified file 'lib/lp/soyuz/scripts/gina/handlers.py'
> --- lib/lp/soyuz/scripts/gina/handlers.py	2013-09-11 08:17:34 +0000
> +++ lib/lp/soyuz/scripts/gina/handlers.py	2014-06-11 09:31:16 +0000
> @@ -1,4 +1,4 @@
> -# Copyright 2009-2012 Canonical Ltd.  This software is licensed under the
> +# Copyright 2009-2014 Canonical Ltd.  This software is licensed under the
>  # GNU Affero General Public License version 3 (see the file LICENSE).
>  
>  """Gina db handlers.
> @@ -33,10 +33,7 @@
>  from lp.archivepublisher.diskpool import poolify
>  from lp.archiveuploader.changesfile import ChangesFile
>  from lp.archiveuploader.tagfiles import parse_tagfile
> -from lp.archiveuploader.utils import (
> -    determine_binary_file_type,
> -    determine_source_file_type,
> -    )
> +from lp.archiveuploader.utils import determine_binary_file_type
>  from lp.buildmaster.enums import BuildStatus
>  from lp.registry.interfaces.person import (
>      IPersonSet,
> @@ -68,10 +65,7 @@
>  from lp.soyuz.model.binarypackagerelease import BinaryPackageRelease
>  from lp.soyuz.model.component import Component
>  from lp.soyuz.model.distroarchseries import DistroArchSeries
> -from lp.soyuz.model.files import (
> -    BinaryPackageFile,
> -    SourcePackageReleaseFile,
> -    )
> +from lp.soyuz.model.files import BinaryPackageFile
>  from lp.soyuz.model.processor import Processor
>  from lp.soyuz.model.publishing import (
>      BinaryPackagePublishingHistory,
> @@ -657,11 +651,7 @@
>          # Insert file into the library and create the
>          # SourcePackageReleaseFile entry on lp db.
>          for fname, path in to_upload:
> -            alias = getLibraryAlias(path, fname)
> -            SourcePackageReleaseFile(
> -                sourcepackagerelease=spr.id,
> -                libraryfile=alias,
> -                filetype=determine_source_file_type(fname))
> +            spr.addFile(getLibraryAlias(path, fname))

Yay. I fixed gina to use the common determine_source_file_type in my 3.0 (quilt) work, but didn't go quite far enough.

>              log.info('Package file %s included into library' % fname)
>  
>          return spr
> 
> === modified file 'lib/lp/testing/factory.py'
> --- lib/lp/testing/factory.py	2014-04-24 02:16:27 +0000
> +++ lib/lp/testing/factory.py	2014-06-11 09:31:16 +0000
> @@ -2,7 +2,7 @@
>  # NOTE: The first line above must stay first; do not move the copyright
>  # notice to the top.  See http://www.python.org/dev/peps/pep-0263/.
>  #
> -# Copyright 2009-2013 Canonical Ltd.  This software is licensed under the
> +# Copyright 2009-2014 Canonical Ltd.  This software is licensed under the
>  # GNU Affero General Public License version 3 (see the file LICENSE).
>  
>  """Testing infrastructure for the Launchpad application.
> @@ -250,7 +250,10 @@
>      )
>  from lp.services.oauth.interfaces import IOAuthConsumerSet
>  from lp.services.openid.model.openididentifier import OpenIdIdentifier
> -from lp.services.propertycache import clear_property_cache
> +from lp.services.propertycache import (
> +    clear_property_cache,
> +    get_property_cache,
> +    )
>  from lp.services.temporaryblobstorage.interfaces import (
>      ITemporaryStorageManager,
>      )
> @@ -3543,10 +3546,12 @@
>              library_file = self.makeLibraryFileAlias()
>          if filetype is None:
>              filetype = SourcePackageFileType.DSC
> -        return ProxyFactory(
> +        sprf = ProxyFactory(
>              SourcePackageReleaseFile(
>                  sourcepackagerelease=sourcepackagerelease,
>                  libraryfile=library_file, filetype=filetype))
> +        del get_property_cache(sourcepackagerelease).files

I'd probably just let SPR.addFile take a filetype override.

> +        return sprf
>  
>      def makeBinaryPackageBuild(self, source_package_release=None,
>              distroarchseries=None, archive=None, builder=None,
> 


-- 
https://code.launchpad.net/~cjwatson/launchpad/preload-publisher-indexes/+merge/222768
Your team Launchpad code reviewers is subscribed to branch lp:launchpad.


References