launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #14814
[Merge] lp:~stevenk/launchpad/drop-garbo-packageupload-searchables into lp:launchpad
Steve Kowalik has proposed merging lp:~stevenk/launchpad/drop-garbo-packageupload-searchables into lp:launchpad.
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
For more details, see:
https://code.launchpad.net/~stevenk/launchpad/drop-garbo-packageupload-searchables/+merge/140118
PopulatePackageUploadSearchables is has completed on all five instances. To thank it for its work of populating up to 4.7 million packageupload rows (on production) at least, we will boot it out of the codebase.
This also cleans up some unused imports that were left hanging around.
--
https://code.launchpad.net/~stevenk/launchpad/drop-garbo-packageupload-searchables/+merge/140118
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~stevenk/launchpad/drop-garbo-packageupload-searchables into lp:launchpad.
=== modified file 'database/schema/security.cfg'
--- database/schema/security.cfg 2012-12-14 00:36:37 +0000
+++ database/schema/security.cfg 2012-12-17 00:24:23 +0000
@@ -2253,7 +2253,6 @@
public.oauthnonce = SELECT, DELETE
public.openidconsumerassociation = SELECT, DELETE
public.openidconsumernonce = SELECT, DELETE
-public.packageupload = SELECT, UPDATE
public.person = SELECT, DELETE
public.product = SELECT, UPDATE
public.pofiletranslator = SELECT, INSERT, UPDATE, DELETE
=== modified file 'lib/lp/scripts/garbo.py'
--- lib/lp/scripts/garbo.py 2012-12-14 00:36:37 +0000
+++ lib/lp/scripts/garbo.py 2012-12-17 00:24:23 +0000
@@ -18,7 +18,6 @@
)
import logging
import multiprocessing
-from operator import itemgetter
import os
import threading
import time
@@ -40,9 +39,7 @@
Min,
Or,
Row,
- Select,
SQL,
- Update,
)
from storm.info import ClassAlias
from storm.store import EmptyResultSet
@@ -123,7 +120,6 @@
from lp.services.verification.model.logintoken import LoginToken
from lp.soyuz.model.archive import Archive
from lp.soyuz.model.publishing import SourcePackagePublishingHistory
-from lp.soyuz.model.queue import PackageUpload
from lp.soyuz.model.reporting import LatestPersonSourcePackageReleaseCache
from lp.soyuz.model.sourcepackagerelease import SourcePackageRelease
from lp.translations.interfaces.potemplate import IPOTemplateSet
@@ -1339,131 +1335,6 @@
transaction.commit()
-class PopulatePackageUploadSearchables(TunableLoop):
- """Populates PackageUpload.searchable_names and
- PackageUpload.searchable_versions."""
-
- maximum_chunk_size = 5000
-
- def __init__(self, log, abort_time=None):
- super(PopulatePackageUploadSearchables, self).__init__(log, abort_time)
- self.start_at = 1
- self.store = IMasterStore(PackageUpload)
-
- def findPackageUploadIDs(self):
- return self.store.find(
- (PackageUpload.id,),
- Or(PackageUpload.searchable_names == None,
- PackageUpload.searchable_versions == None),
- PackageUpload.id >= self.start_at).order_by(PackageUpload.id)
-
- def isDone(self):
- return self.findPackageUploadIDs().is_empty()
-
- def __call__(self, chunk_size):
- packageupload_ids = map(
- itemgetter(0), list(self.findPackageUploadIDs()[:chunk_size]))
- # The following SQL links from PU[SBC] to fetch all of the relevant
- # source names, binary names, libraryfile filenames and their versions.
- results = self.store.find(
- (PackageUpload.id, SQL("""
- (SELECT COALESCE(
- string_agg(DISTINCT name, ' ' ORDER BY name), '') FROM (
- (SELECT spn.name
- FROM
- packageuploadbuild
- JOIN binarypackagebuild AS bpb ON
- bpb.id = packageuploadbuild.build
- JOIN sourcepackagerelease AS spr ON
- spr.id = bpb.source_package_release
- JOIN sourcepackagename AS spn ON
- spn.id = spr.sourcepackagename
- WHERE packageuploadbuild.packageupload = packageupload.id
- )
- UNION
- (SELECT bpn.name
- FROM
- packageuploadbuild
- JOIN binarypackagerelease ON
- binarypackagerelease.build = packageuploadbuild.build
- JOIN binarypackagename AS bpn ON
- bpn.id = binarypackagerelease.binarypackagename
- WHERE packageuploadbuild.packageupload = packageupload.id
- )
- UNION
- (SELECT sourcepackagename.name
- FROM
- packageuploadsource
- JOIN sourcepackagerelease AS spr ON
- spr.id = packageuploadsource.sourcepackagerelease
- JOIN sourcepackagename ON
- sourcepackagename.id = spr.sourcepackagename
- WHERE packageuploadsource.packageupload = packageupload.id
- )
- UNION
- (SELECT lfa.filename
- FROM
- packageuploadcustom
- JOIN libraryfilealias AS lfa ON
- lfa.id = packageuploadcustom.libraryfilealias
- WHERE packageuploadcustom.packageupload = packageupload.id
- )
- UNION
- (SELECT package_name FROM packagecopyjob
- WHERE packageupload.package_copy_job = packagecopyjob.id
- )) AS names (name))
- """), SQL("""
- (SELECT COALESCE(array_agg(DISTINCT version ORDER BY version)::text[],
- ARRAY[]::text[]) FROM (
- (
- SELECT spr.version
- FROM packageuploadsource
- JOIN sourcepackagerelease AS spr ON
- spr.id = packageuploadsource.sourcepackagerelease
- WHERE packageuploadsource.packageupload = packageupload.id
- )
- UNION
- (
- SELECT binarypackagerelease.version
- FROM packageuploadbuild
- JOIN binarypackagerelease ON
- binarypackagerelease.build = packageuploadbuild.build
- WHERE packageuploadbuild.packageupload = packageupload.id
- )
- UNION
- (
- SELECT (regexp_matches(json_data,
- '"package_version": "([^"]+)"')::debversion[])[1]
- FROM packagecopyjob
- WHERE packageupload.package_copy_job = packagecopyjob.id
- )) AS versions (version))
- """)), PackageUpload.id.is_in(packageupload_ids))
- # Construct our cache data and populate our Values expression.
- cache_data = ClassAlias(PackageUpload, "cache_data")
- updated_columns = dict(
- [(PackageUpload.searchable_names, cache_data.searchable_names),
- (PackageUpload.searchable_versions,
- cache_data.searchable_versions)])
- values = [
- [dbify_value(col, val)[0]
- for (col, val) in zip(
- (PackageUpload.id, PackageUpload.searchable_names,
- PackageUpload.searchable_versions), data)]
- for data in results]
- cols = [
- ('id', 'integer'), ('searchable_names', 'text'),
- ('searchable_versions', 'text[]')]
- cache_data_expr = Values('cache_data', cols, values)
- # Using the PackageUpload table, and the pseudo-table Values, set
- # updated_columns for every row in this loop.
- self.store.execute(
- BulkUpdate(
- updated_columns, table=PackageUpload, values=cache_data_expr,
- where=PackageUpload.id == cache_data.id))
- self.start_at = packageupload_ids[-1] + 1
- transaction.commit()
-
-
class BaseDatabaseGarbageCollector(LaunchpadCronScript):
"""Abstract base class to run a collection of TunableLoops."""
script_name = None # Script name for locking and database user. Override.
@@ -1719,7 +1590,6 @@
UnusedSessionPruner,
DuplicateSessionPruner,
BugHeatUpdater,
- PopulatePackageUploadSearchables,
]
experimental_tunable_loops = []
=== modified file 'lib/lp/scripts/tests/test_garbo.py'
--- lib/lp/scripts/tests/test_garbo.py 2012-12-14 00:36:37 +0000
+++ lib/lp/scripts/tests/test_garbo.py 2012-12-17 00:24:23 +0000
@@ -22,7 +22,6 @@
Min,
Not,
SQL,
- Update,
)
from storm.locals import (
Int,
@@ -63,7 +62,6 @@
from lp.registry.interfaces.person import IPersonSet
from lp.registry.interfaces.teammembership import TeamMembershipStatus
from lp.registry.model.commercialsubscription import CommercialSubscription
-from lp.registry.model.product import Product
from lp.registry.model.teammembership import TeamMembership
from lp.scripts.garbo import (
AntiqueSessionPruner,
@@ -1275,55 +1273,6 @@
'PopulateLatestPersonSourcePackageReleaseCache')
self.assertEqual(spph_2.id, job_data['last_spph_id'])
- def test_PopulatePackageUploadSearchables(self):
- # PopulatePackageUploadSearchables sets searchable_names and
- # searchable_versions for existing uploads correctly.
- switch_dbuser('testadmin')
- distroseries = self.factory.makeDistroSeries()
- source = self.factory.makeSourcePackageUpload(distroseries)
- binary = self.factory.makeBuildPackageUpload(distroseries)
- build = self.factory.makeBinaryPackageBuild()
- self.factory.makeBinaryPackageRelease(build=build)
- binary.addBuild(build)
- custom = self.factory.makeCustomPackageUpload(distroseries)
- # They are all have searchable_{names,versions} set, so unset them.
- for kind in (source, binary, custom):
- removeSecurityProxy(kind).searchable_names = None
- removeSecurityProxy(kind).searchable_versions = None
- transaction.commit()
- self.runHourly()
- source_name = source.sources[0].sourcepackagerelease.name
- binary_names = ' '.join(
- [build.build.binarypackages[0].name for build in binary.builds] + [
- build.build.source_package_release.name
- for build in binary.builds])
- filename = custom.customfiles[0].libraryfilealias.filename
- self.assertEqual(source.searchable_names, source_name)
- self.assertEqual(binary.searchable_names, binary_names)
- self.assertEqual(custom.searchable_names, filename)
- source_version = [source.sources[0].sourcepackagerelease.version]
- binary_versions = [
- build.build.binarypackages[0].version for build in binary.builds]
- self.assertContentEqual(source_version, source.searchable_versions)
- self.assertContentEqual(binary_versions, binary.searchable_versions)
- self.assertEqual([], custom.searchable_versions)
-
- def test_PopulatePackageUploadSearchables_deduplication(self):
- # When the SPN and the BPN are the same for a build, the
- # searchable_names field is set to just one name.
- switch_dbuser('testadmin')
- distroseries = self.factory.makeDistroSeries()
- spr = self.factory.makeSourcePackageRelease()
- bpn = self.factory.makeBinaryPackageName(name=spr.name)
- binary = self.factory.makeBuildPackageUpload(
- distroseries=distroseries, binarypackagename=bpn,
- source_package_release=spr)
- removeSecurityProxy(binary).searchable_names = None
- removeSecurityProxy(binary).searchable_versions = None
- transaction.commit()
- self.runHourly()
- self.assertEqual(spr.name, binary.searchable_names)
-
class TestGarboTasks(TestCaseWithFactory):
layer = LaunchpadZopelessLayer
Follow ups