launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #20383
[Merge] lp:~cjwatson/launchpad/package-cache-drop-changelog into lp:launchpad
Colin Watson has proposed merging lp:~cjwatson/launchpad/package-cache-drop-changelog into lp:launchpad.
Commit message:
Stop populating DistributionSourcePackageCache.changelog, and add a garbo job to depopulate it.
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/package-cache-drop-changelog/+merge/294894
Stop populating DistributionSourcePackageCache.changelog, and add a garbo job to depopulate it.
The changelog was first added here in https://bugs.launchpad.net/launchpad/+bug/48735, and it is at least considered only as a bottom-priority item. But it doesn't make all that much sense, at least nowadays: we now have a pretty decent batched +changelog view, matching on changelog terms will often not be what you expected except in special-purpose circumstances, and general fishing expeditions are probably better handled by a general-purpose search engine. The changelog makes up about half of the table size (166 of 341 MB on dogfood), so I'd like to trim it down so that we can make the package cache quicker to update and easier to use for source package vocabulary pickers.
The next step will be to drop the column from the database, but we need this on production first to rebuild the fti column.
--
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~cjwatson/launchpad/package-cache-drop-changelog into lp:launchpad.
=== modified file 'database/schema/security.cfg'
--- database/schema/security.cfg 2016-05-12 10:02:25 +0000
+++ database/schema/security.cfg 2016-05-17 09:47:55 +0000
@@ -2327,6 +2327,7 @@
public.codeimportresult = SELECT, DELETE
public.commercialsubscription = SELECT, UPDATE
public.diff = SELECT, DELETE
+public.distributionsourcepackagecache = SELECT, UPDATE
public.distroseries = SELECT, UPDATE
public.emailaddress = SELECT, UPDATE, DELETE
public.garbojobstate = SELECT, INSERT, UPDATE, DELETE
=== modified file 'lib/lp/scripts/garbo.py'
--- lib/lp/scripts/garbo.py 2016-04-05 15:44:16 +0000
+++ lib/lp/scripts/garbo.py 2016-05-17 09:47:55 +0000
@@ -119,6 +119,9 @@
from lp.services.webhooks.interfaces import IWebhookJobSource
from lp.services.webhooks.model import WebhookJob
from lp.soyuz.model.archive import Archive
+from lp.soyuz.model.distributionsourcepackagecache import (
+ DistributionSourcePackageCache,
+ )
from lp.soyuz.model.livefsbuild import LiveFSFile
from lp.soyuz.model.publishing import SourcePackagePublishingHistory
from lp.soyuz.model.reporting import LatestPersonSourcePackageReleaseCache
@@ -1435,6 +1438,36 @@
"""
+class DistributionSourcePackageCacheChangelogPruner(BulkPruner):
+ """Set DistributionSourcePackageCache.changelog to NULL and update fti.
+
+ This column is due to be deleted, but we force an fti rebuild without it
+ first to avoid leaving debris in the fti column.
+ """
+
+ target_table_class = DistributionSourcePackageCache
+ ids_to_prune_query = """
+ SELECT id
+ FROM DistributionSourcePackageCache
+ WHERE changelog IS NOT NULL
+ """
+
+ def __call__(self, chunk_size):
+ """See `ITunableLoop`."""
+ result = self.store.execute("""
+ UPDATE %s
+ SET changelog = NULL, fti = NULL
+ WHERE (%s) IN (
+ SELECT * FROM
+ cursor_fetch('%s', %d) AS f(%s))
+ """
+ % (
+ self.target_table_name, self.target_table_key,
+ self.cursor_name, chunk_size, self.target_table_key_type))
+ self._num_removed = result.rowcount
+ transaction.commit()
+
+
class BaseDatabaseGarbageCollector(LaunchpadCronScript):
"""Abstract base class to run a collection of TunableLoops."""
script_name = None # Script name for locking and database user. Override.
@@ -1645,6 +1678,7 @@
except Exception:
loop_logger.exception("Unhandled exception")
self.failure_count += 1
+ raise
finally:
loop_lock.release()
@@ -1713,6 +1747,7 @@
CodeImportEventPruner,
CodeImportResultPruner,
DiffPruner,
+ DistributionSourcePackageCacheChangelogPruner,
GitJobPruner,
HWSubmissionEmailLinker,
LiveFSFilePruner,
=== modified file 'lib/lp/scripts/tests/test_garbo.py'
--- lib/lp/scripts/tests/test_garbo.py 2016-04-05 15:44:16 +0000
+++ lib/lp/scripts/tests/test_garbo.py 2016-05-17 09:47:55 +0000
@@ -15,6 +15,7 @@
from StringIO import StringIO
import time
+from psycopg2 import ProgrammingError
from pytz import UTC
from storm.exceptions import LostObjectError
from storm.expr import (
@@ -115,6 +116,9 @@
from lp.services.worlddata.interfaces.language import ILanguageSet
from lp.soyuz.enums import PackagePublishingStatus
from lp.soyuz.interfaces.livefs import LIVEFS_FEATURE_FLAG
+from lp.soyuz.model.distributionsourcepackagecache import (
+ DistributionSourcePackageCache,
+ )
from lp.soyuz.model.livefsbuild import LiveFSFile
from lp.soyuz.model.reporting import LatestPersonSourcePackageReleaseCache
from lp.testing import (
@@ -1386,6 +1390,47 @@
self._test_LiveFSFilePruner(
'application/octet-stream', 0, expected_count=1)
+ def test_DistributionSourcePackageCacheChangelogPruner(self):
+ # Garbo prunes old changelog columns from
+ # DistributionSourcePackageCache.
+ switch_dbuser('testadmin')
+ store = IMasterStore(DistributionSourcePackageCache)
+ distribution = self.factory.makeDistribution()
+ dsps = [
+ self.factory.makeDSPCache(
+ distro_name=distribution.name,
+ package_name=self.factory.getUniqueUnicode(),
+ make_distro=False, official=False)[1]
+ for _ in range(3)]
+ store.flush()
+ try:
+ store.execute("""
+ UPDATE DistributionSourcePackageCache
+ SET changelog = 'placeholder'
+ WHERE
+ distribution = %s
+ AND sourcepackagename IN (%s, %s)
+ """ % (
+ sqlbase.quote(distribution),
+ sqlbase.quote(dsps[0].sourcepackagename),
+ sqlbase.quote(dsps[1].sourcepackagename)))
+ except ProgrammingError:
+ # The column must have been removed from the database.
+ return
+
+ self.assertEqual(
+ 3, distribution.searchSourcePackageCaches(u"").count())
+ self.assertEqual(
+ 2, distribution.searchSourcePackageCaches(u"placeholder").count())
+
+ transaction.commit()
+ self.runDaily()
+
+ self.assertEqual(
+ 3, distribution.searchSourcePackageCaches(u"").count())
+ self.assertEqual(
+ 0, distribution.searchSourcePackageCaches(u"placeholder").count())
+
class TestGarboTasks(TestCaseWithFactory):
layer = LaunchpadZopelessLayer
=== modified file 'lib/lp/soyuz/interfaces/distributionsourcepackagecache.py'
--- lib/lp/soyuz/interfaces/distributionsourcepackagecache.py 2015-10-21 09:37:08 +0000
+++ lib/lp/soyuz/interfaces/distributionsourcepackagecache.py 2016-05-17 09:47:55 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009 Canonical Ltd. This software is licensed under the
+# Copyright 2009-2016 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
"""Source package in Distribution Cache interfaces."""
@@ -29,9 +29,6 @@
binpkgdescriptions = Attribute("A concatenation of the descriptions "
"of the binary packages from this source package name in the "
"distro.")
- changelog = Attribute("A concatenation of the source package release "
- "changelog entries for this source package, where the status is "
- "not REMOVED.")
distributionsourcepackage = Attribute("The DistributionSourcePackage "
"for which this is a cache.")
=== modified file 'lib/lp/soyuz/model/distributionsourcepackagecache.py'
--- lib/lp/soyuz/model/distributionsourcepackagecache.py 2015-07-08 16:05:11 +0000
+++ lib/lp/soyuz/model/distributionsourcepackagecache.py 2016-05-17 09:47:55 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009-2011 Canonical Ltd. This software is licensed under the
+# Copyright 2009-2016 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
__metaclass__ = type
@@ -43,7 +43,6 @@
binpkgnames = StringCol(notNull=False, default=None)
binpkgsummaries = StringCol(notNull=False, default=None)
binpkgdescriptions = StringCol(notNull=False, default=None)
- changelog = StringCol(notNull=False, default=None)
@property
def distributionsourcepackage(self):
@@ -167,13 +166,8 @@
binpkgnames = set()
binpkgsummaries = set()
binpkgdescriptions = set()
- sprchangelog = set()
for spr in sprs:
log.debug("Considering source version %s" % spr.version)
- # changelog may be empty, in which case we don't want to add it
- # to the set as the join would fail below.
- if spr.changelog_entry is not None:
- sprchangelog.add(spr.changelog_entry)
binpkgs = IStore(BinaryPackageRelease).find(
(BinaryPackageName.name, BinaryPackageRelease.summary,
BinaryPackageRelease.description),
@@ -190,7 +184,6 @@
cache.binpkgnames = ' '.join(sorted(binpkgnames))
cache.binpkgsummaries = ' '.join(sorted(binpkgsummaries))
cache.binpkgdescriptions = ' '.join(sorted(binpkgdescriptions))
- cache.changelog = ' '.join(sorted(sprchangelog))
@classmethod
def updateAll(cls, distro, archive, log, ztm, commit_chunk=500):
=== modified file 'lib/lp/soyuz/stories/distribution/xx-distribution-packages.txt'
--- lib/lp/soyuz/stories/distribution/xx-distribution-packages.txt 2015-09-04 12:36:43 +0000
+++ lib/lp/soyuz/stories/distribution/xx-distribution-packages.txt 2016-05-17 09:47:55 +0000
@@ -61,47 +61,6 @@
The Mozilla Firefox web browser
(Matching binaries: mozilla-firefox, mozilla-firefox-data.)
-Now try searching for text that we know to be in a change log entry, to
-prove that FTI works on change logs. The text we're looking for is
-"placeholder" which is mentioned in the change log entry for pmount and
-libstdc++, so we are looking for two results here as the "placeholder"
-text is not mentioned in anything else that is indexed.
-
- >>> browser.open("http://localhost/ubuntu/+search")
- >>> field = browser.getControl(name="text")
- >>> field.value = 'placeholder'
- >>> browser.getControl('Search', index=0).click()
-
-Note, by default we only search on binary package names (as the fti is
-not currently so useful), so the initial result is empty, but contains
-a link to the fti/source package search:
-
- >>> print extract_text(find_tag_by_id(browser.contents, 'no-results'))
- Your search for “placeholder” did not return any results.
- ...
-
-Clicking on the provided link to retry the search against source packages
-finds the fti results:
-
- >>> browser.getLink(id='source-search').click()
- >>> for tag in find_tags_by_class(
- ... browser.contents, 'batch-navigation-index'):
- ... print extract_text(tag)
- All packages with sources matching your query “placeholder”
- 1...2 of 2 results
-
- >>> soup = find_main_content(browser.contents)
- >>> results = soup.findAll(attrs={'class': 'pagematch'})
- >>> len(results)
- 2
-
- >>> texts = [extract_text(html) for html in results]
- >>> texts.sort()
- >>> for text in texts:
- ... print text.encode('ascii', 'backslashreplace')
- libstdc++
- pmount
-
Distribution package change summary
-----------------------------------
Follow ups