launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #06040
[Merge] lp:~cjwatson/launchpad/gina-dsc-binaries into lp:launchpad
Colin Watson has proposed merging lp:~cjwatson/launchpad/gina-dsc-binaries into lp:launchpad.
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
Related bugs:
Bug #911943 in Launchpad itself: "gina imports SourcePackageRelease.dsc_binaries incorrectly"
https://bugs.launchpad.net/launchpad/+bug/911943
For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/gina-dsc-binaries/+merge/87632
== Summary ==
There is some wrong data in SourcePackageRelease.dsc_binaries, which I believe is all gina's fault: that field is meant to be separated with comma-space, not space. This breaks lp:~cjwatson/launchpad/germinate-from-db, but would also be a blocker (one of several, but still) to switching to no-more-apt-ftparchive publishing for Ubuntu.
== Proposed fix ==
Fix gina to separate dsc_binaries correctly, and add a garbo-daily job to clean up the wreckage.
== Pre-implementation notes ==
Robert and Stuart educated me a bit on how to write garbo jobs.
== Implementation details ==
I found a few rows on dogfood where the Binary field had been incorrectly written as a dependency relationship field, complete with "(>= ...)". This is wildly wrong and it came from PPA uploads rather than gina; but it's in the past and s/ /, /g would not improve matters, so I just excluded anything containing '(', and included a test that such rows remain unchanged.
== Tests ==
bin/test -vvc -t gina -t garbo
== Demo and Q/A ==
I'd like to run this garbo job on dogfood and check that it really does clean everything up, which is probably most easily done by manual inspection of:
SELECT dsc_binaries FROM SourcePackageRelease WHERE dsc_binaries ~ '[a-z0-9+.-] ';
== lint ==
./lib/lp/soyuz/doc/gina.txt
162: want exceeds 78 characters.
179: want exceeds 78 characters.
223: want exceeds 78 characters.
236: want exceeds 78 characters.
242: want exceeds 78 characters.
Pre-existing, fiddly to fix, and should be ignoreable for the purposes of this branch.
--
https://code.launchpad.net/~cjwatson/launchpad/gina-dsc-binaries/+merge/87632
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~cjwatson/launchpad/gina-dsc-binaries into lp:launchpad.
=== modified file 'database/schema/security.cfg'
--- database/schema/security.cfg 2012-01-04 11:49:08 +0000
+++ database/schema/security.cfg 2012-01-05 14:52:42 +0000
@@ -2156,7 +2156,7 @@
public.revisionauthor = SELECT, UPDATE
public.revisioncache = SELECT, DELETE
public.sourcepackagename = SELECT
-public.sourcepackagerelease = SELECT
+public.sourcepackagerelease = SELECT, UPDATE
public.sourcepackagepublishinghistory = SELECT, UPDATE
public.suggestivepotemplate = INSERT, DELETE
public.teamparticipation = SELECT, DELETE
=== modified file 'lib/lp/scripts/garbo.py'
--- lib/lp/scripts/garbo.py 2012-01-04 11:49:08 +0000
+++ lib/lp/scripts/garbo.py 2012-01-05 14:52:42 +0000
@@ -86,6 +86,7 @@
MAIN_STORE,
MASTER_FLAVOR,
)
+from lp.soyuz.model.sourcepackagerelease import SourcePackageRelease
from lp.translations.interfaces.potemplate import IPOTemplateSet
from lp.translations.model.potmsgset import POTMsgSet
from lp.translations.model.potranslation import POTranslation
@@ -901,6 +902,59 @@
self._update_oldest()
+class SourcePackageReleaseDscBinariesUpdater(TunableLoop):
+ """Fix incorrect values for SourcePackageRelease.dsc_binaries."""
+
+ maximum_chunk_size = 1000
+
+ def __init__(self, log, abort_time=None):
+ super(SourcePackageReleaseDscBinariesUpdater, self).__init__(
+ log, abort_time)
+ self.offset = 0
+ self.store = IMasterStore(SourcePackageRelease)
+ self.store.execute("DROP TABLE IF EXISTS MatchedSourcePackageRelease")
+ self.store.execute("""
+ CREATE TEMPORARY TABLE MatchedSourcePackageRelease AS
+ SELECT id AS spr
+ FROM SourcePackageRelease
+ -- Get all SPR IDs which have an incorrectly-separated
+ -- dsc_binaries value (space rather than comma-space).
+ WHERE dsc_binaries ~ '[a-z0-9+.-] '
+ -- Skip rows with dsc_binaries in dependency relationship
+ -- format. This is a different bug.
+ AND dsc_binaries NOT LIKE '%(%'
+ """, noresult=True)
+ self.store.execute("""
+ CREATE INDEX matchedsourcepackagerelease__spr__idx
+ ON MatchedSourcePackageRelease(spr)
+ """, noresult=True)
+ self.matched_spr_count = self.store.execute("""
+ SELECT COUNT(*) FROM MatchedSourcePackageRelease
+ """).get_one()[0]
+
+ def isDone(self):
+ """See `TunableLoop`."""
+ return self.offset >= self.matched_spr_count
+
+ def __call__(self, chunk_size):
+ """See `TunableLoop`."""
+ self.store.execute("""
+ UPDATE SourcePackageRelease
+ SET dsc_binaries = regexp_replace(
+ dsc_binaries, '([a-z0-9+.-]) ', E'\\\\1, ', 'g')
+ FROM (
+ SELECT spr
+ FROM MatchedSourcePackageRelease
+ ORDER BY spr
+ OFFSET %d
+ LIMIT %d
+ ) AS MatchedSourcePackageRelease
+ WHERE SourcePackageRelease.id = MatchedSourcePackageRelease.spr
+ """ % (self.offset, chunk_size), noresult=True)
+ self.offset += chunk_size
+ transaction.commit()
+
+
class SuggestiveTemplatesCacheUpdater(TunableLoop):
"""Refresh the SuggestivePOTemplate cache.
@@ -1265,6 +1319,7 @@
ObsoleteBugAttachmentPruner,
OldTimeLimitedTokenDeleter,
RevisionAuthorEmailLinker,
+ SourcePackageReleaseDscBinariesUpdater,
SuggestiveTemplatesCacheUpdater,
POTranslationPruner,
UnusedPOTMsgSetPruner,
=== modified file 'lib/lp/scripts/tests/test_garbo.py'
--- lib/lp/scripts/tests/test_garbo.py 2012-01-04 11:49:08 +0000
+++ lib/lp/scripts/tests/test_garbo.py 2012-01-05 14:52:42 +0000
@@ -996,6 +996,54 @@
self.runDaily()
self.assertEqual(0, unreferenced_msgsets.count())
+ def test_SourcePackageReleaseDscBinariesUpdater_updates_incorrect(self):
+ # SourcePackageReleaseDscBinariesUpdater fixes incorrectly-separated
+ # dsc_binaries values.
+ LaunchpadZopelessLayer.switchDbUser('testadmin')
+ three = [
+ self.factory.getUniqueString(),
+ self.factory.getUniqueString(),
+ self.factory.getUniqueString(),
+ ]
+ spr_three = self.factory.makeSourcePackageRelease(
+ dsc_binaries=" ".join(three))
+ transaction.commit()
+ self.runDaily()
+ self.assertEqual(", ".join(three), spr_three.dsc_binaries)
+
+ def test_SourcePackageReleaseDscBinariesUpdater_skips_correct(self):
+ # SourcePackageReleaseDscBinariesUpdater leaves correct dsc_binaries
+ # values alone.
+ LaunchpadZopelessLayer.switchDbUser('testadmin')
+ one = self.factory.getUniqueString()
+ spr_one = self.factory.makeSourcePackageRelease(dsc_binaries=one)
+ three = ", ".join([
+ self.factory.getUniqueString(),
+ self.factory.getUniqueString(),
+ self.factory.getUniqueString(),
+ ])
+ spr_three = self.factory.makeSourcePackageRelease(dsc_binaries=three)
+ transaction.commit()
+ self.runDaily()
+ self.assertEqual(one, spr_one.dsc_binaries)
+ self.assertEqual(three, spr_three.dsc_binaries)
+
+ def test_SourcePackageReleaseDscBinariesUpdater_skips_broken(self):
+ # There have been a few instances of Binary fields in PPA packages
+ # that are formatted like a dependency relationship field, complete
+ # with (>= ...). This is completely invalid (and failed to build),
+ # but does exist historically, so we have to deal with it.
+ # SourcePackageReleaseDscBinariesUpdater leaves such fields well
+ # alone.
+ LaunchpadZopelessLayer.switchDbUser('testadmin')
+ broken = "%s (>= 1), %s" % (
+ self.factory.getUniqueString(), self.factory.getUniqueString())
+ spr_broken = self.factory.makeSourcePackageRelease(
+ dsc_binaries=broken)
+ transaction.commit()
+ self.runDaily()
+ self.assertEqual(broken, spr_broken.dsc_binaries)
+
class TestGarboTasks(TestCaseWithFactory):
layer = LaunchpadZopelessLayer
=== modified file 'lib/lp/soyuz/doc/gina.txt'
--- lib/lp/soyuz/doc/gina.txt 2011-12-30 06:14:56 +0000
+++ lib/lp/soyuz/doc/gina.txt 2012-01-05 14:52:42 +0000
@@ -280,6 +280,8 @@
-----END PGP SIGNATURE-----
>>> print cap.maintainer.displayname
Michael Vogt
+ >>> print cap.dsc_binaries
+ libcap-dev, libcap-bin, libcap1
Test ubuntu-meta in breezy, which was forcefully imported.
=== modified file 'lib/lp/soyuz/scripts/gina/handlers.py'
--- lib/lp/soyuz/scripts/gina/handlers.py 2011-12-30 06:14:56 +0000
+++ lib/lp/soyuz/scripts/gina/handlers.py 2012-01-05 14:52:42 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009-2011 Canonical Ltd. This software is licensed under the
+# Copyright 2009-2012 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
"""Gina db handlers.
@@ -654,7 +654,7 @@
dsc_format=src.format,
dsc_maintainer_rfc822=maintainer_line,
dsc_standards_version=src.standards_version,
- dsc_binaries=" ".join(src.binaries),
+ dsc_binaries=", ".join(src.binaries),
upload_archive=distroseries.main_archive)
log.info('Source Package Release %s (%s) created' %
(name.name, src.version))