← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] ~cjwatson/launchpad:skip-conda-source-files-harder into launchpad:master

 

Colin Watson has proposed merging ~cjwatson/launchpad:skip-conda-source-files-harder into launchpad:master.

Commit message:
Always pretend that "source packages" for Conda have no files

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)
Related bugs:
  Bug #2023315 in Launchpad itself: "Launchpad fails to delete packages from private non-deb PPAs"
  https://bugs.launchpad.net/launchpad/+bug/2023315

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/+git/launchpad/+merge/444873

`CIBuildUploadJob.format_by_repository_format` is supposed to ensure that no files are attached to Conda source packages, which are intended only as skeletons to fit into the rest of Launchpad's data model so that things like the web UI and copy operations work.  However, there appears to be exactly one case on production where a file somehow got attached to a source package in a Conda-format archive, and the attempt to reap that file from the archive is failing because we can't compute its path.  That file isn't actually published in Artifactory, so skipping it shouldn't be a problem, but we do need the reaper not to crash.  Since Conda "source packages" should never have any files, just enforce this in `SourcePackagePublishingHistory.files`.  That's slightly less awkward than the existing workaround in `ArchivePublisherBase.publish`, and it ensures that we handle this case both on publication and on removal.

I've left an XXX comment in place because I think we should remove this once we figure out how that file got attached in the first place and ensure that that doesn't happen again.  We'd also need to ensure that this query returns no rows:

    SELECT spph.id, lfa.filename
    FROM
        sourcepackagepublishinghistory spph,
        sourcepackagereleasefile sprf,
        libraryfilealias lfa
    WHERE
        spph.archive IN (
            SELECT id FROM archive WHERE repository_format = 2
        )
        AND spph.sourcepackagerelease = sprf.sourcepackagerelease
        AND sprf.libraryfile = lfa.id
        AND spph.dateremoved IS NULL;
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of ~cjwatson/launchpad:skip-conda-source-files-harder into launchpad:master.
diff --git a/lib/lp/archivepublisher/deathrow.py b/lib/lp/archivepublisher/deathrow.py
index cca0368..2dd0b9a 100644
--- a/lib/lp/archivepublisher/deathrow.py
+++ b/lib/lp/archivepublisher/deathrow.py
@@ -244,7 +244,8 @@ class DeathRow:
 
             See `canRemove` for more information.
             """
-            for pub_file in pub_record.files:
+            files = pub_record.files
+            for pub_file in files:
                 filename = pub_file.libraryfile.filename
                 file_md5 = pub_file.libraryfile.content.md5
 
@@ -280,6 +281,13 @@ class DeathRow:
                 condemned_files.add(file_path)
                 condemned_records.add(pub_record)
 
+            # A source package with no files at all (which can happen in
+            # some cases where the archive's repository format is not
+            # ArchiveRepositoryFormat.DEBIAN) cannot have any files which
+            # refer to other publishing records, so can always be removed.
+            if not files:
+                condemned_records.add(pub_record)
+
         # Check source and binary publishing records.
         for pub_record in condemned_source_files:
             checkPubRecord(pub_record, SourcePackagePublishingHistory)
diff --git a/lib/lp/archivepublisher/tests/test_deathrow.py b/lib/lp/archivepublisher/tests/test_deathrow.py
index 77bcf8b..0426381 100644
--- a/lib/lp/archivepublisher/tests/test_deathrow.py
+++ b/lib/lp/archivepublisher/tests/test_deathrow.py
@@ -9,17 +9,24 @@ from pathlib import Path
 
 from zope.component import getUtility
 
+from lp.archivepublisher.artifactory import ArtifactoryPool
 from lp.archivepublisher.deathrow import DeathRow
 from lp.archivepublisher.diskpool import DiskPool
 from lp.registry.interfaces.distribution import IDistributionSet
+from lp.registry.interfaces.sourcepackage import SourcePackageType
 from lp.services.log.logger import BufferLogger
+from lp.soyuz.enums import (
+    ArchivePublishingMethod,
+    ArchivePurpose,
+    ArchiveRepositoryFormat,
+)
 from lp.soyuz.interfaces.component import IComponentSet
 from lp.soyuz.tests.test_publishing import SoyuzTestPublisher
-from lp.testing import TestCase
+from lp.testing import TestCaseWithFactory
 from lp.testing.layers import LaunchpadZopelessLayer
 
 
-class TestDeathRow(TestCase):
+class TestDeathRow(TestCaseWithFactory):
 
     layer = LaunchpadZopelessLayer
 
@@ -138,3 +145,30 @@ class TestDeathRow(TestCase):
 
         self.assertDoesNotExist(main_dsc_path)
         self.assertDoesNotExist(universe_dsc_path)
+
+    def test_skips_conda_source_packages(self):
+        root_url = "https://foo.example.com/artifactory/repository";
+        distroseries = self.factory.makeDistroSeries()
+        archive = self.factory.makeArchive(
+            distribution=distroseries.distribution,
+            purpose=ArchivePurpose.PPA,
+            publishing_method=ArchivePublishingMethod.ARTIFACTORY,
+            repository_format=ArchiveRepositoryFormat.CONDA,
+        )
+        stp = self.getTestPublisher(distroseries)
+        logger = BufferLogger()
+        pool = ArtifactoryPool(archive, root_url, logger)
+        deathrow = DeathRow(archive, pool, logger)
+        pub_source = stp.getPubSource(
+            filecontent=b"Hello world",
+            archive=archive,
+            format=SourcePackageType.CI_BUILD,
+            user_defined_fields=[("bogus_field", "instead_of_subdir")],
+        )
+        pub_source.publish(pool, logger)
+        pub_source.requestObsolescence()
+        self.layer.commit()
+
+        self.assertIsNone(pub_source.dateremoved)
+        deathrow.reap()
+        self.assertIsNotNone(pub_source.dateremoved)
diff --git a/lib/lp/soyuz/model/publishing.py b/lib/lp/soyuz/model/publishing.py
index ba47074..cabddf4 100644
--- a/lib/lp/soyuz/model/publishing.py
+++ b/lib/lp/soyuz/model/publishing.py
@@ -68,7 +68,6 @@ from lp.soyuz.interfaces.component import IComponentSet
 from lp.soyuz.interfaces.distributionjob import (
     IDistroSeriesDifferenceJobSource,
 )
-from lp.soyuz.interfaces.files import ISourcePackageReleaseFile
 from lp.soyuz.interfaces.publishing import (
     DeletionError,
     IBinaryPackagePublishingHistory,
@@ -154,17 +153,6 @@ class ArchivePublisherBase:
         """See `IPublishing`"""
         try:
             for pub_file in self.files:
-                # XXX ilasc 2022-14-11 We want to exclude the Conda source
-                # packages as a temporary workaround to overcome the issue of
-                # the publisher crashing when the package contains files
-                # missing required metadata as was the case of the missing
-                # `subdir` from info/index.json file.
-                if (
-                    self.archive.repository_format
-                    == ArchiveRepositoryFormat.CONDA
-                    and ISourcePackageReleaseFile.providedBy(pub_file)
-                ):
-                    continue
                 pool_name = self.pool_name
                 pool_version = self.pool_version
                 component = (
@@ -476,6 +464,20 @@ class SourcePackagePublishingHistory(SQLBase, ArchivePublisherBase):
     @property
     def files(self):
         """See `IPublishing`."""
+        # XXX ilasc 2022-11-14 / cjwatson 2023-06-15: Source packages in
+        # Conda repositories should never have any files attached; they
+        # should be pure skeletons used to fit into the rest of Launchpad's
+        # data model (see commit 397fbebfa3).  However, there is one case
+        # where a file somehow got attached to a source package in a Conda
+        # repository (see
+        # https://bugs.launchpad.net/launchpad/+bug/2023315); this file
+        # doesn't have enough metadata for the publisher to compute its URL
+        # path, and so it crashes.  Until we figure out why this happened
+        # and ensure that it can't happen again, forcibly return the empty
+        # list for such source packages.
+        if self.archive.repository_format == ArchiveRepositoryFormat.CONDA:
+            return []
+
         files = self.sourcepackagerelease.files
         lfas = bulk.load_related(LibraryFileAlias, files, ["libraryfile_id"])
         bulk.load_related(LibraryFileContent, lfas, ["contentID"])
diff --git a/lib/lp/soyuz/tests/test_publishing.py b/lib/lp/soyuz/tests/test_publishing.py
index 2f0bb5d..ac86b56 100644
--- a/lib/lp/soyuz/tests/test_publishing.py
+++ b/lib/lp/soyuz/tests/test_publishing.py
@@ -888,7 +888,7 @@ class TestNativePublishing(TestNativePublishingBase):
             filecontent=b"Hello world",
             archive=archive,
             format=SourcePackageType.CI_BUILD,
-            user_defined_fields=[("bogus_filed", "instead_of_subdir")],
+            user_defined_fields=[("bogus_field", "instead_of_subdir")],
         )
         pub_source.publish(pool, self.logger)
         self.assertFalse(mock.called)

Follow ups