← Back to team overview

launchpad-reviewers team mailing list archive

Re: [Merge] ~tushar5526/launchpad:add-support-for-parallel-publishing into launchpad:master

 


Diff comments:

> diff --git a/lib/lp/archivepublisher/scripts/base.py b/lib/lp/archivepublisher/scripts/base.py
> index f590791..1182032 100644
> --- a/lib/lp/archivepublisher/scripts/base.py
> +++ b/lib/lp/archivepublisher/scripts/base.py
> @@ -36,6 +37,34 @@ class PublisherScript(LaunchpadCronScript):
>              help="Publish all Ubuntu-derived distributions.",
>          )
>  
> +    def addParallelPublisherOptions(self):

Yes, maybe addCommonPublisherOptions, do you have any other suggestions?

> +        self.parser.add_option(
> +            "--archive",
> +            action="append",
> +            dest="archives",
> +            metavar="REFERENCE",
> +            default=[],
> +            help="Only run over the archives identified by this reference. "
> +            "You can specify multiple archives by repeating the option",
> +        )
> +
> +        self.parser.add_option(
> +            "--exclude",
> +            action="append",
> +            dest="excluded_archives",
> +            metavar="REFERENCE",
> +            default=[],
> +            help="Skip the archives identified by this reference in the "
> +            "publisher run. You can specify multiple archives by repeating "
> +            "the option",
> +        )
> +
> +        self.parser.add_option(
> +            "--lockfilename",
> +            dest="lockfilename",
> +            help="lockfilename to be used by the publisher run",
> +        )
> +
>      def findSelectedDistro(self):
>          """Find the `Distribution` named by the --distribution option.
>  
> @@ -62,3 +91,35 @@ class PublisherScript(LaunchpadCronScript):
>              return self.findDerivedDistros()
>          else:
>              return [self.findSelectedDistro()]
> +
> +    def findArchives(self, archive_references, distribution=None):
> +        """
> +        Retrieve a list of archives based on the provided references and
> +        optional distribution.
> +
> +        Args:
> +            archive_references (list): A list of archive references to
> +            retrieve.
> +            distribution (str, optional): The distribution to filter archives
> +            by. Defaults to None.
> +
> +        Returns:
> +            list: A list of archives that match the provided references and
> +            distribution.
> +        """
> +        if not archive_references:
> +            return []
> +
> +        archives = []
> +        for reference in archive_references:
> +            archive = getUtility(IArchiveSet).getByReference(reference)

Agreed. Thanks!

> +            if not archive:
> +                self.logger.warning(
> +                    "Cannot find the excluded archive with reference: '%s', "

Thanks, resolved.

> +                    % reference
> +                )
> +                continue
> +            if distribution and archive.distribution != distribution:
> +                continue
> +            archives.append(archive)
> +        return archives
> diff --git a/lib/lp/archivepublisher/scripts/processaccepted.py b/lib/lp/archivepublisher/scripts/processaccepted.py
> index da27fb9..dc46718 100644
> --- a/lib/lp/archivepublisher/scripts/processaccepted.py
> +++ b/lib/lp/archivepublisher/scripts/processaccepted.py
> @@ -62,6 +63,18 @@ class ProcessAccepted(PublisherScript):
>              help="Run only over COPY archives.",
>          )
>  
> +    def countExclusiveOptions(self):
> +        """Return the number of exclusive "mode" options that were set.
> +
> +        In valid use, at most one of them should be set.

I did this to stay consistent with how mutually exclusive options were validated in publish-distro script.

> +        """
> +        exclusive_options = [
> +            self.options.ppa,
> +            self.options.copy_archives,
> +            self.options.archives,
> +        ]
> +        return len(list(filter(None, exclusive_options)))
> +
>      def validateArguments(self):
>          """Validate command-line arguments."""
>          if self.options.ppa and self.options.copy_archives:
> diff --git a/lib/lp/archivepublisher/scripts/publishdistro.py b/lib/lp/archivepublisher/scripts/publishdistro.py
> index 38ee406..90c8116 100644
> --- a/lib/lp/archivepublisher/scripts/publishdistro.py
> +++ b/lib/lp/archivepublisher/scripts/publishdistro.py
> @@ -375,20 +371,32 @@ class PublishDistro(PublisherScript):
>  
>      def getTargetArchives(self, distribution):
>          """Find the archive(s) selected by the script's options."""
> -        if self.options.archive:
> -            archive = getUtility(IArchiveSet).getByReference(
> -                self.options.archive
> -            )
> -            if archive.distribution == distribution:
> -                return [archive]
> -            else:
> -                return []
> +        if self.options.archives:
> +            return self.findArchives(self.options.archives, distribution)
>          elif self.options.partner:
>              return [distribution.getArchiveByComponent("partner")]
>          elif self.options.ppa:
> -            return filter(is_ppa_public, self.getPPAs(distribution))
> +            return [
> +                archive
> +                for archive in filter(
> +                    is_ppa_public, self.getPPAs(distribution)
> +                )
> +                if archive
> +                not in self.findArchives(
> +                    self.options.excluded_archives, distribution
> +                )
> +            ]

Agreed. Thanks!

>          elif self.options.private_ppa:
> -            return filter(is_ppa_private, self.getPPAs(distribution))
> +            return [
> +                archive
> +                for archive in filter(
> +                    is_ppa_private, self.getPPAs(distribution)
> +                )
> +                if archive
> +                not in self.findArchives(
> +                    self.options.excluded_archives, distribution
> +                )
> +            ]
>          elif self.options.copy_archive:
>              return self.getCopyArchives(distribution)
>          else:
> @@ -598,6 +606,10 @@ class PublishDistro(PublisherScript):
>                  Store.of(archive).reset()
>  
>      def rsyncOVALData(self):
> +        # rsync supports --exclude option to exclude specific folders to
> +        # exclude. We can use that to exclude rsync for excluded PPAs and

Sorry, this was just a placeholder comment. It was removed in the latest commit. Thanks!

> +        # then we have to add in additional functionality to run it
> +        # specifically for included archive.
>          # Ensure that the rsync paths have a trailing slash.
>          rsync_src = os.path.join(
>              config.archivepublisher.oval_data_rsync_endpoint, ""
> @@ -634,6 +646,9 @@ class PublishDistro(PublisherScript):
>          """Compare the published OVAL files with the incoming one."""
>          start_dir = Path(config.archivepublisher.oval_data_root)
>          archive_set = getUtility(IArchiveSet)
> +        excluded_archives = self.findArchives(
> +            self.options.excluded_archives, distribution
> +        )

I have tried to reuse them where possible. Bulk querying them will also help as suggested. I was thinking of keeping it simple and not introduce any caches. I don't think we are going to scale it for more than 25 PPAs as it will become cumbersome to manage that via secrets.

>          for owner_path in start_dir.iterdir():
>              if not owner_path.name.startswith("~"):
>                  continue
> @@ -653,6 +673,15 @@ class PublishDistro(PublisherScript):
>                          archive_path.name,
>                      )
>                      continue
> +                if archive in excluded_archives:
> +                    self.logger.info(

I see. I added that to explicitly log what all was excluded in the publisher run, but it make sense to not have anything related to excluded archives in run.

> +                        "Skipping OVAL data for '~%s/%s/%s' "
> +                        "(archive excluded from the publisher run).",
> +                        owner_path.name[1:],
> +                        distribution.name,
> +                        archive_path.name,
> +                    )
> +                    continue
>                  for suite_path in archive_path.iterdir():
>                      try:
>                          series, pocket = distribution.getDistroSeriesAndPocket(
> diff --git a/lib/lp/archivepublisher/tests/test_processaccepted.py b/lib/lp/archivepublisher/tests/test_processaccepted.py
> index 34c459e..8c95f7a 100644
> --- a/lib/lp/archivepublisher/tests/test_processaccepted.py
> +++ b/lib/lp/archivepublisher/tests/test_processaccepted.py
> @@ -269,3 +270,104 @@ class TestProcessAccepted(TestCaseWithFactory):
>                  ]
>              ),
>          )
> +
> +    def test_countExclusiveOptions_is_zero_if_none_set(self):
> +        # If none of the exclusive options is set, countExclusiveOptions
> +        # counts zero.
> +        self.assertEqual(0, self.getScript().countExclusiveOptions())
> +
> +    def test_countExclusiveOptions_counts_ppa(self):
> +        # countExclusiveOptions includes the "ppa" option.
> +        self.assertEqual(
> +            1, self.getScript(test_args=["--ppa"]).countExclusiveOptions()
> +        )
> +
> +    def test_countExclusiveOptions_counts_copy_archives(self):
> +        # countExclusiveOptions includes the "copy-archive" option.
> +        self.assertEqual(
> +            1,
> +            self.getScript(
> +                test_args=["--copy-archives"]
> +            ).countExclusiveOptions(),
> +        )
> +
> +    def test_countExclusiveOptions_counts_archive(self):
> +        # countExclusiveOptions includes the "copy-archive" option.
> +        self.assertEqual(
> +            1, self.getScript(test_args=["--archive"]).countExclusiveOptions()
> +        )
> +
> +    def test_lockfilename_option_overrides_default_lock(self):
> +        script = self.getScript(test_args=["--lockfilename", "foo.lock"])
> +        assert script.lockfilepath == "/var/lock/foo.lock"

Agreed. I missed the pattern to use self.assertEqual. 

Switched to LOCK_PATH instead of hardcoded path.

> +
> +    def test_default_lock(self):
> +        script = self.getScript()
> +        assert script.lockfilepath == "/var/lock/%s" % GLOBAL_PUBLISHER_LOCK

ack, no more relevant after the new changes.

> +
> +    def test_getTargetArchives_ignores_excluded_archives_for_ppa(self):
> +        # If the selected exclusive option is "ppa," getTargetArchives
> +        # leaves out excluded PPAs.
> +        distroseries = self.factory.makeDistroSeries(distribution=self.distro)
> +
> +        ppa = self.factory.makeArchive(self.distro, purpose=ArchivePurpose.PPA)
> +        excluded_ppa_1 = self.factory.makeArchive(
> +            self.distro, purpose=ArchivePurpose.PPA
> +        )
> +        excluded_ppa_2 = self.factory.makeArchive(
> +            self.distro, purpose=ArchivePurpose.PPA, private=True
> +        )
> +
> +        for archive in [ppa, excluded_ppa_1, excluded_ppa_2]:
> +            self.createWaitingAcceptancePackage(
> +                archive=archive, distroseries=distroseries
> +            )
> +        script = self.getScript(
> +            test_args=[
> +                "--ppa",
> +                "--exclude",
> +                excluded_ppa_1.reference,
> +                "--exclude",
> +                excluded_ppa_2.reference,
> +            ],
> +        )
> +        self.assertContentEqual([ppa], script.getTargetArchives(self.distro))
> +
> +    def test_getTargetArchives_gets_specific_archives(self):
> +        # If the selected exclusive option is "archive,"
> +        # getTargetArchives looks for the specified archives.
> +
> +        distroseries = self.factory.makeDistroSeries(distribution=self.distro)
> +
> +        ppa1 = self.factory.makeArchive(
> +            self.distro, purpose=ArchivePurpose.PPA
> +        )
> +        ppa2 = self.factory.makeArchive(
> +            self.distro, purpose=ArchivePurpose.PPA, private=True
> +        )
> +        ppa3 = self.factory.makeArchive(
> +            self.distro, purpose=ArchivePurpose.PPA
> +        )
> +        ppa4 = self.factory.makeArchive(
> +            self.distro, purpose=ArchivePurpose.PPA, private=True
> +        )
> +
> +        # create another random archive in the same distro
> +        self.factory.makeArchive(self.distro, purpose=ArchivePurpose.PPA)
> +
> +        for archive in [ppa1, ppa2, ppa3, ppa4]:
> +            self.createWaitingAcceptancePackage(
> +                archive=archive, distroseries=distroseries
> +            )
> +
> +        script = self.getScript(
> +            test_args=[
> +                "--archive",
> +                ppa1.reference,
> +                "--archive",
> +                ppa2.reference,
> +            ],
> +        )
> +        self.assertContentEqual(
> +            [ppa1, ppa2], script.getTargetArchives(self.distro)
> +        )


-- 
https://code.launchpad.net/~tushar5526/launchpad/+git/launchpad/+merge/480435
Your team Launchpad code reviewers is requested to review the proposed merge of ~tushar5526/launchpad:add-support-for-parallel-publishing into launchpad:master.



References