← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] ~cjwatson/launchpad:archive-file-history-backfill into launchpad:master

 

Colin Watson has proposed merging ~cjwatson/launchpad:archive-file-history-backfill into launchpad:master with ~cjwatson/launchpad:archive-file-history as a prerequisite.

Commit message:
Backfill ArchiveFile.date_superseded

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)
Related bugs:
  Bug #1765933 in Launchpad itself: "Allow building livefses against a view of the archive at a fixed point in time"
  https://bugs.launchpad.net/launchpad/+bug/1765933

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/+git/launchpad/+merge/390761

We can currently derive this reliably from ArchiveFile.scheduled_deletion_date, and doing so gives us a chance of historical queries being mostly accurate.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of ~cjwatson/launchpad:archive-file-history-backfill into launchpad:master.
diff --git a/database/schema/security.cfg b/database/schema/security.cfg
index 8a9a31b..b43a05b 100644
--- a/database/schema/security.cfg
+++ b/database/schema/security.cfg
@@ -2398,6 +2398,7 @@ public.accesspolicy                     = SELECT, DELETE
 public.accesspolicygrant                = SELECT, DELETE
 public.account                          = SELECT, DELETE
 public.answercontact                    = SELECT, DELETE
+public.archivefile                      = SELECT, UPDATE
 public.branch                           = SELECT, UPDATE
 public.branchjob                        = SELECT, DELETE
 public.branchmergeproposal              = SELECT, UPDATE, DELETE
diff --git a/lib/lp/archivepublisher/publishing.py b/lib/lp/archivepublisher/publishing.py
index 4735b89..3caf46f 100644
--- a/lib/lp/archivepublisher/publishing.py
+++ b/lib/lp/archivepublisher/publishing.py
@@ -2,6 +2,7 @@
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 __all__ = [
+    'BY_HASH_STAY_OF_EXECUTION',
     'cannot_modify_suite',
     'DirectoryHash',
     'FORMAT_TO_SUBCOMPONENT',
diff --git a/lib/lp/scripts/garbo.py b/lib/lp/scripts/garbo.py
index 09a64ed..104a9e9 100644
--- a/lib/lp/scripts/garbo.py
+++ b/lib/lp/scripts/garbo.py
@@ -50,6 +50,7 @@ from zope.component import getUtility
 from zope.security.proxy import removeSecurityProxy
 
 from lp.answers.model.answercontact import AnswerContact
+from lp.archivepublisher.publishing import BY_HASH_STAY_OF_EXECUTION
 from lp.bugs.interfaces.bug import IBugSet
 from lp.bugs.model.bug import Bug
 from lp.bugs.model.bugattachment import BugAttachment
@@ -123,6 +124,7 @@ from lp.snappy.model.snapbuild import SnapFile
 from lp.snappy.model.snapbuildjob import SnapBuildJobType
 from lp.soyuz.interfaces.publishing import active_publishing_status
 from lp.soyuz.model.archive import Archive
+from lp.soyuz.model.archivefile import ArchiveFile
 from lp.soyuz.model.distributionsourcepackagecache import (
     DistributionSourcePackageCache,
     )
@@ -1552,6 +1554,37 @@ class GitRepositoryPruner(TunableLoop):
         transaction.commit()
 
 
+class ArchiveFileDatePopulator(TunableLoop):
+    """Populates ArchiveFile.date_superseded."""
+
+    maximum_chunk_size = 5000
+
+    def __init__(self, log, abort_time=None):
+        super(ArchiveFileDatePopulator, self).__init__(log, abort_time)
+        self.start_at = 1
+        self.store = IMasterStore(ArchiveFile)
+
+    def findArchiveFiles(self):
+        archive_files = self.store.find(
+            ArchiveFile,
+            ArchiveFile.id >= self.start_at,
+            ArchiveFile.date_superseded == None,
+            ArchiveFile.scheduled_deletion_date != None)
+        return archive_files.order_by(ArchiveFile.id)
+
+    def isDone(self):
+        return self.findArchiveFiles().is_empty()
+
+    def __call__(self, chunk_size):
+        archive_files = list(self.findArchiveFiles()[:chunk_size])
+        for archive_file in archive_files:
+            archive_file.date_superseded = (
+                archive_file.scheduled_deletion_date -
+                timedelta(days=BY_HASH_STAY_OF_EXECUTION))
+        self.start_at = archive_files[-1].id + 1
+        transaction.commit()
+
+
 class BaseDatabaseGarbageCollector(LaunchpadCronScript):
     """Abstract base class to run a collection of TunableLoops."""
     script_name = None  # Script name for locking and database user. Override.
@@ -1825,6 +1858,7 @@ class DailyDatabaseGarbageCollector(BaseDatabaseGarbageCollector):
     script_name = 'garbo-daily'
     tunable_loops = [
         AnswerContactPruner,
+        ArchiveFileDatePopulator,
         BranchJobPruner,
         BugNotificationPruner,
         BugWatchActivityPruner,
diff --git a/lib/lp/scripts/tests/test_garbo.py b/lib/lp/scripts/tests/test_garbo.py
index 2fbc80d..ac17212 100644
--- a/lib/lp/scripts/tests/test_garbo.py
+++ b/lib/lp/scripts/tests/test_garbo.py
@@ -87,6 +87,7 @@ from lp.registry.model.commercialsubscription import CommercialSubscription
 from lp.registry.model.teammembership import TeamMembership
 from lp.scripts.garbo import (
     AntiqueSessionPruner,
+    ArchiveFileDatePopulator,
     BulkPruner,
     DailyDatabaseGarbageCollector,
     DuplicateSessionPruner,
@@ -134,6 +135,7 @@ from lp.snappy.model.snapbuildjob import (
     SnapStoreUploadJob,
     )
 from lp.soyuz.enums import PackagePublishingStatus
+from lp.soyuz.interfaces.archivefile import IArchiveFileSet
 from lp.soyuz.interfaces.livefs import LIVEFS_FEATURE_FLAG
 from lp.soyuz.model.distributionsourcepackagecache import (
     DistributionSourcePackageCache,
@@ -433,7 +435,8 @@ class TestGarbo(FakeAdapterMixin, TestCaseWithFactory):
         self.log_buffer = six.StringIO()
         handler = logging.StreamHandler(self.log_buffer)
         self.log.addHandler(handler)
-        self.addDetail('garbo-log', text_content(self.log_buffer.getvalue()))
+        self.addCleanup(lambda: self.addDetail(
+            'garbo-log', text_content(self.log_buffer.getvalue())))
 
     def runFrequently(self, maximum_chunk_size=2, test_args=()):
         switch_dbuser('garbo_daily')
@@ -1725,6 +1728,49 @@ class TestGarbo(FakeAdapterMixin, TestCaseWithFactory):
         # retained.
         self._test_SnapFilePruner('foo.snap', None, 30, expected_count=1)
 
+    def test_ArchiveFileDatePopulator(self):
+        switch_dbuser('testadmin')
+        now = datetime.now(UTC)
+        archive_files = [self.factory.makeArchiveFile() for _ in range(2)]
+        removeSecurityProxy(archive_files[1]).scheduled_deletion_date = (
+            now + timedelta(hours=6))
+
+        self.runDaily()
+
+        self.assertThat(archive_files, MatchesListwise([
+            MatchesStructure(date_superseded=Is(None)),
+            MatchesStructure.byEquality(
+                date_superseded=now - timedelta(hours=18)),
+            ]))
+
+    def test_ArchiveFileDatePopulator_findArchiveFiles_filters_correctly(self):
+        switch_dbuser('testadmin')
+
+        # Create three ArchiveFiles: one with date_superseded set, one with
+        # date_superseded unset and scheduled_deletion_date set, and one
+        # with both unset.
+        archive_files = [self.factory.makeArchiveFile() for _ in range(3)]
+
+        Store.of(archive_files[0]).flush()
+        getUtility(IArchiveFileSet).scheduleDeletion(
+            [archive_files[0]], timedelta(days=1))
+        self.assertIsNotNone(archive_files[0].date_superseded)
+
+        removeSecurityProxy(archive_files[1]).scheduled_deletion_date = (
+            datetime.now(UTC) + timedelta(days=1))
+        self.assertIsNone(archive_files[1].date_superseded)
+
+        self.assertIsNone(archive_files[2].date_superseded)
+        self.assertIsNone(archive_files[2].scheduled_deletion_date)
+
+        populator = ArchiveFileDatePopulator(None)
+        # Consider only ArchiveFiles created by this test.
+        populator.start_at = archive_files[0].id
+
+        rs = populator.findArchiveFiles()
+        self.assertEqual(1, rs.count())
+        self.assertEqual(archive_files[1], rs.one())
+
 
 class TestGarboTasks(TestCaseWithFactory):
     layer = LaunchpadZopelessLayer