launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #25294
[Merge] ~cjwatson/launchpad:archive-file-history-backfill into launchpad:master
Colin Watson has proposed merging ~cjwatson/launchpad:archive-file-history-backfill into launchpad:master with ~cjwatson/launchpad:archive-file-history as a prerequisite.
Commit message:
Backfill ArchiveFile.date_superseded
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
Related bugs:
Bug #1765933 in Launchpad itself: "Allow building livefses against a view of the archive at a fixed point in time"
https://bugs.launchpad.net/launchpad/+bug/1765933
For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/+git/launchpad/+merge/390761
We can currently derive this reliably from ArchiveFile.scheduled_deletion_date, and doing so gives us a chance of historical queries being mostly accurate.
--
Your team Launchpad code reviewers is requested to review the proposed merge of ~cjwatson/launchpad:archive-file-history-backfill into launchpad:master.
diff --git a/database/schema/security.cfg b/database/schema/security.cfg
index 8a9a31b..b43a05b 100644
--- a/database/schema/security.cfg
+++ b/database/schema/security.cfg
@@ -2398,6 +2398,7 @@ public.accesspolicy = SELECT, DELETE
public.accesspolicygrant = SELECT, DELETE
public.account = SELECT, DELETE
public.answercontact = SELECT, DELETE
+public.archivefile = SELECT, UPDATE
public.branch = SELECT, UPDATE
public.branchjob = SELECT, DELETE
public.branchmergeproposal = SELECT, UPDATE, DELETE
diff --git a/lib/lp/archivepublisher/publishing.py b/lib/lp/archivepublisher/publishing.py
index 4735b89..3caf46f 100644
--- a/lib/lp/archivepublisher/publishing.py
+++ b/lib/lp/archivepublisher/publishing.py
@@ -2,6 +2,7 @@
# GNU Affero General Public License version 3 (see the file LICENSE).
__all__ = [
+ 'BY_HASH_STAY_OF_EXECUTION',
'cannot_modify_suite',
'DirectoryHash',
'FORMAT_TO_SUBCOMPONENT',
diff --git a/lib/lp/scripts/garbo.py b/lib/lp/scripts/garbo.py
index 09a64ed..104a9e9 100644
--- a/lib/lp/scripts/garbo.py
+++ b/lib/lp/scripts/garbo.py
@@ -50,6 +50,7 @@ from zope.component import getUtility
from zope.security.proxy import removeSecurityProxy
from lp.answers.model.answercontact import AnswerContact
+from lp.archivepublisher.publishing import BY_HASH_STAY_OF_EXECUTION
from lp.bugs.interfaces.bug import IBugSet
from lp.bugs.model.bug import Bug
from lp.bugs.model.bugattachment import BugAttachment
@@ -123,6 +124,7 @@ from lp.snappy.model.snapbuild import SnapFile
from lp.snappy.model.snapbuildjob import SnapBuildJobType
from lp.soyuz.interfaces.publishing import active_publishing_status
from lp.soyuz.model.archive import Archive
+from lp.soyuz.model.archivefile import ArchiveFile
from lp.soyuz.model.distributionsourcepackagecache import (
DistributionSourcePackageCache,
)
@@ -1552,6 +1554,37 @@ class GitRepositoryPruner(TunableLoop):
transaction.commit()
+class ArchiveFileDatePopulator(TunableLoop):
+ """Populates ArchiveFile.date_superseded."""
+
+ maximum_chunk_size = 5000
+
+ def __init__(self, log, abort_time=None):
+ super(ArchiveFileDatePopulator, self).__init__(log, abort_time)
+ self.start_at = 1
+ self.store = IMasterStore(ArchiveFile)
+
+ def findArchiveFiles(self):
+ archive_files = self.store.find(
+ ArchiveFile,
+ ArchiveFile.id >= self.start_at,
+ ArchiveFile.date_superseded == None,
+ ArchiveFile.scheduled_deletion_date != None)
+ return archive_files.order_by(ArchiveFile.id)
+
+ def isDone(self):
+ return self.findArchiveFiles().is_empty()
+
+ def __call__(self, chunk_size):
+ archive_files = list(self.findArchiveFiles()[:chunk_size])
+ for archive_file in archive_files:
+ archive_file.date_superseded = (
+ archive_file.scheduled_deletion_date -
+ timedelta(days=BY_HASH_STAY_OF_EXECUTION))
+ self.start_at = archive_files[-1].id + 1
+ transaction.commit()
+
+
class BaseDatabaseGarbageCollector(LaunchpadCronScript):
"""Abstract base class to run a collection of TunableLoops."""
script_name = None # Script name for locking and database user. Override.
@@ -1825,6 +1858,7 @@ class DailyDatabaseGarbageCollector(BaseDatabaseGarbageCollector):
script_name = 'garbo-daily'
tunable_loops = [
AnswerContactPruner,
+ ArchiveFileDatePopulator,
BranchJobPruner,
BugNotificationPruner,
BugWatchActivityPruner,
diff --git a/lib/lp/scripts/tests/test_garbo.py b/lib/lp/scripts/tests/test_garbo.py
index 2fbc80d..ac17212 100644
--- a/lib/lp/scripts/tests/test_garbo.py
+++ b/lib/lp/scripts/tests/test_garbo.py
@@ -87,6 +87,7 @@ from lp.registry.model.commercialsubscription import CommercialSubscription
from lp.registry.model.teammembership import TeamMembership
from lp.scripts.garbo import (
AntiqueSessionPruner,
+ ArchiveFileDatePopulator,
BulkPruner,
DailyDatabaseGarbageCollector,
DuplicateSessionPruner,
@@ -134,6 +135,7 @@ from lp.snappy.model.snapbuildjob import (
SnapStoreUploadJob,
)
from lp.soyuz.enums import PackagePublishingStatus
+from lp.soyuz.interfaces.archivefile import IArchiveFileSet
from lp.soyuz.interfaces.livefs import LIVEFS_FEATURE_FLAG
from lp.soyuz.model.distributionsourcepackagecache import (
DistributionSourcePackageCache,
@@ -433,7 +435,8 @@ class TestGarbo(FakeAdapterMixin, TestCaseWithFactory):
self.log_buffer = six.StringIO()
handler = logging.StreamHandler(self.log_buffer)
self.log.addHandler(handler)
- self.addDetail('garbo-log', text_content(self.log_buffer.getvalue()))
+ self.addCleanup(lambda: self.addDetail(
+ 'garbo-log', text_content(self.log_buffer.getvalue())))
def runFrequently(self, maximum_chunk_size=2, test_args=()):
switch_dbuser('garbo_daily')
@@ -1725,6 +1728,49 @@ class TestGarbo(FakeAdapterMixin, TestCaseWithFactory):
# retained.
self._test_SnapFilePruner('foo.snap', None, 30, expected_count=1)
+ def test_ArchiveFileDatePopulator(self):
+ switch_dbuser('testadmin')
+ now = datetime.now(UTC)
+ archive_files = [self.factory.makeArchiveFile() for _ in range(2)]
+ removeSecurityProxy(archive_files[1]).scheduled_deletion_date = (
+ now + timedelta(hours=6))
+
+ self.runDaily()
+
+ self.assertThat(archive_files, MatchesListwise([
+ MatchesStructure(date_superseded=Is(None)),
+ MatchesStructure.byEquality(
+ date_superseded=now - timedelta(hours=18)),
+ ]))
+
+ def test_ArchiveFileDatePopulator_findArchiveFiles_filters_correctly(self):
+ switch_dbuser('testadmin')
+
+ # Create three ArchiveFiles: one with date_superseded set, one with
+ # date_superseded unset and scheduled_deletion_date set, and one
+ # with both unset.
+ archive_files = [self.factory.makeArchiveFile() for _ in range(3)]
+
+ Store.of(archive_files[0]).flush()
+ getUtility(IArchiveFileSet).scheduleDeletion(
+ [archive_files[0]], timedelta(days=1))
+ self.assertIsNotNone(archive_files[0].date_superseded)
+
+ removeSecurityProxy(archive_files[1]).scheduled_deletion_date = (
+ datetime.now(UTC) + timedelta(days=1))
+ self.assertIsNone(archive_files[1].date_superseded)
+
+ self.assertIsNone(archive_files[2].date_superseded)
+ self.assertIsNone(archive_files[2].scheduled_deletion_date)
+
+ populator = ArchiveFileDatePopulator(None)
+ # Consider only ArchiveFiles created by this test.
+ populator.start_at = archive_files[0].id
+
+ rs = populator.findArchiveFiles()
+ self.assertEqual(1, rs.count())
+ self.assertEqual(archive_files[1], rs.one())
+
class TestGarboTasks(TestCaseWithFactory):
layer = LaunchpadZopelessLayer