← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~cjwatson/launchpad/prune-snap-files into lp:launchpad

 

Colin Watson has proposed merging lp:~cjwatson/launchpad/prune-snap-files into lp:launchpad.

Commit message:
Prune old SnapFiles that have been uploaded to the store.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/prune-snap-files/+merge/344825

Based on test queries on staging, this should remove around 5TiB from the librarian.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~cjwatson/launchpad/prune-snap-files into lp:launchpad.
=== modified file 'lib/lp/scripts/garbo.py'
--- lib/lp/scripts/garbo.py	2018-04-04 13:51:16 +0000
+++ lib/lp/scripts/garbo.py	2018-04-30 10:45:14 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009-2016 Canonical Ltd.  This software is licensed under the
+# Copyright 2009-2018 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 """Database garbage collection."""
@@ -101,6 +101,7 @@
 from lp.services.identity.interfaces.emailaddress import EmailAddressStatus
 from lp.services.identity.model.account import Account
 from lp.services.identity.model.emailaddress import EmailAddress
+from lp.services.job.interfaces.job import JobStatus
 from lp.services.job.model.job import Job
 from lp.services.librarian.model import TimeLimitedToken
 from lp.services.log.logger import PrefixFilter
@@ -122,6 +123,8 @@
 from lp.services.webhooks.model import WebhookJob
 from lp.snappy.interfaces.snappyseries import ISnappyDistroSeriesSet
 from lp.snappy.model.snap import Snap
+from lp.snappy.model.snapbuild import SnapFile
+from lp.snappy.model.snapbuildjob import SnapBuildJobType
 from lp.soyuz.enums import PackagePublishingStatus
 from lp.soyuz.model.archive import Archive
 from lp.soyuz.model.distributionsourcepackagecache import (
@@ -1564,6 +1567,32 @@
         """
 
 
+class SnapFilePruner(BulkPruner):
+    """Prune old `SnapFile`s that have been uploaded to the store.
+
+    Binary files attached to `SnapBuild`s are typically very large, and once
+    they've been uploaded to the store we don't really need to keep them in
+    Launchpad as well.  Text files are typically small (<1MiB) and useful
+    for retrospective analysis, so we preserve those indefinitely.
+    """
+    target_table_class = SnapFile
+    ids_to_prune_query = """
+        SELECT DISTINCT SnapFile.id
+        FROM SnapFile, SnapBuild, SnapBuildJob, Job, LibraryFileAlias
+        WHERE
+            SnapFile.snapbuild = SnapBuild.id
+            AND SnapBuildJob.snapbuild = SnapBuild.id
+            AND SnapBuildJob.job_type = %s
+            AND SnapBuildJob.job = Job.id
+            AND Job.status = %s
+            AND Job.date_finished <
+                CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
+                - CAST('30 days' AS INTERVAL)
+            AND SnapFile.libraryfile = LibraryFileAlias.id
+            AND LibraryFileAlias.mimetype != 'text/plain'
+        """ % (SnapBuildJobType.STORE_UPLOAD.value, JobStatus.COMPLETED.value)
+
+
 class SnapStoreSeriesPopulator(TunableLoop):
     """Populates Snap.store_series based on Snap.distro_series.
 
@@ -1894,6 +1923,7 @@
         RevisionAuthorEmailLinker,
         ScrubPOFileTranslator,
         SnapBuildJobPruner,
+        SnapFilePruner,
         SnapStoreSeriesPopulator,
         SuggestiveTemplatesCacheUpdater,
         TeamMembershipPruner,

=== modified file 'lib/lp/scripts/tests/test_garbo.py'
--- lib/lp/scripts/tests/test_garbo.py	2018-04-04 14:14:30 +0000
+++ lib/lp/scripts/tests/test_garbo.py	2018-04-30 10:45:14 +0000
@@ -99,6 +99,7 @@
 from lp.services.features.testing import FeatureFixture
 from lp.services.identity.interfaces.account import AccountStatus
 from lp.services.identity.interfaces.emailaddress import EmailAddressStatus
+from lp.services.job.interfaces.job import JobStatus
 from lp.services.job.model.job import Job
 from lp.services.librarian.model import TimeLimitedToken
 from lp.services.messages.model.message import Message
@@ -114,6 +115,7 @@
 from lp.services.verification.model.logintoken import LoginToken
 from lp.services.worlddata.interfaces.language import ILanguageSet
 from lp.snappy.interfaces.snap import SNAP_TESTING_FLAGS
+from lp.snappy.model.snapbuild import SnapFile
 from lp.snappy.model.snapbuildjob import (
     SnapBuildJob,
     SnapStoreUploadJob,
@@ -1518,6 +1520,65 @@
         self._test_LiveFSFilePruner(
             'application/octet-stream', 0, expected_count=1)
 
+    def _test_SnapFilePruner(self, content_type, job_status, interval,
+                             expected_count=0):
+        # Garbo should (or should not, if `expected_count=1`) remove snap
+        # files of MIME type `content_type` with a store upload job of
+        # status `job_status` that finished more than `interval` days ago.
+        now = datetime.now(UTC)
+        switch_dbuser('testadmin')
+        store = IMasterStore(SnapFile)
+
+        db_build = self.factory.makeSnapBuild(
+            date_created=now - timedelta(days=interval, minutes=15),
+            status=BuildStatus.FULLYBUILT, duration=timedelta(minutes=10))
+        db_lfa = self.factory.makeLibraryFileAlias(content_type=content_type)
+        db_file = self.factory.makeSnapFile(
+            snapbuild=db_build, libraryfile=db_lfa)
+        if job_status is not None:
+            db_build_job = SnapStoreUploadJob.create(db_build)
+            db_build_job.job._status = job_status
+            db_build_job.job.date_finished = (
+                now - timedelta(days=interval, minutes=5))
+        Store.of(db_file).flush()
+        self.assertEqual(1, store.find(SnapFile).count())
+
+        self.runDaily()
+
+        switch_dbuser('testadmin')
+        self.assertEqual(expected_count, store.find(SnapFile).count())
+
+    def test_SnapFilePruner_old_binary_files(self):
+        # Snap binary files attached to builds over 30 days old that have
+        # been uploaded to the store are pruned.
+        self._test_SnapFilePruner(
+            'application/octet-stream', JobStatus.COMPLETED, 30)
+
+    def test_SnapFilePruner_old_text_files(self):
+        # Snap text files attached to builds over 30 days old that have been
+        # uploaded to the store are retained.
+        self._test_SnapFilePruner(
+            'text/plain', JobStatus.COMPLETED, 30, expected_count=1)
+
+    def test_SnapFilePruner_recent_binary_files(self):
+        # Snap binary files attached to builds less than 30 days old that
+        # have been uploaded to the store are retained.
+        self._test_SnapFilePruner(
+            'application/octet-stream', JobStatus.COMPLETED, 29,
+            expected_count=1)
+
+    def test_SnapFilePruner_binary_files_failed_to_upload(self):
+        # Snap binary files attached to builds that failed to be uploaded to
+        # the store are retained.
+        self._test_SnapFilePruner(
+            'application/octet-stream', JobStatus.FAILED, 30, expected_count=1)
+
+    def test_SnapFilePruner_binary_files_no_upload_job(self):
+        # Snap binary files attached to builds with no store upload job are
+        # retained.
+        self._test_SnapFilePruner(
+            'application/octet-stream', None, 30, expected_count=1)
+
     def test_SnapStoreSeriesPopulator(self):
         switch_dbuser('testadmin')
         # Make some series.


Follow ups