← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~stub/launchpad/garbo-bulk-pruner into lp:launchpad

 

Stuart Bishop has proposed merging lp:~stub/launchpad/garbo-bulk-pruner into lp:launchpad with lp:~stub/launchpad/garbo as a prerequisite.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~stub/launchpad/garbo-bulk-pruner/+merge/55497

Clean up a lot of our garbo tasks by switching them to use BulkPruner.
-- 
https://code.launchpad.net/~stub/launchpad/garbo-bulk-pruner/+merge/55497
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~stub/launchpad/garbo-bulk-pruner into lp:launchpad.
=== modified file 'lib/lp/scripts/garbo.py'
--- lib/lp/scripts/garbo.py	2011-03-09 23:43:02 +0000
+++ lib/lp/scripts/garbo.py	2011-03-30 09:56:17 +0000
@@ -27,7 +27,6 @@
     Count,
     Max,
     Min,
-    Select,
     SQL,
     )
 import transaction
@@ -36,7 +35,6 @@
 
 from canonical.config import config
 from canonical.database import postgresql
-from canonical.database.constants import THIRTY_DAYS_AGO
 from canonical.database.sqlbase import (
     cursor,
     session_store,
@@ -65,13 +63,12 @@
 from lp.bugs.model.bug import Bug
 from lp.bugs.model.bugattachment import BugAttachment
 from lp.bugs.model.bugnotification import BugNotification
-from lp.bugs.model.bugwatch import BugWatch
+from lp.bugs.model.bugwatch import BugWatchActivity
 from lp.bugs.scripts.checkwatches.scheduler import (
     BugWatchScheduler,
     MAX_SAMPLE_SIZE,
     )
 from lp.code.interfaces.revision import IRevisionSet
-from lp.code.model.branchjob import BranchJob
 from lp.code.model.codeimportevent import CodeImportEvent
 from lp.code.model.codeimportresult import CodeImportResult
 from lp.code.model.revision import (
@@ -204,40 +201,17 @@
         """
 
 
-class OAuthNoncePruner(TunableLoop):
+class OAuthNoncePruner(BulkPruner):
     """An ITunableLoop to prune old OAuthNonce records.
 
     We remove all OAuthNonce records older than 1 day.
     """
-    maximum_chunk_size = 6*60*60 # 6 hours in seconds.
-
-    def __init__(self, log, abort_time=None):
-        super(OAuthNoncePruner, self).__init__(log, abort_time)
-        self.store = IMasterStore(OAuthNonce)
-        self.oldest_age = self.store.execute("""
-            SELECT COALESCE(EXTRACT(EPOCH FROM
-                CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
-                - MIN(request_timestamp)), 0)
-            FROM OAuthNonce
-            """).get_one()[0]
-
-    def isDone(self):
-        return self.oldest_age <= ONE_DAY_IN_SECONDS
-
-    def __call__(self, chunk_size):
-        self.oldest_age = max(
-            ONE_DAY_IN_SECONDS, self.oldest_age - chunk_size)
-
-        self.log.debug(
-            "Removed OAuthNonce rows older than %d seconds"
-            % self.oldest_age)
-
-        self.store.find(
-            OAuthNonce,
-            OAuthNonce.request_timestamp < SQL(
-                "CURRENT_TIMESTAMP AT TIME ZONE 'UTC' - interval '%d seconds'"
-                % self.oldest_age)).remove()
-        transaction.commit()
+    target_table_class = OAuthNonce
+    ids_to_prune_query = """
+        SELECT id FROM OauthNonce
+        WHERE request_timestamp
+            < CURRENT_TIMESTAMP AT TIME ZONE 'UTC' - CAST('1 day' AS interval)
+        """
 
 
 class OpenIDConsumerNoncePruner(TunableLoop):
@@ -323,75 +297,34 @@
         transaction.commit()
 
 
-class CodeImportEventPruner(TunableLoop):
+class CodeImportEventPruner(BulkPruner):
     """Prune `CodeImportEvent`s that are more than a month old.
 
     Events that happened more than 30 days ago are really of no
     interest to us.
     """
-
-    maximum_chunk_size = 10000
-    minimum_chunk_size = 500
-
-    def isDone(self):
-        store = IMasterStore(CodeImportEvent)
-        events = store.find(
-            CodeImportEvent,
-            CodeImportEvent.date_created < THIRTY_DAYS_AGO)
-        return events.any() is None
-
-    def __call__(self, chunk_size):
-        chunk_size = int(chunk_size)
-        store = IMasterStore(CodeImportEvent)
-        event_ids = Select(
-            [CodeImportEvent.id],
-            CodeImportEvent.date_created < THIRTY_DAYS_AGO,
-            limit=chunk_size)
-        num_removed = store.find(
-            CodeImportEvent, CodeImportEvent.id.is_in(event_ids)).remove()
-        transaction.commit()
-        self.log.debug("Removed %d old CodeImportEvents" % num_removed)
-
-
-class CodeImportResultPruner(TunableLoop):
+    target_table_class = CodeImportEvent
+    ids_to_prune_query = """
+        SELECT id FROM CodeImportEvent
+        WHERE date_created < CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
+            - CAST('30 days' AS interval)
+        """
+
+
+class CodeImportResultPruner(BulkPruner):
     """A TunableLoop to prune unwanted CodeImportResult rows.
 
     Removes CodeImportResult rows if they are older than 30 days
     and they are not one of the most recent results for that
     CodeImport.
     """
-    maximum_chunk_size = 1000
-
-    def __init__(self, log, abort_time=None):
-        super(CodeImportResultPruner, self).__init__(log, abort_time)
-        self.store = IMasterStore(CodeImportResult)
-
-        self.min_code_import = self.store.find(
-            Min(CodeImportResult.code_importID)).one()
-        self.max_code_import = self.store.find(
-            Max(CodeImportResult.code_importID)).one()
-
-        self.next_code_import_id = self.min_code_import
-
-    def isDone(self):
-        return (
-            self.min_code_import is None
-            or self.next_code_import_id > self.max_code_import)
-
-    def __call__(self, chunk_size):
-        self.log.debug(
-            "Removing expired CodeImportResults for CodeImports %d -> %d" % (
-                self.next_code_import_id,
-                self.next_code_import_id + chunk_size - 1))
-
-        self.store.execute("""
-            DELETE FROM CodeImportResult
+    target_table_class = CodeImportResult
+    ids_to_prune_query = """
+            SELECT id FROM CodeImportResult
             WHERE
                 CodeImportResult.date_created
                     < CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
                         - interval '30 days'
-                AND CodeImportResult.code_import >= %s
-                AND CodeImportResult.code_import < %s + %s
                 AND CodeImportResult.id NOT IN (
                     SELECT LatestResult.id
                     FROM CodeImportResult AS LatestResult
@@ -400,13 +333,7 @@
                             = CodeImportResult.code_import
                     ORDER BY LatestResult.date_created DESC
                     LIMIT %s)
-            """ % sqlvalues(
-                self.next_code_import_id,
-                self.next_code_import_id,
-                chunk_size,
-                config.codeimport.consecutive_failure_limit - 1))
-        self.next_code_import_id += chunk_size
-        transaction.commit()
+            """ % sqlvalues(config.codeimport.consecutive_failure_limit - 1)
 
 
 class RevisionAuthorEmailLinker(TunableLoop):
@@ -654,63 +581,35 @@
                 % chunk_size)
 
 
-class BugNotificationPruner(TunableLoop):
+class BugNotificationPruner(BulkPruner):
     """Prune `BugNotificationRecipient` records no longer of interest.
 
     We discard all rows older than 30 days that have been sent. We
     keep 30 days worth or records to help diagnose email delivery issues.
     """
-    maximum_chunk_size = 10000
-
-    def _to_remove(self):
-        return IMasterStore(BugNotification).find(
-            BugNotification.id,
-            BugNotification.date_emailed < THIRTY_DAYS_AGO)
-
-    def isDone(self):
-        return self._to_remove().any() is None
-
-    def __call__(self, chunk_size):
-        chunk_size = int(chunk_size)
-        ids_to_remove = list(self._to_remove()[:chunk_size])
-        num_removed = IMasterStore(BugNotification).find(
-            BugNotification,
-            BugNotification.id.is_in(ids_to_remove)).remove()
-        transaction.commit()
-        self.log.debug("Removed %d rows" % num_removed)
-
-
-class BranchJobPruner(TunableLoop):
+    target_table_class = BugNotification
+    ids_to_prune_query = """
+        SELECT BugNotification.id FROM BugNotification
+        WHERE date_emailed < CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
+            - CAST('30 days' AS interval)
+        """
+
+
+class BranchJobPruner(BulkPruner):
     """Prune `BranchJob`s that are in a final state and more than a month old.
 
     When a BranchJob is completed, it gets set to a final state.  These jobs
     should be pruned from the database after a month.
     """
-
-    maximum_chunk_size = 10000
-    minimum_chunk_size = 500
-
-    _is_done = False
-
-    def isDone(self):
-        return self._is_done
-
-    def __call__(self, chunk_size):
-        chunk_size = int(chunk_size)
-        store = IMasterStore(BranchJob)
-        ids_to_remove = list(store.find(
-            Job.id,
-            BranchJob.job == Job.id,
-            Job.date_finished < THIRTY_DAYS_AGO)[:chunk_size])
-        if len(ids_to_remove) > 0:
-            # BranchJob is removed too, as the BranchJob.job foreign key
-            # constraint is ON DELETE CASCADE.
-            IMasterStore(Job).find(
-                Job,
-                Job.id.is_in(ids_to_remove)).remove()
-        else:
-            self._is_done = True
-        transaction.commit()
+    target_table_class = Job
+    ids_to_prune_query = """
+        SELECT DISTINCT Job.id
+        FROM Job, BranchJob
+        WHERE
+            Job.id = BranchJob.job
+            AND Job.date_finished < CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
+                - CAST('30 days' AS interval)
+        """
 
 
 class BugHeatUpdater(TunableLoop):
@@ -764,61 +663,22 @@
         transaction.commit()
 
 
-class BugWatchActivityPruner(TunableLoop):
+class BugWatchActivityPruner(BulkPruner):
     """A TunableLoop to prune BugWatchActivity entries."""
-
-    maximum_chunk_size = 1000
-
-    def getPrunableBugWatchIds(self, chunk_size):
-        """Return the set of BugWatch IDs whose activity is prunable."""
-        query = """
-            SELECT
-                watch_activity.id
-            FROM (
-                SELECT
-                    BugWatch.id AS id,
-                    COUNT(BugWatchActivity.id) as activity_count
-                FROM BugWatch, BugWatchActivity
-                WHERE BugWatchActivity.bug_watch = BugWatch.id
-                GROUP BY BugWatch.id) AS watch_activity
-            WHERE watch_activity.activity_count > %s
-            LIMIT %s;
-        """ % sqlvalues(MAX_SAMPLE_SIZE, chunk_size)
-        store = IMasterStore(BugWatch)
-        results = store.execute(query)
-        return set(result[0] for result in results)
-
-    def pruneBugWatchActivity(self, bug_watch_ids):
-        """Prune the BugWatchActivity for bug_watch_ids."""
-        query = """
-            DELETE FROM BugWatchActivity
-            WHERE id IN (
-                SELECT id
-                FROM BugWatchActivity
-                WHERE bug_watch = %s
-                ORDER BY id DESC
-                OFFSET %s);
-        """
-        store = IMasterStore(BugWatch)
-        for bug_watch_id in bug_watch_ids:
-            results = store.execute(
-                query % sqlvalues(bug_watch_id, MAX_SAMPLE_SIZE))
-            self.log.debug(
-                "Pruned %s BugWatchActivity entries for watch %s" %
-                (results.rowcount, bug_watch_id))
-
-    def __call__(self, chunk_size):
-        transaction.begin()
-        prunable_ids = self.getPrunableBugWatchIds(chunk_size)
-        self.pruneBugWatchActivity(prunable_ids)
-        transaction.commit()
-
-    def isDone(self):
-        """Return True if there are no watches left to prune."""
-        return len(self.getPrunableBugWatchIds(1)) == 0
-
-
-class ObsoleteBugAttachmentDeleter(TunableLoop):
+    target_table_class = BugWatchActivity
+    # For each bug_watch, remove all but the most recent MAX_SAMPLE_SIZE
+    # entries.
+    ids_to_prune_query = """
+        SELECT id FROM (
+            SELECT id, rank() OVER w AS rank
+            FROM BugWatchActivity
+            WINDOW w AS (PARTITION BY bug_watch ORDER BY id DESC)
+            ) AS whatever
+        WHERE rank > %s
+        """ % sqlvalues(MAX_SAMPLE_SIZE)
+
+
+class ObsoleteBugAttachmentPruner(BulkPruner):
     """Delete bug attachments without a LibraryFileContent record.
 
     Our database schema allows LibraryFileAlias records that have no
@@ -827,28 +687,14 @@
     This class deletes bug attachments that reference such "content free"
     and thus completely useless LFA records.
     """
-
-    maximum_chunk_size = 1000
-
-    def __init__(self, log, abort_time=None):
-        super(ObsoleteBugAttachmentDeleter, self).__init__(log, abort_time)
-        self.store = IMasterStore(BugAttachment)
-
-    def _to_remove(self):
-        return self.store.find(
-            BugAttachment.id,
-            BugAttachment.libraryfile == LibraryFileAlias.id,
-            LibraryFileAlias.content == None)
-
-    def isDone(self):
-        return self._to_remove().any() is None
-
-    def __call__(self, chunk_size):
-        chunk_size = int(chunk_size)
-        ids_to_remove = list(self._to_remove()[:chunk_size])
-        self.store.find(
-            BugAttachment, BugAttachment.id.is_in(ids_to_remove)).remove()
-        transaction.commit()
+    target_table_class = BugAttachment
+    ids_to_prune_query = """
+        SELECT BugAttachment.id
+        FROM BugAttachment, LibraryFileAlias
+        WHERE
+            BugAttachment.libraryfile = LibraryFileAlias.id
+            AND LibraryFileAlias.content IS NULL
+        """
 
 
 class OldTimeLimitedTokenDeleter(TunableLoop):
@@ -1134,7 +980,7 @@
         CodeImportEventPruner,
         CodeImportResultPruner,
         HWSubmissionEmailLinker,
-        ObsoleteBugAttachmentDeleter,
+        ObsoleteBugAttachmentPruner,
         OldTimeLimitedTokenDeleter,
         RevisionAuthorEmailLinker,
         SuggestiveTemplatesCacheUpdater,

=== modified file 'lib/lp/scripts/tests/test_garbo.py'
--- lib/lp/scripts/tests/test_garbo.py	2011-03-09 23:43:02 +0000
+++ lib/lp/scripts/tests/test_garbo.py	2011-03-30 09:56:17 +0000
@@ -662,7 +662,7 @@
         LaunchpadZopelessLayer.switchDbUser('testadmin')
         self.assertEqual(store.find(BranchJob).count(), 1)
 
-    def test_ObsoleteBugAttachmentDeleter(self):
+    def test_ObsoleteBugAttachmentPruner(self):
         # Bug attachments without a LibraryFileContent record are removed.
 
         LaunchpadZopelessLayer.switchDbUser('testadmin')