← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~stub/launchpad/session-prune into lp:launchpad/db-devel

 

Stuart Bishop has proposed merging lp:~stub/launchpad/session-prune into lp:launchpad/db-devel.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)
Related bugs:
  #357516 Session cookies are being held for too long - from a users perspective the old ones are now invalid
  https://bugs.launchpad.net/bugs/357516
  #729019 session pruner should be in tree
  https://bugs.launchpad.net/bugs/729019

For more details, see:
https://code.launchpad.net/~stub/launchpad/session-prune/+merge/52532

If a user multiple authenticated sessions, remove all but the most recent 6.

Also, pull out extra_prune_clause from the BulkPruner, as it is currently only useful for pathalogical cases that don't really matter and it broke our end of loop detection (if all items of a batch are skipped due to the extra_prune_clause, then no rows would be deleted and the task would terminate). It would be possible to support this, but it will require refactoring and perform worse.
-- 
https://code.launchpad.net/~stub/launchpad/session-prune/+merge/52532
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~stub/launchpad/session-prune into lp:launchpad/db-devel.
=== modified file 'lib/lp/scripts/garbo.py'
--- lib/lp/scripts/garbo.py	2011-03-03 15:19:07 +0000
+++ lib/lp/scripts/garbo.py	2011-03-08 10:11:59 +0000
@@ -124,12 +124,6 @@
     # See `TunableLoop`. May be overridden.
     maximum_chunk_size = 10000
 
-    # Optional extra WHERE clause fragment for the deletion to skip
-    # arbitrary rows flagged for deletion. For example, skip rows
-    # that might have been modified since the set of ids_to_prune
-    # was calculated.
-    extra_prune_clause = None
-
     def getStore(self):
         """The master Store for the table we are pruning.
 
@@ -156,20 +150,15 @@
 
     def __call__(self, chunk_size):
         """See `ITunableLoop`."""
-        if self.extra_prune_clause:
-            extra = "AND (%s)" % self.extra_prune_clause
-        else:
-            extra = ""
         result = self.store.execute("""
             DELETE FROM %s
             WHERE %s IN (
                 SELECT id FROM
                 cursor_fetch('bulkprunerid', %d) AS f(id %s))
-                %s
             """
             % (
                 self.target_table_name, self.target_table_key,
-                chunk_size, self.target_table_key_type, extra))
+                chunk_size, self.target_table_key_type))
         self._num_removed = result.rowcount
         transaction.commit()
 
@@ -242,10 +231,33 @@
                     AND key='logintime')
         """
 
-    # Don't delete a session if it has been used between calculating
-    # the list of sessions to remove and the current iteration.
-    prune_extra_clause = """
-        last_accessed < CURRENT_TIMESTAMP - CAST('1 day' AS interval)
+
+class DuplicateSessionPruner(SessionPruner):
+    """Remove all but the most recent 6 authenticated sessions for a user.
+
+    We sometimes see users with dozens or thousands of authenticated
+    sessions. To limit exposure to replay attacks, we remove all but
+    the most recent 6 of them for a given user.
+    """
+
+    ids_to_prune_query = """
+        SELECT client_id AS id
+        FROM (
+            SELECT
+                sessiondata.client_id,
+                last_accessed,
+                rank() OVER pickle AS rank
+            FROM SessionData, SessionPkgData
+            WHERE
+                SessionData.client_id = SessionPkgData.client_id
+                AND product_id = 'launchpad.authenticateduser'
+                AND key='accountid'
+            WINDOW pickle AS (PARTITION BY pickle ORDER BY last_accessed DESC)
+            ) AS whatever
+        WHERE
+            rank > 6
+            AND last_accessed < CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
+                - CAST('1 hour' AS interval)
         """
 
 
@@ -1042,6 +1054,7 @@
         BugWatchScheduler,
         AntiqueSessionPruner,
         UnusedSessionPruner,
+        DuplicateSessionPruner,
         ]
     experimental_tunable_loops = []
 

=== modified file 'lib/lp/scripts/tests/test_garbo.py'
--- lib/lp/scripts/tests/test_garbo.py	2011-03-04 11:13:32 +0000
+++ lib/lp/scripts/tests/test_garbo.py	2011-03-08 10:11:59 +0000
@@ -74,6 +74,7 @@
     AntiqueSessionPruner,
     BulkPruner,
     DailyDatabaseGarbageCollector,
+    DuplicateSessionPruner,
     HourlyDatabaseGarbageCollector,
     OpenIDConsumerAssociationPruner,
     UnusedSessionPruner,
@@ -205,26 +206,37 @@
         yesterday = recent - timedelta(days=1)
         ancient = recent - timedelta(days=61)
 
-        def make_session(client_id, accessed, authenticated=None):
-            session_data = SessionData()
-            session_data.client_id = client_id
-            session_data.last_accessed = accessed
-            IMasterStore(SessionData).add(session_data)
-
-            if authenticated:
-                session_pkg_data = SessionPkgData()
-                session_pkg_data.client_id = client_id
-                session_pkg_data.product_id = u'launchpad.authenticateduser'
-                session_pkg_data.key = u'logintime'
-                session_pkg_data.pickle = 'value is ignored'
-                IMasterStore(SessionPkgData).add(session_pkg_data)
-
-        make_session(u'recent_auth', recent, True)
-        make_session(u'recent_unauth', recent, False)
-        make_session(u'yesterday_auth', yesterday, True)
-        make_session(u'yesterday_unauth', yesterday, False)
-        make_session(u'ancient_auth', ancient, True)
-        make_session(u'ancient_unauth', ancient, False)
+        self.make_session(u'recent_auth', recent, 'auth1')
+        self.make_session(u'recent_unauth', recent, False)
+        self.make_session(u'yesterday_auth', yesterday, 'auth2')
+        self.make_session(u'yesterday_unauth', yesterday, False)
+        self.make_session(u'ancient_auth', ancient, 'auth3')
+        self.make_session(u'ancient_unauth', ancient, False)
+
+    def make_session(self, client_id, accessed, authenticated=None):
+        session_data = SessionData()
+        session_data.client_id = client_id
+        session_data.last_accessed = accessed
+        IMasterStore(SessionData).add(session_data)
+
+        if authenticated:
+            # Add login time information.
+            session_pkg_data = SessionPkgData()
+            session_pkg_data.client_id = client_id
+            session_pkg_data.product_id = u'launchpad.authenticateduser'
+            session_pkg_data.key = u'logintime'
+            session_pkg_data.pickle = 'value is ignored'
+            IMasterStore(SessionPkgData).add(session_pkg_data)
+
+            # Add authenticated as information.
+            session_pkg_data = SessionPkgData()
+            session_pkg_data.client_id = client_id
+            session_pkg_data.product_id = u'launchpad.authenticateduser'
+            session_pkg_data.key = u'accountid'
+            # Normally Account.id, but the session pruning works
+            # at the SQL level and doesn't unpickle anything.
+            session_pkg_data.pickle = authenticated
+            IMasterStore(SessionPkgData).add(session_pkg_data)
 
     def sessionExists(self, client_id):
         store = IMasterStore(SessionData)
@@ -279,6 +291,51 @@
 
         self.assertEqual(expected_sessions, found_sessions)
 
+    def test_duplicate_session_pruner(self):
+        # None of the sessions created in setUp() are duplicates, so
+        # they will all survive the pruning.
+        expected_sessions = set([
+            u'recent_auth',
+            u'recent_unauth',
+            u'yesterday_auth',
+            u'yesterday_unauth',
+            u'ancient_auth',
+            u'ancient_unauth',
+            ])
+
+        now = datetime.now(UTC)
+
+        # Make some duplicate logins from a few days ago.
+        # Only the most recent 6 will be kept. Oldest is 'old dupe 9',
+        # most recent 'old dupe 1'.
+        for count in range(1, 10):
+            self.make_session(
+                u'old dupe %d' % count,
+                now - timedelta(days=2, seconds=count),
+                'old dupe')
+        for count in range(1, 7):
+            expected_sessions.add(u'old dupe %d' % count)
+
+        # Make some other duplicate logins less than an hour old.
+        # All of these will be kept.
+        for count in range(1, 10):
+            self.make_session(u'new dupe %d' % count, now, 'new dupe')
+            expected_sessions.add(u'new dupe %d' % count)
+
+        chunk_size = 2
+        log = BufferLogger()
+        pruner = DuplicateSessionPruner(log)
+        try:
+            while not pruner.isDone():
+                pruner(chunk_size)
+        finally:
+            pruner.cleanUp()
+
+        found_sessions = set(
+            IMasterStore(SessionData).find(SessionData.client_id))
+
+        self.assertEqual(expected_sessions, found_sessions)
+
 
 class TestGarbo(TestCaseWithFactory):
     layer = LaunchpadZopelessLayer