← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~wgrant/launchpad/scannery-stabbery into lp:launchpad

 

William Grant has proposed merging lp:~wgrant/launchpad/scannery-stabbery into lp:launchpad.

Commit message:
Rewrite Branch.createBranchRevisionsFromIDs to use bulk.create.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~wgrant/launchpad/scannery-stabbery/+merge/295542

Rewrite Branch.createBranchRevisionsFromIDs to use bulk.create. It previously used a temp table and potentially overcomplicated queries in ways that caused postgres to choose a bad plan.

Also log a bit more verbosely in bzrsync, so we can see which bits remain slow.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~wgrant/launchpad/scannery-stabbery into lp:launchpad.
=== modified file 'lib/lp/code/model/branch.py'
--- lib/lp/code/model/branch.py	2016-04-06 10:59:15 +0000
+++ lib/lp/code/model/branch.py	2016-05-24 05:37:38 +0000
@@ -27,7 +27,6 @@
     Coalesce,
     Count,
     Desc,
-    Insert,
     Join,
     NamedFunc,
     Not,
@@ -154,7 +153,7 @@
     )
 from lp.registry.model.teammembership import TeamParticipation
 from lp.services.config import config
-from lp.services.database.bulk import load_related
+from lp.services.database import bulk
 from lp.services.database.constants import (
     DEFAULT,
     UTC_NOW,
@@ -499,7 +498,7 @@
         from lp.code.model.branchcollection import GenericBranchCollection
 
         def eager_load(rows):
-            branches = load_related(
+            branches = bulk.load_related(
                 Branch, rows, ['source_branchID', 'prerequisite_branchID'])
             GenericBranchCollection.preloadVisibleStackedOnBranches(
                 branches, user)
@@ -781,7 +780,8 @@
 
         def eager_load(rows):
             revisions = map(operator.itemgetter(1), rows)
-            load_related(RevisionAuthor, revisions, ['revision_author_id'])
+            bulk.load_related(
+                RevisionAuthor, revisions, ['revision_author_id'])
         return DecoratedResultSet(result, pre_iter_hook=eager_load)
 
     def getRevisionsSince(self, timestamp):
@@ -1094,26 +1094,15 @@
         if not revision_id_sequence_pairs:
             return
         store = Store.of(self)
-        store.execute(
-            """
-            CREATE TEMPORARY TABLE RevidSequence
-            (revision_id text, sequence integer)
-            """)
-        # Force to Unicode or we will end up with bad quoting under
-        # PostgreSQL 9.1.
-        unicode_revid_sequence_pairs = [
-            (a and unicode(a) or None, b and unicode(b) or None)
-                for a, b in revision_id_sequence_pairs]
-        store.execute(Insert(('revision_id', 'sequence'),
-            table=['RevidSequence'], values=unicode_revid_sequence_pairs))
-        store.execute(
-            """
-            INSERT INTO BranchRevision (branch, revision, sequence)
-            SELECT %s, Revision.id, RevidSequence.sequence
-            FROM RevidSequence, Revision
-            WHERE Revision.revision_id = RevidSequence.revision_id
-            """ % sqlvalues(self))
-        store.execute("DROP TABLE RevidSequence")
+        rev_db_ids = dict(store.find(
+            (Revision.revision_id, Revision.id),
+            Revision.revision_id.is_in(
+                (revid for revid, _ in revision_id_sequence_pairs))))
+        bulk.create(
+            (BranchRevision.branch, BranchRevision.revision_id,
+             BranchRevision.sequence),
+            [(self, rev_db_ids[revid], seq)
+             for revid, seq in revision_id_sequence_pairs])
 
     def getTipRevision(self):
         """See `IBranch`."""

=== modified file 'lib/lp/codehosting/scanner/bzrsync.py'
--- lib/lp/codehosting/scanner/bzrsync.py	2015-09-24 13:44:02 +0000
+++ lib/lp/codehosting/scanner/bzrsync.py	2016-05-24 05:37:38 +0000
@@ -111,11 +111,13 @@
         self.insertBranchRevisions(bzr_branch, revids_to_insert)
         transaction.commit()
         # Synchronize the RevisionCache for this branch.
+        self.logger.info("Updating revision cache.")
         getUtility(IRevisionSet).updateRevisionCacheForBranch(self.db_branch)
         transaction.commit()
 
         # Notify any listeners that the tip of the branch has changed, but
         # before we've actually updated the database branch.
+        self.logger.info("Firing tip change event.")
         initial_scan = (len(db_history) == 0)
         notify(events.TipChanged(self.db_branch, bzr_branch, initial_scan))
 
@@ -126,7 +128,9 @@
         # not been updated. Since this has no ill-effect, and can only err on
         # the pessimistic side (tell the user the data has not yet been
         # updated although it has), the race is acceptable.
+        self.logger.info("Updating branch status.")
         self.updateBranchStatus(bzr_history)
+        self.logger.info("Firing scan completion event.")
         notify(
             events.ScanCompleted(
                 self.db_branch, bzr_branch, self.logger, new_ancestry))
@@ -157,6 +161,7 @@
         return bzr_branch.repository.get_graph(PPSource)
 
     def getAncestryDelta(self, bzr_branch):
+        self.logger.info("Calculating ancestry delta.")
         bzr_last = bzr_branch.last_revision()
         db_last = self.db_branch.last_scanned_id
         if db_last is None:
@@ -221,6 +226,7 @@
                 added_history, last_revno, added_ancestry))
         # We must remove any stray BranchRevisions that happen to already be
         # present.
+        self.logger.info("Finding stray BranchRevisions.")
         existing_branchrevisions = Store.of(self.db_branch).find(
             Revision.revision_id, BranchRevision.branch == self.db_branch,
             BranchRevision.revision_id == Revision.id,


Follow ups