launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #20452
[Merge] lp:~wgrant/launchpad/scannery-stabbery into lp:launchpad
William Grant has proposed merging lp:~wgrant/launchpad/scannery-stabbery into lp:launchpad.
Commit message:
Rewrite Branch.createBranchRevisionsFromIDs to use bulk.create.
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
For more details, see:
https://code.launchpad.net/~wgrant/launchpad/scannery-stabbery/+merge/295542
Rewrite Branch.createBranchRevisionsFromIDs to use bulk.create. It previously used a temp table and potentially overcomplicated queries in ways that caused postgres to choose a bad plan.
Also log a bit more verbosely in bzrsync, so we can see which bits remain slow.
--
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~wgrant/launchpad/scannery-stabbery into lp:launchpad.
=== modified file 'lib/lp/code/model/branch.py'
--- lib/lp/code/model/branch.py 2016-04-06 10:59:15 +0000
+++ lib/lp/code/model/branch.py 2016-05-24 05:37:38 +0000
@@ -27,7 +27,6 @@
Coalesce,
Count,
Desc,
- Insert,
Join,
NamedFunc,
Not,
@@ -154,7 +153,7 @@
)
from lp.registry.model.teammembership import TeamParticipation
from lp.services.config import config
-from lp.services.database.bulk import load_related
+from lp.services.database import bulk
from lp.services.database.constants import (
DEFAULT,
UTC_NOW,
@@ -499,7 +498,7 @@
from lp.code.model.branchcollection import GenericBranchCollection
def eager_load(rows):
- branches = load_related(
+ branches = bulk.load_related(
Branch, rows, ['source_branchID', 'prerequisite_branchID'])
GenericBranchCollection.preloadVisibleStackedOnBranches(
branches, user)
@@ -781,7 +780,8 @@
def eager_load(rows):
revisions = map(operator.itemgetter(1), rows)
- load_related(RevisionAuthor, revisions, ['revision_author_id'])
+ bulk.load_related(
+ RevisionAuthor, revisions, ['revision_author_id'])
return DecoratedResultSet(result, pre_iter_hook=eager_load)
def getRevisionsSince(self, timestamp):
@@ -1094,26 +1094,15 @@
if not revision_id_sequence_pairs:
return
store = Store.of(self)
- store.execute(
- """
- CREATE TEMPORARY TABLE RevidSequence
- (revision_id text, sequence integer)
- """)
- # Force to Unicode or we will end up with bad quoting under
- # PostgreSQL 9.1.
- unicode_revid_sequence_pairs = [
- (a and unicode(a) or None, b and unicode(b) or None)
- for a, b in revision_id_sequence_pairs]
- store.execute(Insert(('revision_id', 'sequence'),
- table=['RevidSequence'], values=unicode_revid_sequence_pairs))
- store.execute(
- """
- INSERT INTO BranchRevision (branch, revision, sequence)
- SELECT %s, Revision.id, RevidSequence.sequence
- FROM RevidSequence, Revision
- WHERE Revision.revision_id = RevidSequence.revision_id
- """ % sqlvalues(self))
- store.execute("DROP TABLE RevidSequence")
+ rev_db_ids = dict(store.find(
+ (Revision.revision_id, Revision.id),
+ Revision.revision_id.is_in(
+ (revid for revid, _ in revision_id_sequence_pairs))))
+ bulk.create(
+ (BranchRevision.branch, BranchRevision.revision_id,
+ BranchRevision.sequence),
+ [(self, rev_db_ids[revid], seq)
+ for revid, seq in revision_id_sequence_pairs])
def getTipRevision(self):
"""See `IBranch`."""
=== modified file 'lib/lp/codehosting/scanner/bzrsync.py'
--- lib/lp/codehosting/scanner/bzrsync.py 2015-09-24 13:44:02 +0000
+++ lib/lp/codehosting/scanner/bzrsync.py 2016-05-24 05:37:38 +0000
@@ -111,11 +111,13 @@
self.insertBranchRevisions(bzr_branch, revids_to_insert)
transaction.commit()
# Synchronize the RevisionCache for this branch.
+ self.logger.info("Updating revision cache.")
getUtility(IRevisionSet).updateRevisionCacheForBranch(self.db_branch)
transaction.commit()
# Notify any listeners that the tip of the branch has changed, but
# before we've actually updated the database branch.
+ self.logger.info("Firing tip change event.")
initial_scan = (len(db_history) == 0)
notify(events.TipChanged(self.db_branch, bzr_branch, initial_scan))
@@ -126,7 +128,9 @@
# not been updated. Since this has no ill-effect, and can only err on
# the pessimistic side (tell the user the data has not yet been
# updated although it has), the race is acceptable.
+ self.logger.info("Updating branch status.")
self.updateBranchStatus(bzr_history)
+ self.logger.info("Firing scan completion event.")
notify(
events.ScanCompleted(
self.db_branch, bzr_branch, self.logger, new_ancestry))
@@ -157,6 +161,7 @@
return bzr_branch.repository.get_graph(PPSource)
def getAncestryDelta(self, bzr_branch):
+ self.logger.info("Calculating ancestry delta.")
bzr_last = bzr_branch.last_revision()
db_last = self.db_branch.last_scanned_id
if db_last is None:
@@ -221,6 +226,7 @@
added_history, last_revno, added_ancestry))
# We must remove any stray BranchRevisions that happen to already be
# present.
+ self.logger.info("Finding stray BranchRevisions.")
existing_branchrevisions = Store.of(self.db_branch).find(
Revision.revision_id, BranchRevision.branch == self.db_branch,
BranchRevision.revision_id == Revision.id,
Follow ups