← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~jelmer/launchpad/no-big-oh-history into lp:launchpad

 

Jelmer Vernooij has proposed merging lp:~jelmer/launchpad/no-big-oh-history into lp:launchpad.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)
Related bugs:
  Bug #903703 in Launchpad itself: "uses deprecated Branch.revision_history call"
  https://bugs.launchpad.net/launchpad/+bug/903703

For more details, see:
https://code.launchpad.net/~jelmer/launchpad/no-big-oh-history/+merge/92562

Avoid Branch.revision_history() and Repository.get_ancestry() in the Launchpad codebase.

This is a prerequisite for deploying a new version of bzr (newer versions deprecate Branch.revision_history() and Repository.get_ancestry()) and it might help towards bug 808930.

A much earlier version of this landed in https://code.launchpad.net/~jelmer/launchpad/avoid-revision-history-2/+merge/85501, but then reverted because it actually caused performance regressions.

Tests
--------

./bin/test lp.codehosting.scanner
./bin/test lp.code.model.tests.test_branchjob

QA
----
1) Push a Linux kernel branch and make sure that it is scanned correctly
2) Push some additional changes to it and make sure they're correctly scanned
3) Overwrite it with a completely different branch, and make sure it scans correctly
-- 
https://code.launchpad.net/~jelmer/launchpad/no-big-oh-history/+merge/92562
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~jelmer/launchpad/no-big-oh-history into lp:launchpad.
=== modified file 'lib/lp/code/interfaces/branch.py'
--- lib/lp/code/interfaces/branch.py	2012-01-30 06:02:10 +0000
+++ lib/lp/code/interfaces/branch.py	2012-02-10 19:30:29 +0000
@@ -889,19 +889,14 @@
         """
 
     def getScannerData():
-        """Retrieve the full ancestry of a branch for the branch scanner.
+        """Retrieve the full history of a branch for the branch scanner.
 
         The branch scanner script is the only place where we need to retrieve
-        all the BranchRevision rows for a branch. Since the ancestry of some
+        all the BranchRevision rows for a branch. Since the history of some
         branches is into the tens of thousands we don't want to materialise
         BranchRevision instances for each of these.
 
-        :return: tuple of three items.
-            1. Ancestry set of bzr revision-ids.
-            2. History list of bzr revision-ids. Similar to the result of
-               bzrlib.Branch.revision_history().
-            3. Dictionnary mapping bzr bzr revision-ids to the database ids of
-               the corresponding BranchRevision rows for this branch.
+        :return: Iterator over bzr revision-ids in history, newest first.
         """
 
     def getInternalBzrUrl():

=== modified file 'lib/lp/code/model/branch.py'
--- lib/lp/code/model/branch.py	2012-02-03 06:28:44 +0000
+++ lib/lp/code/model/branch.py	2012-02-10 19:30:29 +0000
@@ -975,19 +975,12 @@
 
     def getScannerData(self):
         """See `IBranch`."""
-        columns = (BranchRevision.sequence, Revision.revision_id)
         rows = Store.of(self).using(Revision, BranchRevision).find(
-            columns,
+            (BranchRevision.sequence, Revision.revision_id),
             Revision.id == BranchRevision.revision_id,
-            BranchRevision.branch_id == self.id)
-        rows = rows.order_by(BranchRevision.sequence)
-        ancestry = set()
-        history = []
-        for sequence, revision_id in rows:
-            ancestry.add(revision_id)
-            if sequence is not None:
-                history.append(revision_id)
-        return ancestry, history
+            BranchRevision.branch_id == self.id,
+            BranchRevision.sequence != None)
+        return rows.order_by(Desc(BranchRevision.sequence))
 
     def getPullURL(self):
         """See `IBranch`."""

=== modified file 'lib/lp/code/model/branchjob.py'
--- lib/lp/code/model/branchjob.py	2011-12-30 06:14:56 +0000
+++ lib/lp/code/model/branchjob.py	2012-02-10 19:30:29 +0000
@@ -485,16 +485,20 @@
 
     def iterAddedMainline(self):
         """Iterate through revisions added to the mainline."""
-        repository = self.bzr_branch.repository
-        added_revisions = repository.get_graph().find_unique_ancestors(
-            self.last_revision_id, [self.last_scanned_id])
+        graph = self.bzr_branch.repository.get_graph()
+        branch_last_revinfo = self.bzr_branch.last_revision_info()
+        # FInd the revision number matching self.last_revision_id
+        last_revno = graph.find_distance_to_null(
+            self.last_revision_id,
+            [(branch_last_revinfo[1], branch_last_revinfo[0])])
+        added_revisions = graph.find_unique_ancestors(
+            self.last_revision_id, [self.last_scanned_id, NULL_REVISION])
         # Avoid hitting the database since bzrlib makes it easy to check.
-        # There are possibly more efficient ways to get the mainline
-        # revisions, but this is simple and it works.
-        history = self.bzr_branch.revision_history()
-        for num, revid in enumerate(history):
+        history = graph.iter_lefthand_ancestry(
+            self.last_revision_id, [NULL_REVISION, self.last_scanned_id, None])
+        for distance, revid in enumerate(history):
             if revid in added_revisions:
-                yield repository.get_revision(revid), num + 1
+                yield revid, last_revno - distance
 
     def generateDiffs(self):
         """Determine whether to generate diffs."""
@@ -515,8 +519,11 @@
 
         self.bzr_branch.lock_read()
         try:
-            for revision, revno in self.iterAddedMainline():
+            for revision_id, revno in reversed(
+                    list(self.iterAddedMainline())):
                 assert revno is not None
+                revision = self.bzr_branch.repository.get_revision(
+                    revision_id)
                 mailer = self.getMailerForRevision(
                     revision, revno, self.generateDiffs())
                 mailer.sendAll()

=== modified file 'lib/lp/code/model/tests/test_branchjob.py'
--- lib/lp/code/model/tests/test_branchjob.py	2012-01-24 12:36:15 +0000
+++ lib/lp/code/model/tests/test_branchjob.py	2012-02-10 19:30:29 +0000
@@ -378,16 +378,13 @@
         job = RevisionsAddedJob.create(branch, 'rev1', 'rev2', '')
         self.assertEqual([job], list(RevisionsAddedJob.iterReady()))
 
-    def updateDBRevisions(self, branch, bzr_branch, revision_ids=None):
+    def updateDBRevisions(self, branch, bzr_branch, revision_ids):
         """Update the database for the revisions.
 
         :param branch: The database branch associated with the revisions.
         :param bzr_branch: The Bazaar branch associated with the revisions.
-        :param revision_ids: The ids of the revisions to update.  If not
-            supplied, the branch revision history is used.
+        :param revision_ids: The ids of the revisions to update.
         """
-        if revision_ids is None:
-            revision_ids = bzr_branch.revision_history()
         for bzr_revision in bzr_branch.repository.get_revisions(revision_ids):
             existing = branch.getBranchRevision(
                 revision_id=bzr_revision.revision_id)
@@ -433,7 +430,7 @@
         job = RevisionsAddedJob.create(branch, 'rev1', 'rev2', '')
         job.bzr_branch.lock_read()
         self.addCleanup(job.bzr_branch.unlock)
-        [(revision, revno)] = list(job.iterAddedMainline())
+        [(revision_id, revno)] = list(job.iterAddedMainline())
         self.assertEqual(2, revno)
 
     def test_iterAddedNonMainline(self):
@@ -447,24 +444,24 @@
         with override_environ(BZR_EMAIL='me@xxxxxxxxxxx'):
             tree.commit('rev3a', rev_id='rev3a')
         self.updateDBRevisions(branch, tree.branch, ['rev3', 'rev3a'])
-        job = RevisionsAddedJob.create(branch, 'rev1', 'rev3', '')
+        job = RevisionsAddedJob.create(branch, 'rev1', 'rev2', '')
         job.bzr_branch.lock_read()
         self.addCleanup(job.bzr_branch.unlock)
-        out = [x.revision_id for x, y in job.iterAddedMainline()]
+        out = [x for x, y in job.iterAddedMainline()]
         self.assertEqual(['rev2'], out)
 
     def test_iterAddedMainline_order(self):
-        """iterAddedMainline iterates in commit order."""
+        """iterAddedMainline iterates in reverse commit order."""
         self.useBzrBranches(direct_database=True)
         branch, tree = self.create3CommitsBranch()
         job = RevisionsAddedJob.create(branch, 'rev1', 'rev3', '')
         job.bzr_branch.lock_read()
         self.addCleanup(job.bzr_branch.unlock)
         # Since we've gone from rev1 to rev3, we've added rev2 and rev3.
-        [(rev2, revno2), (rev3, revno3)] = list(job.iterAddedMainline())
-        self.assertEqual('rev2', rev2.revision_id)
+        [(rev3, revno3), (rev2, revno2)] = list(job.iterAddedMainline())
+        self.assertEqual('rev2', rev2)
         self.assertEqual(2, revno2)
-        self.assertEqual('rev3', rev3.revision_id)
+        self.assertEqual('rev3', rev3)
         self.assertEqual(3, revno3)
 
     def makeBranchWithCommit(self):
@@ -776,7 +773,8 @@
                 committer="Joe Bloggs <joe@xxxxxxxxxxx>",
                 timestamp=1000100000.0, timezone=0)
         switch_dbuser('branchscanner')
-        self.updateDBRevisions(db_branch, tree.branch)
+        self.updateDBRevisions(
+            db_branch, tree.branch, [first_revision, second_revision])
         expected = (
             u"-" * 60 + '\n'
             "revno: 1" '\n'
@@ -819,7 +817,8 @@
                 committer=u"Non ASCII: \xed", timestamp=1000000000.0,
                 timezone=0)
         switch_dbuser('branchscanner')
-        self.updateDBRevisions(db_branch, tree.branch)
+        self.updateDBRevisions(
+            db_branch, tree.branch, [rev_id])
         job = RevisionsAddedJob.create(db_branch, '', '', '')
         message = job.getRevisionMessage(rev_id, 1)
         # The revision message must be a unicode object.

=== modified file 'lib/lp/codehosting/codeimport/tests/test_worker.py'
--- lib/lp/codehosting/codeimport/tests/test_worker.py	2012-01-01 02:58:52 +0000
+++ lib/lp/codehosting/codeimport/tests/test_worker.py	2012-02-10 19:30:29 +0000
@@ -185,7 +185,7 @@
         store = self.makeBranchStore()
         bzr_branch = store.pull(
             self.arbitrary_branch_id, self.temp_dir, default_format)
-        self.assertEqual([], bzr_branch.revision_history())
+        self.assertEqual((0, 'null:'), bzr_branch.last_revision_info())
 
     def test_getNewBranch_without_tree(self):
         # If pull() with needs_tree=False creates a new branch, it doesn't
@@ -657,7 +657,7 @@
         # import.
         worker = self.makeImportWorker()
         bzr_branch = worker.getBazaarBranch()
-        self.assertEqual([], bzr_branch.revision_history())
+        self.assertEqual((0, 'null:'), bzr_branch.last_revision_info())
 
     def test_bazaarBranchLocation(self):
         # getBazaarBranch makes the working tree under the current working
@@ -844,7 +844,7 @@
         worker.run()
         branch = self.getStoredBazaarBranch(worker)
         self.assertEqual(
-            self.foreign_commit_count, len(branch.revision_history()))
+            self.foreign_commit_count, branch.revno())
 
     def test_sync(self):
         # Do an import.
@@ -854,7 +854,7 @@
         worker.run()
         branch = self.getStoredBazaarBranch(worker)
         self.assertEqual(
-            self.foreign_commit_count, len(branch.revision_history()))
+            self.foreign_commit_count, branch.revno())
 
         # Change the remote branch.
         self.makeForeignCommit(worker.source_details)
@@ -865,7 +865,7 @@
         # Check that the new revisions are in the Bazaar branch.
         branch = self.getStoredBazaarBranch(worker)
         self.assertEqual(
-            self.foreign_commit_count, len(branch.revision_history()))
+            self.foreign_commit_count, branch.revno())
 
     def test_import_script(self):
         # Like test_import, but using the code-import-worker.py script
@@ -902,7 +902,7 @@
         branch = Branch.open(branch_url)
 
         self.assertEqual(
-            self.foreign_commit_count, len(branch.revision_history()))
+            self.foreign_commit_count, branch.revno())
 
     def test_script_exit_codes(self):
         # After a successful import that imports revisions, the worker exits
@@ -1381,8 +1381,7 @@
         self.assertEqual(
             CodeImportWorkerExitCode.SUCCESS, worker.run())
         branch = self.getStoredBazaarBranch(worker)
-        self.assertEqual(
-            1, len(branch.revision_history()))
+        self.assertEqual(1, branch.revno())
         self.assertEqual(
             "Some Random Hacker <jane@xxxxxxxxxxx>",
             branch.repository.get_revision(branch.last_revision()).committer)

=== modified file 'lib/lp/codehosting/codeimport/tests/test_workermonitor.py'
--- lib/lp/codehosting/codeimport/tests/test_workermonitor.py	2012-01-01 02:58:52 +0000
+++ lib/lp/codehosting/codeimport/tests/test_workermonitor.py	2012-02-10 19:30:29 +0000
@@ -803,8 +803,7 @@
         url = get_default_bazaar_branch_store()._getMirrorURL(
             code_import.branch.id)
         branch = Branch.open(url)
-        self.assertEqual(
-            self.foreign_commit_count, len(branch.revision_history()))
+        self.assertEqual(self.foreign_commit_count, branch.revno())
 
     def assertImported(self, ignored, code_import_id):
         """Assert that the `CodeImport` of the given id was imported."""

=== modified file 'lib/lp/codehosting/scanner/bzrsync.py'
--- lib/lp/codehosting/scanner/bzrsync.py	2012-01-01 02:58:52 +0000
+++ lib/lp/codehosting/scanner/bzrsync.py	2012-02-10 19:30:29 +0000
@@ -16,6 +16,7 @@
 
 import logging
 
+from bzrlib.errors import NoSuchRevision
 from bzrlib.graph import DictParentsProvider
 from bzrlib.revision import NULL_REVISION
 import pytz
@@ -84,16 +85,16 @@
         # Get the history and ancestry from the branch first, to fail early
         # if something is wrong with the branch.
         self.logger.info("Retrieving history from bzrlib.")
-        bzr_history = bzr_branch.revision_history()
+        last_revision_info = bzr_branch.last_revision_info()
         # The BranchRevision, Revision and RevisionParent tables are only
         # written to by the branch-scanner, so they are not subject to
         # write-lock contention. Update them all in a single transaction to
         # improve the performance and allow garbage collection in the future.
-        db_ancestry, db_history = self.retrieveDatabaseAncestry()
+        db_history = self.retrieveDatabaseAncestry()
 
         (new_ancestry, branchrevisions_to_delete,
             revids_to_insert) = self.planDatabaseChanges(
-            bzr_branch, bzr_history, db_ancestry, db_history)
+            bzr_branch, last_revision_info, db_history)
         new_db_revs = (
             new_ancestry - getUtility(IRevisionSet).onlyPresent(new_ancestry))
         self.logger.info("Adding %s new revisions.", len(new_db_revs))
@@ -115,7 +116,7 @@
 
         # Notify any listeners that the tip of the branch has changed, but
         # before we've actually updated the database branch.
-        initial_scan = (len(db_history) == 0)
+        initial_scan = (self.db_branch.last_scanned_id is None)
         notify(events.TipChanged(self.db_branch, bzr_branch, initial_scan))
 
         # The Branch table is modified by other systems, including the web UI,
@@ -125,7 +126,7 @@
         # not been updated. Since this has no ill-effect, and can only err on
         # the pessimistic side (tell the user the data has not yet been
         # updated although it has), the race is acceptable.
-        self.updateBranchStatus(bzr_history)
+        self.updateBranchStatus(last_revision_info)
         notify(
             events.ScanCompleted(
                 self.db_branch, bzr_branch, self.logger, new_ancestry))
@@ -134,8 +135,7 @@
     def retrieveDatabaseAncestry(self):
         """Efficiently retrieve ancestry from the database."""
         self.logger.info("Retrieving ancestry from database.")
-        db_ancestry, db_history = self.db_branch.getScannerData()
-        return db_ancestry, db_history
+        return self.db_branch.getScannerData()
 
     def _getRevisionGraph(self, bzr_branch, db_last):
         if bzr_branch.repository.has_revision(db_last):
@@ -155,42 +155,32 @@
 
         return bzr_branch.repository.get_graph(PPSource)
 
-    def getAncestryDelta(self, bzr_branch):
-        bzr_last = bzr_branch.last_revision()
-        db_last = self.db_branch.last_scanned_id
-        if db_last is None:
-            added_ancestry = set(bzr_branch.repository.get_ancestry(bzr_last))
-            added_ancestry.discard(None)
-            removed_ancestry = set()
-        else:
-            graph = self._getRevisionGraph(bzr_branch, db_last)
-            added_ancestry, removed_ancestry = (
-                graph.find_difference(bzr_last, db_last))
-            added_ancestry.discard(NULL_REVISION)
+    def getAncestryDelta(self, bzr_branch, bzr_last_revinfo, graph, db_last):
+        added_ancestry, removed_ancestry = graph.find_difference(
+            bzr_last_revinfo[1], db_last)
+        added_ancestry.discard(NULL_REVISION)
         return added_ancestry, removed_ancestry
 
-    def getHistoryDelta(self, bzr_history, db_history):
+    def getHistoryDelta(self, bzr_branch, bzr_last_revinfo, graph, db_history):
         self.logger.info("Calculating history delta.")
-        common_len = min(len(bzr_history), len(db_history))
-        while common_len > 0:
-            # The outer conditional improves efficiency. Without it, the
-            # algorithm is O(history-size * change-size), which can be
-            # excessive if a long branch is replaced by another long branch
-            # with a distant (or no) common mainline parent. The inner
-            # conditional is needed for correctness with branches where the
-            # history does not follow the line of leftmost parents.
-            if db_history[common_len - 1] == bzr_history[common_len - 1]:
-                if db_history[:common_len] == bzr_history[:common_len]:
+        removed_history = []
+        common_revid = NULL_REVISION
+        for (revno, revid) in db_history:
+            try:
+                if bzr_branch.get_rev_id(revno) == revid:
+                    common_revid = revid
+                    common_len = revno
                     break
-            common_len -= 1
+            except NoSuchRevision:
+                pass
+            removed_history.append(revid)
         # Revision added or removed from the branch's history. These lists may
         # include revisions whose history position has merely changed.
-        removed_history = db_history[common_len:]
-        added_history = bzr_history[common_len:]
+        added_history = list(graph.iter_lefthand_ancestry(bzr_last_revinfo[1],
+            (common_revid, )))
         return added_history, removed_history
 
-    def planDatabaseChanges(self, bzr_branch, bzr_history, db_ancestry,
-                            db_history):
+    def planDatabaseChanges(self, bzr_branch, bzr_last_revinfo, db_history):
         """Plan database changes to synchronize with bzrlib data.
 
         Use the data retrieved by `retrieveDatabaseAncestry` and
@@ -198,9 +188,18 @@
         """
         self.logger.info("Planning changes.")
         # Find the length of the common history.
-        added_history, removed_history = self.getHistoryDelta(
-            bzr_history, db_history)
-        added_ancestry, removed_ancestry = self.getAncestryDelta(bzr_branch)
+        db_last = self.db_branch.last_scanned_id
+        if db_last is None:
+            db_last = NULL_REVISION
+        graph = self._getRevisionGraph(bzr_branch, db_last)
+        bzr_branch.lock_read()
+        try:
+            added_history, removed_history = self.getHistoryDelta(
+                bzr_branch, bzr_last_revinfo, graph, db_history)
+            added_ancestry, removed_ancestry = self.getAncestryDelta(
+                bzr_branch, bzr_last_revinfo, graph, db_last)
+        finally:
+            bzr_branch.unlock()
 
         notify(
             events.RevisionsRemoved(
@@ -215,10 +214,9 @@
 
         # We must insert BranchRevision rows for all revisions which were
         # added to the ancestry or whose sequence value has changed.
-        last_revno = len(bzr_history)
         revids_to_insert = dict(
             self.revisionsToInsert(
-                added_history, last_revno, added_ancestry))
+                added_history, bzr_last_revinfo[0], added_ancestry))
         # We must remove any stray BranchRevisions that happen to already be
         # present.
         existing_branchrevisions = Store.of(self.db_branch).find(
@@ -236,8 +234,8 @@
         :param revisions: the set of Bazaar revision IDs to return bzrlib
             Revision objects for.
         """
-        revisions = bzr_branch.repository.get_parent_map(revisions)
-        return bzr_branch.repository.get_revisions(revisions.keys())
+        revisions = list(bzr_branch.repository.has_revisions(revisions))
+        return bzr_branch.repository.get_revisions(revisions)
 
     def syncOneRevision(self, bzr_branch, bzr_revision, revids_to_insert):
         """Import the revision with the given revision_id.
@@ -263,13 +261,13 @@
         """Calculate the revisions to insert and their revnos.
 
         :param added_history: A list of revision ids added to the revision
-            history in parent-to-child order.
+            history in child-to-parent order.
         :param last_revno: The revno of the last revision.
         :param added_ancestry: A set of revisions that have been added to the
             ancestry of the branch.  May overlap with added_history.
         """
         start_revno = last_revno - len(added_history) + 1
-        for (revno, revision_id) in enumerate(added_history, start_revno):
+        for (revno, revision_id) in enumerate(reversed(added_history), start_revno):
             yield revision_id, revno
         for revision_id in added_ancestry.difference(added_history):
             yield revision_id, None
@@ -296,12 +294,10 @@
         for revid_seq_pair_chunk in iter_list_chunks(revid_seq_pairs, 1000):
             self.db_branch.createBranchRevisionFromIDs(revid_seq_pair_chunk)
 
-    def updateBranchStatus(self, bzr_history):
+    def updateBranchStatus(self, (revision_count, last_revision)):
         """Update the branch-scanner status in the database Branch table."""
         # Record that the branch has been updated.
-        revision_count = len(bzr_history)
         if revision_count > 0:
-            last_revision = bzr_history[-1]
             revision = getUtility(IRevisionSet).getByRevisionId(last_revision)
         else:
             revision = None

=== modified file 'lib/lp/codehosting/scanner/tests/test_bzrsync.py'
--- lib/lp/codehosting/scanner/tests/test_bzrsync.py	2012-01-20 15:42:44 +0000
+++ lib/lp/codehosting/scanner/tests/test_bzrsync.py	2012-02-10 19:30:29 +0000
@@ -17,6 +17,7 @@
 from bzrlib.tests import TestCaseWithTransport
 from bzrlib.uncommit import uncommit
 import pytz
+from storm.expr import Desc
 from storm.locals import Store
 from twisted.python.util import mergeFunctionMetadata
 from zope.component import getUtility
@@ -428,9 +429,10 @@
             else:
                 bzr_branch.set_last_revision_info(revno, bzr_rev)
                 delta_branch = bzr_branch
-            return sync.getAncestryDelta(delta_branch)
+            return sync.getAncestryDelta(
+                delta_branch, delta_branch.last_revision_info(), graph, db_rev)
 
-        added_ancestry, removed_ancestry = get_delta('merge', None)
+        added_ancestry, removed_ancestry = get_delta('merge', NULL_REVISION)
         # All revisions are new for an unscanned branch
         self.assertEqual(
             set(['base', 'trunk', 'branch', 'merge']), added_ancestry)
@@ -471,8 +473,11 @@
         # yield each revision along with a sequence number, starting at 1.
         self.commitRevision(rev_id='rev-1')
         bzrsync = self.makeBzrSync(self.db_branch)
-        bzr_history = self.bzr_branch.revision_history()
-        added_ancestry = bzrsync.getAncestryDelta(self.bzr_branch)[0]
+        bzr_history = ['rev-1']
+        graph = bzrsync._getRevisionGraph(self.bzr_branch, 'rev-1')
+        added_ancestry = bzrsync.getAncestryDelta(
+            self.bzr_branch, self.bzr_branch.last_revision_info(),
+            graph, 'rev-1')[0]
         result = bzrsync.revisionsToInsert(
             bzr_history, self.bzr_branch.revno(), added_ancestry)
         self.assertEqual({'rev-1': 1}, dict(result))
@@ -483,8 +488,12 @@
         (db_branch, bzr_tree), ignored = self.makeBranchWithMerge(
             'base', 'trunk', 'branch', 'merge')
         bzrsync = self.makeBzrSync(db_branch)
-        bzr_history = bzr_tree.branch.revision_history()
-        added_ancestry = bzrsync.getAncestryDelta(bzr_tree.branch)[0]
+        bzr_history = ['merge', 'trunk', 'base']
+        graph = bzrsync._getRevisionGraph(bzr_tree.branch, 'merge')
+        self.addCleanup(bzr_tree.branch.lock_read().unlock)
+        added_ancestry = bzrsync.getAncestryDelta(
+            bzr_tree.branch, bzr_tree.branch.last_revision_info(),
+            graph, NULL_REVISION)[0]
         expected = {'base': 1, 'trunk': 2, 'merge': 3, 'branch': None}
         self.assertEqual(
             expected, dict(bzrsync.revisionsToInsert(bzr_history,
@@ -528,7 +537,7 @@
         self.assertEqual(self.getBranchRevisions(db_trunk), expected)
 
     def test_retrieveDatabaseAncestry(self):
-        # retrieveDatabaseAncestry should set db_ancestry and db_history to
+        # retrieveDatabaseAncestry should set db_history to
         # Launchpad's current understanding of the branch state.
         # db_branch_revision_map should map Bazaar revision_ids to
         # BranchRevision.ids.
@@ -541,19 +550,16 @@
             '~name12/+junk/junk.contrib')
         branch_revisions = IStore(BranchRevision).find(
             BranchRevision, BranchRevision.branch == branch)
-        sampledata = list(branch_revisions.order_by(BranchRevision.sequence))
-        expected_ancestry = set(branch_revision.revision.revision_id
-            for branch_revision in sampledata)
-        expected_history = [branch_revision.revision.revision_id
+        sampledata = list(branch_revisions.order_by(Desc(BranchRevision.sequence)))
+        expected_history = [
+            (branch_revision.sequence, branch_revision.revision.revision_id)
             for branch_revision in sampledata
             if branch_revision.sequence is not None]
 
         self.create_branch_and_tree(db_branch=branch)
 
         bzrsync = self.makeBzrSync(branch)
-        db_ancestry, db_history = (
-            bzrsync.retrieveDatabaseAncestry())
-        self.assertEqual(expected_ancestry, set(db_ancestry))
+        db_history = bzrsync.retrieveDatabaseAncestry()
         self.assertEqual(expected_history, list(db_history))
 
 
@@ -576,9 +582,9 @@
         syncer.syncBranchAndClose(self.bzr_tree.branch)
         self.assertEqual(rev2_id, self.db_branch.last_scanned_id)
         self.db_branch.last_scanned_id = rev1_id
-        db_ancestry, db_history = self.db_branch.getScannerData()
+        db_history = self.db_branch.getScannerData()
         branchrevisions_to_delete = syncer.planDatabaseChanges(
-            self.bzr_branch, [rev1_id, rev2_id], db_ancestry, db_history)[1]
+            self.bzr_branch, (2, rev2_id), db_history)[1]
         self.assertIn(merge_id, branchrevisions_to_delete)
 
 

=== modified file 'lib/lp/codehosting/tests/test_branchdistro.py'
--- lib/lp/codehosting/tests/test_branchdistro.py	2012-01-20 15:42:44 +0000
+++ lib/lp/codehosting/tests/test_branchdistro.py	2012-02-10 19:30:29 +0000
@@ -267,12 +267,9 @@
         self.assertEqual(tip_revision_id, new_branch.last_mirrored_id)
         self.assertEqual(tip_revision_id, new_branch.last_scanned_id)
         # Make sure that the branch revisions have been copied.
-        old_ancestry, old_history = removeSecurityProxy(
-            db_branch).getScannerData()
-        new_ancestry, new_history = removeSecurityProxy(
-            new_branch).getScannerData()
-        self.assertEqual(old_ancestry, new_ancestry)
-        self.assertEqual(old_history, new_history)
+        old_history = removeSecurityProxy(db_branch).getScannerData()
+        new_history = removeSecurityProxy(new_branch).getScannerData()
+        self.assertEqual(list(old_history), list(new_history))
         self.assertFalse(new_branch.pending_writes)
         self.assertIs(None, new_branch.stacked_on)
         self.assertEqual(new_branch, db_branch.stacked_on)