← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~cjwatson/launchpad/bzr-svn-worker-store-cache into lp:launchpad

 

Colin Watson has proposed merging lp:~cjwatson/launchpad/bzr-svn-worker-store-cache into lp:launchpad.

Commit message:
Store bzr-svn's cache in the import data store.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/bzr-svn-worker-store-cache/+merge/362757

We really shouldn't be storing persistent state (even if only caches) on individual importds.  The last time we brought up new importds, they took ages to clear their initial queue because they had to populate ~/.bazaar/svn-cache/ first, unlike bzr-git where we push the cache to the import data store.  This is a bit of a trade-off because it'll mean transferring the full cache for a given repository at the start and end of each import, but I think that's better than sometimes hammering the remote repository if a particular importd happens to be lacking the cache.

The last figure I saw for the total size of the cache across all imports was 42G or so, which isn't quite trivial, but maple seems to have plenty of space so it shouldn't be a problem.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~cjwatson/launchpad/bzr-svn-worker-store-cache into lp:launchpad.
=== modified file 'lib/lp/codehosting/codeimport/tarball.py'
--- lib/lp/codehosting/codeimport/tarball.py	2009-06-25 04:06:00 +0000
+++ lib/lp/codehosting/codeimport/tarball.py	2019-02-06 00:49:40 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009 Canonical Ltd.  This software is licensed under the
+# Copyright 2009-2019 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 """Create and extract tarballs."""
@@ -33,19 +33,22 @@
         raise TarError(retcode)
 
 
-def create_tarball(directory, tarball_name):
+def create_tarball(directory, tarball_name, filenames=None):
     """Create a tarball of `directory` called `tarball_name`.
 
     This creates a tarball of `directory` from its parent directory. This
     means that when untarred, it will create a new directory with the same
-    name as `directory`.
+    name as `directory`. If `filenames` is not None, then the tarball will
+    be limited to that list of directory entries under `directory`.
 
     Basically, this is the standard way of making tarballs.
     """
     if not os.path.isdir(directory):
         raise NotADirectory(directory)
+    if filenames is None:
+        filenames = ['.']
     retcode = subprocess.call(
-        ['tar', '-C', directory, '-czf', tarball_name, '.'])
+        ['tar', '-C', directory, '-czf', tarball_name] + filenames)
     _check_tar_retcode(retcode)
 
 

=== modified file 'lib/lp/codehosting/codeimport/tests/test_worker.py'
--- lib/lp/codehosting/codeimport/tests/test_worker.py	2018-08-16 12:37:47 +0000
+++ lib/lp/codehosting/codeimport/tests/test_worker.py	2019-02-06 00:49:40 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009-2018 Canonical Ltd.  This software is licensed under the
+# Copyright 2009-2019 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 """Tests for the code import worker."""
@@ -11,6 +11,7 @@
 import subprocess
 import tempfile
 import time
+from uuid import uuid4
 
 from bzrlib import (
     trace,
@@ -1230,6 +1231,50 @@
             self.bazaar_store, logging.getLogger(),
             opener_policy=opener_policy)
 
+    def test_pushBazaarBranch_saves_bzr_svn_cache(self):
+        # BzrSvnImportWorker.pushBazaarBranch saves a tarball of the bzr-svn
+        # cache in the worker's ImportDataStore.
+        from bzrlib.plugins.svn.cache import get_cache
+        worker = self.makeImportWorker(self.makeSourceDetails(
+            'trunk', [('README', 'Original contents')]),
+            opener_policy=AcceptAnythingPolicy())
+        uuid = subvertpy.ra.RemoteAccess(worker.source_details.url).get_uuid()
+        cache_dir = get_cache(uuid).create_cache_dir()
+        cache_dir_contents = os.listdir(cache_dir)
+        self.assertNotEqual([], cache_dir_contents)
+        opener = SafeBranchOpener(worker._opener_policy, worker.probers)
+        remote_branch = opener.open(worker.source_details.url)
+        worker.pushBazaarBranch(
+            self.make_branch('.'), remote_branch=remote_branch)
+        worker.import_data_store.fetch('svn-cache.tar.gz')
+        extract_tarball('svn-cache.tar.gz', '.')
+        self.assertContentEqual(cache_dir_contents, os.listdir(uuid))
+
+    def test_getBazaarBranch_fetches_bzr_svn_cache(self):
+        # BzrSvnImportWorker.getBazaarBranch fetches the tarball of the
+        # bzr-svn cache from the worker's ImportDataStore and expands it
+        # into the appropriate cache directory.
+        from bzrlib.plugins.svn.cache import get_cache
+        worker = self.makeImportWorker(self.makeSourceDetails(
+            'trunk', [('README', 'Original contents')]),
+            opener_policy=AcceptAnythingPolicy())
+        # Store a tarred-up cache in the store.
+        content = self.factory.getUniqueString()
+        uuid = str(uuid4())
+        os.makedirs(os.path.join('cache', uuid))
+        with open(os.path.join('cache', uuid, 'svn-cache'), 'w') as cache_file:
+            cache_file.write(content)
+        create_tarball('cache', 'svn-cache.tar.gz', filenames=[uuid])
+        worker.import_data_store.put('svn-cache.tar.gz')
+        # Make sure there's a Bazaar branch in the branch store.
+        branch = self.make_branch('branch')
+        ToBzrImportWorker.pushBazaarBranch(worker, branch)
+        # Finally, fetching the tree gets the cache too.
+        worker.getBazaarBranch()
+        cache_dir = get_cache(uuid).create_cache_dir()
+        with open(os.path.join(cache_dir, 'svn-cache')) as cache_file:
+            self.assertEqual(content, cache_file.read())
+
 
 class TestBzrImport(WorkerTest, TestActualImportMixin,
                     PullingImportWorkerTests):

=== modified file 'lib/lp/codehosting/codeimport/worker.py'
--- lib/lp/codehosting/codeimport/worker.py	2018-08-16 12:37:47 +0000
+++ lib/lp/codehosting/codeimport/worker.py	2019-02-06 00:49:40 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009-2018 Canonical Ltd.  This software is licensed under the
+# Copyright 2009-2019 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 """The code import worker. This imports code from foreign repositories."""
@@ -600,7 +600,7 @@
             self.required_format, self.needs_bzr_tree,
             stacked_on_url=self.source_details.stacked_on_url)
 
-    def pushBazaarBranch(self, bazaar_branch):
+    def pushBazaarBranch(self, bazaar_branch, remote_branch=None):
         """Push the updated Bazaar branch to the server.
 
         :return: True if revisions were transferred.
@@ -778,7 +778,7 @@
                 else:
                     raise
             self._logger.info("Pushing local import branch to central store.")
-            self.pushBazaarBranch(bazaar_branch)
+            self.pushBazaarBranch(bazaar_branch, remote_branch=remote_branch)
             self._logger.info("Job complete.")
             return result
         finally:
@@ -844,7 +844,7 @@
             extract_tarball(local_name, git_db_dir)
         return branch
 
-    def pushBazaarBranch(self, bazaar_branch):
+    def pushBazaarBranch(self, bazaar_branch, remote_branch=None):
         """See `ToBzrImportWorker.pushBazaarBranch`.
 
         In addition to the superclass' behaviour, we store bzr-git's cache
@@ -894,6 +894,42 @@
         from bzrlib.plugins.svn import SvnRemoteProber
         return [SvnRemoteProber]
 
+    def getBazaarBranch(self):
+        """See `ToBzrImportWorker.getBazaarBranch`.
+
+        In addition to the superclass' behaviour, we retrieve bzr-svn's
+        cache from the import data store and put it where bzr-svn will find
+        it.
+        """
+        from bzrlib.plugins.svn.cache import create_cache_dir
+        branch = super(BzrSvnImportWorker, self).getBazaarBranch()
+        local_name = 'svn-cache.tar.gz'
+        if self.import_data_store.fetch(local_name):
+            extract_tarball(local_name, create_cache_dir())
+        return branch
+
+    def pushBazaarBranch(self, bazaar_branch, remote_branch=None):
+        """See `ToBzrImportWorker.pushBazaarBranch`.
+
+        In addition to the superclass' behaviour, we store bzr-svn's cache
+        directory in the import data store.
+        """
+        from bzrlib.plugins.svn.cache import get_cache
+        non_trivial = super(BzrSvnImportWorker, self).pushBazaarBranch(
+            bazaar_branch)
+        if remote_branch is not None:
+            cache = get_cache(remote_branch.repository.uuid)
+            cache_dir = cache.create_cache_dir()
+            local_name = 'svn-cache.tar.gz'
+            create_tarball(
+                os.path.dirname(cache_dir), local_name,
+                filenames=[os.path.basename(cache_dir)])
+            self.import_data_store.put(local_name)
+            # XXX cjwatson 2019-02-06: Once this is behaving well on
+            # production, consider removing the local cache after pushing a
+            # copy of it to the import data store.
+        return non_trivial
+
 
 class BzrImportWorker(PullingImportWorker):
     """An import worker for importing Bazaar branches."""


Follow ups