← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~wgrant/turnip/alternate-madness into lp:turnip

 

William Grant has proposed merging lp:~wgrant/turnip/alternate-madness into lp:turnip.

Commit message:
Fix open_repo's ephemeral case and init_repo's clone-of-a-clone case to cope with turnip-subordinate properly.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)
Related bugs:
  Bug #1454452 in turnip: "spurious alternate object store warning"
  https://bugs.launchpad.net/turnip/+bug/1454452
  Bug #1465502 in turnip: "Ephemeral repos created by open_repo don't include objects from turnip-subordinate"
  https://bugs.launchpad.net/turnip/+bug/1465502

For more details, see:
https://code.launchpad.net/~wgrant/turnip/alternate-madness/+merge/262305

Fix two bugs in our handling of alternates.

open_repo's ephemeral case assumed that alternates of alternates were respected, but neither git nor libgit2 support relative alternates except in the root. We work around this by explicitly including the absolute paths in the root's alternates list.

init_repo with clone_from set to a repo that was itself a clone erroneously failed to include objects from the clone_from repo's turnip-subordinate. git's and libgit2's clone operations preserve alternate relations, which we don't want, and libgit2's buggily fails to map relative paths so they work in the new location. Since there's no built-in way to merge clone_from and its subordinate into a single new subordinate, I create a fresh repo, link in the loose and packed objects, and add the refs.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~wgrant/turnip/alternate-madness into lp:turnip.
=== modified file 'turnip/api/store.py'
--- turnip/api/store.py	2015-06-10 15:20:56 +0000
+++ turnip/api/store.py	2015-06-18 08:35:42 +0000
@@ -7,6 +7,7 @@
     )
 import itertools
 import os
+import re
 import shutil
 import subprocess
 import uuid
@@ -101,6 +102,41 @@
             f.write("{}\n".format(objects_path))
 
 
+object_dir_re = re.compile(r'\A[0-9a-f][0-9a-f]\Z')
+
+
+def import_into_subordinate(sub_root, from_root):
+    """Import all of a repo's objects and refs into another.
+
+    The refs may clobber existing ones."""
+    for dirname in os.listdir(os.path.join(from_root, 'objects')):
+        # We want to hardlink any children of the loose fanout or pack
+        # directories.
+        if (not os.path.isdir(os.path.join(from_root, 'objects', dirname))
+                or (dirname != 'pack' and not object_dir_re.match(dirname))):
+            continue
+
+        sub_dir = os.path.join(sub_root, 'objects', dirname)
+        if not os.path.exists(sub_dir):
+            os.makedirs(sub_dir)
+        for name in os.listdir(os.path.join(from_root, 'objects', dirname)):
+            from_path = os.path.join(from_root, 'objects', dirname, name)
+            sub_path = os.path.join(sub_root, 'objects', dirname, name)
+            if not os.path.isfile(from_path) or os.path.exists(sub_path):
+                continue
+            os.link(from_path, sub_path)
+
+    # Copy over the refs.
+    # TODO: This should ensure that we don't overwrite anything. The
+    # alternate's refs are only used as extra .haves on push, so it
+    # wouldn't hurt to mangle the names.
+    from_repo = Repository(from_root)
+    sub_repo = Repository(sub_root)
+    for ref in from_repo.listall_references():
+        sub_repo.create_reference(
+            ref, from_repo.lookup_reference(ref).target, force=True)
+
+
 def init_repo(repo_path, clone_from=None, clone_refs=False,
               alternate_repo_paths=None, is_bare=True):
     """Initialise a new git repository or clone from existing."""
@@ -113,25 +149,30 @@
         # repo. This lets git-receive-pack expose available commits as
         # extra haves without polluting refs in the real repo.
         sub_path = os.path.join(repo_path, 'turnip-subordinate')
-        clone_repository(clone_from, sub_path, True)
-        assert is_bare
-        alt_path = os.path.join(repo_path, 'objects/info/alternates')
-        with open(alt_path, 'w') as f:
-            f.write('../turnip-subordinate/objects\n')
-
-        if clone_refs:
-            # With the objects all accessible via the subordinate, we
-            # can just copy all refs from the origin. Unlike
-            # pygit2.clone_repository, this won't set up a remote.
-            # TODO: Filter out internal (eg. MP) refs.
-            from_repo = Repository(clone_from)
-            to_repo = Repository(repo_path)
-            for ref in from_repo.listall_references():
-                to_repo.create_reference(
-                    ref, from_repo.lookup_reference(ref).target)
-
+        init_repository(sub_path, True)
+        if os.path.exists(os.path.join(clone_from, 'turnip-subordinate')):
+            import_into_subordinate(
+                sub_path, os.path.join(clone_from, 'turnip-subordinate'))
+        import_into_subordinate(sub_path, clone_from)
+
+    new_alternates = []
     if alternate_repo_paths:
-        write_alternates(repo_path, alternate_repo_paths)
+        new_alternates.extend(alternate_repo_paths)
+    if clone_from:
+        new_alternates.append('../turnip-subordinate')
+    write_alternates(repo_path, new_alternates)
+
+    if clone_from and clone_refs:
+        # With the objects all accessible via the subordinate, we
+        # can just copy all refs from the origin. Unlike
+        # pygit2.clone_repository, this won't set up a remote.
+        # TODO: Filter out internal (eg. MP) refs.
+        from_repo = Repository(clone_from)
+        to_repo = Repository(repo_path)
+        for ref in from_repo.listall_references():
+            to_repo.create_reference(
+                ref, from_repo.lookup_reference(ref).target)
+
     ensure_config(repo_path)  # set repository configuration defaults
     return repo_path
 
@@ -146,9 +187,15 @@
     """
     if ':' in repo_name:
         try:
-            # create ephemeral repo with alternates set from both
-            repos = [os.path.join(repo_store, repo)
-                     for repo in repo_name.split(':')]
+            # Create ephemeral repo with alternates set from both.
+            # Neither git nor libgit2 will respect a relative alternate
+            # path except in the root repo, so we explicitly include the
+            # turnip-subordinate for each repo. If it doesn't exist
+            # it'll just be ignored.
+            repos = list(itertools.chain(*(
+                (os.path.join(repo_store, repo),
+                 os.path.join(repo_store, repo, 'turnip-subordinate'))
+                for repo in repo_name.split(':'))))
             tmp_repo_path = os.path.join(repo_store,
                                          'ephemeral-' + uuid.uuid4().hex)
             ephemeral_repo_path = init_repo(

=== modified file 'turnip/api/tests/test_store.py'
--- turnip/api/tests/test_store.py	2015-06-18 00:05:01 +0000
+++ turnip/api/tests/test_store.py	2015-06-18 08:35:42 +0000
@@ -8,6 +8,7 @@
     )
 
 import os.path
+import re
 import subprocess
 import uuid
 
@@ -32,7 +33,6 @@
         super(InitTestCase, self).setUp()
         self.repo_store = self.useFixture(TempDir()).path
         self.useFixture(EnvironmentVariable("REPO_STORE", self.repo_store))
-        self.repo_path = os.path.join(self.repo_store, uuid.uuid1().hex)
 
     def assertAllLinkCounts(self, link_count, path):
         count = 0
@@ -55,6 +55,19 @@
         for ref in absent:
             self.assertNotIn(absent, out)
 
+    def assertAlternates(self, expected_paths, repo_path):
+        alt_path = store.alternates_path(repo_path)
+        if not os.path.exists(os.path.dirname(alt_path)):
+            raise Exception("No repo at %s." % repo_path)
+        actual_paths = []
+        if os.path.exists(alt_path):
+            with open(alt_path) as altf:
+                actual_paths = [
+                    re.sub('/objects\n$', '', line) for line in altf]
+        self.assertEqual(
+            set([path.rstrip('/') for path in expected_paths]),
+            set(actual_paths))
+
     def makeOrig(self):
         self.orig_path = os.path.join(self.repo_store, 'orig/')
         orig = RepoFactory(
@@ -63,14 +76,6 @@
         self.master_oid = orig.lookup_reference('refs/heads/master').target
         self.orig_objs = os.path.join(self.orig_path, '.git/objects')
 
-    def assert_alternate_exists(self, alternate_path, repo_path):
-        """Assert alternate_path exists in alternates at repo_path."""
-        objects_path = '{}\n'.format(
-            os.path.join(alternate_path, 'objects'))
-        with open(store.alternates_path(repo_path), 'r') as alts:
-            alts_content = alts.read()
-            self.assertIn(objects_path, alts_content)
-
     def test_from_scratch(self):
         path = os.path.join(self.repo_store, 'repo/')
         self.assertEqual(path, store.init_repo(path))
@@ -79,7 +84,7 @@
 
     def test_repo_config(self):
         """Assert repository is initialised with correct config defaults."""
-        repo_path = store.init_repo(self.repo_path)
+        repo_path = store.init_repo(os.path.join(self.repo_store, 'repo'))
         repo_config = pygit2.Repository(repo_path).config
         with open('git.config.yaml') as f:
             yaml_config = yaml.load(f)
@@ -93,23 +98,42 @@
         """Opening a repo where a repo name contains ':' should return
         a new ephemeral repo.
         """
-        repos = [uuid.uuid4().hex, uuid.uuid4().hex]
-        repo_name = '{}:{}'.format(repos[0], repos[1])
-        alt_path = os.path.join(self.repo_store, repos[0])
-        with store.open_repo(self.repo_store, repo_name) as repo:
-            self.assert_alternate_exists(alt_path, repo.path)
+        # Create repos A and B with distinct commits, and C which has no
+        # objects of its own but has a clone of B as its
+        # turnip-subordinate.
+        repos = {}
+        for name in ['A', 'B']:
+            factory = RepoFactory(os.path.join(self.repo_store, name))
+            factory.generate_branches(2, 2)
+            repos[name] = factory.repo
+        repos['C'] = pygit2.Repository(store.init_repo(
+            os.path.join(self.repo_store, 'C'),
+            clone_from=os.path.join(self.repo_store, 'B')))
+
+        # Opening the union of one and three includes the objects from
+        # two, as they're in three's turnip-subordinate.
+        repo_name = 'A:C'
+
+        with store.open_repo(self.repo_store, repo_name) as ephemeral_repo:
+            self.assertAlternates(
+                [repos['A'].path, repos['C'].path,
+                 os.path.join(repos['A'].path, 'turnip-subordinate'),
+                 os.path.join(repos['C'].path, 'turnip-subordinate')],
+                ephemeral_repo.path)
+            self.assertIn(repos['A'].head.target, ephemeral_repo)
+            self.assertIn(repos['B'].head.target, ephemeral_repo)
 
     def test_repo_with_alternates(self):
         """Ensure objects path is defined correctly in repo alternates."""
-        factory = RepoFactory(self.repo_path)
+        factory = RepoFactory(os.path.join(self.repo_store, uuid.uuid1().hex))
         new_repo_path = os.path.join(self.repo_store, uuid.uuid1().hex)
         repo_path_with_alt = store.init_repo(
             new_repo_path, alternate_repo_paths=[factory.repo.path])
-        self.assert_alternate_exists(factory.repo.path, repo_path_with_alt)
+        self.assertAlternates([factory.repo_path], repo_path_with_alt)
 
     def test_repo_alternates_objects_shared(self):
         """Ensure objects are shared from alternate repo."""
-        factory = RepoFactory(self.repo_path)
+        factory = RepoFactory(os.path.join(self.repo_store, uuid.uuid1().hex))
         commit_oid = factory.add_commit('foo', 'foobar.txt')
         new_repo_path = os.path.join(self.repo_store, uuid.uuid4().hex)
         repo_path_with_alt = store.init_repo(
@@ -132,6 +156,7 @@
 
         # Internally, the packs are hardlinked into a subordinate
         # alternate repo, so minimal space is used by the clone.
+        self.assertAlternates(['../turnip-subordinate'], to_path)
         self.assertTrue(
             os.path.exists(
                 os.path.join(to_path, 'turnip-subordinate')))
@@ -156,6 +181,7 @@
 
         # Internally, the packs are hardlinked into a subordinate
         # alternate repo, so minimal space is used by the clone.
+        self.assertAlternates(['../turnip-subordinate'], to_path)
         self.assertTrue(
             os.path.exists(
                 os.path.join(to_path, 'turnip-subordinate')))
@@ -165,3 +191,33 @@
         # refs as extra haves.
         self.assertAdvertisedRefs(
             [('.have', self.master_oid.hex)], ['refs/'], to_path)
+
+    def test_clone_of_clone(self):
+        self.makeOrig()
+        orig_blob = pygit2.Repository(self.orig_path).create_blob(b'orig')
+
+        self.assertAllLinkCounts(1, self.orig_objs)
+        to_path = os.path.join(self.repo_store, 'to/')
+        store.init_repo(to_path, clone_from=self.orig_path)
+        self.assertAllLinkCounts(2, self.orig_objs)
+        to_blob = pygit2.Repository(to_path).create_blob(b'to')
+
+        too_path = os.path.join(self.repo_store, 'too/')
+        store.init_repo(too_path, clone_from=to_path)
+        self.assertAllLinkCounts(3, self.orig_objs)
+        too_blob = pygit2.Repository(too_path).create_blob(b'too')
+
+        # Each clone has just its subordinate as an alternate, and the
+        # subordinate has no alternates of its own.
+        for path in (to_path, too_path):
+            self.assertAlternates(['../turnip-subordinate'], path)
+            self.assertAlternates([], os.path.join(path, 'turnip-subordinate'))
+            self.assertIn(self.master_oid.hex, pygit2.Repository(path))
+            self.assertAdvertisedRefs(
+                [('.have', self.master_oid.hex)], [], path)
+
+        # Objects from all three repos are in the third.
+        too = pygit2.Repository(too_path)
+        self.assertIn(orig_blob, too)
+        self.assertIn(to_blob, too)
+        self.assertIn(too_blob, too)


Follow ups