← Back to team overview

launchpad-reviewers team mailing list archive

Re: [Merge] lp:~wgrant/turnip/subordinate-alternate into lp:turnip

 

Review: Approve

I think we may need to force receive.autogc to false before landing this.  Otherwise, if somebody removes a ref from the cloned-from repository and then pushes to it, my understanding is that receive-pack will run git gc --auto and corrupt the cloned repository.  We're going to need to do garbage collection in a more controlled way.

Diff comments:

> === modified file 'turnip/api/store.py'
> --- turnip/api/store.py	2015-04-24 11:06:04 +0000
> +++ turnip/api/store.py	2015-04-25 18:16:55 +0000
> @@ -3,8 +3,6 @@
>  import itertools
>  import os
>  import shutil
> -import urllib
> -import urlparse
>  
>  from pygit2 import (
>      clone_repository,
> @@ -72,16 +70,34 @@
>      return True
>  
>  
> -def init_repo(repo_path, clone_from=None, is_bare=True):
> +def init_repo(repo_path, clone_from=None, clone_refs=False, is_bare=True):
>      """Initialise a new git repository or clone from existing."""
>      assert is_valid_new_path(repo_path)
> +    init_repository(repo_path, is_bare)
> +
>      if clone_from:
> -        clone_from_url = urlparse.urljoin('file:',
> -                                          urllib.pathname2url(clone_from))
> -        repo = clone_repository(clone_from_url, repo_path, is_bare)
> -    else:
> -        repo = init_repository(repo_path, is_bare)
> -    return repo.path
> +        # The clone_from's objects and refs are in fact cloned into a
> +        # subordinate tree that's then set as an alternate for the real
> +        # repo. This lets git-receive-pack expose available commits as
> +        # extra haves without polluting refs in the real repo.
> +        sub_path = os.path.join(repo_path, 'turnip-subordinate')
> +        clone_repository(clone_from, sub_path, True)
> +        assert is_bare

I'd put assertions at the top, or at least before the clone_repository.

> +        alt_path = os.path.join(repo_path, 'objects/info/alternates')
> +        with open(alt_path, 'w') as f:
> +            f.write('../turnip-subordinate/objects\n')
> +
> +        if clone_refs:
> +            # With the objects all accessible via the subordinate, we
> +            # can just copy all refs from the origin. Unlike
> +            # pygit2.clone_repository, this won't set up a remote.
> +            # TODO: Filter out internal (eg. MP) refs.
> +            from_repo = Repository(clone_from)
> +            to_repo = Repository(repo_path)
> +            for ref in from_repo.listall_references():
> +                to_repo.create_reference(
> +                    ref, from_repo.lookup_reference(ref).target)
> +    return repo_path
>  
>  
>  def open_repo(repo_path):
> 
> === modified file 'turnip/api/tests/test_api.py'
> --- turnip/api/tests/test_api.py	2015-04-24 09:37:22 +0000
> +++ turnip/api/tests/test_api.py	2015-04-25 18:16:55 +0000
> @@ -63,7 +63,8 @@
>          factory.build()
>          new_repo_path = uuid.uuid1().hex
>          resp = self.app.post_json('/repo', {'repo_path': new_repo_path,
> -                                            'clone_from': self.repo_path})
> +                                            'clone_from': self.repo_path,
> +                                            'clone_refs': 'true'})
>          repo1_revlist = get_revlist(factory.repo)
>          clone_from = resp.json['repo_url'].split('/')[-1]
>          repo2 = open_repo(os.path.join(self.repo_root, clone_from))
> 
> === added file 'turnip/api/tests/test_store.py'
> --- turnip/api/tests/test_store.py	1970-01-01 00:00:00 +0000
> +++ turnip/api/tests/test_store.py	2015-04-25 18:16:55 +0000
> @@ -0,0 +1,107 @@
> +# Copyright 2015 Canonical Ltd.  All rights reserved.
> +
> +from __future__ import (
> +    absolute_import,
> +    print_function,
> +    unicode_literals,
> +    )
> +
> +import os.path
> +import subprocess
> +
> +from fixtures import TempDir
> +import pygit2
> +from testtools import TestCase
> +
> +from turnip.api import store
> +from turnip.api.tests.test_helpers import RepoFactory
> +
> +
> +class InitTestCase(TestCase):
> +
> +    def setUp(self):
> +        super(InitTestCase, self).setUp()
> +        self.root = self.useFixture(TempDir()).path
> +
> +    def assertAllLinkCounts(self, link_count, path):
> +        count = 0
> +        for dirpath, _, filenames in os.walk(path):
> +            for filename in filenames:
> +                count += 1
> +                self.assertEqual(
> +                    link_count,
> +                    os.stat(os.path.join(dirpath, filename)).st_nlink)
> +        return count
> +
> +    def assertAdvertisedRefs(self, present, absent, repo_path):
> +        out, err = subprocess.Popen(
> +            ['git', 'receive-pack', '--advertise-refs', repo_path],
> +            stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
> +        self.assertEqual('', err)
> +        for ref, hex in present:
> +            self.assertIn('%s %s' % (hex, ref), out)
> +        for ref in absent:
> +            self.assertNotIn(absent, out)
> +
> +    def makeOrig(self):
> +        self.orig_path = os.path.join(self.root, 'orig/')
> +        orig = RepoFactory(
> +            self.orig_path, num_branches=3, num_commits=2).build()
> +        self.orig_refs = orig.listall_references()
> +        self.master_oid = orig.lookup_reference('refs/heads/master').target
> +        self.orig_objs = os.path.join(self.orig_path, '.git/objects')
> +
> +    def test_from_scratch(self):
> +        path = os.path.join(self.root, 'repo/')
> +        self.assertEqual(path, store.init_repo(path))
> +        r = pygit2.Repository(path)
> +        self.assertEqual([], r.listall_references())
> +
> +    def test_clone_with_refs(self):
> +        self.makeOrig()
> +        self.assertAllLinkCounts(1, self.orig_objs)
> +
> +        # init_repo with clone_from=orig and clone_refs=True creates a
> +        # repo with the same set of refs. And the objects are copied
> +        # too.
> +        to_path = os.path.join(self.root, 'to/')
> +        store.init_repo(to_path, clone_from=self.orig_path, clone_refs=True)
> +        to = pygit2.Repository(to_path)
> +        self.assertIsNot(None, to[self.master_oid])
> +        self.assertEqual(self.orig_refs, to.listall_references())
> +
> +        # Internally, the packs are hardlinked into a subordinate
> +        # alternate repo, so minimal space is used by the clone.
> +        self.assertTrue(
> +            os.path.exists(
> +                os.path.join(to_path, 'turnip-subordinate')))
> +        self.assertAllLinkCounts(2, self.orig_objs)
> +
> +        self.assertAdvertisedRefs(
> +            [('.have', self.master_oid.hex),
> +             ('refs/heads/master', self.master_oid.hex)],
> +            [], to_path)
> +
> +    def test_clone_without_refs(self):
> +        self.makeOrig()
> +        self.assertAllLinkCounts(1, self.orig_objs)
> +
> +        # init_repo with clone_from=orig and clone_refs=False creates a
> +        # repo without any refs, but the objects are copied.
> +        to_path = os.path.join(self.root, 'to/')
> +        store.init_repo(to_path, clone_from=self.orig_path, clone_refs=False)
> +        to = pygit2.Repository(to_path)
> +        self.assertIsNot(None, to[self.master_oid])
> +        self.assertEqual([], to.listall_references())
> +
> +        # Internally, the packs are hardlinked into a subordinate
> +        # alternate repo, so minimal space is used by the clone.
> +        self.assertTrue(
> +            os.path.exists(
> +                os.path.join(to_path, 'turnip-subordinate')))
> +        self.assertAllLinkCounts(2, self.orig_objs)
> +
> +        # No refs exist, but receive-pack advertises the clone_from's
> +        # refs as extra haves.
> +        self.assertAdvertisedRefs(
> +            [('.have', self.master_oid.hex)], ['refs/'], to_path)
> 
> === modified file 'turnip/api/views.py'
> --- turnip/api/views.py	2015-04-16 21:47:08 +0000
> +++ turnip/api/views.py	2015-04-25 18:16:55 +0000
> @@ -55,6 +55,7 @@
>          """Initialise a new git repository, or clone from an existing repo."""
>          repo_path = extract_json_data(self.request).get('repo_path')
>          clone_path = extract_json_data(self.request).get('clone_from')
> +        clone_refs = extract_json_data(self.request).get('clone_refs', 'false')
>  
>          if not repo_path:
>              self.request.errors.add('body', 'repo_path',
> @@ -65,13 +66,21 @@
>              self.request.errors.add('body', 'name', 'invalid path.')
>              raise exc.HTTPNotFound()
>  
> +        if clone_refs.lower() not in ('true', 'false'):
> +            self.request.errors.add(
> +                'body', 'clone_refs', 'clone_refs must be true or false.')
> +            return

What's wrong with JSON booleans?

> +
> +        clone_refs = clone_refs == 'true'
> +
>          if clone_path:
>              repo_clone = os.path.join(self.repo_store, clone_path)
>          else:
>              repo_clone = None
>  
>          try:
> -            new_repo_path = store.init_repo(repo, repo_clone)
> +            new_repo_path = store.init_repo(
> +                repo, clone_from=repo_clone, clone_refs=clone_refs)
>              repo_name = os.path.basename(os.path.normpath(new_repo_path))
>              return {'repo_url': '/'.join([self.request.url, repo_name])}
>          except GitError:
> 


-- 
https://code.launchpad.net/~wgrant/turnip/subordinate-alternate/+merge/257465
Your team Launchpad code reviewers is subscribed to branch lp:turnip.


References