← Back to team overview

launchpad-reviewers team mailing list archive

Re: [Merge] lp:~blr/turnip/repack-api into lp:turnip

 

Review: Approve code

Repack can be a lengthy operation, so running it synchronously within an HTTP request is less than ideal, but it'll do until we have a job system of some kind. I'd like to see how it handles long requests, though -- if the connection dies, does the repack continue?

Diff comments:

> === modified file '.bzrignore'
> --- .bzrignore	2015-02-17 21:32:29 +0000
> +++ .bzrignore	2015-05-21 04:37:40 +0000
> @@ -1,13 +1,13 @@
>  bin
> +build
>  develop-eggs
> +dist
>  download-cache
>  eggs
> +*.egg*
> +*.egg-info
>  .installed.cfg
> +*.log
>  parts
> -*.egg-info
>  tags
> -TAGS
> -build
> -*.egg
> -dist
> -*.log
> \ No newline at end of file
> +TAGS
> \ No newline at end of file
> 
> === added file 'git.config.yaml'
> --- git.config.yaml	1970-01-01 00:00:00 +0000
> +++ git.config.yaml	2015-05-21 04:37:40 +0000
> @@ -0,0 +1,8 @@
> +# Per-repository configuration
> +# See http://git-scm.com/docs/git-config.html for more info
> +
> +core.logallrefupdates: True
> +pack.depth: 100
> +pack.window: 100
> +pack.windowMemory: 2g
> +repack.writeBitmaps: True
> 
> === modified file 'turnip/api/store.py'
> --- turnip/api/store.py	2015-05-19 12:55:00 +0000
> +++ turnip/api/store.py	2015-05-21 04:37:40 +0000
> @@ -7,6 +7,7 @@
>  import itertools
>  import os
>  import shutil
> +import subprocess
>  import uuid
>  
>  from pygit2 import (
> @@ -22,6 +23,8 @@
>      Repository,
>      )
>  
> +from turnip.pack.helpers import ensure_config
> +
>  
>  REF_TYPE_NAME = {
>      GIT_OBJ_COMMIT: 'commit',
> @@ -127,6 +130,7 @@
>  
>      if alternate_repo_paths:
>          write_alternates(repo_path, alternate_repo_paths)
> +    ensure_config(repo_path)  # set repository configuration defaults
>      return repo_path
>  
>  
> @@ -172,6 +176,29 @@
>      shutil.rmtree(repo_path)
>  
>  
> +def repack(repo_path, ignore_alternates=False, single=False,
> +           prune=False, no_reuse_delta=False):
> +    """Repack a repository with git-repack.
> +
> +    :param ignore_alternates: Only repack local refs (git repack --local).
> +    :param single: Create a single packfile (git repack -a).
> +    :param prune: Remove redundant packs. (git repack -d)
> +    :param no_reuse_delta: Force delta recalculation.
> +    """
> +    repack_args = ['git', 'repack']
> +    if ignore_alternates:
> +        repack_args.append('-l')
> +    if no_reuse_delta:
> +        repack_args.append('-f')
> +    if prune:
> +        repack_args.append('-d')
> +    if single:
> +        repack_args.append('-a')
> +    return subprocess.check_call(
> +        repack_args, cwd=repo_path,
> +        stderr=subprocess.PIPE, stdout=subprocess.PIPE)

You need to ensure_config before you perform a write operation on a repo.

We also probably always want -q, as the output isn't going to go anywhere useful.

> +
> +
>  def get_refs(repo_store, repo_name):
>      """Return all refs for a git repository."""
>      with open_repo(repo_store, repo_name) as repo:
> 
> === modified file 'turnip/api/tests/test_api.py'
> --- turnip/api/tests/test_api.py	2015-05-19 12:55:00 +0000
> +++ turnip/api/tests/test_api.py	2015-05-21 04:37:40 +0000
> @@ -2,7 +2,9 @@
>  # -*- coding: utf-8 -*-
>  from __future__ import print_function
>  
> +import fnmatch
>  import os
> +import subprocess
>  from textwrap import dedent
>  import unittest
>  import uuid
> @@ -510,6 +512,20 @@
>          self.assertEqual(5, len(resp.json))
>          self.assertNotIn(excluded_commit, resp.json)
>  
> +    def test_repo_repack_verify_pack(self):
> +        """Ensure commit exists in pack."""
> +        factory = RepoFactory(self.repo_store)
> +        oid = factory.add_commit('foo', 'foobar.txt')
> +        factory.set_head(oid)
> +        resp = self.app.post_json('/repo/{}/repack'.format(self.repo_path),
> +                                  {'prune': True, 'single': True})
> +        for root, dirnames, filenames in os.walk(factory.repo_path):
> +            for filename in fnmatch.filter(filenames, '*.pack'):
> +                pack = os.path.join(root, filename)
> +        out = subprocess.check_output(['git', 'verify-pack', pack, '-v'])
> +        self.assertEqual(200, resp.status_code)
> +        self.assertIn(oid.hex, out)

Can you perhaps create two packs with different commits and then test that we end up with a single pack that contains both?

> +
>  
>  if __name__ == '__main__':
>      unittest.main()
> 
> === modified file 'turnip/api/tests/test_store.py'
> --- turnip/api/tests/test_store.py	2015-04-29 02:43:28 +0000
> +++ turnip/api/tests/test_store.py	2015-05-21 04:37:40 +0000
> @@ -16,6 +16,7 @@
>      )
>  import pygit2
>  from testtools import TestCase
> +import yaml
>  
>  from turnip.api import store
>  from turnip.api.tests.test_helpers import (
> @@ -74,6 +75,17 @@
>          r = pygit2.Repository(path)
>          self.assertEqual([], r.listall_references())
>  
> +    def test_repo_config(self):
> +        """Assert repository is initialised with correct config defaults."""
> +        repo_path = store.init_repo(self.repo_path)
> +        repo_config = pygit2.Repository(repo_path).config
> +        yaml_config = yaml.load(open('git.config.yaml'))
> +
> +        self.assertEqual(bool(yaml_config['core.logallrefupdates']),
> +                         bool(repo_config['core.logallrefupdates']))
> +        self.assertEqual(str(yaml_config['pack.depth']),
> +                         repo_config['pack.depth'])
> +
>      def test_open_ephemeral_repo(self):
>          """Opening a repo where a repo name contains ':' should return
>          a new ephemeral repo.
> 
> === modified file 'turnip/api/views.py'
> --- turnip/api/views.py	2015-05-19 12:55:00 +0000
> +++ turnip/api/views.py	2015-05-21 04:37:40 +0000
> @@ -2,6 +2,7 @@
>  
>  import os
>  import re
> +from subprocess import CalledProcessError
>  
>  from cornice.resource import resource
>  from cornice.util import extract_json_data
> @@ -123,6 +124,33 @@
>              return exc.HTTPNotFound()  # 404
>  
>  
> +@resource(path='/repo/{name}/repack')
> +class RepackAPI(BaseAPI):
> +    """Provides HTTP API for repository repacking."""
> +
> +    def __init__(self, request):
> +        super(RepackAPI, self).__init__()
> +        self.request = request
> +
> +    @validate_path
> +    def post(self, repo_store, repo_name):
> +        repo_path = os.path.join(repo_store, repo_name)
> +
> +        ignore_alternates = extract_json_data(self.request).get(
> +            'ignore_alternates')
> +        no_reuse_delta = extract_json_data(self.request).get('no_reuse_delta')
> +        prune = extract_json_data(self.request).get('prune')
> +        single = extract_json_data(self.request).get('single')

Can you just extract_json_data once?

It'd also be good to have an option to override window/depth.

> +
> +        try:
> +            store.repack(repo_path, single=single, prune=prune,
> +                         no_reuse_delta=no_reuse_delta,
> +                         ignore_alternates=ignore_alternates)
> +        except (CalledProcessError):
> +            return exc.HTTPInternalServerError()
> +        return
> +
> +
>  @resource(collection_path='/repo/{name}/refs',
>            path='/repo/{name}/refs/{ref:.*}')
>  class RefAPI(BaseAPI):
> 
> === modified file 'turnip/pack/helpers.py'
> --- turnip/pack/helpers.py	2015-05-06 11:46:43 +0000
> +++ turnip/pack/helpers.py	2015-05-21 04:37:40 +0000
> @@ -14,6 +14,7 @@
>      )
>  
>  from pygit2 import Repository
> +import yaml
>  
>  import turnip.pack.hooks
>  
> @@ -104,10 +105,11 @@
>      pygit2.Config handles locking itself, so we don't need to think too hard
>      about concurrency.
>      """
> -
> +    config_file = open('git.config.yaml')
> +    git_config_defaults = yaml.load(config_file)
>      config = Repository(repo_root).config
> -    config['core.logallrefupdates'] = True
> -    config['repack.writeBitmaps'] = True
> +    for key, val in git_config_defaults.iteritems():
> +        config[key] = val
>  
>  
>  def ensure_hooks(repo_root):
> 


-- 
https://code.launchpad.net/~blr/turnip/repack-api/+merge/257312
Your team Launchpad code reviewers is subscribed to branch lp:turnip.


References