launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #18612
Re: [Merge] lp:~blr/turnip/repack-api into lp:turnip
Review: Approve code
Repack can be a lengthy operation, so running it synchronously within an HTTP request is less than ideal, but it'll do until we have a job system of some kind. I'd like to see how it handles long requests, though -- if the connection dies, does the repack continue?
Diff comments:
> === modified file '.bzrignore'
> --- .bzrignore 2015-02-17 21:32:29 +0000
> +++ .bzrignore 2015-05-21 04:37:40 +0000
> @@ -1,13 +1,13 @@
> bin
> +build
> develop-eggs
> +dist
> download-cache
> eggs
> +*.egg*
> +*.egg-info
> .installed.cfg
> +*.log
> parts
> -*.egg-info
> tags
> -TAGS
> -build
> -*.egg
> -dist
> -*.log
> \ No newline at end of file
> +TAGS
> \ No newline at end of file
>
> === added file 'git.config.yaml'
> --- git.config.yaml 1970-01-01 00:00:00 +0000
> +++ git.config.yaml 2015-05-21 04:37:40 +0000
> @@ -0,0 +1,8 @@
> +# Per-repository configuration
> +# See http://git-scm.com/docs/git-config.html for more info
> +
> +core.logallrefupdates: True
> +pack.depth: 100
> +pack.window: 100
> +pack.windowMemory: 2g
> +repack.writeBitmaps: True
>
> === modified file 'turnip/api/store.py'
> --- turnip/api/store.py 2015-05-19 12:55:00 +0000
> +++ turnip/api/store.py 2015-05-21 04:37:40 +0000
> @@ -7,6 +7,7 @@
> import itertools
> import os
> import shutil
> +import subprocess
> import uuid
>
> from pygit2 import (
> @@ -22,6 +23,8 @@
> Repository,
> )
>
> +from turnip.pack.helpers import ensure_config
> +
>
> REF_TYPE_NAME = {
> GIT_OBJ_COMMIT: 'commit',
> @@ -127,6 +130,7 @@
>
> if alternate_repo_paths:
> write_alternates(repo_path, alternate_repo_paths)
> + ensure_config(repo_path) # set repository configuration defaults
> return repo_path
>
>
> @@ -172,6 +176,29 @@
> shutil.rmtree(repo_path)
>
>
> +def repack(repo_path, ignore_alternates=False, single=False,
> + prune=False, no_reuse_delta=False):
> + """Repack a repository with git-repack.
> +
> + :param ignore_alternates: Only repack local refs (git repack --local).
> + :param single: Create a single packfile (git repack -a).
> + :param prune: Remove redundant packs. (git repack -d)
> + :param no_reuse_delta: Force delta recalculation.
> + """
> + repack_args = ['git', 'repack']
> + if ignore_alternates:
> + repack_args.append('-l')
> + if no_reuse_delta:
> + repack_args.append('-f')
> + if prune:
> + repack_args.append('-d')
> + if single:
> + repack_args.append('-a')
> + return subprocess.check_call(
> + repack_args, cwd=repo_path,
> + stderr=subprocess.PIPE, stdout=subprocess.PIPE)
You need to ensure_config before you perform a write operation on a repo.
We also probably always want -q, as the output isn't going to go anywhere useful.
> +
> +
> def get_refs(repo_store, repo_name):
> """Return all refs for a git repository."""
> with open_repo(repo_store, repo_name) as repo:
>
> === modified file 'turnip/api/tests/test_api.py'
> --- turnip/api/tests/test_api.py 2015-05-19 12:55:00 +0000
> +++ turnip/api/tests/test_api.py 2015-05-21 04:37:40 +0000
> @@ -2,7 +2,9 @@
> # -*- coding: utf-8 -*-
> from __future__ import print_function
>
> +import fnmatch
> import os
> +import subprocess
> from textwrap import dedent
> import unittest
> import uuid
> @@ -510,6 +512,20 @@
> self.assertEqual(5, len(resp.json))
> self.assertNotIn(excluded_commit, resp.json)
>
> + def test_repo_repack_verify_pack(self):
> + """Ensure commit exists in pack."""
> + factory = RepoFactory(self.repo_store)
> + oid = factory.add_commit('foo', 'foobar.txt')
> + factory.set_head(oid)
> + resp = self.app.post_json('/repo/{}/repack'.format(self.repo_path),
> + {'prune': True, 'single': True})
> + for root, dirnames, filenames in os.walk(factory.repo_path):
> + for filename in fnmatch.filter(filenames, '*.pack'):
> + pack = os.path.join(root, filename)
> + out = subprocess.check_output(['git', 'verify-pack', pack, '-v'])
> + self.assertEqual(200, resp.status_code)
> + self.assertIn(oid.hex, out)
Can you perhaps create two packs with different commits and then test that we end up with a single pack that contains both?
> +
>
> if __name__ == '__main__':
> unittest.main()
>
> === modified file 'turnip/api/tests/test_store.py'
> --- turnip/api/tests/test_store.py 2015-04-29 02:43:28 +0000
> +++ turnip/api/tests/test_store.py 2015-05-21 04:37:40 +0000
> @@ -16,6 +16,7 @@
> )
> import pygit2
> from testtools import TestCase
> +import yaml
>
> from turnip.api import store
> from turnip.api.tests.test_helpers import (
> @@ -74,6 +75,17 @@
> r = pygit2.Repository(path)
> self.assertEqual([], r.listall_references())
>
> + def test_repo_config(self):
> + """Assert repository is initialised with correct config defaults."""
> + repo_path = store.init_repo(self.repo_path)
> + repo_config = pygit2.Repository(repo_path).config
> + yaml_config = yaml.load(open('git.config.yaml'))
> +
> + self.assertEqual(bool(yaml_config['core.logallrefupdates']),
> + bool(repo_config['core.logallrefupdates']))
> + self.assertEqual(str(yaml_config['pack.depth']),
> + repo_config['pack.depth'])
> +
> def test_open_ephemeral_repo(self):
> """Opening a repo where a repo name contains ':' should return
> a new ephemeral repo.
>
> === modified file 'turnip/api/views.py'
> --- turnip/api/views.py 2015-05-19 12:55:00 +0000
> +++ turnip/api/views.py 2015-05-21 04:37:40 +0000
> @@ -2,6 +2,7 @@
>
> import os
> import re
> +from subprocess import CalledProcessError
>
> from cornice.resource import resource
> from cornice.util import extract_json_data
> @@ -123,6 +124,33 @@
> return exc.HTTPNotFound() # 404
>
>
> +@resource(path='/repo/{name}/repack')
> +class RepackAPI(BaseAPI):
> + """Provides HTTP API for repository repacking."""
> +
> + def __init__(self, request):
> + super(RepackAPI, self).__init__()
> + self.request = request
> +
> + @validate_path
> + def post(self, repo_store, repo_name):
> + repo_path = os.path.join(repo_store, repo_name)
> +
> + ignore_alternates = extract_json_data(self.request).get(
> + 'ignore_alternates')
> + no_reuse_delta = extract_json_data(self.request).get('no_reuse_delta')
> + prune = extract_json_data(self.request).get('prune')
> + single = extract_json_data(self.request).get('single')
Can you just extract_json_data once?
It'd also be good to have an option to override window/depth.
> +
> + try:
> + store.repack(repo_path, single=single, prune=prune,
> + no_reuse_delta=no_reuse_delta,
> + ignore_alternates=ignore_alternates)
> + except (CalledProcessError):
> + return exc.HTTPInternalServerError()
> + return
> +
> +
> @resource(collection_path='/repo/{name}/refs',
> path='/repo/{name}/refs/{ref:.*}')
> class RefAPI(BaseAPI):
>
> === modified file 'turnip/pack/helpers.py'
> --- turnip/pack/helpers.py 2015-05-06 11:46:43 +0000
> +++ turnip/pack/helpers.py 2015-05-21 04:37:40 +0000
> @@ -14,6 +14,7 @@
> )
>
> from pygit2 import Repository
> +import yaml
>
> import turnip.pack.hooks
>
> @@ -104,10 +105,11 @@
> pygit2.Config handles locking itself, so we don't need to think too hard
> about concurrency.
> """
> -
> + config_file = open('git.config.yaml')
> + git_config_defaults = yaml.load(config_file)
> config = Repository(repo_root).config
> - config['core.logallrefupdates'] = True
> - config['repack.writeBitmaps'] = True
> + for key, val in git_config_defaults.iteritems():
> + config[key] = val
>
>
> def ensure_hooks(repo_root):
>
--
https://code.launchpad.net/~blr/turnip/repack-api/+merge/257312
Your team Launchpad code reviewers is subscribed to branch lp:turnip.
References