← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] ~pappacena/turnip:async-repo-creation into turnip:master

 

Thiago F. Pappacena has proposed merging ~pappacena/turnip:async-repo-creation into turnip:master.

Commit message:
Option on POST /repo API call to asynchronously create a repository

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~pappacena/turnip/+git/turnip/+merge/386461

Apart from creating the new POST /repo option to run the repository creation asynchronously, this MP is also adjusting GET /repo/<repo-path> to  return a new key indicating if the repository finished its creation process.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of ~pappacena/turnip:async-repo-creation into turnip:master.
diff --git a/turnip/api/store.py b/turnip/api/store.py
index 5723048..53b75b5 100644
--- a/turnip/api/store.py
+++ b/turnip/api/store.py
@@ -27,6 +27,7 @@ from pygit2 import (
     Repository,
     )
 
+from turnip.enums import BaseEnum
 from turnip.pack.helpers import ensure_config
 
 
@@ -38,6 +39,16 @@ REF_TYPE_NAME = {
     }
 
 
+class RepositoryStatusEnum(BaseEnum):
+    """Possible status of a local git repository."""
+    CREATING = 'creating'
+    AVAILABLE = 'available'
+
+
+# Where to store repository status information inside a repository directory.
+REPOSITORY_STATUS_FILE_NAME = '.turnip-status'
+
+
 def format_ref(ref, git_object):
     """Return a formatted object dict from a ref."""
     return {
@@ -209,6 +220,7 @@ def init_repo(repo_path, clone_from=None, clone_refs=False,
     if os.path.exists(repo_path):
         raise AlreadyExistsError(repo_path)
     init_repository(repo_path, is_bare)
+    set_repository_status(repo_path, RepositoryStatusEnum.CREATING)
 
     if clone_from:
         # The clone_from's objects and refs are in fact cloned into a
@@ -240,6 +252,7 @@ def init_repo(repo_path, clone_from=None, clone_refs=False,
         write_packed_refs(repo_path, packable_refs)
 
     ensure_config(repo_path)  # set repository configuration defaults
+    set_repository_status(repo_path, RepositoryStatusEnum.AVAILABLE)
 
 
 @contextmanager
@@ -287,6 +300,32 @@ def get_default_branch(repo_path):
     return repo.references['HEAD'].target
 
 
+def set_repository_status(repo_path, status):
+    if status not in RepositoryStatusEnum:
+        raise ValueError("%s is not a valid repository status" % status)
+    with open(os.path.join(repo_path, REPOSITORY_STATUS_FILE_NAME), 'w') as fd:
+        fd.write(status)
+        fd.close()
+
+
+def is_repository_available(repo_path):
+    """Checks if the repository is available (that is, if it is not in the
+    middle of a clone or init operation)."""
+    if not os.path.exists(repo_path):
+        return False
+
+    status_file_path = os.path.join(repo_path, REPOSITORY_STATUS_FILE_NAME)
+
+    # Backward compatibility: if the status file is not created,
+    # the repository was created before this convention existed. So,
+    # it should be already available by now.
+    if not os.path.exists(status_file_path):
+        return True
+
+    with open(status_file_path) as fd:
+        return fd.read() == RepositoryStatusEnum.AVAILABLE
+
+
 def set_default_branch(repo_path, target):
     repo = Repository(repo_path)
     repo.set_head(target)
diff --git a/turnip/api/tests/test_api.py b/turnip/api/tests/test_api.py
index 396369b..4f86409 100644
--- a/turnip/api/tests/test_api.py
+++ b/turnip/api/tests/test_api.py
@@ -6,9 +6,11 @@
 from __future__ import print_function
 
 import base64
+from datetime import timedelta, datetime
 import os
 import subprocess
 from textwrap import dedent
+import time
 import unittest
 import uuid
 
@@ -51,6 +53,23 @@ class ApiTestCase(TestCase):
             repo.references[expected].peel().oid,
             repo.references[observed].peel().oid)
 
+    def assertRepositoryCreatedAsynchronously(self, repo_path, timeout_secs=5):
+        """Waits up to `timeout_secs` for a repository to be available."""
+        timeout = timedelta(seconds=timeout_secs)
+        start = datetime.now()
+        while datetime.now() <= (start + timeout):
+            try:
+                resp = self.app.get('/repo/{}'.format(repo_path),
+                                    expect_errors=True)
+                if resp.status_code == 200 and resp.json['is_available']:
+                    return
+            except:
+                pass
+            time.sleep(0.1)
+        self.fail(
+            "Repository %s was not created after %s secs"
+            % (repo_path, timeout_secs))
+
     def get_ref(self, ref):
         resp = self.app.get('/repo/{}/{}'.format(self.repo_path, ref))
         return resp.json
@@ -90,6 +109,27 @@ class ApiTestCase(TestCase):
         self.assertEqual(200, resp.status_code)
         self.assertIn(new_repo_path, resp.json['repo_url'])
 
+    def test_repo_async_init_with_clone(self):
+        """Repo can be initialised with optional clone asynchronously."""
+        factory = RepoFactory(self.repo_store, num_commits=2)
+        factory.build()
+        new_repo_path = uuid.uuid1().hex
+        resp = self.app.post_json('/repo?async=1', {
+            'repo_path': new_repo_path,
+            'clone_from': self.repo_path,
+            'clone_refs': True})
+
+        self.assertRepositoryCreatedAsynchronously(new_repo_path)
+
+        repo1_revlist = get_revlist(factory.repo)
+        clone_from = resp.json['repo_url'].split('/')[-1]
+        repo2 = open_repo(os.path.join(self.repo_root, clone_from))
+        repo2_revlist = get_revlist(repo2)
+
+        self.assertEqual(repo1_revlist, repo2_revlist)
+        self.assertEqual(200, resp.status_code)
+        self.assertIn(new_repo_path, resp.json['repo_url'])
+
     def test_repo_get(self):
         """The GET method on a repository returns its properties."""
         factory = RepoFactory(self.repo_store, num_branches=2, num_commits=1)
@@ -98,7 +138,9 @@ class ApiTestCase(TestCase):
 
         resp = self.app.get('/repo/{}'.format(self.repo_path))
         self.assertEqual(200, resp.status_code)
-        self.assertEqual({'default_branch': 'refs/heads/branch-0'}, resp.json)
+        self.assertEqual({
+            'default_branch': 'refs/heads/branch-0',
+            'is_available': True}, resp.json)
 
     def test_repo_get_default_branch_missing(self):
         """default_branch is returned even if that branch has been deleted."""
@@ -109,7 +151,9 @@ class ApiTestCase(TestCase):
 
         resp = self.app.get('/repo/{}'.format(self.repo_path))
         self.assertEqual(200, resp.status_code)
-        self.assertEqual({'default_branch': 'refs/heads/branch-0'}, resp.json)
+        self.assertEqual({
+            'default_branch': 'refs/heads/branch-0',
+            'is_available': True}, resp.json)
 
     def test_repo_patch_default_branch(self):
         """A repository's default branch ("HEAD") can be changed."""
diff --git a/turnip/api/tests/test_store.py b/turnip/api/tests/test_store.py
index 9276303..3ffebcf 100644
--- a/turnip/api/tests/test_store.py
+++ b/turnip/api/tests/test_store.py
@@ -22,6 +22,7 @@ from testtools import TestCase
 import yaml
 
 from turnip.api import store
+from turnip.api.store import RepositoryStatusEnum
 from turnip.api.tests.test_helpers import (
     open_repo,
     RepoFactory,
@@ -125,6 +126,16 @@ class InitTestCase(TestCase):
         self.assertEqual(str(yaml_config['pack.depth']),
                          repo_config['pack.depth'])
 
+    def test_is_repository_available(self):
+        repo_path = os.path.join(self.repo_store, 'repo/')
+
+        store.init_repository(repo_path, True)
+        store.set_repository_status(repo_path, RepositoryStatusEnum.CREATING)
+        self.assertFalse(store.is_repository_available(repo_path))
+
+        store.set_repository_status(repo_path, RepositoryStatusEnum.AVAILABLE)
+        self.assertTrue(store.is_repository_available(repo_path))
+
     def test_open_ephemeral_repo(self):
         """Opening a repo where a repo name contains ':' should return
         a new ephemeral repo.
@@ -224,7 +235,10 @@ class InitTestCase(TestCase):
         # repo with the same set of refs. And the objects are copied
         # too.
         to_path = os.path.join(self.repo_store, 'to/')
+        self.assertFalse(store.is_repository_available(to_path))
         store.init_repo(to_path, clone_from=self.orig_path, clone_refs=True)
+        self.assertTrue(store.is_repository_available(to_path))
+
         to = pygit2.Repository(to_path)
         self.assertIsNot(None, to[self.master_oid])
         self.assertEqual(
@@ -260,7 +274,10 @@ class InitTestCase(TestCase):
         # init_repo with clone_from=orig and clone_refs=False creates a
         # repo without any refs, but the objects are copied.
         to_path = os.path.join(self.repo_store, 'to/')
+        self.assertFalse(store.is_repository_available(to_path))
         store.init_repo(to_path, clone_from=self.orig_path, clone_refs=False)
+        self.assertTrue(store.is_repository_available(to_path))
+
         to = pygit2.Repository(to_path)
         self.assertIsNot(None, to[self.master_oid])
         self.assertEqual([], to.listall_references())
@@ -286,7 +303,10 @@ class InitTestCase(TestCase):
 
         self.assertAllLinkCounts(1, self.orig_objs)
         to_path = os.path.join(self.repo_store, 'to/')
+        self.assertFalse(store.is_repository_available(to_path))
         store.init_repo(to_path, clone_from=self.orig_path)
+        self.assertTrue(store.is_repository_available(to_path))
+
         self.assertAllLinkCounts(2, self.orig_objs)
         to = pygit2.Repository(to_path)
         to_blob = to.create_blob(b'to')
diff --git a/turnip/api/views.py b/turnip/api/views.py
index 2e8a082..4aa59cb 100644
--- a/turnip/api/views.py
+++ b/turnip/api/views.py
@@ -3,6 +3,7 @@
 
 import os
 import re
+from multiprocessing import Pool
 from subprocess import CalledProcessError
 
 from cornice.resource import resource
@@ -13,6 +14,16 @@ import pyramid.httpexceptions as exc
 from turnip.config import config
 from turnip.api import store
 
+# Process pool for async execution (like repository creation).
+processing_pool = None
+
+
+def run_in_background(f, *args, **kwargs):
+    global processing_pool
+    if processing_pool is None:
+        processing_pool = Pool(maxtasksperchild=10)
+    return processing_pool.apply_async(f, args=args, kwds=kwargs)
+
 
 def is_valid_path(repo_store, repo_path):
     """Ensure path in within repo root and has not been subverted."""
@@ -53,9 +64,11 @@ class RepoAPI(BaseAPI):
 
     def collection_post(self):
         """Initialise a new git repository, or clone from an existing repo."""
-        repo_path = extract_json_data(self.request).get('repo_path')
-        clone_path = extract_json_data(self.request).get('clone_from')
-        clone_refs = extract_json_data(self.request).get('clone_refs', False)
+        json_data = extract_json_data(self.request)
+        repo_path = json_data.get('repo_path')
+        clone_path = json_data.get('clone_from')
+        clone_refs = json_data.get('clone_refs', False)
+        async_run = self.request.params.get('async')
 
         if not repo_path:
             self.request.errors.add('body', 'repo_path',
@@ -72,7 +85,12 @@ class RepoAPI(BaseAPI):
             repo_clone = None
 
         try:
-            store.init_repo(repo, clone_from=repo_clone, clone_refs=clone_refs)
+            kwargs = dict(
+                repo_path=repo, clone_from=repo_clone, clone_refs=clone_refs)
+            if async_run:
+                run_in_background(store.init_repo, **kwargs)
+            else:
+                store.init_repo(**kwargs)
             repo_name = os.path.basename(os.path.normpath(repo))
             return {'repo_url': '/'.join([self.request.url, repo_name])}
         except GitError:
@@ -88,6 +106,7 @@ class RepoAPI(BaseAPI):
             raise exc.HTTPNotFound()
         return {
             'default_branch': store.get_default_branch(repo_path),
+            'is_available': store.is_repository_available(repo_path)
             }
 
     def _patch_default_branch(self, repo_path, value):
diff --git a/turnip/enums.py b/turnip/enums.py
new file mode 100644
index 0000000..d84ba80
--- /dev/null
+++ b/turnip/enums.py
@@ -0,0 +1,25 @@
+# Copyright 2020 Canonical Ltd.  This software is licensed under the
+# GNU Affero General Public License version 3 (see the file LICENSE).
+
+from __future__ import (
+    absolute_import,
+    print_function,
+    unicode_literals,
+    )
+
+from six import with_metaclass
+
+
+__all__ = [
+    "BaseEnum",
+    ]
+
+
+class MetaEnum(type):
+    def __contains__(cls, x):
+        values = [getattr(cls, i) for i in dir(cls) if not i.startswith("_")]
+        return x in values
+
+
+class BaseEnum(with_metaclass(MetaEnum)):
+    pass