← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~cjwatson/launchpad/codeimport-git-worker-sync-head into lp:launchpad

 

Colin Watson has proposed merging lp:~cjwatson/launchpad/codeimport-git-worker-sync-head into lp:launchpad with lp:~cjwatson/launchpad/codeimport-git-dulwich-web as a prerequisite.

Commit message:
Fix the Git-to-Git import worker to synchronise HEAD if possible.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)
Related bugs:
  Bug #1469459 in Launchpad itself: "import external code into a LP git repo (natively)"
  https://bugs.launchpad.net/launchpad/+bug/1469459

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/codeimport-git-worker-sync-head/+merge/309988

We can only synchronise HEAD if the server advertises its target, but git 1.8.4.3 or newer does so, which covers a lot of ground (notably, GitHub has a more than sufficient version).

This needs a couple of other bits:

 * https://code.launchpad.net/~cjwatson/turnip/+git/turnip/+merge/309702
 * git 1:1.7.9.5-1ubuntu0.3ppa2 from https://launchpad.net/~cjwatson/+archive/ubuntu/launchpad/+packages to make upload-pack advertise the target of HEAD (only needed in tests, so this just needs to be in the Launchpad PPA).
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~cjwatson/launchpad/codeimport-git-worker-sync-head into lp:launchpad.
=== modified file 'lib/lp/codehosting/codeimport/tests/servers.py'
--- lib/lp/codehosting/codeimport/tests/servers.py	2016-11-03 17:40:01 +0000
+++ lib/lp/codehosting/codeimport/tests/servers.py	2016-11-03 17:40:01 +0000
@@ -15,6 +15,7 @@
 from cStringIO import StringIO
 import errno
 import os
+import re
 import shutil
 import signal
 import stat
@@ -42,9 +43,13 @@
 import dulwich.index
 from dulwich.objects import Blob
 from dulwich.repo import Repo as GitRepo
-from dulwich.server import Backend
+from dulwich.server import (
+    Backend,
+    Handler,
+    )
 from dulwich.web import (
     GunzipFilter,
+    handle_service_request,
     HTTPGitApplication,
     LimitedInputFilter,
     WSGIRequestHandlerLogger,
@@ -240,13 +245,55 @@
         return GitRepo(full_path)
 
 
+class TurnipSetSymbolicRefHandler(Handler):
+    """Dulwich protocol handler for setting a symbolic ref.
+
+    Transcribed from turnip.pack.git.PackBackendProtocol.
+    """
+
+    def __init__(self, backend, args, proto, http_req=None):
+        super(TurnipSetSymbolicRefHandler, self).__init__(
+            backend, proto, http_req=http_req)
+        self.repo = backend.open_repository(args[0])
+
+    def handle(self):
+        line = self.proto.read_pkt_line()
+        if line is None:
+            self.proto.write_pkt_line(b"ERR Invalid set-symbolic-ref-line\n")
+            self.proto.write_pkt_line(None)
+            return
+        name, target = line.split(b" ", 1)
+        if name != b"HEAD":
+            self.proto.write_pkt_line(
+                b'ERR Symbolic ref name must be "HEAD"\n')
+            self.proto.write_pkt_line(None)
+            return
+        if target.startswith(b"-"):
+            self.proto.write_pkt_line(
+                b'ERR Symbolic ref target may not start with "-"\n')
+            self.proto.write_pkt_line(None)
+            return
+        try:
+            self.repo.refs.set_symbolic_ref(name, target)
+        except Exception as e:
+            self.proto.write_pkt_line(b'ERR %s\n' % e)
+        else:
+            self.proto.write_pkt_line(b'ACK %s' % name)
+        self.proto.write_pkt_line(None)
+
+
 class HTTPGitServerThread(threading.Thread):
     """Thread that runs an HTTP Git server."""
 
     def __init__(self, backend, address, port=None):
         super(HTTPGitServerThread, self).__init__()
         self.setName("HTTP Git server on %s:%s" % (address, port))
-        app = GunzipFilter(LimitedInputFilter(HTTPGitApplication(backend)))
+        app = HTTPGitApplication(
+            backend,
+            handlers={'turnip-set-symbolic-ref': TurnipSetSymbolicRefHandler})
+        app.services[('POST', re.compile('/turnip-set-symbolic-ref$'))] = (
+            handle_service_request)
+        app = GunzipFilter(LimitedInputFilter(app))
         self.server = make_server(
             address, port, app, handler_class=WSGIRequestHandlerLogger,
             server_class=WSGIServerLogger)
@@ -297,7 +344,7 @@
 
     def makeRepo(self, repository_name, tree_contents):
         repository_path = os.path.join(self.repository_store, repository_name)
-        os.mkdir(repository_path)
+        os.makedirs(repository_path)
         self.createRepository(repository_path, bare=self._use_server)
         repo = GitRepo(repository_path)
         blobs = [

=== modified file 'lib/lp/codehosting/codeimport/tests/test_workermonitor.py'
--- lib/lp/codehosting/codeimport/tests/test_workermonitor.py	2016-11-03 17:40:01 +0000
+++ lib/lp/codehosting/codeimport/tests/test_workermonitor.py	2016-11-03 17:40:01 +0000
@@ -20,6 +20,7 @@
     TestCase as BzrTestCase,
     TestCaseInTempDir,
     )
+from dulwich.repo import Repo as GitRepo
 import oops_twisted
 from testtools.deferredruntest import (
     assert_fails_with,
@@ -775,6 +776,27 @@
             self.addCleanup(clean_up_default_stores_for_import, source_details)
         return job
 
+    def makeTargetGitServer(self):
+        """Set up a target Git server that can receive imports."""
+        self.target_store = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, self.target_store)
+        self.target_git_server = GitServer(self.target_store, use_server=True)
+        self.target_git_server.start_server()
+        self.addCleanup(self.target_git_server.stop_server)
+        config_name = self.getUniqueString()
+        config_fixture = self.useFixture(ConfigFixture(
+            config_name, self.layer.config_fixture.instance_name))
+        setting_lines = [
+            "[codehosting]",
+            "git_browse_root: %s" % self.target_git_server.get_url(""),
+            "",
+            "[codeimport]",
+            "macaroon_secret_key: some-secret",
+            ]
+        config_fixture.add_section("\n" + "\n".join(setting_lines))
+        self.useFixture(ConfigUseFixture(config_name))
+        self.useFixture(GitHostingFixture())
+
     def assertCodeImportResultCreated(self, code_import):
         """Assert that a `CodeImportResult` was created for `code_import`."""
         self.assertEqual(len(list(code_import.results)), 1)
@@ -796,7 +818,7 @@
                 cwd=repo_path, universal_newlines=True))
         self.assertEqual(self.foreign_commit_count, commit_count)
 
-    def assertImported(self, ignored, code_import_id):
+    def assertImported(self, code_import_id):
         """Assert that the `CodeImport` of the given id was imported."""
         # In the in-memory tests, check that resetTimeout on the
         # CodeImportWorkerMonitorProtocol was called at least once.
@@ -832,53 +854,40 @@
 
         return deferred.addBoth(save_protocol_object)
 
+    @defer.inlineCallbacks
     def test_import_cvs(self):
         # Create a CVS CodeImport and import it.
         job = self.getStartedJobForImport(self.makeCVSCodeImport())
         code_import_id = job.code_import.id
         job_id = job.id
         self.layer.txn.commit()
-        result = self.performImport(job_id)
-        return result.addCallback(self.assertImported, code_import_id)
+        yield self.performImport(job_id)
+        self.assertImported(code_import_id)
 
+    @defer.inlineCallbacks
     def test_import_subversion(self):
         # Create a Subversion CodeImport and import it.
         job = self.getStartedJobForImport(self.makeSVNCodeImport())
         code_import_id = job.code_import.id
         job_id = job.id
         self.layer.txn.commit()
-        result = self.performImport(job_id)
-        return result.addCallback(self.assertImported, code_import_id)
+        yield self.performImport(job_id)
+        self.assertImported(code_import_id)
 
+    @defer.inlineCallbacks
     def test_import_git(self):
         # Create a Git CodeImport and import it.
         job = self.getStartedJobForImport(self.makeGitCodeImport())
         code_import_id = job.code_import.id
         job_id = job.id
         self.layer.txn.commit()
-        result = self.performImport(job_id)
-        return result.addCallback(self.assertImported, code_import_id)
+        yield self.performImport(job_id)
+        self.assertImported(code_import_id)
 
+    @defer.inlineCallbacks
     def test_import_git_to_git(self):
         # Create a Git-to-Git CodeImport and import it.
-        self.target_store = tempfile.mkdtemp()
-        self.addCleanup(shutil.rmtree, self.target_store)
-        target_git_server = GitServer(self.target_store, use_server=True)
-        target_git_server.start_server()
-        self.addCleanup(target_git_server.stop_server)
-        config_name = self.getUniqueString()
-        config_fixture = self.useFixture(ConfigFixture(
-            config_name, self.layer.config_fixture.instance_name))
-        setting_lines = [
-            "[codehosting]",
-            "git_browse_root: %s" % target_git_server.get_url(""),
-            "",
-            "[codeimport]",
-            "macaroon_secret_key: some-secret",
-            ]
-        config_fixture.add_section("\n" + "\n".join(setting_lines))
-        self.useFixture(ConfigUseFixture(config_name))
-        self.useFixture(GitHostingFixture())
+        self.makeTargetGitServer()
         job = self.getStartedJobForImport(self.makeGitCodeImport(
             target_rcs_type=TargetRevisionControlSystems.GIT))
         code_import_id = job.code_import.id
@@ -887,27 +896,63 @@
         target_repo_path = os.path.join(
             self.target_store, job.code_import.target.unique_name)
         os.makedirs(target_repo_path)
-        target_git_server.createRepository(target_repo_path, bare=True)
-        result = self.performImport(job_id)
-        return result.addCallback(self.assertImported, code_import_id)
-
+        self.target_git_server.createRepository(target_repo_path, bare=True)
+        yield self.performImport(job_id)
+        self.assertImported(code_import_id)
+        target_repo = GitRepo(target_repo_path)
+        self.assertContentEqual(
+            ["heads/master"], target_repo.refs.keys(base="refs"))
+        self.assertEqual(
+            "ref: refs/heads/master", target_repo.refs.read_ref("HEAD"))
+
+    @defer.inlineCallbacks
+    def test_import_git_to_git_refs_changed(self):
+        # Create a Git-to-Git CodeImport and import it incrementally with
+        # ref and HEAD changes.
+        self.makeTargetGitServer()
+        job = self.getStartedJobForImport(self.makeGitCodeImport(
+            target_rcs_type=TargetRevisionControlSystems.GIT))
+        code_import_id = job.code_import.id
+        job_id = job.id
+        self.layer.txn.commit()
+        source_repo = GitRepo(os.path.join(self.repo_path, "source"))
+        commit = source_repo.refs["refs/heads/master"]
+        source_repo.refs["refs/heads/one"] = commit
+        source_repo.refs["refs/heads/two"] = commit
+        source_repo.refs.set_symbolic_ref("HEAD", "refs/heads/one")
+        del source_repo.refs["refs/heads/master"]
+        target_repo_path = os.path.join(
+            self.target_store, job.code_import.target.unique_name)
+        self.target_git_server.makeRepo(
+            job.code_import.target.unique_name, [("NEWS", "contents")])
+        yield self.performImport(job_id)
+        self.assertImported(code_import_id)
+        target_repo = GitRepo(target_repo_path)
+        self.assertContentEqual(
+            ["heads/one", "heads/two"], target_repo.refs.keys(base="refs"))
+        self.assertEqual(
+            "ref: refs/heads/one",
+            GitRepo(target_repo_path).refs.read_ref("HEAD"))
+
+    @defer.inlineCallbacks
     def test_import_bzr(self):
         # Create a Bazaar CodeImport and import it.
         job = self.getStartedJobForImport(self.makeBzrCodeImport())
         code_import_id = job.code_import.id
         job_id = job.id
         self.layer.txn.commit()
-        result = self.performImport(job_id)
-        return result.addCallback(self.assertImported, code_import_id)
+        yield self.performImport(job_id)
+        self.assertImported(code_import_id)
 
+    @defer.inlineCallbacks
     def test_import_bzrsvn(self):
         # Create a Subversion-via-bzr-svn CodeImport and import it.
         job = self.getStartedJobForImport(self.makeBzrSvnCodeImport())
         code_import_id = job.code_import.id
         job_id = job.id
         self.layer.txn.commit()
-        result = self.performImport(job_id)
-        return result.addCallback(self.assertImported, code_import_id)
+        yield self.performImport(job_id)
+        self.assertImported(code_import_id)
 
 
 class DeferredOnExit(protocol.ProcessProtocol):

=== modified file 'lib/lp/codehosting/codeimport/worker.py'
--- lib/lp/codehosting/codeimport/worker.py	2016-11-03 17:40:01 +0000
+++ lib/lp/codehosting/codeimport/worker.py	2016-11-03 17:40:01 +0000
@@ -20,6 +20,7 @@
     ]
 
 
+import io
 import os
 import shutil
 import subprocess
@@ -61,6 +62,11 @@
 import cscvs
 from cscvs.cmds import totla
 import CVS
+from dulwich.errors import GitProtocolError
+from dulwich.protocol import (
+    pkt_line,
+    Protocol,
+    )
 from lazr.uri import (
     InvalidURIError,
     URI,
@@ -93,6 +99,7 @@
 from lp.services.config import config
 from lp.services.macaroons.interfaces import IMacaroonIssuer
 from lp.services.propertycache import cachedproperty
+from lp.services.timeout import urlfetch
 from lp.services.utils import sanitise_urls
 
 
@@ -1000,6 +1007,62 @@
         if retcode:
             raise subprocess.CalledProcessError(retcode, cmd)
 
+    def _getHead(self, repository, remote_name):
+        """Get HEAD from a configured remote in a local repository.
+
+        The returned ref name will be adjusted in such a way that it can be
+        passed to `_setHead` (e.g. refs/remotes/origin/master ->
+        refs/heads/master).
+        """
+        # This is a bit weird, but set-head will bail out if the target
+        # doesn't exist in the correct remotes namespace.  git 2.8.0 has
+        # "git ls-remote --symref <repository> HEAD" which would involve
+        # less juggling.
+        self._runGit(
+            "fetch", "-q", ".", "refs/heads/*:refs/remotes/%s/*" % remote_name,
+            cwd=repository)
+        self._runGit(
+            "remote", "set-head", remote_name, "--auto", cwd=repository)
+        ref_prefix = "refs/remotes/%s/" % remote_name
+        target_ref = subprocess.check_output(
+            ["git", "symbolic-ref", ref_prefix + "HEAD"],
+            cwd=repository, universal_newlines=True).rstrip("\n")
+        if not target_ref.startswith(ref_prefix):
+            raise GitProtocolError(
+                "'git remote set-head %s --auto' did not leave remote HEAD "
+                "under %s" % (remote_name, ref_prefix))
+        return "refs/heads/" + target_ref[len(ref_prefix):]
+
+    def _setHead(self, target_url, target_ref):
+        """Set HEAD on a remote repository.
+
+        This relies on the turnip-set-symbolic-ref extension.
+        """
+        service = "turnip-set-symbolic-ref"
+        url = urljoin(target_url, service)
+        headers = {
+            "Content-Type": "application/x-%s-request" % service,
+            }
+        body = pkt_line("HEAD %s" % target_ref) + pkt_line(None)
+        try:
+            response = urlfetch(url, method="POST", headers=headers, data=body)
+            response.raise_for_status()
+        except Exception as e:
+            raise GitProtocolError(str(e))
+        content_type = response.headers.get("Content-Type")
+        if content_type != ("application/x-%s-result" % service):
+            raise GitProtocolError(
+                "Invalid Content-Type from server: %s" % content_type)
+        content = io.BytesIO(response.content)
+        proto = Protocol(content.read, None)
+        pkts = list(proto.read_pkt_seq())
+        if pkts == [b"ACK HEAD"]:
+            pass
+        elif pkts and pkts[0].startswith(b"ERR "):
+            raise GitProtocolError(pkts[0][len(b"ERR "):])
+        else:
+            raise GitProtocolError("Unexpected packets %r from server" % pkts)
+
     def _doImport(self):
         self._logger.info("Starting job.")
         try:
@@ -1031,20 +1094,38 @@
         try:
             self._runGit("config", "gc.auto", "0", cwd="repository")
             self._runGit(
-                "fetch", "--prune", self.source_details.url, "+refs/*:refs/*",
-                cwd="repository")
-            # We should sync the remote HEAD as well.  This involves
-            # considerable gymnastics.  "git remote set-head --auto" does it
-            # if we can arrange a suitable temporary remote, or git 2.8.0
-            # has "git ls-remote --symref <repository> HEAD".  We then also
-            # need to set Launchpad's idea of HEAD, which is fiddly from an
-            # import worker.  For now we leave it at the default and trust
-            # that we'll be able to fix things up later.
+                "remote", "add", "source", self.source_details.url,
+                cwd="repository")
+            self._runGit(
+                "fetch", "--prune", "source", "+refs/*:refs/*",
+                cwd="repository")
+            try:
+                target_ref = self._getHead("repository", "source")
+            except (subprocess.CalledProcessError, GitProtocolError) as e2:
+                self._logger.info("Unable to fetch default branch: %s" % e2)
+                target_ref = None
+            self._runGit("remote", "rm", "source", cwd="repository")
+            # XXX cjwatson 2016-11-03: For some reason "git remote rm"
+            # doesn't actually remove the refs.
+            remote_refs = os.path.join(
+                "repository", "refs", "remotes", "source")
+            if os.path.isdir(remote_refs):
+                shutil.rmtree(remote_refs)
         except subprocess.CalledProcessError as e:
             self._logger.info("Unable to fetch remote repository: %s" % e)
             return CodeImportWorkerExitCode.FAILURE_INVALID
         self._logger.info("Pushing repository to hosting service.")
         try:
+            if target_ref is not None:
+                # Push the target of HEAD first to ensure that it is always
+                # available.
+                self._runGit(
+                    "push", target_url, "+%s:%s" % (target_ref, target_ref),
+                    cwd="repository")
+                try:
+                    self._setHead(target_url, target_ref)
+                except GitProtocolError as e:
+                    self._logger.info("Unable to set default branch: %s" % e)
             self._runGit("push", "--mirror", target_url, cwd="repository")
         except subprocess.CalledProcessError as e:
             self._logger.info(

=== modified file 'scripts/code-import-worker.py'
--- scripts/code-import-worker.py	2016-10-11 15:28:25 +0000
+++ scripts/code-import-worker.py	2016-11-03 17:40:01 +0000
@@ -35,6 +35,7 @@
 from lp.codehosting.safe_open import AcceptAnythingPolicy
 from lp.services import scripts
 from lp.services.config import config
+from lp.services.timeout import set_default_timeout_function
 
 
 opener_policies = {
@@ -71,6 +72,7 @@
 
     def main(self):
         force_bzr_to_use_urllib()
+        set_default_timeout_function(lambda: 60.0)
         source_details = CodeImportSourceDetails.fromArguments(self.args)
         if source_details.rcstype == 'git':
             if source_details.target_rcstype == 'bzr':


Follow ups