launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #21158
[Merge] lp:~cjwatson/launchpad/codeimport-git-worker-sync-head into lp:launchpad
Colin Watson has proposed merging lp:~cjwatson/launchpad/codeimport-git-worker-sync-head into lp:launchpad with lp:~cjwatson/launchpad/codeimport-git-dulwich-web as a prerequisite.
Commit message:
Fix the Git-to-Git import worker to synchronise HEAD if possible.
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
Related bugs:
Bug #1469459 in Launchpad itself: "import external code into a LP git repo (natively)"
https://bugs.launchpad.net/launchpad/+bug/1469459
For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/codeimport-git-worker-sync-head/+merge/309988
We can only synchronise HEAD if the server advertises its target, but git 1.8.4.3 or newer does so, which covers a lot of ground (notably, GitHub has a more than sufficient version).
This needs a couple of other bits:
* https://code.launchpad.net/~cjwatson/turnip/+git/turnip/+merge/309702
* git 1:1.7.9.5-1ubuntu0.3ppa2 from https://launchpad.net/~cjwatson/+archive/ubuntu/launchpad/+packages to make upload-pack advertise the target of HEAD (only needed in tests, so this just needs to be in the Launchpad PPA).
--
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~cjwatson/launchpad/codeimport-git-worker-sync-head into lp:launchpad.
=== modified file 'lib/lp/codehosting/codeimport/tests/servers.py'
--- lib/lp/codehosting/codeimport/tests/servers.py 2016-11-03 17:40:01 +0000
+++ lib/lp/codehosting/codeimport/tests/servers.py 2016-11-03 17:40:01 +0000
@@ -15,6 +15,7 @@
from cStringIO import StringIO
import errno
import os
+import re
import shutil
import signal
import stat
@@ -42,9 +43,13 @@
import dulwich.index
from dulwich.objects import Blob
from dulwich.repo import Repo as GitRepo
-from dulwich.server import Backend
+from dulwich.server import (
+ Backend,
+ Handler,
+ )
from dulwich.web import (
GunzipFilter,
+ handle_service_request,
HTTPGitApplication,
LimitedInputFilter,
WSGIRequestHandlerLogger,
@@ -240,13 +245,55 @@
return GitRepo(full_path)
+class TurnipSetSymbolicRefHandler(Handler):
+ """Dulwich protocol handler for setting a symbolic ref.
+
+ Transcribed from turnip.pack.git.PackBackendProtocol.
+ """
+
+ def __init__(self, backend, args, proto, http_req=None):
+ super(TurnipSetSymbolicRefHandler, self).__init__(
+ backend, proto, http_req=http_req)
+ self.repo = backend.open_repository(args[0])
+
+ def handle(self):
+ line = self.proto.read_pkt_line()
+ if line is None:
+ self.proto.write_pkt_line(b"ERR Invalid set-symbolic-ref-line\n")
+ self.proto.write_pkt_line(None)
+ return
+ name, target = line.split(b" ", 1)
+ if name != b"HEAD":
+ self.proto.write_pkt_line(
+ b'ERR Symbolic ref name must be "HEAD"\n')
+ self.proto.write_pkt_line(None)
+ return
+ if target.startswith(b"-"):
+ self.proto.write_pkt_line(
+ b'ERR Symbolic ref target may not start with "-"\n')
+ self.proto.write_pkt_line(None)
+ return
+ try:
+ self.repo.refs.set_symbolic_ref(name, target)
+ except Exception as e:
+ self.proto.write_pkt_line(b'ERR %s\n' % e)
+ else:
+ self.proto.write_pkt_line(b'ACK %s' % name)
+ self.proto.write_pkt_line(None)
+
+
class HTTPGitServerThread(threading.Thread):
"""Thread that runs an HTTP Git server."""
def __init__(self, backend, address, port=None):
super(HTTPGitServerThread, self).__init__()
self.setName("HTTP Git server on %s:%s" % (address, port))
- app = GunzipFilter(LimitedInputFilter(HTTPGitApplication(backend)))
+ app = HTTPGitApplication(
+ backend,
+ handlers={'turnip-set-symbolic-ref': TurnipSetSymbolicRefHandler})
+ app.services[('POST', re.compile('/turnip-set-symbolic-ref$'))] = (
+ handle_service_request)
+ app = GunzipFilter(LimitedInputFilter(app))
self.server = make_server(
address, port, app, handler_class=WSGIRequestHandlerLogger,
server_class=WSGIServerLogger)
@@ -297,7 +344,7 @@
def makeRepo(self, repository_name, tree_contents):
repository_path = os.path.join(self.repository_store, repository_name)
- os.mkdir(repository_path)
+ os.makedirs(repository_path)
self.createRepository(repository_path, bare=self._use_server)
repo = GitRepo(repository_path)
blobs = [
=== modified file 'lib/lp/codehosting/codeimport/tests/test_workermonitor.py'
--- lib/lp/codehosting/codeimport/tests/test_workermonitor.py 2016-11-03 17:40:01 +0000
+++ lib/lp/codehosting/codeimport/tests/test_workermonitor.py 2016-11-03 17:40:01 +0000
@@ -20,6 +20,7 @@
TestCase as BzrTestCase,
TestCaseInTempDir,
)
+from dulwich.repo import Repo as GitRepo
import oops_twisted
from testtools.deferredruntest import (
assert_fails_with,
@@ -775,6 +776,27 @@
self.addCleanup(clean_up_default_stores_for_import, source_details)
return job
+ def makeTargetGitServer(self):
+ """Set up a target Git server that can receive imports."""
+ self.target_store = tempfile.mkdtemp()
+ self.addCleanup(shutil.rmtree, self.target_store)
+ self.target_git_server = GitServer(self.target_store, use_server=True)
+ self.target_git_server.start_server()
+ self.addCleanup(self.target_git_server.stop_server)
+ config_name = self.getUniqueString()
+ config_fixture = self.useFixture(ConfigFixture(
+ config_name, self.layer.config_fixture.instance_name))
+ setting_lines = [
+ "[codehosting]",
+ "git_browse_root: %s" % self.target_git_server.get_url(""),
+ "",
+ "[codeimport]",
+ "macaroon_secret_key: some-secret",
+ ]
+ config_fixture.add_section("\n" + "\n".join(setting_lines))
+ self.useFixture(ConfigUseFixture(config_name))
+ self.useFixture(GitHostingFixture())
+
def assertCodeImportResultCreated(self, code_import):
"""Assert that a `CodeImportResult` was created for `code_import`."""
self.assertEqual(len(list(code_import.results)), 1)
@@ -796,7 +818,7 @@
cwd=repo_path, universal_newlines=True))
self.assertEqual(self.foreign_commit_count, commit_count)
- def assertImported(self, ignored, code_import_id):
+ def assertImported(self, code_import_id):
"""Assert that the `CodeImport` of the given id was imported."""
# In the in-memory tests, check that resetTimeout on the
# CodeImportWorkerMonitorProtocol was called at least once.
@@ -832,53 +854,40 @@
return deferred.addBoth(save_protocol_object)
+ @defer.inlineCallbacks
def test_import_cvs(self):
# Create a CVS CodeImport and import it.
job = self.getStartedJobForImport(self.makeCVSCodeImport())
code_import_id = job.code_import.id
job_id = job.id
self.layer.txn.commit()
- result = self.performImport(job_id)
- return result.addCallback(self.assertImported, code_import_id)
+ yield self.performImport(job_id)
+ self.assertImported(code_import_id)
+ @defer.inlineCallbacks
def test_import_subversion(self):
# Create a Subversion CodeImport and import it.
job = self.getStartedJobForImport(self.makeSVNCodeImport())
code_import_id = job.code_import.id
job_id = job.id
self.layer.txn.commit()
- result = self.performImport(job_id)
- return result.addCallback(self.assertImported, code_import_id)
+ yield self.performImport(job_id)
+ self.assertImported(code_import_id)
+ @defer.inlineCallbacks
def test_import_git(self):
# Create a Git CodeImport and import it.
job = self.getStartedJobForImport(self.makeGitCodeImport())
code_import_id = job.code_import.id
job_id = job.id
self.layer.txn.commit()
- result = self.performImport(job_id)
- return result.addCallback(self.assertImported, code_import_id)
+ yield self.performImport(job_id)
+ self.assertImported(code_import_id)
+ @defer.inlineCallbacks
def test_import_git_to_git(self):
# Create a Git-to-Git CodeImport and import it.
- self.target_store = tempfile.mkdtemp()
- self.addCleanup(shutil.rmtree, self.target_store)
- target_git_server = GitServer(self.target_store, use_server=True)
- target_git_server.start_server()
- self.addCleanup(target_git_server.stop_server)
- config_name = self.getUniqueString()
- config_fixture = self.useFixture(ConfigFixture(
- config_name, self.layer.config_fixture.instance_name))
- setting_lines = [
- "[codehosting]",
- "git_browse_root: %s" % target_git_server.get_url(""),
- "",
- "[codeimport]",
- "macaroon_secret_key: some-secret",
- ]
- config_fixture.add_section("\n" + "\n".join(setting_lines))
- self.useFixture(ConfigUseFixture(config_name))
- self.useFixture(GitHostingFixture())
+ self.makeTargetGitServer()
job = self.getStartedJobForImport(self.makeGitCodeImport(
target_rcs_type=TargetRevisionControlSystems.GIT))
code_import_id = job.code_import.id
@@ -887,27 +896,63 @@
target_repo_path = os.path.join(
self.target_store, job.code_import.target.unique_name)
os.makedirs(target_repo_path)
- target_git_server.createRepository(target_repo_path, bare=True)
- result = self.performImport(job_id)
- return result.addCallback(self.assertImported, code_import_id)
-
+ self.target_git_server.createRepository(target_repo_path, bare=True)
+ yield self.performImport(job_id)
+ self.assertImported(code_import_id)
+ target_repo = GitRepo(target_repo_path)
+ self.assertContentEqual(
+ ["heads/master"], target_repo.refs.keys(base="refs"))
+ self.assertEqual(
+ "ref: refs/heads/master", target_repo.refs.read_ref("HEAD"))
+
+ @defer.inlineCallbacks
+ def test_import_git_to_git_refs_changed(self):
+ # Create a Git-to-Git CodeImport and import it incrementally with
+ # ref and HEAD changes.
+ self.makeTargetGitServer()
+ job = self.getStartedJobForImport(self.makeGitCodeImport(
+ target_rcs_type=TargetRevisionControlSystems.GIT))
+ code_import_id = job.code_import.id
+ job_id = job.id
+ self.layer.txn.commit()
+ source_repo = GitRepo(os.path.join(self.repo_path, "source"))
+ commit = source_repo.refs["refs/heads/master"]
+ source_repo.refs["refs/heads/one"] = commit
+ source_repo.refs["refs/heads/two"] = commit
+ source_repo.refs.set_symbolic_ref("HEAD", "refs/heads/one")
+ del source_repo.refs["refs/heads/master"]
+ target_repo_path = os.path.join(
+ self.target_store, job.code_import.target.unique_name)
+ self.target_git_server.makeRepo(
+ job.code_import.target.unique_name, [("NEWS", "contents")])
+ yield self.performImport(job_id)
+ self.assertImported(code_import_id)
+ target_repo = GitRepo(target_repo_path)
+ self.assertContentEqual(
+ ["heads/one", "heads/two"], target_repo.refs.keys(base="refs"))
+ self.assertEqual(
+ "ref: refs/heads/one",
+ GitRepo(target_repo_path).refs.read_ref("HEAD"))
+
+ @defer.inlineCallbacks
def test_import_bzr(self):
# Create a Bazaar CodeImport and import it.
job = self.getStartedJobForImport(self.makeBzrCodeImport())
code_import_id = job.code_import.id
job_id = job.id
self.layer.txn.commit()
- result = self.performImport(job_id)
- return result.addCallback(self.assertImported, code_import_id)
+ yield self.performImport(job_id)
+ self.assertImported(code_import_id)
+ @defer.inlineCallbacks
def test_import_bzrsvn(self):
# Create a Subversion-via-bzr-svn CodeImport and import it.
job = self.getStartedJobForImport(self.makeBzrSvnCodeImport())
code_import_id = job.code_import.id
job_id = job.id
self.layer.txn.commit()
- result = self.performImport(job_id)
- return result.addCallback(self.assertImported, code_import_id)
+ yield self.performImport(job_id)
+ self.assertImported(code_import_id)
class DeferredOnExit(protocol.ProcessProtocol):
=== modified file 'lib/lp/codehosting/codeimport/worker.py'
--- lib/lp/codehosting/codeimport/worker.py 2016-11-03 17:40:01 +0000
+++ lib/lp/codehosting/codeimport/worker.py 2016-11-03 17:40:01 +0000
@@ -20,6 +20,7 @@
]
+import io
import os
import shutil
import subprocess
@@ -61,6 +62,11 @@
import cscvs
from cscvs.cmds import totla
import CVS
+from dulwich.errors import GitProtocolError
+from dulwich.protocol import (
+ pkt_line,
+ Protocol,
+ )
from lazr.uri import (
InvalidURIError,
URI,
@@ -93,6 +99,7 @@
from lp.services.config import config
from lp.services.macaroons.interfaces import IMacaroonIssuer
from lp.services.propertycache import cachedproperty
+from lp.services.timeout import urlfetch
from lp.services.utils import sanitise_urls
@@ -1000,6 +1007,62 @@
if retcode:
raise subprocess.CalledProcessError(retcode, cmd)
+ def _getHead(self, repository, remote_name):
+ """Get HEAD from a configured remote in a local repository.
+
+ The returned ref name will be adjusted in such a way that it can be
+ passed to `_setHead` (e.g. refs/remotes/origin/master ->
+ refs/heads/master).
+ """
+ # This is a bit weird, but set-head will bail out if the target
+ # doesn't exist in the correct remotes namespace. git 2.8.0 has
+ # "git ls-remote --symref <repository> HEAD" which would involve
+ # less juggling.
+ self._runGit(
+ "fetch", "-q", ".", "refs/heads/*:refs/remotes/%s/*" % remote_name,
+ cwd=repository)
+ self._runGit(
+ "remote", "set-head", remote_name, "--auto", cwd=repository)
+ ref_prefix = "refs/remotes/%s/" % remote_name
+ target_ref = subprocess.check_output(
+ ["git", "symbolic-ref", ref_prefix + "HEAD"],
+ cwd=repository, universal_newlines=True).rstrip("\n")
+ if not target_ref.startswith(ref_prefix):
+ raise GitProtocolError(
+ "'git remote set-head %s --auto' did not leave remote HEAD "
+ "under %s" % (remote_name, ref_prefix))
+ return "refs/heads/" + target_ref[len(ref_prefix):]
+
+ def _setHead(self, target_url, target_ref):
+ """Set HEAD on a remote repository.
+
+ This relies on the turnip-set-symbolic-ref extension.
+ """
+ service = "turnip-set-symbolic-ref"
+ url = urljoin(target_url, service)
+ headers = {
+ "Content-Type": "application/x-%s-request" % service,
+ }
+ body = pkt_line("HEAD %s" % target_ref) + pkt_line(None)
+ try:
+ response = urlfetch(url, method="POST", headers=headers, data=body)
+ response.raise_for_status()
+ except Exception as e:
+ raise GitProtocolError(str(e))
+ content_type = response.headers.get("Content-Type")
+ if content_type != ("application/x-%s-result" % service):
+ raise GitProtocolError(
+ "Invalid Content-Type from server: %s" % content_type)
+ content = io.BytesIO(response.content)
+ proto = Protocol(content.read, None)
+ pkts = list(proto.read_pkt_seq())
+ if pkts == [b"ACK HEAD"]:
+ pass
+ elif pkts and pkts[0].startswith(b"ERR "):
+ raise GitProtocolError(pkts[0][len(b"ERR "):])
+ else:
+ raise GitProtocolError("Unexpected packets %r from server" % pkts)
+
def _doImport(self):
self._logger.info("Starting job.")
try:
@@ -1031,20 +1094,38 @@
try:
self._runGit("config", "gc.auto", "0", cwd="repository")
self._runGit(
- "fetch", "--prune", self.source_details.url, "+refs/*:refs/*",
- cwd="repository")
- # We should sync the remote HEAD as well. This involves
- # considerable gymnastics. "git remote set-head --auto" does it
- # if we can arrange a suitable temporary remote, or git 2.8.0
- # has "git ls-remote --symref <repository> HEAD". We then also
- # need to set Launchpad's idea of HEAD, which is fiddly from an
- # import worker. For now we leave it at the default and trust
- # that we'll be able to fix things up later.
+ "remote", "add", "source", self.source_details.url,
+ cwd="repository")
+ self._runGit(
+ "fetch", "--prune", "source", "+refs/*:refs/*",
+ cwd="repository")
+ try:
+ target_ref = self._getHead("repository", "source")
+ except (subprocess.CalledProcessError, GitProtocolError) as e2:
+ self._logger.info("Unable to fetch default branch: %s" % e2)
+ target_ref = None
+ self._runGit("remote", "rm", "source", cwd="repository")
+ # XXX cjwatson 2016-11-03: For some reason "git remote rm"
+ # doesn't actually remove the refs.
+ remote_refs = os.path.join(
+ "repository", "refs", "remotes", "source")
+ if os.path.isdir(remote_refs):
+ shutil.rmtree(remote_refs)
except subprocess.CalledProcessError as e:
self._logger.info("Unable to fetch remote repository: %s" % e)
return CodeImportWorkerExitCode.FAILURE_INVALID
self._logger.info("Pushing repository to hosting service.")
try:
+ if target_ref is not None:
+ # Push the target of HEAD first to ensure that it is always
+ # available.
+ self._runGit(
+ "push", target_url, "+%s:%s" % (target_ref, target_ref),
+ cwd="repository")
+ try:
+ self._setHead(target_url, target_ref)
+ except GitProtocolError as e:
+ self._logger.info("Unable to set default branch: %s" % e)
self._runGit("push", "--mirror", target_url, cwd="repository")
except subprocess.CalledProcessError as e:
self._logger.info(
=== modified file 'scripts/code-import-worker.py'
--- scripts/code-import-worker.py 2016-10-11 15:28:25 +0000
+++ scripts/code-import-worker.py 2016-11-03 17:40:01 +0000
@@ -35,6 +35,7 @@
from lp.codehosting.safe_open import AcceptAnythingPolicy
from lp.services import scripts
from lp.services.config import config
+from lp.services.timeout import set_default_timeout_function
opener_policies = {
@@ -71,6 +72,7 @@
def main(self):
force_bzr_to_use_urllib()
+ set_default_timeout_function(lambda: 60.0)
source_details = CodeImportSourceDetails.fromArguments(self.args)
if source_details.rcstype == 'git':
if source_details.target_rcstype == 'bzr':
Follow ups