launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #21173
Re: [Merge] lp:~cjwatson/launchpad/codeimport-git-worker-sync-head into lp:launchpad
Review: Approve code
Diff comments:
>
> === modified file 'lib/lp/codehosting/codeimport/worker.py'
> --- lib/lp/codehosting/codeimport/worker.py 2016-11-05 11:12:36 +0000
> +++ lib/lp/codehosting/codeimport/worker.py 2016-11-05 11:12:36 +0000
> @@ -1000,6 +1007,63 @@
> if retcode:
> raise subprocess.CalledProcessError(retcode, cmd)
>
> + def _getHead(self, repository, remote_name):
> + """Get HEAD from a configured remote in a local repository.
> +
> + The returned ref name will be adjusted in such a way that it can be
> + passed to `_setHead` (e.g. refs/remotes/origin/master ->
> + refs/heads/master).
> + """
> + # This is a bit weird, but set-head will bail out if the target
> + # doesn't exist in the correct remotes namespace. git 2.8.0 has
> + # "git ls-remote --symref <repository> HEAD" which would involve
> + # less juggling.
Did we decide that these horrors were less bad than backporting ls-remote --symref?
> + self._runGit(
> + "fetch", "-q", ".", "refs/heads/*:refs/remotes/%s/*" % remote_name,
> + cwd=repository)
> + self._runGit(
> + "remote", "set-head", remote_name, "--auto", cwd=repository)
> + ref_prefix = "refs/remotes/%s/" % remote_name
> + target_ref = subprocess.check_output(
> + ["git", "symbolic-ref", ref_prefix + "HEAD"],
> + cwd=repository, universal_newlines=True).rstrip("\n")
> + if not target_ref.startswith(ref_prefix):
> + raise GitProtocolError(
> + "'git remote set-head %s --auto' did not leave remote HEAD "
> + "under %s" % (remote_name, ref_prefix))
> + return "refs/heads/" + target_ref[len(ref_prefix):]
Do I want to know what happens if HEAD doesn't point somewhere under refs/heads?
> +
> + def _setHead(self, target_url, target_ref):
> + """Set HEAD on a remote repository.
> +
> + This relies on the turnip-set-symbolic-ref extension.
> + """
> + service = "turnip-set-symbolic-ref"
> + url = urljoin(target_url, service)
> + headers = {
> + "Content-Type": "application/x-%s-request" % service,
> + }
> + body = pkt_line("HEAD %s" % target_ref) + pkt_line(None)
> + try:
> + response = urlfetch(url, method="POST", headers=headers, data=body)
> + response.raise_for_status()
> + except Exception as e:
> + raise GitProtocolError(str(e))
> + content_type = response.headers.get("Content-Type")
> + if content_type != ("application/x-%s-result" % service):
> + raise GitProtocolError(
> + "Invalid Content-Type from server: %s" % content_type)
> + content = io.BytesIO(response.content)
> + proto = Protocol(content.read, None)
> + pkts = list(proto.read_pkt_seq())
> + if len(pkts) == 1 and pkts[0].rstrip(b"\n") == b"ACK HEAD":
> + pass
> + elif pkts and pkts[0].startswith(b"ERR "):
> + raise GitProtocolError(
> + pkts[0][len(b"ERR "):].rstrip(b"\n").decode("UTF-8"))
> + else:
> + raise GitProtocolError("Unexpected packets %r from server" % pkts)
> +
> def _doImport(self):
> self._logger.info("Starting job.")
> try:
> @@ -1031,20 +1095,38 @@
> try:
> self._runGit("config", "gc.auto", "0", cwd="repository")
> self._runGit(
> - "fetch", "--prune", self.source_details.url, "+refs/*:refs/*",
> - cwd="repository")
> - # We should sync the remote HEAD as well. This involves
> - # considerable gymnastics. "git remote set-head --auto" does it
> - # if we can arrange a suitable temporary remote, or git 2.8.0
> - # has "git ls-remote --symref <repository> HEAD". We then also
> - # need to set Launchpad's idea of HEAD, which is fiddly from an
> - # import worker. For now we leave it at the default and trust
> - # that we'll be able to fix things up later.
> + "remote", "add", "source", self.source_details.url,
> + cwd="repository")
> + self._runGit(
> + "fetch", "--prune", "source", "+refs/*:refs/*",
> + cwd="repository")
> + try:
> + target_ref = self._getHead("repository", "source")
s/target_ref/new_head/?
> + except (subprocess.CalledProcessError, GitProtocolError) as e2:
> + self._logger.info("Unable to fetch default branch: %s" % e2)
> + target_ref = None
> + self._runGit("remote", "rm", "source", cwd="repository")
> + # XXX cjwatson 2016-11-03: For some reason "git remote rm"
> + # doesn't actually remove the refs.
> + remote_refs = os.path.join(
> + "repository", "refs", "remotes", "source")
> + if os.path.isdir(remote_refs):
> + shutil.rmtree(remote_refs)
Hum, really? I'd expect it to remove the refs, but maybe not the containing dirs. You're also in trouble here if they ever end up in packed-refs.
> except subprocess.CalledProcessError as e:
> self._logger.info("Unable to fetch remote repository: %s" % e)
> return CodeImportWorkerExitCode.FAILURE_INVALID
> self._logger.info("Pushing repository to hosting service.")
> try:
> + if target_ref is not None:
> + # Push the target of HEAD first to ensure that it is always
> + # available.
> + self._runGit(
> + "push", target_url, "+%s:%s" % (target_ref, target_ref),
Is the ref safe to use in this context, even in the face of a malicious server?
> + cwd="repository")
> + try:
> + self._setHead(target_url, target_ref)
> + except GitProtocolError as e:
> + self._logger.info("Unable to set default branch: %s" % e)
> self._runGit("push", "--mirror", target_url, cwd="repository")
> except subprocess.CalledProcessError as e:
> self._logger.info(
--
https://code.launchpad.net/~cjwatson/launchpad/codeimport-git-worker-sync-head/+merge/309988
Your team Launchpad code reviewers is subscribed to branch lp:launchpad.
References