← Back to team overview

launchpad-reviewers team mailing list archive

Re: [Merge] lp:~cjwatson/launchpad/codeimport-git-worker-sync-head into lp:launchpad

 

Review: Approve code



Diff comments:

> 
> === modified file 'lib/lp/codehosting/codeimport/worker.py'
> --- lib/lp/codehosting/codeimport/worker.py	2016-11-05 11:12:36 +0000
> +++ lib/lp/codehosting/codeimport/worker.py	2016-11-05 11:12:36 +0000
> @@ -1000,6 +1007,63 @@
>          if retcode:
>              raise subprocess.CalledProcessError(retcode, cmd)
>  
> +    def _getHead(self, repository, remote_name):
> +        """Get HEAD from a configured remote in a local repository.
> +
> +        The returned ref name will be adjusted in such a way that it can be
> +        passed to `_setHead` (e.g. refs/remotes/origin/master ->
> +        refs/heads/master).
> +        """
> +        # This is a bit weird, but set-head will bail out if the target
> +        # doesn't exist in the correct remotes namespace.  git 2.8.0 has
> +        # "git ls-remote --symref <repository> HEAD" which would involve
> +        # less juggling.

Did we decide that these horrors were less bad than backporting ls-remote --symref?

> +        self._runGit(
> +            "fetch", "-q", ".", "refs/heads/*:refs/remotes/%s/*" % remote_name,
> +            cwd=repository)
> +        self._runGit(
> +            "remote", "set-head", remote_name, "--auto", cwd=repository)
> +        ref_prefix = "refs/remotes/%s/" % remote_name
> +        target_ref = subprocess.check_output(
> +            ["git", "symbolic-ref", ref_prefix + "HEAD"],
> +            cwd=repository, universal_newlines=True).rstrip("\n")
> +        if not target_ref.startswith(ref_prefix):
> +            raise GitProtocolError(
> +                "'git remote set-head %s --auto' did not leave remote HEAD "
> +                "under %s" % (remote_name, ref_prefix))
> +        return "refs/heads/" + target_ref[len(ref_prefix):]

Do I want to know what happens if HEAD doesn't point somewhere under refs/heads?

> +
> +    def _setHead(self, target_url, target_ref):
> +        """Set HEAD on a remote repository.
> +
> +        This relies on the turnip-set-symbolic-ref extension.
> +        """
> +        service = "turnip-set-symbolic-ref"
> +        url = urljoin(target_url, service)
> +        headers = {
> +            "Content-Type": "application/x-%s-request" % service,
> +            }
> +        body = pkt_line("HEAD %s" % target_ref) + pkt_line(None)
> +        try:
> +            response = urlfetch(url, method="POST", headers=headers, data=body)
> +            response.raise_for_status()
> +        except Exception as e:
> +            raise GitProtocolError(str(e))
> +        content_type = response.headers.get("Content-Type")
> +        if content_type != ("application/x-%s-result" % service):
> +            raise GitProtocolError(
> +                "Invalid Content-Type from server: %s" % content_type)
> +        content = io.BytesIO(response.content)
> +        proto = Protocol(content.read, None)
> +        pkts = list(proto.read_pkt_seq())
> +        if len(pkts) == 1 and pkts[0].rstrip(b"\n") == b"ACK HEAD":
> +            pass
> +        elif pkts and pkts[0].startswith(b"ERR "):
> +            raise GitProtocolError(
> +                pkts[0][len(b"ERR "):].rstrip(b"\n").decode("UTF-8"))
> +        else:
> +            raise GitProtocolError("Unexpected packets %r from server" % pkts)
> +
>      def _doImport(self):
>          self._logger.info("Starting job.")
>          try:
> @@ -1031,20 +1095,38 @@
>          try:
>              self._runGit("config", "gc.auto", "0", cwd="repository")
>              self._runGit(
> -                "fetch", "--prune", self.source_details.url, "+refs/*:refs/*",
> -                cwd="repository")
> -            # We should sync the remote HEAD as well.  This involves
> -            # considerable gymnastics.  "git remote set-head --auto" does it
> -            # if we can arrange a suitable temporary remote, or git 2.8.0
> -            # has "git ls-remote --symref <repository> HEAD".  We then also
> -            # need to set Launchpad's idea of HEAD, which is fiddly from an
> -            # import worker.  For now we leave it at the default and trust
> -            # that we'll be able to fix things up later.
> +                "remote", "add", "source", self.source_details.url,
> +                cwd="repository")
> +            self._runGit(
> +                "fetch", "--prune", "source", "+refs/*:refs/*",
> +                cwd="repository")
> +            try:
> +                target_ref = self._getHead("repository", "source")

s/target_ref/new_head/?

> +            except (subprocess.CalledProcessError, GitProtocolError) as e2:
> +                self._logger.info("Unable to fetch default branch: %s" % e2)
> +                target_ref = None
> +            self._runGit("remote", "rm", "source", cwd="repository")
> +            # XXX cjwatson 2016-11-03: For some reason "git remote rm"
> +            # doesn't actually remove the refs.
> +            remote_refs = os.path.join(
> +                "repository", "refs", "remotes", "source")
> +            if os.path.isdir(remote_refs):
> +                shutil.rmtree(remote_refs)

Hum, really? I'd expect it to remove the refs, but maybe not the containing dirs. You're also in trouble here if they ever end up in packed-refs.

>          except subprocess.CalledProcessError as e:
>              self._logger.info("Unable to fetch remote repository: %s" % e)
>              return CodeImportWorkerExitCode.FAILURE_INVALID
>          self._logger.info("Pushing repository to hosting service.")
>          try:
> +            if target_ref is not None:
> +                # Push the target of HEAD first to ensure that it is always
> +                # available.
> +                self._runGit(
> +                    "push", target_url, "+%s:%s" % (target_ref, target_ref),

Is the ref safe to use in this context, even in the face of a malicious server?

> +                    cwd="repository")
> +                try:
> +                    self._setHead(target_url, target_ref)
> +                except GitProtocolError as e:
> +                    self._logger.info("Unable to set default branch: %s" % e)
>              self._runGit("push", "--mirror", target_url, cwd="repository")
>          except subprocess.CalledProcessError as e:
>              self._logger.info(


-- 
https://code.launchpad.net/~cjwatson/launchpad/codeimport-git-worker-sync-head/+merge/309988
Your team Launchpad code reviewers is subscribed to branch lp:launchpad.


References