← Back to team overview

launchpad-reviewers team mailing list archive

Re: [Merge] lp:~cjwatson/launchpad/git-ref-scanner-commits into lp:launchpad

 

Review: Approve code



Diff comments:

> === modified file 'lib/lp/code/errors.py'
> --- lib/lp/code/errors.py	2015-03-12 15:21:27 +0000
> +++ lib/lp/code/errors.py	2015-03-19 15:42:21 +0000
> @@ -36,7 +36,7 @@
>      'GitRepositoryCreatorNotMemberOfOwnerTeam',
>      'GitRepositoryCreatorNotOwner',
>      'GitRepositoryExists',
> -    'GitRepositoryRefScanFault',
> +    'GitRepositoryScanFault',
>      'GitTargetError',
>      'InvalidBranchMergeProposal',
>      'InvalidMergeQueueConfig',
> @@ -382,8 +382,8 @@
>      """Raised when there is a hosting fault creating a Git repository."""
>  
>  
> -class GitRepositoryRefScanFault(Exception):
> -    """Raised when there is a fault getting the refs for a repository."""
> +class GitRepositoryScanFault(Exception):
> +    """Raised when there is a fault scanning a repository."""
>  
>  
>  class GitTargetError(Exception):
> 
> === modified file 'lib/lp/code/githosting.py'
> --- lib/lp/code/githosting.py	2015-03-12 15:21:27 +0000
> +++ lib/lp/code/githosting.py	2015-03-19 15:42:21 +0000
> @@ -15,7 +15,7 @@
>  
>  from lp.code.errors import (
>      GitRepositoryCreationFault,
> -    GitRepositoryRefScanFault,
> +    GitRepositoryScanFault,
>      )
>  
>  
> @@ -60,13 +60,39 @@
>                  urlutils.join(self.endpoint, "repo", path, "refs"),
>                  timeout=self.timeout)
>          except Exception as e:
> -            raise GitRepositoryRefScanFault(
> +            raise GitRepositoryScanFault(
>                  "Failed to get refs from Git repository: %s" % unicode(e))
>          if response.status_code != 200:
> -            raise GitRepositoryRefScanFault(
> +            raise GitRepositoryScanFault(
>                  "Failed to get refs from Git repository: %s" % response.text)
>          try:
>              return response.json()
>          except ValueError as e:
> -            raise GitRepositoryRefScanFault(
> +            raise GitRepositoryScanFault(
>                  "Failed to decode ref-scan response: %s" % unicode(e))
> +
> +    def get_commits(self, path, commit_oids, logger=None):

I don't object to PEP 8 naming, but there's already a _makeSession in this class.

> +        try:
> +            # XXX cjwatson 2015-03-01: Once we're on requests >= 2.4.2, we
> +            # should just use post(json=) and drop the explicit Content-Type
> +            # header.
> +            if logger is not None:
> +                logger.info("Requesting commit details for %s" % commit_oids)

I'd cast commit_oids to a list at the top of method for consistency and to make generators work.

> +            response = self._makeSession().post(
> +                urlutils.join(self.endpoint, "repo", path, "commits"),
> +                headers={"Content-Type": "application/json"},
> +                data=json.dumps({"commits": commit_oids}),
> +                timeout=self.timeout)
> +        except Exception as e:
> +            raise GitRepositoryScanFault(
> +                "Failed to get commit details from Git repository: %s" %
> +                unicode(e))
> +        if response.status_code != 200:
> +            raise GitRepositoryScanFault(
> +                "Failed to get commit details from Git repository: %s" %
> +                response.text)
> +        try:
> +            return response.json()
> +        except ValueError as e:
> +            raise GitRepositoryScanFault(
> +                "Failed to decode commit-scan response: %s" % unicode(e))
> 
> === modified file 'lib/lp/code/interfaces/gitref.py'
> --- lib/lp/code/interfaces/gitref.py	2015-03-13 14:15:24 +0000
> +++ lib/lp/code/interfaces/gitref.py	2015-03-19 15:42:21 +0000
> @@ -15,6 +15,8 @@
>      )
>  from zope.schema import (
>      Choice,
> +    Datetime,
> +    Text,
>      TextLine,
>      )
>  
> @@ -42,6 +44,24 @@
>          title=_("Object type"), required=True, readonly=True,
>          vocabulary=GitObjectType)
>  
> +    author = Attribute(
> +        "The author of the commit pointed to by this reference.")
> +    author_date = Datetime(
> +        title=_("The author date of the commit pointed to by this reference."),
> +        required=False, readonly=True)
> +
> +    committer = Attribute(
> +        "The committer of the commit pointed to by this reference.")
> +    committer_date = Datetime(
> +        title=_(
> +            "The committer date of the commit pointed to by this reference."),
> +        required=False, readonly=True)
> +
> +    commit_message = Text(
> +        title=_(
> +            "The commit message of the commit pointed to by this reference."),
> +        required=False, readonly=True)
> +
>      display_name = TextLine(
>          title=_("Display name"), required=True, readonly=True,
>          description=_("Display name of the reference."))
> 
> === modified file 'lib/lp/code/interfaces/gitrepository.py'
> --- lib/lp/code/interfaces/gitrepository.py	2015-03-17 10:42:24 +0000
> +++ lib/lp/code/interfaces/gitrepository.py	2015-03-19 15:42:21 +0000
> @@ -213,12 +213,21 @@
>          :params paths: An iterable of paths.
>          """
>  
> -    def synchroniseRefs(hosting_refs, logger=None):
> +    def planRefChanges(hosting_client, hosting_path, logger=None):
> +        """Plan ref changes based on information from the hosting service.
> +
> +        :param hosting_client: A `GitHostingClient`.
> +        :param hosting_path: A path on the hosting service.
> +        :param logger: An optional logger.
> +        """

I'd document the return value.

> +
> +    def synchroniseRefs(refs_to_upsert, refs_to_remove):
>          """Synchronise references with those from the hosting service.
>  
> -        :param hosting_refs: A dictionary of reference information returned
> -            from the hosting service's `/repo/PATH/refs` collection.
> -        :param logger: An optional logger.
> +        :param refs_to_upsert: A dictionary mapping ref paths to
> +            dictionaries of their fields; these refs will be created or
> +            updated as appropriate.
> +        :param refs_to_remove: A set of ref paths to remove.
>          """
>  
>      def setOwnerDefault(value):
> 
> === modified file 'lib/lp/code/model/gitjob.py'
> --- lib/lp/code/model/gitjob.py	2015-03-17 10:42:24 +0000
> +++ lib/lp/code/model/gitjob.py	2015-03-19 15:42:21 +0000
> @@ -189,8 +189,10 @@
>                      LockType.GIT_REF_SCAN, self.repository.id,
>                      Store.of(self.repository)):
>                  hosting_path = self.repository.getInternalPath()
> -                self.repository.synchroniseRefs(
> -                    self._hosting_client.get_refs(hosting_path), logger=log)
> +                refs_to_upsert, refs_to_remove = (
> +                    self.repository.planRefChanges(
> +                        self._hosting_client, hosting_path, logger=log))
> +                self.repository.synchroniseRefs(refs_to_upsert, refs_to_remove)
>          except LostObjectError:
>              log.info(
>                  "Skipping repository %s because it has been deleted." %
> 
> === modified file 'lib/lp/code/model/gitref.py'
> --- lib/lp/code/model/gitref.py	2015-03-13 14:15:24 +0000
> +++ lib/lp/code/model/gitref.py	2015-03-19 15:42:21 +0000
> @@ -6,7 +6,9 @@
>      'GitRef',
>      ]
>  
> +import pytz
>  from storm.locals import (
> +    DateTime,
>      Int,
>      Reference,
>      Unicode,
> @@ -36,6 +38,18 @@
>  
>      object_type = EnumCol(enum=GitObjectType, notNull=True)
>  
> +    author_id = Int(name='author', allow_none=True)
> +    author = Reference(author_id, 'RevisionAuthor.id')
> +    author_date = DateTime(
> +        name='author_date', tzinfo=pytz.UTC, allow_none=True)
> +
> +    committer_id = Int(name='committer', allow_none=True)
> +    committer = Reference(committer_id, 'RevisionAuthor.id')
> +    committer_date = DateTime(
> +        name='committer_date', tzinfo=pytz.UTC, allow_none=True)
> +
> +    commit_message = Unicode(name='commit_message', allow_none=True)
> +
>      @property
>      def display_name(self):
>          return self.path.split("/", 2)[-1]
> 
> === modified file 'lib/lp/code/model/gitrepository.py'
> --- lib/lp/code/model/gitrepository.py	2015-03-17 10:42:24 +0000
> +++ lib/lp/code/model/gitrepository.py	2015-03-19 15:42:21 +0000
> @@ -8,6 +8,8 @@
>      'GitRepositorySet',
>      ]
>  
> +from datetime import datetime
> +import email
>  from itertools import chain
>  
>  from bzrlib import urlutils
> @@ -68,6 +70,7 @@
>      IGitRepositorySet,
>      user_has_special_git_repository_access,
>      )
> +from lp.code.interfaces.revision import IRevisionSet
>  from lp.code.model.gitref import GitRef
>  from lp.registry.enums import PersonVisibility
>  from lp.registry.errors import CannotChangeInformationType
> @@ -358,16 +361,28 @@
>          store.flush()
>  
>          # Try a bulk update first.
> -        column_names = ["repository_id", "path", "commit_sha1", "object_type"]
> +        column_names = [
> +            "repository_id", "path", "commit_sha1", "object_type",
> +            "author_id", "author_date", "committer_id", "committer_date",
> +            "commit_message",
> +            ]
>          column_types = [
>              ("repository", "integer"),
>              ("path", "text"),
>              ("commit_sha1", "character(40)"),
>              ("object_type", "integer"),
> +            ("author", "integer"),
> +            ("author_date", "timestamp without time zone"),
> +            ("committer", "integer"),
> +            ("committer_date", "timestamp without time zone"),
> +            ("commit_message", "text"),
>              ]
>          columns = [getattr(GitRef, name) for name in column_names]
>          values = [
> -            (self.id, path, info["sha1"], info["type"])
> +            (self.id, path, info["sha1"], info["type"],
> +             info.get("author"), info.get("author_date"),
> +             info.get("committer"), info.get("committer_date"),
> +             info.get("commit_message"))
>              for path, info in refs_info.items()]
>          db_values = dbify_values(values)
>          new_refs_expr = Values("new_refs", column_types, db_values)
> @@ -409,23 +424,86 @@
>              GitRef.repository == self, GitRef.path.is_in(paths)).remove()
>          del get_property_cache(self).refs
>  
> -    def synchroniseRefs(self, hosting_refs, logger=None):
> +    def planRefChanges(self, hosting_client, hosting_path, logger=None):
>          """See `IGitRepository`."""
>          new_refs = {}
> -        for path, info in hosting_refs.items():
> +        for path, info in hosting_client.get_refs(hosting_path).items():
>              try:
>                  new_refs[path] = self._convertRefInfo(info)
>              except ValueError as e:
> -                logger.warning("Unconvertible ref %s %s: %s" % (path, info, e))
> +                if logger is not None:
> +                    logger.warning(
> +                        "Unconvertible ref %s %s: %s" % (path, info, e))
> +
> +        # Plan the changes.
>          current_refs = {ref.path: ref for ref in self.refs}
>          refs_to_upsert = {}
>          for path, info in new_refs.items():
>              current_ref = current_refs.get(path)
>              if (current_ref is None or
>                  info["sha1"] != current_ref.commit_sha1 or
> -                info["type"] != current_ref.object_type):
> +                info["type"] != current_ref.object_type or
> +                current_ref.author_id is None or
> +                current_ref.author_date is None or
> +                current_ref.committer_id is None or
> +                current_ref.committer_date is None or
> +                current_ref.commit_message is None):

This will detect any ref that points to a non-commit as updatable.

>                  refs_to_upsert[path] = info
> +        oids_to_upsert = sorted(set(
> +            info["sha1"] for info in refs_to_upsert.values()))
>          refs_to_remove = set(current_refs) - set(new_refs)
> +
> +        # Fetch any required commit information, and fill it into the
> +        # appropriate info dictionaries.
> +        commits = {
> +            commit.get("sha1"): commit
> +            for commit in hosting_client.get_commits(
> +                hosting_path, oids_to_upsert, logger=logger)}
> +        authors_to_acquire = []
> +        committers_to_acquire = []
> +        for info in refs_to_upsert.values():
> +            commit = commits.get(info["sha1"])
> +            if commit is None:
> +                continue
> +            author = commit.get("author")
> +            if author is not None:
> +                if "time" in author:
> +                    info["author_date"] = datetime.fromtimestamp(
> +                        author["time"], tz=pytz.UTC)
> +                if "name" in author and "email" in author:
> +                    author_addr = email.utils.formataddr(
> +                        (author["name"], author["email"]))
> +                    info["author_addr"] = author_addr
> +                    authors_to_acquire.append(author_addr)
> +            committer = commit.get("committer")
> +            if committer is not None:
> +                if "time" in committer:
> +                    info["committer_date"] = datetime.fromtimestamp(
> +                        committer["time"], tz=pytz.UTC)
> +                if "name" in committer and "email" in committer:
> +                    committer_addr = email.utils.formataddr(
> +                        (committer["name"], committer["email"]))
> +                    info["committer_addr"] = committer_addr
> +                    committers_to_acquire.append(committer_addr)
> +            if "message" in commit:
> +                info["commit_message"] = commit["message"]

This method is quite long and complex, so I'd consider splitting this block out into a separate function that augments the info dicts (or an info dict) with detailed commit metadata. It in fact needn't even be in this method at all; it could be applied to refs_to_upsert after the value is returned, simplifying the tests for both sides considerably.

> +        revision_authors = getUtility(IRevisionSet).acquireRevisionAuthors(
> +            authors_to_acquire + committers_to_acquire)
> +        for info in refs_to_upsert.values():
> +            # Removing the security proxy here is safe because
> +            # RevisionAuthors are always public.  We need to do this for the
> +            # sake of dbify_value later.

Worth just adding id to the interface instead?

> +            author = revision_authors.get(info.get("author_addr"))
> +            if author is not None:
> +                info["author"] = removeSecurityProxy(author).id
> +            committer = revision_authors.get(info.get("committer_addr"))
> +            if committer is not None:
> +                info["committer"] = removeSecurityProxy(committer).id
> +
> +        return refs_to_upsert, refs_to_remove
> +
> +    def synchroniseRefs(self, refs_to_upsert, refs_to_remove):
> +        """See `IGitRepository`."""
>          if refs_to_upsert:
>              self.createOrUpdateRefs(refs_to_upsert)
>          if refs_to_remove:
> 
> === modified file 'lib/lp/code/model/tests/test_gitjob.py'
> --- lib/lp/code/model/tests/test_gitjob.py	2015-03-17 10:51:15 +0000
> +++ lib/lp/code/model/tests/test_gitjob.py	2015-03-19 15:42:21 +0000
> @@ -5,8 +5,13 @@
>  
>  __metaclass__ = type
>  
> +from datetime import (
> +    datetime,
> +    timedelta,
> +    )
>  import hashlib
>  
> +import pytz
>  from testtools.matchers import (
>      MatchesSetwise,
>      MatchesStructure,
> @@ -25,7 +30,10 @@
>      GitRefScanJob,
>      )
>  from lp.services.features.testing import FeatureFixture
> -from lp.testing import TestCaseWithFactory
> +from lp.testing import (
> +    TestCaseWithFactory,
> +    time_counter,
> +    )
>  from lp.testing.dbuser import dbuser
>  from lp.testing.fakemethod import FakeMethod
>  from lp.testing.layers import (
> @@ -72,6 +80,27 @@
>                  }}
>              for path in paths}
>  
> +    @staticmethod
> +    def makeFakeCommits(author, author_date_gen, paths):
> +        epoch = datetime.fromtimestamp(0, tz=pytz.UTC)
> +        dates = {path: next(author_date_gen) for path in paths}
> +        return [{
> +            "sha1": hashlib.sha1(path).hexdigest(),
> +            "message": "tip of %s" % path,
> +            "author": {
> +                "name": author.displayname,
> +                "email": author.preferredemail.email,
> +                "time": int((dates[path] - epoch).total_seconds()),
> +                },
> +            "committer": {
> +                "name": author.displayname,
> +                "email": author.preferredemail.email,
> +                "time": int((dates[path] - epoch).total_seconds()),
> +                },
> +            "parents": [],
> +            "tree": hashlib.sha1("").hexdigest(),
> +            } for path in paths]
> +
>      def assertRefsMatch(self, refs, repository, paths):
>          matchers = [
>              MatchesStructure.byEquality(
> @@ -104,6 +133,11 @@
>          paths = (u"refs/heads/master", u"refs/tags/1.0")
>          job._hosting_client.get_refs = FakeMethod(
>              result=self.makeFakeRefs(paths))
> +        author = repository.owner
> +        author_date_start = datetime(2015, 01, 01, tzinfo=pytz.UTC)
> +        author_date_gen = time_counter(author_date_start, timedelta(days=1))
> +        job._hosting_client.get_commits = FakeMethod(
> +            result=self.makeFakeCommits(author, author_date_gen, paths))
>          with dbuser("branchscanner"):
>              job.run()
>          self.assertRefsMatch(repository.refs, repository, paths)
> @@ -113,6 +147,7 @@
>          job = GitRefScanJob.create(repository)
>          job._hosting_client.get_refs = FakeMethod(
>              result={u"refs/heads/master": {}})
> +        job._hosting_client.get_commits = FakeMethod(result=[])
>          expected_message = (
>              'Unconvertible ref refs/heads/master {}: '
>              'ref info does not contain "object" key')
> 
> === modified file 'lib/lp/code/model/tests/test_gitrepository.py'
> --- lib/lp/code/model/tests/test_gitrepository.py	2015-03-17 10:47:59 +0000
> +++ lib/lp/code/model/tests/test_gitrepository.py	2015-03-19 15:42:21 +0000
> @@ -43,6 +43,7 @@
>      IGitRepository,
>      IGitRepositorySet,
>      )
> +from lp.code.interfaces.revision import IRevisionSet
>  from lp.code.model.gitrepository import GitRepository
>  from lp.registry.enums import (
>      BranchSharingPolicy,
> @@ -72,6 +73,7 @@
>      TestCaseWithFactory,
>      verifyObject,
>      )
> +from lp.testing.fakemethod import FakeMethod
>  from lp.testing.layers import (
>      DatabaseFunctionalLayer,
>      ZopelessDatabaseLayer,
> @@ -535,6 +537,97 @@
>                  object_type=GitObjectType.BLOB,
>                  ))
>  
> +    def test_planRefChanges(self):
> +        # planRefChanges copes with planning changes to refs in a repository
> +        # where some refs have been created, some deleted, and some changed.
> +        repository = self.factory.makeGitRepository()
> +        paths = (u"refs/heads/master", u"refs/heads/foo", u"refs/heads/bar")
> +        self.factory.makeGitRefs(repository=repository, paths=paths)
> +        self.assertRefsMatch(repository.refs, repository, paths)
> +        master_sha1 = repository.getRefByPath(u"refs/heads/master").commit_sha1
> +        foo_sha1 = repository.getRefByPath(u"refs/heads/foo").commit_sha1
> +        author = self.factory.makePerson()
> +        with person_logged_in(author):
> +            author_email = author.preferredemail.email
> +        epoch = datetime.fromtimestamp(0, tz=pytz.UTC)
> +        author_date = datetime(2015, 1, 1, tzinfo=pytz.UTC)
> +        committer_date = datetime(2015, 1, 2, tzinfo=pytz.UTC)
> +        hosting_client = FakeMethod()
> +        hosting_client.get_refs = FakeMethod(result={
> +            u"refs/heads/master": {
> +                u"object": {
> +                    u"sha1": u"1111111111111111111111111111111111111111",
> +                    u"type": u"commit",
> +                    },
> +                },
> +            u"refs/heads/foo": {
> +                u"object": {
> +                    u"sha1": foo_sha1,
> +                    u"type": u"commit",
> +                    },
> +                },
> +            u"refs/tags/1.0": {
> +                u"object": {
> +                    u"sha1": master_sha1,
> +                    u"type": u"commit",
> +                    },
> +                },
> +            })
> +        hosting_client.get_commits = FakeMethod(result=[
> +            {
> +                u"sha1": u"1111111111111111111111111111111111111111",
> +                u"message": u"tip of master",
> +                u"author": {
> +                    u"name": author.displayname,
> +                    u"email": author_email,
> +                    u"time": int((author_date - epoch).total_seconds()),
> +                    },
> +                u"committer": {
> +                    u"name": u"New Person",
> +                    u"email": u"new-person@xxxxxxxxxxx",
> +                    u"time": int((committer_date - epoch).total_seconds()),
> +                    },
> +                u"parents": [],
> +                u"tree": unicode(hashlib.sha1("").hexdigest()),
> +                }])
> +        refs_to_upsert, refs_to_remove = repository.planRefChanges(
> +            hosting_client, "dummy")
> +
> +        expected_oids = [
> +            u"1111111111111111111111111111111111111111", foo_sha1, master_sha1]
> +        [(_, observed_oids)] = hosting_client.get_commits.extract_args()
> +        self.assertContentEqual(expected_oids, observed_oids)
> +        expected_author_addr = u"%s <%s>" % (author.displayname, author_email)
> +        [expected_author] = getUtility(IRevisionSet).acquireRevisionAuthors(
> +            [expected_author_addr]).values()
> +        expected_committer_addr = u"New Person <new-person@xxxxxxxxxxx>"
> +        [expected_committer] = getUtility(IRevisionSet).acquireRevisionAuthors(
> +            [expected_committer_addr]).values()
> +        expected_upsert = {
> +            u"refs/heads/master": {
> +                u"sha1": u"1111111111111111111111111111111111111111",
> +                u"type": GitObjectType.COMMIT,
> +                u"author": removeSecurityProxy(expected_author).id,
> +                u"author_addr": expected_author_addr,
> +                u"author_date": author_date,
> +                u"committer": removeSecurityProxy(expected_committer).id,
> +                u"committer_addr": expected_committer_addr,
> +                u"committer_date": committer_date,
> +                u"commit_message": u"tip of master",
> +                },
> +            u"refs/heads/foo": {
> +                u"sha1": unicode(hashlib.sha1(u"refs/heads/foo").hexdigest()),
> +                u"type": GitObjectType.COMMIT,
> +                },
> +            u"refs/tags/1.0": {
> +                u"sha1": unicode(
> +                    hashlib.sha1(u"refs/heads/master").hexdigest()),
> +                u"type": GitObjectType.COMMIT,
> +                },
> +            }
> +        self.assertEqual(expected_upsert, refs_to_upsert)
> +        self.assertEqual(set([u"refs/heads/bar"]), refs_to_remove)
> +
>      def test_synchroniseRefs(self):
>          # synchroniseRefs copes with synchronising a repository where some
>          # refs have been created, some deleted, and some changed.
> @@ -542,28 +635,24 @@
>          paths = (u"refs/heads/master", u"refs/heads/foo", u"refs/heads/bar")
>          self.factory.makeGitRefs(repository=repository, paths=paths)
>          self.assertRefsMatch(repository.refs, repository, paths)
> -        repository.synchroniseRefs({
> +        refs_to_upsert = {
>              u"refs/heads/master": {
> -                u"object": {
> -                    u"sha1": u"1111111111111111111111111111111111111111",
> -                    u"type": u"commit",
> -                    },
> +                u"sha1": u"1111111111111111111111111111111111111111",
> +                u"type": GitObjectType.COMMIT,
>                  },
>              u"refs/heads/foo": {
> -                u"object": {
> -                    u"sha1": repository.getRefByPath(
> -                        u"refs/heads/foo").commit_sha1,
> -                    u"type": u"commit",
> -                    },
> +                u"sha1": repository.getRefByPath(
> +                    u"refs/heads/foo").commit_sha1,
> +                u"type": GitObjectType.COMMIT,
>                  },
>              u"refs/tags/1.0": {
> -                u"object": {
> -                    u"sha1": repository.getRefByPath(
> -                        u"refs/heads/master").commit_sha1,
> -                    u"type": u"commit",
> -                    },
> +                u"sha1": repository.getRefByPath(
> +                    u"refs/heads/master").commit_sha1,
> +                u"type": GitObjectType.COMMIT,
>                  },
> -            })
> +            }
> +        refs_to_remove = set([u"refs/heads/bar"])
> +        repository.synchroniseRefs(refs_to_upsert, refs_to_remove)
>          expected_sha1s = [
>              (u"refs/heads/master",
>               u"1111111111111111111111111111111111111111"),
> 


-- 
https://code.launchpad.net/~cjwatson/launchpad/git-ref-scanner-commits/+merge/253501
Your team Launchpad code reviewers is subscribed to branch lp:launchpad.


References