launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #18178
Re: [Merge] lp:~cjwatson/launchpad/git-ref-scanner-commits into lp:launchpad
Review: Approve code
Diff comments:
> === modified file 'lib/lp/code/errors.py'
> --- lib/lp/code/errors.py 2015-03-12 15:21:27 +0000
> +++ lib/lp/code/errors.py 2015-03-19 15:42:21 +0000
> @@ -36,7 +36,7 @@
> 'GitRepositoryCreatorNotMemberOfOwnerTeam',
> 'GitRepositoryCreatorNotOwner',
> 'GitRepositoryExists',
> - 'GitRepositoryRefScanFault',
> + 'GitRepositoryScanFault',
> 'GitTargetError',
> 'InvalidBranchMergeProposal',
> 'InvalidMergeQueueConfig',
> @@ -382,8 +382,8 @@
> """Raised when there is a hosting fault creating a Git repository."""
>
>
> -class GitRepositoryRefScanFault(Exception):
> - """Raised when there is a fault getting the refs for a repository."""
> +class GitRepositoryScanFault(Exception):
> + """Raised when there is a fault scanning a repository."""
>
>
> class GitTargetError(Exception):
>
> === modified file 'lib/lp/code/githosting.py'
> --- lib/lp/code/githosting.py 2015-03-12 15:21:27 +0000
> +++ lib/lp/code/githosting.py 2015-03-19 15:42:21 +0000
> @@ -15,7 +15,7 @@
>
> from lp.code.errors import (
> GitRepositoryCreationFault,
> - GitRepositoryRefScanFault,
> + GitRepositoryScanFault,
> )
>
>
> @@ -60,13 +60,39 @@
> urlutils.join(self.endpoint, "repo", path, "refs"),
> timeout=self.timeout)
> except Exception as e:
> - raise GitRepositoryRefScanFault(
> + raise GitRepositoryScanFault(
> "Failed to get refs from Git repository: %s" % unicode(e))
> if response.status_code != 200:
> - raise GitRepositoryRefScanFault(
> + raise GitRepositoryScanFault(
> "Failed to get refs from Git repository: %s" % response.text)
> try:
> return response.json()
> except ValueError as e:
> - raise GitRepositoryRefScanFault(
> + raise GitRepositoryScanFault(
> "Failed to decode ref-scan response: %s" % unicode(e))
> +
> + def get_commits(self, path, commit_oids, logger=None):
I don't object to PEP 8 naming, but there's already a _makeSession in this class.
> + try:
> + # XXX cjwatson 2015-03-01: Once we're on requests >= 2.4.2, we
> + # should just use post(json=) and drop the explicit Content-Type
> + # header.
> + if logger is not None:
> + logger.info("Requesting commit details for %s" % commit_oids)
I'd cast commit_oids to a list at the top of method for consistency and to make generators work.
> + response = self._makeSession().post(
> + urlutils.join(self.endpoint, "repo", path, "commits"),
> + headers={"Content-Type": "application/json"},
> + data=json.dumps({"commits": commit_oids}),
> + timeout=self.timeout)
> + except Exception as e:
> + raise GitRepositoryScanFault(
> + "Failed to get commit details from Git repository: %s" %
> + unicode(e))
> + if response.status_code != 200:
> + raise GitRepositoryScanFault(
> + "Failed to get commit details from Git repository: %s" %
> + response.text)
> + try:
> + return response.json()
> + except ValueError as e:
> + raise GitRepositoryScanFault(
> + "Failed to decode commit-scan response: %s" % unicode(e))
>
> === modified file 'lib/lp/code/interfaces/gitref.py'
> --- lib/lp/code/interfaces/gitref.py 2015-03-13 14:15:24 +0000
> +++ lib/lp/code/interfaces/gitref.py 2015-03-19 15:42:21 +0000
> @@ -15,6 +15,8 @@
> )
> from zope.schema import (
> Choice,
> + Datetime,
> + Text,
> TextLine,
> )
>
> @@ -42,6 +44,24 @@
> title=_("Object type"), required=True, readonly=True,
> vocabulary=GitObjectType)
>
> + author = Attribute(
> + "The author of the commit pointed to by this reference.")
> + author_date = Datetime(
> + title=_("The author date of the commit pointed to by this reference."),
> + required=False, readonly=True)
> +
> + committer = Attribute(
> + "The committer of the commit pointed to by this reference.")
> + committer_date = Datetime(
> + title=_(
> + "The committer date of the commit pointed to by this reference."),
> + required=False, readonly=True)
> +
> + commit_message = Text(
> + title=_(
> + "The commit message of the commit pointed to by this reference."),
> + required=False, readonly=True)
> +
> display_name = TextLine(
> title=_("Display name"), required=True, readonly=True,
> description=_("Display name of the reference."))
>
> === modified file 'lib/lp/code/interfaces/gitrepository.py'
> --- lib/lp/code/interfaces/gitrepository.py 2015-03-17 10:42:24 +0000
> +++ lib/lp/code/interfaces/gitrepository.py 2015-03-19 15:42:21 +0000
> @@ -213,12 +213,21 @@
> :params paths: An iterable of paths.
> """
>
> - def synchroniseRefs(hosting_refs, logger=None):
> + def planRefChanges(hosting_client, hosting_path, logger=None):
> + """Plan ref changes based on information from the hosting service.
> +
> + :param hosting_client: A `GitHostingClient`.
> + :param hosting_path: A path on the hosting service.
> + :param logger: An optional logger.
> + """
I'd document the return value.
> +
> + def synchroniseRefs(refs_to_upsert, refs_to_remove):
> """Synchronise references with those from the hosting service.
>
> - :param hosting_refs: A dictionary of reference information returned
> - from the hosting service's `/repo/PATH/refs` collection.
> - :param logger: An optional logger.
> + :param refs_to_upsert: A dictionary mapping ref paths to
> + dictionaries of their fields; these refs will be created or
> + updated as appropriate.
> + :param refs_to_remove: A set of ref paths to remove.
> """
>
> def setOwnerDefault(value):
>
> === modified file 'lib/lp/code/model/gitjob.py'
> --- lib/lp/code/model/gitjob.py 2015-03-17 10:42:24 +0000
> +++ lib/lp/code/model/gitjob.py 2015-03-19 15:42:21 +0000
> @@ -189,8 +189,10 @@
> LockType.GIT_REF_SCAN, self.repository.id,
> Store.of(self.repository)):
> hosting_path = self.repository.getInternalPath()
> - self.repository.synchroniseRefs(
> - self._hosting_client.get_refs(hosting_path), logger=log)
> + refs_to_upsert, refs_to_remove = (
> + self.repository.planRefChanges(
> + self._hosting_client, hosting_path, logger=log))
> + self.repository.synchroniseRefs(refs_to_upsert, refs_to_remove)
> except LostObjectError:
> log.info(
> "Skipping repository %s because it has been deleted." %
>
> === modified file 'lib/lp/code/model/gitref.py'
> --- lib/lp/code/model/gitref.py 2015-03-13 14:15:24 +0000
> +++ lib/lp/code/model/gitref.py 2015-03-19 15:42:21 +0000
> @@ -6,7 +6,9 @@
> 'GitRef',
> ]
>
> +import pytz
> from storm.locals import (
> + DateTime,
> Int,
> Reference,
> Unicode,
> @@ -36,6 +38,18 @@
>
> object_type = EnumCol(enum=GitObjectType, notNull=True)
>
> + author_id = Int(name='author', allow_none=True)
> + author = Reference(author_id, 'RevisionAuthor.id')
> + author_date = DateTime(
> + name='author_date', tzinfo=pytz.UTC, allow_none=True)
> +
> + committer_id = Int(name='committer', allow_none=True)
> + committer = Reference(committer_id, 'RevisionAuthor.id')
> + committer_date = DateTime(
> + name='committer_date', tzinfo=pytz.UTC, allow_none=True)
> +
> + commit_message = Unicode(name='commit_message', allow_none=True)
> +
> @property
> def display_name(self):
> return self.path.split("/", 2)[-1]
>
> === modified file 'lib/lp/code/model/gitrepository.py'
> --- lib/lp/code/model/gitrepository.py 2015-03-17 10:42:24 +0000
> +++ lib/lp/code/model/gitrepository.py 2015-03-19 15:42:21 +0000
> @@ -8,6 +8,8 @@
> 'GitRepositorySet',
> ]
>
> +from datetime import datetime
> +import email
> from itertools import chain
>
> from bzrlib import urlutils
> @@ -68,6 +70,7 @@
> IGitRepositorySet,
> user_has_special_git_repository_access,
> )
> +from lp.code.interfaces.revision import IRevisionSet
> from lp.code.model.gitref import GitRef
> from lp.registry.enums import PersonVisibility
> from lp.registry.errors import CannotChangeInformationType
> @@ -358,16 +361,28 @@
> store.flush()
>
> # Try a bulk update first.
> - column_names = ["repository_id", "path", "commit_sha1", "object_type"]
> + column_names = [
> + "repository_id", "path", "commit_sha1", "object_type",
> + "author_id", "author_date", "committer_id", "committer_date",
> + "commit_message",
> + ]
> column_types = [
> ("repository", "integer"),
> ("path", "text"),
> ("commit_sha1", "character(40)"),
> ("object_type", "integer"),
> + ("author", "integer"),
> + ("author_date", "timestamp without time zone"),
> + ("committer", "integer"),
> + ("committer_date", "timestamp without time zone"),
> + ("commit_message", "text"),
> ]
> columns = [getattr(GitRef, name) for name in column_names]
> values = [
> - (self.id, path, info["sha1"], info["type"])
> + (self.id, path, info["sha1"], info["type"],
> + info.get("author"), info.get("author_date"),
> + info.get("committer"), info.get("committer_date"),
> + info.get("commit_message"))
> for path, info in refs_info.items()]
> db_values = dbify_values(values)
> new_refs_expr = Values("new_refs", column_types, db_values)
> @@ -409,23 +424,86 @@
> GitRef.repository == self, GitRef.path.is_in(paths)).remove()
> del get_property_cache(self).refs
>
> - def synchroniseRefs(self, hosting_refs, logger=None):
> + def planRefChanges(self, hosting_client, hosting_path, logger=None):
> """See `IGitRepository`."""
> new_refs = {}
> - for path, info in hosting_refs.items():
> + for path, info in hosting_client.get_refs(hosting_path).items():
> try:
> new_refs[path] = self._convertRefInfo(info)
> except ValueError as e:
> - logger.warning("Unconvertible ref %s %s: %s" % (path, info, e))
> + if logger is not None:
> + logger.warning(
> + "Unconvertible ref %s %s: %s" % (path, info, e))
> +
> + # Plan the changes.
> current_refs = {ref.path: ref for ref in self.refs}
> refs_to_upsert = {}
> for path, info in new_refs.items():
> current_ref = current_refs.get(path)
> if (current_ref is None or
> info["sha1"] != current_ref.commit_sha1 or
> - info["type"] != current_ref.object_type):
> + info["type"] != current_ref.object_type or
> + current_ref.author_id is None or
> + current_ref.author_date is None or
> + current_ref.committer_id is None or
> + current_ref.committer_date is None or
> + current_ref.commit_message is None):
This will detect any ref that points to a non-commit as updatable.
> refs_to_upsert[path] = info
> + oids_to_upsert = sorted(set(
> + info["sha1"] for info in refs_to_upsert.values()))
> refs_to_remove = set(current_refs) - set(new_refs)
> +
> + # Fetch any required commit information, and fill it into the
> + # appropriate info dictionaries.
> + commits = {
> + commit.get("sha1"): commit
> + for commit in hosting_client.get_commits(
> + hosting_path, oids_to_upsert, logger=logger)}
> + authors_to_acquire = []
> + committers_to_acquire = []
> + for info in refs_to_upsert.values():
> + commit = commits.get(info["sha1"])
> + if commit is None:
> + continue
> + author = commit.get("author")
> + if author is not None:
> + if "time" in author:
> + info["author_date"] = datetime.fromtimestamp(
> + author["time"], tz=pytz.UTC)
> + if "name" in author and "email" in author:
> + author_addr = email.utils.formataddr(
> + (author["name"], author["email"]))
> + info["author_addr"] = author_addr
> + authors_to_acquire.append(author_addr)
> + committer = commit.get("committer")
> + if committer is not None:
> + if "time" in committer:
> + info["committer_date"] = datetime.fromtimestamp(
> + committer["time"], tz=pytz.UTC)
> + if "name" in committer and "email" in committer:
> + committer_addr = email.utils.formataddr(
> + (committer["name"], committer["email"]))
> + info["committer_addr"] = committer_addr
> + committers_to_acquire.append(committer_addr)
> + if "message" in commit:
> + info["commit_message"] = commit["message"]
This method is quite long and complex, so I'd consider splitting this block out into a separate function that augments the info dicts (or an info dict) with detailed commit metadata. It in fact needn't even be in this method at all; it could be applied to refs_to_upsert after the value is returned, simplifying the tests for both sides considerably.
> + revision_authors = getUtility(IRevisionSet).acquireRevisionAuthors(
> + authors_to_acquire + committers_to_acquire)
> + for info in refs_to_upsert.values():
> + # Removing the security proxy here is safe because
> + # RevisionAuthors are always public. We need to do this for the
> + # sake of dbify_value later.
Worth just adding id to the interface instead?
> + author = revision_authors.get(info.get("author_addr"))
> + if author is not None:
> + info["author"] = removeSecurityProxy(author).id
> + committer = revision_authors.get(info.get("committer_addr"))
> + if committer is not None:
> + info["committer"] = removeSecurityProxy(committer).id
> +
> + return refs_to_upsert, refs_to_remove
> +
> + def synchroniseRefs(self, refs_to_upsert, refs_to_remove):
> + """See `IGitRepository`."""
> if refs_to_upsert:
> self.createOrUpdateRefs(refs_to_upsert)
> if refs_to_remove:
>
> === modified file 'lib/lp/code/model/tests/test_gitjob.py'
> --- lib/lp/code/model/tests/test_gitjob.py 2015-03-17 10:51:15 +0000
> +++ lib/lp/code/model/tests/test_gitjob.py 2015-03-19 15:42:21 +0000
> @@ -5,8 +5,13 @@
>
> __metaclass__ = type
>
> +from datetime import (
> + datetime,
> + timedelta,
> + )
> import hashlib
>
> +import pytz
> from testtools.matchers import (
> MatchesSetwise,
> MatchesStructure,
> @@ -25,7 +30,10 @@
> GitRefScanJob,
> )
> from lp.services.features.testing import FeatureFixture
> -from lp.testing import TestCaseWithFactory
> +from lp.testing import (
> + TestCaseWithFactory,
> + time_counter,
> + )
> from lp.testing.dbuser import dbuser
> from lp.testing.fakemethod import FakeMethod
> from lp.testing.layers import (
> @@ -72,6 +80,27 @@
> }}
> for path in paths}
>
> + @staticmethod
> + def makeFakeCommits(author, author_date_gen, paths):
> + epoch = datetime.fromtimestamp(0, tz=pytz.UTC)
> + dates = {path: next(author_date_gen) for path in paths}
> + return [{
> + "sha1": hashlib.sha1(path).hexdigest(),
> + "message": "tip of %s" % path,
> + "author": {
> + "name": author.displayname,
> + "email": author.preferredemail.email,
> + "time": int((dates[path] - epoch).total_seconds()),
> + },
> + "committer": {
> + "name": author.displayname,
> + "email": author.preferredemail.email,
> + "time": int((dates[path] - epoch).total_seconds()),
> + },
> + "parents": [],
> + "tree": hashlib.sha1("").hexdigest(),
> + } for path in paths]
> +
> def assertRefsMatch(self, refs, repository, paths):
> matchers = [
> MatchesStructure.byEquality(
> @@ -104,6 +133,11 @@
> paths = (u"refs/heads/master", u"refs/tags/1.0")
> job._hosting_client.get_refs = FakeMethod(
> result=self.makeFakeRefs(paths))
> + author = repository.owner
> + author_date_start = datetime(2015, 01, 01, tzinfo=pytz.UTC)
> + author_date_gen = time_counter(author_date_start, timedelta(days=1))
> + job._hosting_client.get_commits = FakeMethod(
> + result=self.makeFakeCommits(author, author_date_gen, paths))
> with dbuser("branchscanner"):
> job.run()
> self.assertRefsMatch(repository.refs, repository, paths)
> @@ -113,6 +147,7 @@
> job = GitRefScanJob.create(repository)
> job._hosting_client.get_refs = FakeMethod(
> result={u"refs/heads/master": {}})
> + job._hosting_client.get_commits = FakeMethod(result=[])
> expected_message = (
> 'Unconvertible ref refs/heads/master {}: '
> 'ref info does not contain "object" key')
>
> === modified file 'lib/lp/code/model/tests/test_gitrepository.py'
> --- lib/lp/code/model/tests/test_gitrepository.py 2015-03-17 10:47:59 +0000
> +++ lib/lp/code/model/tests/test_gitrepository.py 2015-03-19 15:42:21 +0000
> @@ -43,6 +43,7 @@
> IGitRepository,
> IGitRepositorySet,
> )
> +from lp.code.interfaces.revision import IRevisionSet
> from lp.code.model.gitrepository import GitRepository
> from lp.registry.enums import (
> BranchSharingPolicy,
> @@ -72,6 +73,7 @@
> TestCaseWithFactory,
> verifyObject,
> )
> +from lp.testing.fakemethod import FakeMethod
> from lp.testing.layers import (
> DatabaseFunctionalLayer,
> ZopelessDatabaseLayer,
> @@ -535,6 +537,97 @@
> object_type=GitObjectType.BLOB,
> ))
>
> + def test_planRefChanges(self):
> + # planRefChanges copes with planning changes to refs in a repository
> + # where some refs have been created, some deleted, and some changed.
> + repository = self.factory.makeGitRepository()
> + paths = (u"refs/heads/master", u"refs/heads/foo", u"refs/heads/bar")
> + self.factory.makeGitRefs(repository=repository, paths=paths)
> + self.assertRefsMatch(repository.refs, repository, paths)
> + master_sha1 = repository.getRefByPath(u"refs/heads/master").commit_sha1
> + foo_sha1 = repository.getRefByPath(u"refs/heads/foo").commit_sha1
> + author = self.factory.makePerson()
> + with person_logged_in(author):
> + author_email = author.preferredemail.email
> + epoch = datetime.fromtimestamp(0, tz=pytz.UTC)
> + author_date = datetime(2015, 1, 1, tzinfo=pytz.UTC)
> + committer_date = datetime(2015, 1, 2, tzinfo=pytz.UTC)
> + hosting_client = FakeMethod()
> + hosting_client.get_refs = FakeMethod(result={
> + u"refs/heads/master": {
> + u"object": {
> + u"sha1": u"1111111111111111111111111111111111111111",
> + u"type": u"commit",
> + },
> + },
> + u"refs/heads/foo": {
> + u"object": {
> + u"sha1": foo_sha1,
> + u"type": u"commit",
> + },
> + },
> + u"refs/tags/1.0": {
> + u"object": {
> + u"sha1": master_sha1,
> + u"type": u"commit",
> + },
> + },
> + })
> + hosting_client.get_commits = FakeMethod(result=[
> + {
> + u"sha1": u"1111111111111111111111111111111111111111",
> + u"message": u"tip of master",
> + u"author": {
> + u"name": author.displayname,
> + u"email": author_email,
> + u"time": int((author_date - epoch).total_seconds()),
> + },
> + u"committer": {
> + u"name": u"New Person",
> + u"email": u"new-person@xxxxxxxxxxx",
> + u"time": int((committer_date - epoch).total_seconds()),
> + },
> + u"parents": [],
> + u"tree": unicode(hashlib.sha1("").hexdigest()),
> + }])
> + refs_to_upsert, refs_to_remove = repository.planRefChanges(
> + hosting_client, "dummy")
> +
> + expected_oids = [
> + u"1111111111111111111111111111111111111111", foo_sha1, master_sha1]
> + [(_, observed_oids)] = hosting_client.get_commits.extract_args()
> + self.assertContentEqual(expected_oids, observed_oids)
> + expected_author_addr = u"%s <%s>" % (author.displayname, author_email)
> + [expected_author] = getUtility(IRevisionSet).acquireRevisionAuthors(
> + [expected_author_addr]).values()
> + expected_committer_addr = u"New Person <new-person@xxxxxxxxxxx>"
> + [expected_committer] = getUtility(IRevisionSet).acquireRevisionAuthors(
> + [expected_committer_addr]).values()
> + expected_upsert = {
> + u"refs/heads/master": {
> + u"sha1": u"1111111111111111111111111111111111111111",
> + u"type": GitObjectType.COMMIT,
> + u"author": removeSecurityProxy(expected_author).id,
> + u"author_addr": expected_author_addr,
> + u"author_date": author_date,
> + u"committer": removeSecurityProxy(expected_committer).id,
> + u"committer_addr": expected_committer_addr,
> + u"committer_date": committer_date,
> + u"commit_message": u"tip of master",
> + },
> + u"refs/heads/foo": {
> + u"sha1": unicode(hashlib.sha1(u"refs/heads/foo").hexdigest()),
> + u"type": GitObjectType.COMMIT,
> + },
> + u"refs/tags/1.0": {
> + u"sha1": unicode(
> + hashlib.sha1(u"refs/heads/master").hexdigest()),
> + u"type": GitObjectType.COMMIT,
> + },
> + }
> + self.assertEqual(expected_upsert, refs_to_upsert)
> + self.assertEqual(set([u"refs/heads/bar"]), refs_to_remove)
> +
> def test_synchroniseRefs(self):
> # synchroniseRefs copes with synchronising a repository where some
> # refs have been created, some deleted, and some changed.
> @@ -542,28 +635,24 @@
> paths = (u"refs/heads/master", u"refs/heads/foo", u"refs/heads/bar")
> self.factory.makeGitRefs(repository=repository, paths=paths)
> self.assertRefsMatch(repository.refs, repository, paths)
> - repository.synchroniseRefs({
> + refs_to_upsert = {
> u"refs/heads/master": {
> - u"object": {
> - u"sha1": u"1111111111111111111111111111111111111111",
> - u"type": u"commit",
> - },
> + u"sha1": u"1111111111111111111111111111111111111111",
> + u"type": GitObjectType.COMMIT,
> },
> u"refs/heads/foo": {
> - u"object": {
> - u"sha1": repository.getRefByPath(
> - u"refs/heads/foo").commit_sha1,
> - u"type": u"commit",
> - },
> + u"sha1": repository.getRefByPath(
> + u"refs/heads/foo").commit_sha1,
> + u"type": GitObjectType.COMMIT,
> },
> u"refs/tags/1.0": {
> - u"object": {
> - u"sha1": repository.getRefByPath(
> - u"refs/heads/master").commit_sha1,
> - u"type": u"commit",
> - },
> + u"sha1": repository.getRefByPath(
> + u"refs/heads/master").commit_sha1,
> + u"type": GitObjectType.COMMIT,
> },
> - })
> + }
> + refs_to_remove = set([u"refs/heads/bar"])
> + repository.synchroniseRefs(refs_to_upsert, refs_to_remove)
> expected_sha1s = [
> (u"refs/heads/master",
> u"1111111111111111111111111111111111111111"),
>
--
https://code.launchpad.net/~cjwatson/launchpad/git-ref-scanner-commits/+merge/253501
Your team Launchpad code reviewers is subscribed to branch lp:launchpad.
References