launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #18167
[Merge] lp:~cjwatson/launchpad/git-ref-scanner-commits into lp:launchpad
Colin Watson has proposed merging lp:~cjwatson/launchpad/git-ref-scanner-commits into lp:launchpad.
Commit message:
Scan author/committer/commit-message information from the tip commit of Git references.
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
Related bugs:
Bug #1032731 in Launchpad itself: "Support for Launchpad-hosted Git repositories"
https://bugs.launchpad.net/launchpad/+bug/1032731
For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/git-ref-scanner-commits/+merge/253501
Scan author/committer/commit-message information from the tip commit of Git references.
I ended up refactoring GitRepository.synchroniseRefs a bit in the process; it now has a separate planning stage that takes a GitHostingClient so that it can go off and fetch commit information. The method split is mainly to make it easier to test.
--
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~cjwatson/launchpad/git-ref-scanner-commits into lp:launchpad.
=== modified file 'lib/lp/code/errors.py'
--- lib/lp/code/errors.py 2015-03-12 15:21:27 +0000
+++ lib/lp/code/errors.py 2015-03-19 11:31:11 +0000
@@ -36,7 +36,7 @@
'GitRepositoryCreatorNotMemberOfOwnerTeam',
'GitRepositoryCreatorNotOwner',
'GitRepositoryExists',
- 'GitRepositoryRefScanFault',
+ 'GitRepositoryScanFault',
'GitTargetError',
'InvalidBranchMergeProposal',
'InvalidMergeQueueConfig',
@@ -382,8 +382,8 @@
"""Raised when there is a hosting fault creating a Git repository."""
-class GitRepositoryRefScanFault(Exception):
- """Raised when there is a fault getting the refs for a repository."""
+class GitRepositoryScanFault(Exception):
+ """Raised when there is a fault scanning a repository."""
class GitTargetError(Exception):
=== modified file 'lib/lp/code/githosting.py'
--- lib/lp/code/githosting.py 2015-03-12 15:21:27 +0000
+++ lib/lp/code/githosting.py 2015-03-19 11:31:11 +0000
@@ -15,7 +15,7 @@
from lp.code.errors import (
GitRepositoryCreationFault,
- GitRepositoryRefScanFault,
+ GitRepositoryScanFault,
)
@@ -60,13 +60,37 @@
urlutils.join(self.endpoint, "repo", path, "refs"),
timeout=self.timeout)
except Exception as e:
- raise GitRepositoryRefScanFault(
+ raise GitRepositoryScanFault(
"Failed to get refs from Git repository: %s" % unicode(e))
if response.status_code != 200:
- raise GitRepositoryRefScanFault(
+ raise GitRepositoryScanFault(
"Failed to get refs from Git repository: %s" % response.text)
try:
return response.json()
except ValueError as e:
- raise GitRepositoryRefScanFault(
+ raise GitRepositoryScanFault(
"Failed to decode ref-scan response: %s" % unicode(e))
+
+ def get_commits(self, path, commit_oids):
+ try:
+ # XXX cjwatson 2015-03-01: Once we're on requests >= 2.4.2, we
+ # should just use post(json=) and drop the explicit Content-Type
+ # header.
+ response = self._makeSession().post(
+ urlutils.join(self.endpoint, "repo", path, "commits"),
+ headers={"Content-Type": "application/json"},
+ data=json.dumps(commit_oids),
+ timeout=self.timeout)
+ except Exception as e:
+ raise GitRepositoryScanFault(
+ "Failed to get commit details from Git repository: %s" %
+ unicode(e))
+ if response.status_code != 200:
+ raise GitRepositoryScanFault(
+ "Failed to get commit details from Git repository: %s" %
+ response.text)
+ try:
+ return response.json()
+ except ValueError as e:
+ raise GitRepositoryScanFault(
+ "Failed to decode commit-scan response: %s" % unicode(e))
=== modified file 'lib/lp/code/interfaces/gitref.py'
--- lib/lp/code/interfaces/gitref.py 2015-03-13 14:15:24 +0000
+++ lib/lp/code/interfaces/gitref.py 2015-03-19 11:31:11 +0000
@@ -15,6 +15,8 @@
)
from zope.schema import (
Choice,
+ Datetime,
+ Text,
TextLine,
)
@@ -42,6 +44,24 @@
title=_("Object type"), required=True, readonly=True,
vocabulary=GitObjectType)
+ author = Attribute(
+ "The author of the commit pointed to by this reference.")
+ author_date = Datetime(
+ title=_("The author date of the commit pointed to by this reference."),
+ required=False, readonly=True)
+
+ committer = Attribute(
+ "The committer of the commit pointed to by this reference.")
+ committer_date = Datetime(
+ title=_(
+ "The committer date of the commit pointed to by this reference."),
+ required=False, readonly=True)
+
+ commit_message = Text(
+ title=_(
+ "The commit message of the commit pointed to by this reference."),
+ required=False, readonly=True)
+
display_name = TextLine(
title=_("Display name"), required=True, readonly=True,
description=_("Display name of the reference."))
=== modified file 'lib/lp/code/interfaces/gitrepository.py'
--- lib/lp/code/interfaces/gitrepository.py 2015-03-17 10:42:24 +0000
+++ lib/lp/code/interfaces/gitrepository.py 2015-03-19 11:31:11 +0000
@@ -213,12 +213,21 @@
:params paths: An iterable of paths.
"""
- def synchroniseRefs(hosting_refs, logger=None):
+ def planRefChanges(hosting_client, hosting_path, logger=None):
+ """Plan ref changes based on information from the hosting service.
+
+ :param hosting_client: A `GitHostingClient`.
+ :param hosting_path: A path on the hosting service.
+ :param logger: An optional logger.
+ """
+
+ def synchroniseRefs(refs_to_upsert, refs_to_remove):
"""Synchronise references with those from the hosting service.
- :param hosting_refs: A dictionary of reference information returned
- from the hosting service's `/repo/PATH/refs` collection.
- :param logger: An optional logger.
+ :param refs_to_upsert: A dictionary mapping ref paths to
+ dictionaries of their fields; these refs will be created or
+ updated as appropriate.
+ :param refs_to_remove: A set of ref paths to remove.
"""
def setOwnerDefault(value):
=== modified file 'lib/lp/code/model/gitjob.py'
--- lib/lp/code/model/gitjob.py 2015-03-17 10:42:24 +0000
+++ lib/lp/code/model/gitjob.py 2015-03-19 11:31:11 +0000
@@ -189,8 +189,10 @@
LockType.GIT_REF_SCAN, self.repository.id,
Store.of(self.repository)):
hosting_path = self.repository.getInternalPath()
- self.repository.synchroniseRefs(
- self._hosting_client.get_refs(hosting_path), logger=log)
+ refs_to_upsert, refs_to_remove = (
+ self.repository.planRefChanges(
+ self._hosting_client, hosting_path, logger=log))
+ self.repository.synchroniseRefs(refs_to_upsert, refs_to_remove)
except LostObjectError:
log.info(
"Skipping repository %s because it has been deleted." %
=== modified file 'lib/lp/code/model/gitref.py'
--- lib/lp/code/model/gitref.py 2015-03-13 14:15:24 +0000
+++ lib/lp/code/model/gitref.py 2015-03-19 11:31:11 +0000
@@ -6,7 +6,9 @@
'GitRef',
]
+import pytz
from storm.locals import (
+ DateTime,
Int,
Reference,
Unicode,
@@ -36,6 +38,18 @@
object_type = EnumCol(enum=GitObjectType, notNull=True)
+ author_id = Int(name='author', allow_none=True)
+ author = Reference(author_id, 'RevisionAuthor.id')
+ author_date = DateTime(
+ name='author_date', tzinfo=pytz.UTC, allow_none=True)
+
+ committer_id = Int(name='committer', allow_none=True)
+ committer = Reference(committer_id, 'RevisionAuthor.id')
+ committer_date = DateTime(
+ name='committer_date', tzinfo=pytz.UTC, allow_none=True)
+
+ commit_message = Unicode(name='commit_message', allow_none=True)
+
@property
def display_name(self):
return self.path.split("/", 2)[-1]
=== modified file 'lib/lp/code/model/gitrepository.py'
--- lib/lp/code/model/gitrepository.py 2015-03-17 10:42:24 +0000
+++ lib/lp/code/model/gitrepository.py 2015-03-19 11:31:11 +0000
@@ -8,6 +8,8 @@
'GitRepositorySet',
]
+from datetime import datetime
+import email
from itertools import chain
from bzrlib import urlutils
@@ -68,6 +70,7 @@
IGitRepositorySet,
user_has_special_git_repository_access,
)
+from lp.code.interfaces.revision import IRevisionSet
from lp.code.model.gitref import GitRef
from lp.registry.enums import PersonVisibility
from lp.registry.errors import CannotChangeInformationType
@@ -358,16 +361,28 @@
store.flush()
# Try a bulk update first.
- column_names = ["repository_id", "path", "commit_sha1", "object_type"]
+ column_names = [
+ "repository_id", "path", "commit_sha1", "object_type",
+ "author_id", "author_date", "committer_id", "committer_date",
+ "commit_message",
+ ]
column_types = [
("repository", "integer"),
("path", "text"),
("commit_sha1", "character(40)"),
("object_type", "integer"),
+ ("author", "integer"),
+ ("author_date", "timestamp without time zone"),
+ ("committer", "integer"),
+ ("committer_date", "timestamp without time zone"),
+ ("commit_message", "text"),
]
columns = [getattr(GitRef, name) for name in column_names]
values = [
- (self.id, path, info["sha1"], info["type"])
+ (self.id, path, info["sha1"], info["type"],
+ info.get("author"), info.get("author_date"),
+ info.get("committer"), info.get("committer_date"),
+ info.get("commit_message"))
for path, info in refs_info.items()]
db_values = dbify_values(values)
new_refs_expr = Values("new_refs", column_types, db_values)
@@ -409,23 +424,84 @@
GitRef.repository == self, GitRef.path.is_in(paths)).remove()
del get_property_cache(self).refs
- def synchroniseRefs(self, hosting_refs, logger=None):
+ def planRefChanges(self, hosting_client, hosting_path, logger=None):
"""See `IGitRepository`."""
new_refs = {}
- for path, info in hosting_refs.items():
+ for path, info in hosting_client.get_refs(hosting_path).items():
try:
new_refs[path] = self._convertRefInfo(info)
except ValueError as e:
logger.warning("Unconvertible ref %s %s: %s" % (path, info, e))
+
+ # Plan the changes.
current_refs = {ref.path: ref for ref in self.refs}
refs_to_upsert = {}
for path, info in new_refs.items():
current_ref = current_refs.get(path)
if (current_ref is None or
info["sha1"] != current_ref.commit_sha1 or
- info["type"] != current_ref.object_type):
+ info["type"] != current_ref.object_type or
+ current_ref.author_id is None or
+ current_ref.author is None or
+ current_ref.committer_id is None or
+ current_ref.committer is None or
+ current_ref.commit_message is None):
refs_to_upsert[path] = info
+ oids_to_upsert = {
+ info["sha1"]: path for path, info in refs_to_upsert.items()}
refs_to_remove = set(current_refs) - set(new_refs)
+
+ # Fetch any required commit information, and fill it into the
+ # appropriate info dictionaries.
+ commits = hosting_client.get_commits(
+ hosting_path, list(oids_to_upsert))
+ authors_to_acquire = {}
+ committers_to_acquire = {}
+ for commit in commits:
+ oid = commit["sha1"]
+ if oid in oids_to_upsert:
+ info = refs_to_upsert[oids_to_upsert[oid]]
+ else:
+ # We got a stray commit from the hosting service for some
+ # reason. Just ignore it.
+ continue
+ author = commit.get("author")
+ if author is not None:
+ if "time" in author:
+ info["author_date"] = datetime.fromtimestamp(
+ author["time"], tz=pytz.UTC)
+ if "name" in author and "email" in author:
+ author_addr = email.utils.formataddr(
+ (author["name"], author["email"]))
+ authors_to_acquire[author_addr] = info
+ committer = commit.get("committer")
+ if committer is not None:
+ if "time" in committer:
+ info["committer_date"] = datetime.fromtimestamp(
+ committer["time"], tz=pytz.UTC)
+ if "name" in committer and "email" in committer:
+ committer_addr = email.utils.formataddr(
+ (committer["name"], committer["email"]))
+ committers_to_acquire[committer_addr] = info
+ if "message" in commit:
+ info["commit_message"] = commit["message"]
+ revision_authors = getUtility(IRevisionSet).acquireRevisionAuthors(
+ list(authors_to_acquire) + list(committers_to_acquire))
+ for addr, revision_author in revision_authors.items():
+ # Removing the security proxy here is safe because
+ # RevisionAuthors are always public. We need to do this for the
+ # sake of dbify_value later.
+ if addr in authors_to_acquire:
+ authors_to_acquire[addr]["author"] = removeSecurityProxy(
+ revision_author).id
+ if addr in committers_to_acquire:
+ committers_to_acquire[addr]["committer"] = removeSecurityProxy(
+ revision_author).id
+
+ return refs_to_upsert, refs_to_remove
+
+ def synchroniseRefs(self, refs_to_upsert, refs_to_remove):
+ """See `IGitRepository`."""
if refs_to_upsert:
self.createOrUpdateRefs(refs_to_upsert)
if refs_to_remove:
=== modified file 'lib/lp/code/model/tests/test_gitjob.py'
--- lib/lp/code/model/tests/test_gitjob.py 2015-03-17 10:51:15 +0000
+++ lib/lp/code/model/tests/test_gitjob.py 2015-03-19 11:31:11 +0000
@@ -5,8 +5,13 @@
__metaclass__ = type
+from datetime import (
+ datetime,
+ timedelta,
+ )
import hashlib
+import pytz
from testtools.matchers import (
MatchesSetwise,
MatchesStructure,
@@ -25,7 +30,10 @@
GitRefScanJob,
)
from lp.services.features.testing import FeatureFixture
-from lp.testing import TestCaseWithFactory
+from lp.testing import (
+ TestCaseWithFactory,
+ time_counter,
+ )
from lp.testing.dbuser import dbuser
from lp.testing.fakemethod import FakeMethod
from lp.testing.layers import (
@@ -72,6 +80,27 @@
}}
for path in paths}
+ @staticmethod
+ def makeFakeCommits(author, author_date_gen, paths):
+ epoch = datetime.fromtimestamp(0, tz=pytz.UTC)
+ dates = {path: next(author_date_gen) for path in paths}
+ return [{
+ "sha1": hashlib.sha1(path).hexdigest(),
+ "message": "tip of %s" % path,
+ "author": {
+ "name": author.displayname,
+ "email": author.preferredemail.email,
+ "time": int((dates[path] - epoch).total_seconds()),
+ },
+ "committer": {
+ "name": author.displayname,
+ "email": author.preferredemail.email,
+ "time": int((dates[path] - epoch).total_seconds()),
+ },
+ "parents": [],
+ "tree": hashlib.sha1("").hexdigest(),
+ } for path in paths]
+
def assertRefsMatch(self, refs, repository, paths):
matchers = [
MatchesStructure.byEquality(
@@ -104,6 +133,11 @@
paths = (u"refs/heads/master", u"refs/tags/1.0")
job._hosting_client.get_refs = FakeMethod(
result=self.makeFakeRefs(paths))
+ author = repository.owner
+ author_date_start = datetime(2015, 01, 01, tzinfo=pytz.UTC)
+ author_date_gen = time_counter(author_date_start, timedelta(days=1))
+ job._hosting_client.get_commits = FakeMethod(
+ result=self.makeFakeCommits(author, author_date_gen, paths))
with dbuser("branchscanner"):
job.run()
self.assertRefsMatch(repository.refs, repository, paths)
@@ -113,6 +147,7 @@
job = GitRefScanJob.create(repository)
job._hosting_client.get_refs = FakeMethod(
result={u"refs/heads/master": {}})
+ job._hosting_client.get_commits = FakeMethod(result=[])
expected_message = (
'Unconvertible ref refs/heads/master {}: '
'ref info does not contain "object" key')
=== modified file 'lib/lp/code/model/tests/test_gitrepository.py'
--- lib/lp/code/model/tests/test_gitrepository.py 2015-03-17 10:47:59 +0000
+++ lib/lp/code/model/tests/test_gitrepository.py 2015-03-19 11:31:11 +0000
@@ -43,6 +43,7 @@
IGitRepository,
IGitRepositorySet,
)
+from lp.code.interfaces.revision import IRevisionSet
from lp.code.model.gitrepository import GitRepository
from lp.registry.enums import (
BranchSharingPolicy,
@@ -72,6 +73,7 @@
TestCaseWithFactory,
verifyObject,
)
+from lp.testing.fakemethod import FakeMethod
from lp.testing.layers import (
DatabaseFunctionalLayer,
ZopelessDatabaseLayer,
@@ -535,6 +537,93 @@
object_type=GitObjectType.BLOB,
))
+ def test_planRefChanges(self):
+ # planRefChanges copes with planning changes to refs in a repository
+ # where some refs have been created, some deleted, and some changed.
+ repository = self.factory.makeGitRepository()
+ paths = (u"refs/heads/master", u"refs/heads/foo", u"refs/heads/bar")
+ self.factory.makeGitRefs(repository=repository, paths=paths)
+ self.assertRefsMatch(repository.refs, repository, paths)
+ master_sha1 = repository.getRefByPath(u"refs/heads/master").commit_sha1
+ foo_sha1 = repository.getRefByPath(u"refs/heads/foo").commit_sha1
+ author = self.factory.makePerson()
+ with person_logged_in(author):
+ author_email = author.preferredemail.email
+ epoch = datetime.fromtimestamp(0, tz=pytz.UTC)
+ author_date = datetime(2015, 1, 1, tzinfo=pytz.UTC)
+ committer_date = datetime(2015, 1, 2, tzinfo=pytz.UTC)
+ hosting_client = FakeMethod()
+ hosting_client.get_refs = FakeMethod(result={
+ u"refs/heads/master": {
+ u"object": {
+ u"sha1": u"1111111111111111111111111111111111111111",
+ u"type": u"commit",
+ },
+ },
+ u"refs/heads/foo": {
+ u"object": {
+ u"sha1": foo_sha1,
+ u"type": u"commit",
+ },
+ },
+ u"refs/tags/1.0": {
+ u"object": {
+ u"sha1": master_sha1,
+ u"type": u"commit",
+ },
+ },
+ })
+ hosting_client.get_commits = FakeMethod(result=[
+ {
+ u"sha1": u"1111111111111111111111111111111111111111",
+ u"message": u"tip of master",
+ u"author": {
+ u"name": author.displayname,
+ u"email": author_email,
+ u"time": int((author_date - epoch).total_seconds()),
+ },
+ u"committer": {
+ u"name": u"New Person",
+ u"email": u"new-person@xxxxxxxxxxx",
+ u"time": int((committer_date - epoch).total_seconds()),
+ },
+ u"parents": [],
+ u"tree": unicode(hashlib.sha1("").hexdigest()),
+ }])
+ refs_to_upsert, refs_to_remove = repository.planRefChanges(
+ hosting_client, "dummy")
+
+ expected_oids = [
+ u"1111111111111111111111111111111111111111", foo_sha1, master_sha1]
+ [(_, observed_oids)] = hosting_client.get_commits.extract_args()
+ self.assertContentEqual(expected_oids, observed_oids)
+ [expected_author] = getUtility(IRevisionSet).acquireRevisionAuthors(
+ ["%s <%s>" % (author.displayname, author_email)]).values()
+ [expected_committer] = getUtility(IRevisionSet).acquireRevisionAuthors(
+ ["New Person <new-person@xxxxxxxxxxx>"]).values()
+ expected_upsert = {
+ u"refs/heads/master": {
+ u"sha1": u"1111111111111111111111111111111111111111",
+ u"type": GitObjectType.COMMIT,
+ u"author": removeSecurityProxy(expected_author).id,
+ u"author_date": author_date,
+ u"committer": removeSecurityProxy(expected_committer).id,
+ u"committer_date": committer_date,
+ u"commit_message": u"tip of master",
+ },
+ u"refs/heads/foo": {
+ u"sha1": unicode(hashlib.sha1(u"refs/heads/foo").hexdigest()),
+ u"type": GitObjectType.COMMIT,
+ },
+ u"refs/tags/1.0": {
+ u"sha1": unicode(
+ hashlib.sha1(u"refs/heads/master").hexdigest()),
+ u"type": GitObjectType.COMMIT,
+ },
+ }
+ self.assertEqual(expected_upsert, refs_to_upsert)
+ self.assertEqual(set([u"refs/heads/bar"]), refs_to_remove)
+
def test_synchroniseRefs(self):
# synchroniseRefs copes with synchronising a repository where some
# refs have been created, some deleted, and some changed.
@@ -542,28 +631,24 @@
paths = (u"refs/heads/master", u"refs/heads/foo", u"refs/heads/bar")
self.factory.makeGitRefs(repository=repository, paths=paths)
self.assertRefsMatch(repository.refs, repository, paths)
- repository.synchroniseRefs({
+ refs_to_upsert = {
u"refs/heads/master": {
- u"object": {
- u"sha1": u"1111111111111111111111111111111111111111",
- u"type": u"commit",
- },
+ u"sha1": u"1111111111111111111111111111111111111111",
+ u"type": GitObjectType.COMMIT,
},
u"refs/heads/foo": {
- u"object": {
- u"sha1": repository.getRefByPath(
- u"refs/heads/foo").commit_sha1,
- u"type": u"commit",
- },
+ u"sha1": repository.getRefByPath(
+ u"refs/heads/foo").commit_sha1,
+ u"type": GitObjectType.COMMIT,
},
u"refs/tags/1.0": {
- u"object": {
- u"sha1": repository.getRefByPath(
- u"refs/heads/master").commit_sha1,
- u"type": u"commit",
- },
+ u"sha1": repository.getRefByPath(
+ u"refs/heads/master").commit_sha1,
+ u"type": GitObjectType.COMMIT,
},
- })
+ }
+ refs_to_remove = set([u"refs/heads/bar"])
+ repository.synchroniseRefs(refs_to_upsert, refs_to_remove)
expected_sha1s = [
(u"refs/heads/master",
u"1111111111111111111111111111111111111111"),
Follow ups