← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~cjwatson/launchpad/git-ref-scanner into lp:launchpad

 

Colin Watson has proposed merging lp:~cjwatson/launchpad/git-ref-scanner into lp:launchpad.

Commit message:
Add a ref scanning job for Git repositories.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)
Related bugs:
  Bug #1032731 in Launchpad itself: "Support for Launchpad-hosted Git repositories"
  https://bugs.launchpad.net/launchpad/+bug/1032731

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/git-ref-scanner/+merge/252763

Add a ref scanning job for Git repositories.

This needs https://code.launchpad.net/~cjwatson/launchpad/db-git-more/+merge/252672 for the GitJob table and the GitRef.object_type column.  I haven't filled in the other (nullable) GitRef columns here yet; that can wait for a later branch.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~cjwatson/launchpad/git-ref-scanner into lp:launchpad.
=== modified file 'lib/lp/code/configure.zcml'
--- lib/lp/code/configure.zcml	2015-03-06 16:31:30 +0000
+++ lib/lp/code/configure.zcml	2015-03-12 15:36:07 +0000
@@ -859,6 +859,14 @@
     <allow interface="lp.code.interfaces.gitnamespace.IGitNamespaceSet" />
   </securedutility>
 
+  <!-- GitRef -->
+
+  <class class="lp.code.model.gitref.GitRef">
+    <require
+        permission="launchpad.View"
+        interface="lp.code.interfaces.gitref.IGitRef" />
+  </class>
+
   <!-- GitCollection -->
 
   <class class="lp.code.model.gitcollection.GenericGitCollection">
@@ -933,6 +941,20 @@
   <adapter factory="lp.code.model.gitlookup.DistributionGitTraversable" />
   <adapter factory="lp.code.model.gitlookup.DistributionSourcePackageGitTraversable" />
 
+  <!-- Git-related jobs -->
+  <class class="lp.code.model.gitjob.GitJob">
+    <allow interface="lp.code.interfaces.gitjob.IGitJob" />
+  </class>
+  <securedutility
+      component="lp.code.model.gitjob.GitRefScanJob"
+      provides="lp.code.interfaces.gitjob.IGitRefScanJobSource">
+    <allow interface="lp.code.interfaces.gitjob.IGitRefScanJobSource" />
+  </securedutility>
+  <class class="lp.code.model.gitjob.GitRefScanJob">
+    <allow interface="lp.code.interfaces.gitjob.IGitJob" />
+    <allow interface="lp.code.interfaces.gitjob.IGitRefScanJob" />
+  </class>
+
   <lp:help-folder folder="help" name="+help-code" />
 
   <!-- Diffs -->

=== modified file 'lib/lp/code/enums.py'
--- lib/lp/code/enums.py	2014-02-24 07:19:52 +0000
+++ lib/lp/code/enums.py	2015-03-12 15:36:07 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009-2011 Canonical Ltd.  This software is licensed under the
+# Copyright 2009-2015 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 """Enumerations used in the lp/code modules."""
@@ -20,6 +20,7 @@
     'CodeImportReviewStatus',
     'CodeReviewNotificationLevel',
     'CodeReviewVote',
+    'GitObjectType',
     'NON_CVS_RCS_TYPES',
     'RevisionControlSystems',
     'UICreatableBranchType',
@@ -115,6 +116,37 @@
     use_template(BranchType, exclude='IMPORTED')
 
 
+class GitObjectType(DBEnumeratedType):
+    """Git Object Type
+
+    Keep these in sync with the concrete GIT_OBJ_* enum values in libgit2.
+    """
+
+    COMMIT = DBItem(1, """
+        Commit
+
+        A commit object.
+        """)
+
+    TREE = DBItem(2, """
+        Tree
+
+        A tree (directory listing) object.
+        """)
+
+    BLOB = DBItem(3, """
+        Blob
+
+        A file revision object.
+        """)
+
+    TAG = DBItem(4, """
+        Tag
+
+        An annotated tag object.
+        """)
+
+
 class BranchLifecycleStatusFilter(EnumeratedType):
     """Branch Lifecycle Status Filter
 

=== modified file 'lib/lp/code/errors.py'
--- lib/lp/code/errors.py	2015-03-04 18:27:40 +0000
+++ lib/lp/code/errors.py	2015-03-12 15:36:07 +0000
@@ -36,6 +36,7 @@
     'GitRepositoryCreatorNotMemberOfOwnerTeam',
     'GitRepositoryCreatorNotOwner',
     'GitRepositoryExists',
+    'GitRepositoryRefScanFault',
     'GitTargetError',
     'InvalidBranchMergeProposal',
     'InvalidMergeQueueConfig',
@@ -381,6 +382,10 @@
     """Raised when there is a hosting fault creating a Git repository."""
 
 
+class GitRepositoryRefScanFault(Exception):
+    """Raised when there is a fault getting the refs for a repository."""
+
+
 class GitTargetError(Exception):
     """Raised when there is an error determining a Git repository target."""
 

=== modified file 'lib/lp/code/githosting.py'
--- lib/lp/code/githosting.py	2015-03-03 14:51:10 +0000
+++ lib/lp/code/githosting.py	2015-03-12 15:36:07 +0000
@@ -9,11 +9,14 @@
     ]
 
 import json
-from urlparse import urljoin
 
+from bzrlib import urlutils
 import requests
 
-from lp.code.errors import GitRepositoryCreationFault
+from lp.code.errors import (
+    GitRepositoryCreationFault,
+    GitRepositoryRefScanFault,
+    )
 
 
 class GitHostingClient:
@@ -40,7 +43,7 @@
             # should just use post(json=) and drop the explicit Content-Type
             # header.
             response = self._makeSession().post(
-                urljoin(self.endpoint, "repo"),
+                urlutils.join(self.endpoint, "repo"),
                 headers={"Content-Type": "application/json"},
                 data=json.dumps({"repo_path": path, "bare_repo": True}),
                 timeout=self.timeout)
@@ -50,3 +53,20 @@
         if response.status_code != 200:
             raise GitRepositoryCreationFault(
                 "Failed to create Git repository: %s" % response.text)
+
+    def get_refs(self, path):
+        try:
+            response = self._makeSession().get(
+                urlutils.join(self.endpoint, "repo", path, "refs"),
+                timeout=self.timeout)
+        except Exception as e:
+            raise GitRepositoryRefScanFault(
+                "Failed to get refs from Git repository: %s" % unicode(e))
+        if response.status_code != 200:
+            raise GitRepositoryRefScanFault(
+                "Failed to get refs from Git repository: %s" % response.text)
+        try:
+            return response.json()
+        except ValueError as e:
+            raise GitRepositoryRefScanFault(
+                "Failed to decode ref-scan response: %s" % unicode(e))

=== modified file 'lib/lp/code/interfaces/gitapi.py'
--- lib/lp/code/interfaces/gitapi.py	2015-03-04 11:12:06 +0000
+++ lib/lp/code/interfaces/gitapi.py	2015-03-12 15:36:07 +0000
@@ -49,3 +49,11 @@
                 "writable", whose value is True if the requester can push to
                 this repository, otherwise False.
         """
+
+    def notify(translated_path):
+        """Notify of a change to the repository at 'translated_path'.
+
+        :param translated_path: The translated path to the repository.  (We
+            use translated paths here in order to avoid problems with
+            repository names etc. being changed during a push.
+        """

=== added file 'lib/lp/code/interfaces/gitjob.py'
--- lib/lp/code/interfaces/gitjob.py	1970-01-01 00:00:00 +0000
+++ lib/lp/code/interfaces/gitjob.py	2015-03-12 15:36:07 +0000
@@ -0,0 +1,53 @@
+# Copyright 2015 Canonical Ltd.  This software is licensed under the
+# GNU Affero General Public License version 3 (see the file LICENSE).
+
+"""GitJob interfaces."""
+
+__metaclass__ = type
+
+__all__ = [
+    'IGitJob',
+    'IGitRefScanJob',
+    'IGitRefScanJobSource',
+    ]
+
+from lazr.restful.fields import Reference
+from zope.interface import (
+    Attribute,
+    Interface,
+    )
+
+from lp import _
+from lp.code.interfaces.gitrepository import IGitRepository
+from lp.services.job.interfaces.job import (
+    IJob,
+    IJobSource,
+    IRunnableJob,
+    )
+
+
+class IGitJob(Interface):
+    """A job related to a Git repository."""
+
+    job = Reference(
+        title=_("The common Job attributes."), schema=IJob,
+        required=True, readonly=True)
+
+    repository = Reference(
+        title=_("The Git repository to use for this job."),
+        schema=IGitRepository, required=True, readonly=True)
+
+    metadata = Attribute(_("A dict of data about the job."))
+
+
+class IGitRefScanJob(IRunnableJob):
+    """A Job that scans a Git repository for its current list of references."""
+
+
+class IGitRefScanJobSource(IJobSource):
+
+    def create(repository):
+        """Scan a repository for refs.
+
+        :param repository: The database repository to scan.
+        """

=== modified file 'lib/lp/code/interfaces/gitlookup.py'
--- lib/lp/code/interfaces/gitlookup.py	2015-03-05 11:39:06 +0000
+++ lib/lp/code/interfaces/gitlookup.py	2015-03-12 15:36:07 +0000
@@ -90,6 +90,12 @@
         Return the default value if there is no such repository.
         """
 
+    def getByHostingPath(path):
+        """Get information about a given path on the hosting backend.
+
+        :return: An `IGitRepository`, or None.
+        """
+
     def getByUniqueName(unique_name):
         """Find a repository by its unique name.
 

=== added file 'lib/lp/code/interfaces/gitref.py'
--- lib/lp/code/interfaces/gitref.py	1970-01-01 00:00:00 +0000
+++ lib/lp/code/interfaces/gitref.py	2015-03-12 15:36:07 +0000
@@ -0,0 +1,43 @@
+# Copyright 2015 Canonical Ltd.  This software is licensed under the
+# GNU Affero General Public License version 3 (see the file LICENSE).
+
+"""Git reference ("ref") interfaces."""
+
+__metaclass__ = type
+
+__all__ = [
+    'IGitRef',
+    ]
+
+from zope.interface import (
+    Attribute,
+    Interface,
+    )
+from zope.schema import (
+    Choice,
+    TextLine,
+    )
+
+from lp import _
+from lp.code.enums import GitObjectType
+
+
+class IGitRef(Interface):
+    """A reference in a Git repository."""
+
+    repository = Attribute("The Git repository containing this reference.")
+
+    path = TextLine(
+        title=_("Path"), required=True, readonly=True,
+        description=_(
+            "The full path of this reference, e.g. refs/heads/master."))
+
+    commit_sha1 = TextLine(
+        title=_("Commit SHA-1"), required=True, readonly=True,
+        description=_(
+            "The full SHA-1 object name of the commit object referenced by "
+            "this reference."))
+
+    object_type = Choice(
+        title=_("Object type"), required=True, readonly=True,
+        vocabulary=GitObjectType)

=== modified file 'lib/lp/code/interfaces/gitrepository.py'
--- lib/lp/code/interfaces/gitrepository.py	2015-03-06 16:31:30 +0000
+++ lib/lp/code/interfaces/gitrepository.py	2015-03-12 15:36:07 +0000
@@ -186,6 +186,38 @@
             "'lp:' plus a shortcut version of the path via that target.  "
             "Otherwise it is simply 'lp:' plus the unique name.")))
 
+    refs = Attribute("The references present in this repository.")
+
+    def convertRefInfo(info):
+        """Validate and canonicalise ref info from the hosting service.
+
+        :param info: A dict of {"object":
+            {"sha1": sha1, "type": "commit"/"tree"/"blob"/"tag"}}.
+
+        :raises ValueError: if the dict is malformed.
+        :return: A dict of {"sha1": sha1, "type": `GitObjectType`}.
+        """
+
+    def createRefs(refs_info):
+        """Create a set of references in this repository.
+
+        :param refs_info: A dict mapping ref paths to
+            {"sha1": sha1, "type": `GitObjectType`}.
+        """
+
+    def removeRefs(paths):
+        """Remove a set of references in this repository.
+
+        :params paths: An iterable of paths.
+        """
+
+    def updateRef(ref, info):
+        """Update a single reference in this repository.
+
+        :param ref: An `IGitRef`.
+        :param info: {"sha1": sha1, "type": `GitObjectType`}
+        """
+
     def setOwnerDefault(value):
         """Set whether this repository is the default for its owner-target.
 

=== added file 'lib/lp/code/model/gitjob.py'
--- lib/lp/code/model/gitjob.py	1970-01-01 00:00:00 +0000
+++ lib/lp/code/model/gitjob.py	2015-03-12 15:36:07 +0000
@@ -0,0 +1,200 @@
+# Copyright 2015 Canonical Ltd.  This software is licensed under the
+# GNU Affero General Public License version 3 (see the file LICENSE).
+
+__metaclass__ = type
+
+__all__ = [
+    'GitJob',
+    'GitRefScanJob',
+    ]
+
+from lazr.delegates import delegates
+from lazr.enum import (
+    DBEnumeratedType,
+    DBItem,
+    )
+from storm.exceptions import LostObjectError
+from storm.locals import (
+    Int,
+    JSON,
+    Reference,
+    )
+from zope.interface import (
+    classProvides,
+    implements,
+    )
+
+from lp.app.errors import NotFoundError
+from lp.code.githosting import GitHostingClient
+from lp.code.interfaces.gitjob import (
+    IGitJob,
+    IGitRefScanJob,
+    IGitRefScanJobSource,
+    )
+from lp.services.config import config
+from lp.services.database.enumcol import EnumCol
+from lp.services.database.interfaces import (
+    IMasterStore,
+    IStore,
+    )
+from lp.services.database.stormbase import StormBase
+from lp.services.job.model.job import (
+    EnumeratedSubclass,
+    Job,
+    )
+from lp.services.job.runner import BaseRunnableJob
+from lp.services.mail.sendmail import format_address_for_person
+from lp.services.scripts import log
+
+
+class GitJobType(DBEnumeratedType):
+    """Values that `IGitJob.job_type` can take."""
+
+    REF_SCAN = DBItem(0, """
+        Ref scan
+
+        This job scans a repository for its current list of references.
+        """)
+
+
+class GitJob(StormBase):
+    """See `IGitJob`."""
+
+    __storm_table__ = 'GitJob'
+
+    implements(IGitJob)
+
+    job_id = Int(name='job', primary=True, allow_none=False)
+    job = Reference(job_id, 'Job.id')
+
+    repository_id = Int(name='repository', allow_none=False)
+    repository = Reference(repository_id, 'GitRepository.id')
+
+    job_type = EnumCol(enum=GitJobType, notNull=True)
+
+    metadata = JSON('json_data')
+
+    def __init__(self, repository, job_type, metadata, **job_args):
+        """Constructor.
+
+        Extra keyword arguments are used to construct the underlying Job
+        object.
+
+        :param repository: The database repository this job relates to.
+        :param job_type: The `GitJobType` of this job.
+        :param metadata: The type-specific variables, as a JSON-compatible
+            dict.
+        """
+        super(GitJob, self).__init__()
+        self.job = Job(**job_args)
+        self.repository = repository
+        self.job_type = job_type
+        self.metadata = metadata
+
+    def makeDerived(self):
+        return GitJobDerived.makeSubclass(self)
+
+
+class GitJobDerived(BaseRunnableJob):
+
+    __metaclass__ = EnumeratedSubclass
+
+    delegates(IGitJob)
+
+    def __init__(self, git_job):
+        self.context = git_job
+
+    @classmethod
+    def get(cls, job_id):
+        """Get a job by id.
+
+        :return: The `GitJob` with the specified id, as the current
+            `GitJobDerived` subclass.
+        :raises: `NotFoundError` if there is no job with the specified id,
+            or its `job_type` does not match the desired subclass.
+        """
+        git_job = IStore(GitJob).get(GitJob, job_id)
+        if git_job.job_type != cls.class_job_type:
+            raise NotFoundError(
+                "No object found with id %d and type %s" %
+                (job_id, cls.class_job_type.title))
+        return cls(git_job)
+
+    @classmethod
+    def iterReady(cls):
+        """See `IJobSource`."""
+        jobs = IMasterStore(GitJob).find(
+            GitJob,
+            GitJob.job_type == cls.class_job_type,
+            GitJob.job == Job.id,
+            Job.id.is_in(Job.ready_jobs))
+        return (cls(job) for job in jobs)
+
+    def getOopsVars(self):
+        """See `IRunnableJob`."""
+        oops_vars = super(GitJobDerived, self).getOopsVars()
+        oops_vars.extend([
+            ('git_job_id', self.context.job.id),
+            ('git_job_type', self.context.job_type.title),
+            ('git_repository_name', self.context.repository.unique_name)])
+        return oops_vars
+
+    def getErrorRecipients(self):
+        if self.requester is None:
+            return []
+        return [format_address_for_person(self.requester)]
+
+
+class GitRefScanJob(GitJobDerived):
+    """A Job that scans a Git repository for its current list of references."""
+
+    implements(IGitRefScanJob)
+
+    classProvides(IGitRefScanJobSource)
+    class_job_type = GitJobType.REF_SCAN
+
+    config = config.IGitRefScanJobSource
+
+    @classmethod
+    def create(cls, repository):
+        """See `IGitRefScanJobSource`."""
+        git_job = GitJob(
+            repository, cls.class_job_type,
+            {"repository_name": repository.unique_name})
+        job = cls(git_job)
+        job.celeryRunOnCommit()
+        return job
+
+    def __init__(self, git_job):
+        super(GitRefScanJob, self).__init__(git_job)
+        self._cached_repository_name = self.metadata["repository_name"]
+        self._hosting_client = GitHostingClient(
+            config.codehosting.internal_git_api_endpoint)
+
+    def run(self):
+        """See `IGitRefScanJob`."""
+        try:
+            hosting_path = self.repository.getInternalPath()
+        except LostObjectError:
+            log.warning(
+                "Skipping repository %s because it has been deleted." %
+                self._cached_repository_name)
+            return
+        new_refs = {}
+        for path, info in self._hosting_client.get_refs(hosting_path).items():
+            try:
+                new_refs[path] = self.repository.convertRefInfo(info)
+            except ValueError:
+                pass
+        current_refs = dict((ref.path, ref) for ref in self.repository.refs)
+        refs_to_insert = dict(
+            (path, info) for path, info in new_refs.items()
+            if path not in current_refs)
+        self.repository.createRefs(refs_to_insert)
+        self.repository.removeRefs(set(current_refs) - set(new_refs))
+        for path in set(new_refs) & set(current_refs):
+            new_ref = new_refs[path]
+            current_ref = current_refs[path]
+            if (new_ref["sha1"] != current_ref.commit_sha1 or
+                new_ref["type"] != current_ref.object_type):
+                self.repository.updateRef(current_ref, new_ref)

=== modified file 'lib/lp/code/model/gitlookup.py'
--- lib/lp/code/model/gitlookup.py	2015-03-05 11:39:06 +0000
+++ lib/lp/code/model/gitlookup.py	2015-03-12 15:36:07 +0000
@@ -297,6 +297,16 @@
             return default
         return repository
 
+    def getByHostingPath(self, path):
+        """See `IGitLookup`."""
+        # This may need to change later to improve support for sharding.
+        # See also `IGitRepository.getInternalPath`.
+        try:
+            repository_id = int(path)
+        except ValueError:
+            return None
+        return self.get(repository_id)
+
     @staticmethod
     def uriToPath(uri):
         """See `IGitLookup`."""

=== added file 'lib/lp/code/model/gitref.py'
--- lib/lp/code/model/gitref.py	1970-01-01 00:00:00 +0000
+++ lib/lp/code/model/gitref.py	2015-03-12 15:36:07 +0000
@@ -0,0 +1,37 @@
+# Copyright 2015 Canonical Ltd.  This software is licensed under the
+# GNU Affero General Public License version 3 (see the file LICENSE).
+
+__metaclass__ = type
+__all__ = [
+    'GitRef',
+    ]
+
+from storm.locals import (
+    Int,
+    Reference,
+    Unicode,
+    )
+from zope.interface import implements
+
+from lp.code.enums import GitObjectType
+from lp.code.interfaces.gitref import IGitRef
+from lp.services.database.enumcol import EnumCol
+from lp.services.database.stormbase import StormBase
+
+
+class GitRef(StormBase):
+    """See `IGitRef`."""
+
+    __storm_table__ = 'GitRef'
+    __storm_primary__ = ('repository_id', 'path')
+
+    implements(IGitRef)
+
+    repository_id = Int(name='repository', allow_none=False)
+    repository = Reference(repository_id, 'GitRepository.id')
+
+    path = Unicode(name='path', allow_none=False)
+
+    commit_sha1 = Unicode(name='commit_sha1', allow_none=False)
+
+    object_type = EnumCol(enum=GitObjectType, notNull=True)

=== modified file 'lib/lp/code/model/gitrepository.py'
--- lib/lp/code/model/gitrepository.py	2015-03-05 14:13:16 +0000
+++ lib/lp/code/model/gitrepository.py	2015-03-12 15:36:07 +0000
@@ -24,6 +24,7 @@
     Reference,
     Unicode,
     )
+from storm.store import Store
 from zope.component import getUtility
 from zope.interface import implements
 from zope.security.proxy import removeSecurityProxy
@@ -36,6 +37,7 @@
 from lp.app.interfaces.informationtype import IInformationType
 from lp.app.interfaces.launchpad import IPrivacy
 from lp.app.interfaces.services import IService
+from lp.code.enums import GitObjectType
 from lp.code.errors import (
     GitDefaultConflict,
     GitFeatureDisabled,
@@ -57,6 +59,7 @@
     IGitRepositorySet,
     user_has_special_git_repository_access,
     )
+from lp.code.model.gitref import GitRef
 from lp.registry.enums import PersonVisibility
 from lp.registry.errors import CannotChangeInformationType
 from lp.registry.interfaces.accesspolicy import (
@@ -78,6 +81,7 @@
     )
 from lp.registry.model.teammembership import TeamParticipation
 from lp.services.config import config
+from lp.services.database import bulk
 from lp.services.database.constants import (
     DEFAULT,
     UTC_NOW,
@@ -91,10 +95,21 @@
     ArrayIntersects,
     )
 from lp.services.features import getFeatureFlag
-from lp.services.propertycache import cachedproperty
+from lp.services.propertycache import (
+    cachedproperty,
+    get_property_cache,
+    )
 from lp.services.webapp.authorization import available_with_permission
 
 
+object_type_map = {
+    "commit": GitObjectType.COMMIT,
+    "tree": GitObjectType.TREE,
+    "blob": GitObjectType.BLOB,
+    "tag": GitObjectType.TAG,
+    }
+
+
 def git_repository_modified(repository, event):
     """Update the date_last_modified property when a GitRepository is modified.
 
@@ -243,6 +258,7 @@
     def getInternalPath(self):
         """See `IGitRepository`."""
         # This may need to change later to improve support for sharding.
+        # See also `IGitLookup.getByHostingPath`.
         return str(self.id)
 
     def getCodebrowseUrl(self):
@@ -279,6 +295,53 @@
             self, self.information_type, pillars, wanted_links)
 
     @cachedproperty
+    def refs(self):
+        """See `IGitRepository`."""
+        return list(Store.of(self).find(
+            GitRef, GitRef.repository_id == self.id).order_by(GitRef.path))
+
+    @staticmethod
+    def convertRefInfo(info):
+        """See `IGitRepository`."""
+        if "object" not in info:
+            raise ValueError('ref info does not contain "object" key')
+        obj = info["object"]
+        if "sha1" not in obj:
+            raise ValueError('ref info object does not contain "sha1" key')
+        if "type" not in obj:
+            raise ValueError('ref info object does not contain "type" key')
+        if not isinstance(obj["sha1"], basestring) or len(obj["sha1"]) != 40:
+            raise ValueError('ref info sha1 is not a 40-character string')
+        if obj["type"] not in object_type_map:
+            raise ValueError('ref info type is not a recognised object type')
+        sha1 = obj["sha1"]
+        if isinstance(sha1, bytes):
+            sha1 = sha1.decode("US-ASCII")
+        return {"sha1": sha1, "type": object_type_map[obj["type"]]}
+
+    def createRefs(self, refs_info):
+        """See `IGitRepository`."""
+        bulk.create(
+            (GitRef.repository, GitRef.path, GitRef.commit_sha1,
+             GitRef.object_type),
+            [(self, path, info["sha1"], info["type"])
+             for path, info in refs_info.items()])
+        del get_property_cache(self).refs
+
+    def removeRefs(self, paths):
+        """See `IGitRepository`."""
+        Store.of(self).find(
+            GitRef,
+            GitRef.repository == self, GitRef.path.is_in(paths)).remove()
+        del get_property_cache(self).refs
+
+    def updateRef(self, ref, info):
+        """See `IGitRepository`."""
+        naked_ref = removeSecurityProxy(ref)
+        naked_ref.commit_sha1 = info["sha1"]
+        naked_ref.object_type = info["type"]
+
+    @cachedproperty
     def _known_viewers(self):
         """A set of known persons able to view this repository.
 

=== added file 'lib/lp/code/model/tests/test_gitjob.py'
--- lib/lp/code/model/tests/test_gitjob.py	1970-01-01 00:00:00 +0000
+++ lib/lp/code/model/tests/test_gitjob.py	2015-03-12 15:36:07 +0000
@@ -0,0 +1,113 @@
+# Copyright 2015 Canonical Ltd.  This software is licensed under the
+# GNU Affero General Public License version 3 (see the file LICENSE).
+
+"""Tests for `GitJob`s."""
+
+__metaclass__ = type
+
+import hashlib
+
+from testtools.matchers import (
+    MatchesSetwise,
+    MatchesStructure,
+    )
+
+from lp.code.enums import GitObjectType
+from lp.code.interfaces.gitjob import (
+    IGitJob,
+    IGitRefScanJob,
+    )
+from lp.code.interfaces.gitrepository import GIT_FEATURE_FLAG
+from lp.code.model.gitjob import (
+    GitJob,
+    GitJobDerived,
+    GitJobType,
+    GitRefScanJob,
+    )
+from lp.services.features.testing import FeatureFixture
+from lp.testing import TestCaseWithFactory
+from lp.testing.dbuser import dbuser
+from lp.testing.fakemethod import FakeMethod
+from lp.testing.layers import (
+    DatabaseFunctionalLayer,
+    LaunchpadZopelessLayer,
+    )
+
+
+class TestGitJob(TestCaseWithFactory):
+    """Tests for `GitJob`."""
+
+    layer = DatabaseFunctionalLayer
+
+    def test_provides_interface(self):
+        # `GitJob` objects provide `IGitJob`.
+        self.useFixture(FeatureFixture({GIT_FEATURE_FLAG: u"on"}))
+        repository = self.factory.makeGitRepository()
+        self.assertProvides(
+            GitJob(repository, GitJobType.REF_SCAN, {}), IGitJob)
+
+
+class TestGitJobDerived(TestCaseWithFactory):
+    """Tests for `GitJobDerived`."""
+
+    layer = LaunchpadZopelessLayer
+
+    def test_getOopsMailController(self):
+        """By default, no mail is sent about failed BranchJobs."""
+        self.useFixture(FeatureFixture({GIT_FEATURE_FLAG: u"on"}))
+        repository = self.factory.makeGitRepository()
+        job = GitJob(repository, GitJobType.REF_SCAN, {})
+        derived = GitJobDerived(job)
+        self.assertIsNone(derived.getOopsMailController("x"))
+
+
+class TestGitRefScanJobMixin:
+
+    @staticmethod
+    def makeFakeRefs(paths):
+        return dict(
+            (path, {"object": {
+                "sha1": hashlib.sha1(path).hexdigest(),
+                "type": "commit",
+                }})
+            for path in paths)
+
+    def assertRefsMatch(self, refs, repository, paths):
+        matchers = [
+            MatchesStructure.byEquality(
+                repository=repository,
+                path=path,
+                commit_sha1=unicode(hashlib.sha1(path).hexdigest()),
+                object_type=GitObjectType.COMMIT)
+            for path in paths]
+        self.assertThat(refs, MatchesSetwise(*matchers))
+
+
+class TestGitRefScanJob(TestGitRefScanJobMixin, TestCaseWithFactory):
+    """Tests for `GitRefScanJob`."""
+
+    layer = LaunchpadZopelessLayer
+
+    def setUp(self):
+        super(TestGitRefScanJob, self).setUp()
+        self.useFixture(FeatureFixture({GIT_FEATURE_FLAG: u"on"}))
+
+    def test_provides_interface(self):
+        # `GitRefScanJob` objects provide `IGitRefScanJob`.
+        repository = self.factory.makeGitRepository()
+        self.assertProvides(GitRefScanJob.create(repository), IGitRefScanJob)
+
+    def test_run(self):
+        # Ensure the job scans the repository.
+        repository = self.factory.makeGitRepository()
+        job = GitRefScanJob.create(repository)
+        paths = (u"refs/heads/master", u"refs/tags/1.0")
+        job._hosting_client.get_refs = FakeMethod(
+            result=self.makeFakeRefs(paths))
+        with dbuser("branchscanner"):
+            job.run()
+        self.assertRefsMatch(repository.refs, repository, paths)
+
+
+# XXX cjwatson 2015-03-12: We should test that the job works via Celery too,
+# but that isn't feasible until we have a proper turnip fixture.

=== modified file 'lib/lp/code/model/tests/test_gitlookup.py'
--- lib/lp/code/model/tests/test_gitlookup.py	2015-03-05 14:13:16 +0000
+++ lib/lp/code/model/tests/test_gitlookup.py	2015-03-12 15:36:07 +0000
@@ -35,6 +35,26 @@
 from lp.testing.layers import DatabaseFunctionalLayer
 
 
+class TestGetByHostingPath(TestCaseWithFactory):
+    """Test `IGitLookup.getByHostingPath`."""
+
+    layer = DatabaseFunctionalLayer
+
+    def setUp(self):
+        super(TestGetByHostingPath, self).setUp()
+        self.useFixture(FeatureFixture({GIT_FEATURE_FLAG: u"on"}))
+        self.lookup = getUtility(IGitLookup)
+
+    def test_exists(self):
+        repository = self.factory.makeGitRepository()
+        self.assertEqual(
+            repository,
+            self.lookup.getByHostingPath(repository.getInternalPath()))
+
+    def test_missing(self):
+        self.assertIsNone(self.lookup.getByHostingPath("nonexistent"))
+
+
 class TestGetByUniqueName(TestCaseWithFactory):
     """Tests for `IGitLookup.getByUniqueName`."""
 

=== modified file 'lib/lp/code/model/tests/test_gitrepository.py'
--- lib/lp/code/model/tests/test_gitrepository.py	2015-03-06 16:31:30 +0000
+++ lib/lp/code/model/tests/test_gitrepository.py	2015-03-12 15:36:07 +0000
@@ -7,10 +7,15 @@
 
 from datetime import datetime
 from functools import partial
+import hashlib
 import json
 
 from lazr.lifecycle.event import ObjectModifiedEvent
 import pytz
+from testtools.matchers import (
+    MatchesSetwise,
+    MatchesStructure,
+    )
 from zope.component import getUtility
 from zope.event import notify
 from zope.security.proxy import removeSecurityProxy
@@ -21,6 +26,7 @@
     PUBLIC_INFORMATION_TYPES,
     )
 from lp.app.interfaces.launchpad import ILaunchpadCelebrities
+from lp.code.enums import GitObjectType
 from lp.code.errors import (
     GitFeatureDisabled,
     GitRepositoryCreatorNotMemberOfOwnerTeam,
@@ -37,6 +43,7 @@
     IGitRepository,
     IGitRepositorySet,
     )
+from lp.code.model.gitrepository import GitRepository
 from lp.registry.enums import (
     BranchSharingPolicy,
     PersonVisibility,
@@ -410,6 +417,116 @@
             get_policies_for_artifact(repository))
 
 
+class TestGitRepositoryRefs(TestCaseWithFactory):
+    """Tests for ref handling."""
+
+    layer = DatabaseFunctionalLayer
+
+    def setUp(self):
+        super(TestGitRepositoryRefs, self).setUp()
+        self.useFixture(FeatureFixture({GIT_FEATURE_FLAG: u"on"}))
+
+    def test_convertRefInfo(self):
+        # convertRefInfo converts a valid info dictionary.
+        sha1 = unicode(hashlib.sha1("").hexdigest())
+        info = {"object": {"sha1": sha1, "type": u"commit"}}
+        expected_info = {"sha1": sha1, "type": GitObjectType.COMMIT}
+        self.assertEqual(expected_info, GitRepository.convertRefInfo(info))
+
+    def test_convertRefInfo_requires_object(self):
+        self.assertRaisesWithContent(
+            ValueError, 'ref info does not contain "object" key',
+            GitRepository.convertRefInfo, {})
+
+    def test_convertRefInfo_requires_object_sha1(self):
+        self.assertRaisesWithContent(
+            ValueError, 'ref info object does not contain "sha1" key',
+            GitRepository.convertRefInfo, {"object": {}})
+
+    def test_convertRefInfo_requires_object_type(self):
+        info = {
+            "object": {"sha1": u"0000000000000000000000000000000000000000"},
+            }
+        self.assertRaisesWithContent(
+            ValueError, 'ref info object does not contain "type" key',
+            GitRepository.convertRefInfo, info)
+
+    def test_convertRefInfo_bad_sha1(self):
+        info = {"object": {"sha1": "x", "type": "commit"}}
+        self.assertRaisesWithContent(
+            ValueError, 'ref info sha1 is not a 40-character string',
+            GitRepository.convertRefInfo, info)
+
+    def test_convertRefInfo_bad_type(self):
+        info = {
+            "object": {
+                "sha1": u"0000000000000000000000000000000000000000",
+                "type": u"nonsense",
+                },
+            }
+        self.assertRaisesWithContent(
+            ValueError, 'ref info type is not a recognised object type',
+            GitRepository.convertRefInfo, info)
+
+    @staticmethod
+    def makeFakeRefs(paths):
+        return dict(
+            (path, {
+                "sha1": unicode(hashlib.sha1(path).hexdigest()),
+                "type": GitObjectType.COMMIT,
+                })
+            for path in paths)
+
+    def assertRefsMatch(self, refs, repository, paths):
+        matchers = [
+            MatchesStructure.byEquality(
+                repository=repository,
+                path=path,
+                commit_sha1=unicode(hashlib.sha1(path).hexdigest()),
+                object_type=GitObjectType.COMMIT)
+            for path in paths]
+        self.assertThat(refs, MatchesSetwise(*matchers))
+
+    def test_createRefs(self):
+        repository = self.factory.makeGitRepository()
+        self.assertEqual([], repository.refs)
+        paths = (u"refs/heads/master", u"refs/tags/1.0")
+        repository.createRefs(self.makeFakeRefs(paths))
+        self.assertRefsMatch(repository.refs, repository, paths)
+
+    def test_removeRefs(self):
+        repository = self.factory.makeGitRepository()
+        paths = (u"refs/heads/master", u"refs/heads/branch", u"refs/tags/1.0")
+        repository.createRefs(self.makeFakeRefs(paths))
+        self.assertRefsMatch(repository.refs, repository, paths)
+        repository.removeRefs([u"refs/heads/branch", u"refs/tags/1.0"])
+        self.assertRefsMatch(
+            repository.refs, repository, [u"refs/heads/master"])
+
+    def test_updateRef(self):
+        repository = self.factory.makeGitRepository()
+        paths = (u"refs/heads/master", u"refs/tags/1.0")
+        repository.createRefs(self.makeFakeRefs(paths))
+        self.assertRefsMatch(repository.refs, repository, paths)
+        [tag_ref] = [
+            ref for ref in repository.refs if ref.path == u"refs/tags/1.0"]
+        new_info = {
+            "sha1": u"0000000000000000000000000000000000000000",
+            "type": GitObjectType.BLOB,
+            }
+        repository.updateRef(tag_ref, new_info)
+        self.assertRefsMatch(
+            [ref for ref in repository.refs if ref.path != u"refs/tags/1.0"],
+            repository, [u"refs/heads/master"])
+        self.assertThat(
+            tag_ref, MatchesStructure.byEquality(
+                repository=repository,
+                path=u"refs/tags/1.0",
+                commit_sha1=u"0000000000000000000000000000000000000000",
+                object_type=GitObjectType.BLOB,
+                ))
+
+
 class TestGitRepositoryGetAllowedInformationTypes(TestCaseWithFactory):
     """Test `IGitRepository.getAllowedInformationTypes`."""
 

=== modified file 'lib/lp/code/xmlrpc/git.py'
--- lib/lp/code/xmlrpc/git.py	2015-03-03 17:09:33 +0000
+++ lib/lp/code/xmlrpc/git.py	2015-03-12 15:36:07 +0000
@@ -38,6 +38,7 @@
     split_git_unique_name,
     )
 from lp.code.interfaces.gitrepository import IGitRepositorySet
+from lp.code.interfaces.gitjob import IGitRefScanJobSource
 from lp.code.xmlrpc.codehosting import run_with_login
 from lp.registry.errors import (
     InvalidName,
@@ -232,3 +233,10 @@
         return run_with_login(
             requester_id, self._translatePath,
             path.strip("/"), permission, can_authenticate)
+
+    @return_fault
+    def notify(self, translated_path):
+        """See `IGitAPI`."""
+        repository = getUtility(IGitLookup).getByHostingPath(translated_path)
+        job = getUtility(IGitRefScanJobSource).create(repository)
+        job.celeryRunOnCommit()

=== modified file 'lib/lp/code/xmlrpc/tests/test_git.py'
--- lib/lp/code/xmlrpc/tests/test_git.py	2015-03-04 18:27:40 +0000
+++ lib/lp/code/xmlrpc/tests/test_git.py	2015-03-12 15:36:07 +0000
@@ -15,6 +15,7 @@
     LAUNCHPAD_SERVICES,
     )
 from lp.code.interfaces.gitcollection import IAllGitRepositories
+from lp.code.interfaces.gitjob import IGitRefScanJobSource
 from lp.code.interfaces.gitrepository import (
     GIT_FEATURE_FLAG,
     GIT_REPOSITORY_NAME_VALIDATION_ERROR_MESSAGE,
@@ -621,6 +622,14 @@
             "GitRepositoryCreationFault: nothing here",
             self.oopses[0]["tb_text"])
 
+    def test_notify(self):
+        # The notify call creates a GitRefScanJob.
+        repository = self.factory.makeGitRepository()
+        self.assertIsNone(self.git_api.notify(repository.getInternalPath()))
+        job_source = getUtility(IGitRefScanJobSource)
+        [job] = list(job_source.iterReady())
+        self.assertEqual(repository, job.repository)
+
 
 class TestGitAPISecurity(TestGitAPIMixin, TestCaseWithFactory):
     """Slow tests for `IGitAPI`.

=== modified file 'lib/lp/scripts/garbo.py'
--- lib/lp/scripts/garbo.py	2014-11-06 02:22:57 +0000
+++ lib/lp/scripts/garbo.py	2015-03-12 15:36:07 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009-2014 Canonical Ltd.  This software is licensed under the
+# Copyright 2009-2015 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 """Database garbage collection."""
@@ -1092,6 +1092,23 @@
         """
 
 
+class GitJobPruner(BulkPruner):
+    """Prune `GitJob`s that are in a final state and more than a month old.
+
+    When a GitJob is completed, it gets set to a final state.  These jobs
+    should be pruned from the database after a month.
+    """
+    target_table_class = Job
+    ids_to_prune_query = """
+        SELECT DISTINCT Job.id
+        FROM Job, GitJob
+        WHERE
+            Job.id = GitJob.job
+            AND Job.date_finished < CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
+                - CAST('30 days' AS interval)
+        """
+
+
 class BugHeatUpdater(TunableLoop):
     """A `TunableLoop` for bug heat calculations."""
 
@@ -1645,6 +1662,7 @@
         BugWatchActivityPruner,
         CodeImportEventPruner,
         CodeImportResultPruner,
+        GitJobPruner,
         HWSubmissionEmailLinker,
         LiveFSFilePruner,
         LoginTokenPruner,

=== modified file 'lib/lp/scripts/tests/test_garbo.py'
--- lib/lp/scripts/tests/test_garbo.py	2015-01-07 00:35:41 +0000
+++ lib/lp/scripts/tests/test_garbo.py	2015-03-12 15:36:07 +0000
@@ -1,4 +1,4 @@
-# Copyright 2009-2014 Canonical Ltd.  This software is licensed under the
+# Copyright 2009-2015 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 """Test the database garbage collector."""
@@ -49,6 +49,7 @@
     )
 from lp.code.enums import CodeImportResultStatus
 from lp.code.interfaces.codeimportevent import ICodeImportEventSet
+from lp.code.interfaces.gitrepository import GIT_FEATURE_FLAG
 from lp.code.model.branchjob import (
     BranchJob,
     BranchUpgradeJob,
@@ -56,6 +57,10 @@
 from lp.code.model.codeimportevent import CodeImportEvent
 from lp.code.model.codeimportresult import CodeImportResult
 from lp.code.model.diff import Diff
+from lp.code.model.gitjob import (
+    GitJob,
+    GitRefScanJob,
+    )
 from lp.registry.enums import (
     BranchSharingPolicy,
     BugSharingPolicy,
@@ -930,6 +935,48 @@
         switch_dbuser('testadmin')
         self.assertEqual(store.find(BranchJob).count(), 1)
 
+    def test_GitJobPruner(self):
+        # Garbo should remove jobs completed over 30 days ago.
+        self.useFixture(FeatureFixture({GIT_FEATURE_FLAG: u'on'}))
+        switch_dbuser('testadmin')
+        store = IMasterStore(Job)
+
+        db_repository = self.factory.makeGitRepository()
+        Store.of(db_repository).flush()
+        git_job = GitRefScanJob.create(db_repository)
+        git_job.job.date_finished = THIRTY_DAYS_AGO
+
+        self.assertEqual(
+            1,
+            store.find(GitJob, GitJob.repository == db_repository.id).count())
+
+        self.runDaily()
+
+        switch_dbuser('testadmin')
+        self.assertEqual(
+            0,
+            store.find(GitJob, GitJob.repository == db_repository.id).count())
+
+    def test_GitJobPruner_doesnt_prune_recent_jobs(self):
+        # Check to make sure the garbo doesn't remove jobs that aren't more
+        # than thirty days old.
+        self.useFixture(FeatureFixture({GIT_FEATURE_FLAG: u'on'}))
+        switch_dbuser('testadmin')
+        store = IMasterStore(Job)
+
+        db_repository = self.factory.makeGitRepository()
+
+        git_job = GitRefScanJob.create(db_repository)
+        git_job.job.date_finished = THIRTY_DAYS_AGO
+
+        db_repository2 = self.factory.makeGitRepository()
+        GitRefScanJob.create(db_repository2)
+
+        self.runDaily()
+
+        switch_dbuser('testadmin')
+        self.assertEqual(1, store.find(GitJob).count())
+
     def test_ObsoleteBugAttachmentPruner(self):
         # Bug attachments without a LibraryFileContent record are removed.
 

=== modified file 'lib/lp/security.py'
--- lib/lp/security.py	2015-03-06 10:22:08 +0000
+++ lib/lp/security.py	2015-03-12 15:36:07 +0000
@@ -84,6 +84,7 @@
 from lp.code.interfaces.codereviewvote import ICodeReviewVoteReference
 from lp.code.interfaces.diff import IPreviewDiff
 from lp.code.interfaces.gitcollection import IGitCollection
+from lp.code.interfaces.gitref import IGitRef
 from lp.code.interfaces.gitrepository import (
     IGitRepository,
     user_has_special_git_repository_access,
@@ -2260,6 +2261,15 @@
     usedfor = IGitRepository
 
 
+class ViewGitRef(DelegatedAuthorization):
+    """Anyone who can see a Git repository can see references within it."""
+    permission = 'launchpad.View'
+    usedfor = IGitRef
+
+    def __init__(self, obj):
+        super(ViewGitRef, self).__init__(obj, obj.repository)
+
+
 class AdminDistroSeriesTranslations(AuthorizationBase):
     permission = 'launchpad.TranslationsAdmin'
     usedfor = IDistroSeries

=== modified file 'lib/lp/services/config/schema-lazr.conf'
--- lib/lp/services/config/schema-lazr.conf	2015-02-19 17:33:35 +0000
+++ lib/lp/services/config/schema-lazr.conf	2015-03-12 15:36:07 +0000
@@ -1750,6 +1750,10 @@
 module: lp.soyuz.interfaces.distributionjob
 dbuser: distroseriesdifferencejob
 
+[IGitRefScanJobSource]
+module: lp.code.interfaces.gitjob
+dbuser: branchscanner
+
 [IInitializeDistroSeriesJobSource]
 module: lp.soyuz.interfaces.distributionjob
 dbuser: initializedistroseries


Follow ups