← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] ~ilasc/launchpad:git-repack-cron-script into launchpad:master

 

Ioana Lasc has proposed merging ~ilasc/launchpad:git-repack-cron-script into launchpad:master.

Commit message:
Add cron script for git repack

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~ilasc/launchpad/+git/launchpad/+merge/398963
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of ~ilasc/launchpad:git-repack-cron-script into launchpad:master.
diff --git a/cronscripts/request_git_repack.py b/cronscripts/request_git_repack.py
new file mode 100755
index 0000000..b01272b
--- /dev/null
+++ b/cronscripts/request_git_repack.py
@@ -0,0 +1,52 @@
+#!/usr/bin/python2 -S
+#
+# Copyright 2010-2021 Canonical Ltd.  This software is licensed under the
+# GNU Affero General Public License version 3 (see the file LICENSE).
+
+"""Request git repack for git repositories."""
+
+__metaclass__ = type
+
+import _pythonpath
+
+import transaction
+
+from zope.component import getUtility
+
+from lp.code.interfaces.gitrepository import IGitRepository, IGitRepositorySet
+
+from lp.services.config import config
+from lp.services.database.constants import UTC_NOW
+from lp.services.scripts.base import LaunchpadCronScript
+from lp.services.timeout import set_default_timeout_function
+from lp.services.webapp.errorlog import globalErrorUtility
+
+
+class RequestGitRepack(LaunchpadCronScript):
+    """Run git repository repack job."""
+
+    def __init__(self):
+        name = 'request_git_repack'
+        dbuser = config.request_git_repack.dbuser
+        LaunchpadCronScript.__init__(self, name, dbuser)
+
+    def main(self):
+        globalErrorUtility.configure(self.name)
+        self.logger.info(
+            'Requesting automatic git repository repack.')
+        set_default_timeout_function(
+            lambda: config.request_git_repack.timeout)
+        repackable_repos = getUtility(IGitRepository).getRepositoriesForRepack()
+
+        for repo in repackable_repos:
+            repo.repackRepository(self.logger)
+            repo.date_last_repacked = UTC_NOW
+
+        self.logger.info(
+            'Requested %d automatic git repository repack.' % len(repackable_repos))
+
+        transaction.commit()
+
+        if __name__ == '__main__':
+            script = RequestGitRepack()
+            script.lock_and_run()
diff --git a/database/schema/security.cfg b/database/schema/security.cfg
index 7b5876e..737fb84 100644
--- a/database/schema/security.cfg
+++ b/database/schema/security.cfg
@@ -848,6 +848,12 @@ public.webhook                                  = SELECT
 public.webhookjob                               = SELECT, INSERT
 type=user
 
+[request_git_repack]
+groups=script
+public.gitrepository                            = SELECT, UPDATE
+public.job                                      = SELECT, INSERT
+type=user
+
 [revisionkarma]
 groups=script
 public.branch                                   = SELECT
diff --git a/lib/lp/code/interfaces/gitcollection.py b/lib/lp/code/interfaces/gitcollection.py
index b6a8d0a..9135471 100644
--- a/lib/lp/code/interfaces/gitcollection.py
+++ b/lib/lp/code/interfaces/gitcollection.py
@@ -152,6 +152,9 @@ class IGitCollection(Interface):
     def modifiedSince(date):
         """Restrict the collection to repositories modified since `date`."""
 
+    def qualifiesForRepack():
+        """Restrict the collection to repositories that qualify for repack."""
+
     def ownedBy(person):
         """Restrict the collection to repositories owned by 'person'."""
 
diff --git a/lib/lp/code/interfaces/gitrepository.py b/lib/lp/code/interfaces/gitrepository.py
index 5b3cc61..e9c0095 100644
--- a/lib/lp/code/interfaces/gitrepository.py
+++ b/lib/lp/code/interfaces/gitrepository.py
@@ -749,7 +749,7 @@ class IGitRepositoryExpensiveRequest(Interface):
 
     @export_write_operation()
     @operation_for_version("devel")
-    def repackRepository():
+    def repackRepository(logger=None):
         """Trigger a repack repository operation.
 
         Raises Unauthorized if the repack was attempted by a person
@@ -900,13 +900,10 @@ class IGitRepositoryEdit(IWebhookTarget):
             effective permissions on each of the requested references.
         """
 
-    def setRepackData(loose_object_count, pack_count):
-        """Sets the repack parameters received from Turnip.
+    def getRepositoriesForRepack():
+        """Get all repositories that need a repack.
 
-        :param loose_object_count: The number of loose objects that
-            this repository currently has.
-        :param pack_count: The number of packs that
-            this repository currently has.
+        :return: A collection of `IGitRepository` objects.
         """
 
     @operation_parameters(
@@ -1071,6 +1068,15 @@ class IGitRepositorySet(Interface):
         :return: A collection of `IGitRepository` objects.
         """
 
+    # @operation_returns_collection_of(IGitRepository)
+    # @export_read_operation()
+    # @operation_for_version("devel")
+    # def getRepositoriesForRepack():
+    #     """Get all repositories that need a repack.
+    #
+    #     :return: A collection of `IGitRepository` objects.
+    #     """
+
     @call_with(user=REQUEST_USER)
     @operation_parameters(
         person=Reference(
diff --git a/lib/lp/code/model/gitcollection.py b/lib/lp/code/model/gitcollection.py
index 37cf78d..b56ff00 100644
--- a/lib/lp/code/model/gitcollection.py
+++ b/lib/lp/code/model/gitcollection.py
@@ -26,8 +26,8 @@ from storm.expr import (
     LeftJoin,
     Select,
     SQL,
-    With,
-    )
+    With, Or,
+)
 from storm.info import ClassAlias
 from storm.store import EmptyResultSet
 from zope.component import getUtility
@@ -61,6 +61,7 @@ from lp.registry.model.person import Person
 from lp.registry.model.product import Product
 from lp.registry.model.sourcepackagename import SourcePackageName
 from lp.registry.model.teammembership import TeamParticipation
+from lp.services.config import config
 from lp.services.database.bulk import load_related
 from lp.services.database.decoratedresultset import DecoratedResultSet
 from lp.services.database.interfaces import IStore
@@ -515,6 +516,14 @@ class GenericGitCollection:
         return self._filterBy(
             [GitRepository.date_last_modified > date], symmetric=False)
 
+    def qualifiesForRepack(self):
+        """See `IGitCollection`."""
+        return self._filterBy(
+            [Or(GitRepository.loose_object_count >=
+                config.codehosting.loose_objects_threshold,
+                GitRepository.pack_count >=
+                config.codehosting.packs_threshold)], symmetric=False)
+
     def ownedBy(self, person):
         """See `IGitCollection`."""
         return self._filterBy([GitRepository.owner == person], symmetric=False)
diff --git a/lib/lp/code/model/gitrepository.py b/lib/lp/code/model/gitrepository.py
index 74d94ec..eb64f6e 100644
--- a/lib/lp/code/model/gitrepository.py
+++ b/lib/lp/code/model/gitrepository.py
@@ -487,7 +487,7 @@ class GitRepository(StormBase, WebhookTargetMixin, GitIdentityMixin):
         namespace.moveRepository(self, user, rename_if_necessary=True)
         self._reconcileAccess()
 
-    def repackRepository(self):
+    def repackRepository(self, logger=None):
         getUtility(IGitHostingClient).repackRepository(self.getInternalPath())
 
     @property
@@ -1730,6 +1730,12 @@ class GitRepository(StormBase, WebhookTargetMixin, GitIdentityMixin):
             getUtility(IReclaimGitRepositorySpaceJobSource).create(
                 repository_name, repository_path)
 
+    def getRepositoriesForRepack(self):
+        """See `IGitRepositorySet`."""
+        collection = getUtility(IAllGitRepositories)
+        collection = collection.qualifiesForRepack()
+        return collection.getRepositories(eager_load=True)
+
 
 class DeletionOperation:
     """Represent an operation to perform as part of branch deletion."""
@@ -1847,6 +1853,12 @@ class GitRepositorySet:
             collection = collection.modifiedSince(modified_since_date)
         return collection.getRepositories(eager_load=True, sort_by=order_by)
 
+    # def getRepositoriesForRepack(self):
+    #     """See `IGitRepositorySet`."""
+    #     collection = getUtility(IAllGitRepositories)
+    #     collection = collection.qualifiesForRepack()
+    #     return collection.getRepositories(eager_load=True)
+
     def getRepositoryVisibilityInfo(self, user, person, repository_names):
         """See `IGitRepositorySet`."""
         if user is None:
diff --git a/lib/lp/code/model/tests/test_gitrepository.py b/lib/lp/code/model/tests/test_gitrepository.py
index c8e5bcf..00071db 100644
--- a/lib/lp/code/model/tests/test_gitrepository.py
+++ b/lib/lp/code/model/tests/test_gitrepository.py
@@ -569,6 +569,41 @@ class TestGitRepository(TestCaseWithFactory):
         self.assertThat(recorder2, HasQueryCount.byEquality(recorder1))
         self.assertEqual(7, recorder1.count)
 
+    def test_getRepositoriesForRepack(self):
+        # The values for the 2 thresholds are defined
+        # under config.codehosting.loose_objects_threshold = 4350
+        # and config.codehosting.packs_threshold = 30
+        repository = self.factory.makeGitRepository()
+        repository = removeSecurityProxy(repository)
+        repository.loose_object_count = 3
+        repository.pack_count = 4
+        self.assertEqual(0, len(list(repository.getRepositoriesForRepack())))
+
+        # If either one of the 2 repack indicators are above
+        # allowed thresholds the repository becomes
+        # a candidate for an auto repack
+        repository.loose_object_count = 4350
+        repository.pack_count = 4
+        self.assertEqual(1, len(list(repository.getRepositoriesForRepack())))
+
+        repository.loose_object_count = 3
+        repository.pack_count = 31
+        self.assertEqual(1, len(list(repository.getRepositoriesForRepack())))
+
+        # adding a second repo with values under admissible thresholds
+        # does not modify the number of candidate repos
+        repository2 = self.factory.makeGitRepository()
+        repository2 = removeSecurityProxy(repository2)
+        repository2.loose_object_count = 3
+        repository2.pack_count = 4
+        self.assertEqual(1, len(list(repository2.getRepositoriesForRepack())))
+
+        # second repo has one metric above threshold
+        # number of candidates increases by 1
+        repository2.loose_object_count = 3
+        repository2.pack_count = 99
+        self.assertEqual(2, len(list(repository.getRepositoriesForRepack())))
+
 
 class TestGitIdentityMixin(TestCaseWithFactory):
     """Test the defaults and identities provided by GitIdentityMixin."""
diff --git a/lib/lp/code/scripts/tests/test_request_git_repack.py b/lib/lp/code/scripts/tests/test_request_git_repack.py
new file mode 100644
index 0000000..abb0630
--- /dev/null
+++ b/lib/lp/code/scripts/tests/test_request_git_repack.py
@@ -0,0 +1,123 @@
+# Copyright 2010-2021 Canonical Ltd.  This software is licensed under the
+# GNU Affero General Public License version 3 (see the file LICENSE).
+
+"""Test the request_git_repack script."""
+import base64
+from collections import defaultdict
+import json
+import threading
+from wsgiref.simple_server import (
+    make_server,
+    WSGIRequestHandler,
+    )
+import transaction
+from zope.security.proxy import removeSecurityProxy
+
+from lp.code.interfaces.codehosting import BRANCH_ID_ALIAS_PREFIX
+from lp.services.config import config
+from lp.services.config.fixture import ConfigFixture, ConfigUseFixture
+from lp.testing import TestCaseWithFactory
+from lp.testing.layers import ZopelessAppServerLayer
+from lp.services.scripts.tests import run_script
+
+
+class SilentWSGIRequestHandler(WSGIRequestHandler):
+    """A request handler that doesn't log requests."""
+
+    def log_message(self, fmt, *args):
+        pass
+
+
+class FakeTurnipApplication:
+    """A WSGI application that provides some fake turnip endpoints."""
+
+    def __init__(self):
+        self.contents = defaultdict(dict)
+
+    def _not_found(self, start_response):
+        start_response('404 Not Found', [('Content-Type', 'text/plain')])
+        return [b'']
+
+    def __call__(self, environ, start_response):
+        segments = environ['PATH_INFO'].lstrip('/').split('/')
+        if (len(segments) < 4 or
+                segments[0] != 'repo' or segments[2] != 'blob'):
+            return self._not_found(start_response)
+        repository_id = segments[1]
+        if repository_id not in self.contents:
+            return self._not_found(start_response)
+        filename = '/'.join(segments[3:])
+        if filename not in self.contents[repository_id]:
+            return self._not_found(start_response)
+        blob = self.contents[repository_id][filename]
+        response = {'size': len(blob), 'data': base64.b64encode(blob)}
+        start_response(
+            '200 OK', [('Content-Type', 'application/octet-stream')])
+        return [json.dumps(response).encode('UTF-8')]
+
+    def addBlob(self, repository, filename, contents):
+        self.contents[repository.getInternalPath()][filename] = contents
+
+
+class FakeTurnipServer(threading.Thread):
+    """Thread that runs a fake turnip server."""
+
+    def __init__(self):
+        super(FakeTurnipServer, self).__init__()
+        self.app = FakeTurnipApplication()
+        self.server = make_server(
+            'localhost', 0, self.app, handler_class=SilentWSGIRequestHandler)
+
+    def run(self):
+        self.server.serve_forever()
+
+    def getURL(self):
+        host, port = self.server.server_address
+        return 'http://%s:%d/' % (host, port)
+
+    def addBlob(self, repository_id, filename, contents):
+        self.app.addBlob(repository_id, filename, contents)
+
+    def stop(self):
+        self.server.shutdown()
+
+
+class TestRequestGitRepack(TestCaseWithFactory):
+
+    layer = ZopelessAppServerLayer
+
+    def setUp(self):
+        super(TestRequestGitRepack, self).setUp()
+
+    def makeTurnipServer(self):
+        turnip_server = FakeTurnipServer()
+        config_name = self.factory.getUniqueString()
+        config_fixture = self.useFixture(ConfigFixture(
+            config_name, config.instance_name))
+        setting_lines = [
+            '[codehosting]',
+            'internal_git_api_endpoint: %s' % turnip_server.getURL(),
+            ]
+        config_fixture.add_section('\n' + '\n'.join(setting_lines))
+        self.useFixture(ConfigUseFixture(config_name))
+        turnip_server.start()
+        self.addCleanup(turnip_server.stop)
+        return turnip_server
+
+    def test_request_git_repack(self):
+        """Ensure the request_git_repack script requests the repacks."""
+        repo = self.factory.makeGitRepository()
+        repo = removeSecurityProxy(repo)
+        repo.loose_object_count = 7000
+        repo.pack_count = 43
+        transaction.commit()
+
+        turnip_server = self.makeTurnipServer()
+        turnip_server.addBlob(
+            repo, 'repo', b'name: prod_repo')
+
+        retcode, stdout, stderr = run_script(
+            'cronscripts/request_git_repack.py', [])
+
+        self.assertIsNotNone(repo.date_last_repacked)
+        self.assertIn('Requested 0 automatic git repository repack.', stderr)
diff --git a/lib/lp/services/config/schema-lazr.conf b/lib/lp/services/config/schema-lazr.conf
index 7b35f3a..d048462 100644
--- a/lib/lp/services/config/schema-lazr.conf
+++ b/lib/lp/services/config/schema-lazr.conf
@@ -1491,6 +1491,11 @@ dbuser: request-daily-builds
 # datatype: integer
 timeout: 60
 
+[request_git_repack]
+dbuser: request_git_repack
+
+# datatype: integer
+timeout: 60
 
 [revisionkarma]
 # The database user which will be used by this process.

Follow ups