launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #26496
[Merge] ~ilasc/launchpad:git-repack-cron-script into launchpad:master
Ioana Lasc has proposed merging ~ilasc/launchpad:git-repack-cron-script into launchpad:master.
Commit message:
Add cron script for git repack
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
For more details, see:
https://code.launchpad.net/~ilasc/launchpad/+git/launchpad/+merge/398963
--
Your team Launchpad code reviewers is requested to review the proposed merge of ~ilasc/launchpad:git-repack-cron-script into launchpad:master.
diff --git a/cronscripts/request_git_repack.py b/cronscripts/request_git_repack.py
new file mode 100755
index 0000000..b01272b
--- /dev/null
+++ b/cronscripts/request_git_repack.py
@@ -0,0 +1,52 @@
+#!/usr/bin/python2 -S
+#
+# Copyright 2010-2021 Canonical Ltd. This software is licensed under the
+# GNU Affero General Public License version 3 (see the file LICENSE).
+
+"""Request git repack for git repositories."""
+
+__metaclass__ = type
+
+import _pythonpath
+
+import transaction
+
+from zope.component import getUtility
+
+from lp.code.interfaces.gitrepository import IGitRepository, IGitRepositorySet
+
+from lp.services.config import config
+from lp.services.database.constants import UTC_NOW
+from lp.services.scripts.base import LaunchpadCronScript
+from lp.services.timeout import set_default_timeout_function
+from lp.services.webapp.errorlog import globalErrorUtility
+
+
+class RequestGitRepack(LaunchpadCronScript):
+ """Run git repository repack job."""
+
+ def __init__(self):
+ name = 'request_git_repack'
+ dbuser = config.request_git_repack.dbuser
+ LaunchpadCronScript.__init__(self, name, dbuser)
+
+ def main(self):
+ globalErrorUtility.configure(self.name)
+ self.logger.info(
+ 'Requesting automatic git repository repack.')
+ set_default_timeout_function(
+ lambda: config.request_git_repack.timeout)
+ repackable_repos = getUtility(IGitRepository).getRepositoriesForRepack()
+
+ for repo in repackable_repos:
+ repo.repackRepository(self.logger)
+ repo.date_last_repacked = UTC_NOW
+
+ self.logger.info(
+ 'Requested %d automatic git repository repack.' % len(repackable_repos))
+
+ transaction.commit()
+
+ if __name__ == '__main__':
+ script = RequestGitRepack()
+ script.lock_and_run()
diff --git a/database/schema/security.cfg b/database/schema/security.cfg
index 7b5876e..737fb84 100644
--- a/database/schema/security.cfg
+++ b/database/schema/security.cfg
@@ -848,6 +848,12 @@ public.webhook = SELECT
public.webhookjob = SELECT, INSERT
type=user
+[request_git_repack]
+groups=script
+public.gitrepository = SELECT, UPDATE
+public.job = SELECT, INSERT
+type=user
+
[revisionkarma]
groups=script
public.branch = SELECT
diff --git a/lib/lp/code/interfaces/gitcollection.py b/lib/lp/code/interfaces/gitcollection.py
index b6a8d0a..9135471 100644
--- a/lib/lp/code/interfaces/gitcollection.py
+++ b/lib/lp/code/interfaces/gitcollection.py
@@ -152,6 +152,9 @@ class IGitCollection(Interface):
def modifiedSince(date):
"""Restrict the collection to repositories modified since `date`."""
+ def qualifiesForRepack():
+ """Restrict the collection to repositories that qualify for repack."""
+
def ownedBy(person):
"""Restrict the collection to repositories owned by 'person'."""
diff --git a/lib/lp/code/interfaces/gitrepository.py b/lib/lp/code/interfaces/gitrepository.py
index 5b3cc61..e9c0095 100644
--- a/lib/lp/code/interfaces/gitrepository.py
+++ b/lib/lp/code/interfaces/gitrepository.py
@@ -749,7 +749,7 @@ class IGitRepositoryExpensiveRequest(Interface):
@export_write_operation()
@operation_for_version("devel")
- def repackRepository():
+ def repackRepository(logger=None):
"""Trigger a repack repository operation.
Raises Unauthorized if the repack was attempted by a person
@@ -900,13 +900,10 @@ class IGitRepositoryEdit(IWebhookTarget):
effective permissions on each of the requested references.
"""
- def setRepackData(loose_object_count, pack_count):
- """Sets the repack parameters received from Turnip.
+ def getRepositoriesForRepack():
+ """Get all repositories that need a repack.
- :param loose_object_count: The number of loose objects that
- this repository currently has.
- :param pack_count: The number of packs that
- this repository currently has.
+ :return: A collection of `IGitRepository` objects.
"""
@operation_parameters(
@@ -1071,6 +1068,15 @@ class IGitRepositorySet(Interface):
:return: A collection of `IGitRepository` objects.
"""
+ # @operation_returns_collection_of(IGitRepository)
+ # @export_read_operation()
+ # @operation_for_version("devel")
+ # def getRepositoriesForRepack():
+ # """Get all repositories that need a repack.
+ #
+ # :return: A collection of `IGitRepository` objects.
+ # """
+
@call_with(user=REQUEST_USER)
@operation_parameters(
person=Reference(
diff --git a/lib/lp/code/model/gitcollection.py b/lib/lp/code/model/gitcollection.py
index 37cf78d..b56ff00 100644
--- a/lib/lp/code/model/gitcollection.py
+++ b/lib/lp/code/model/gitcollection.py
@@ -26,8 +26,8 @@ from storm.expr import (
LeftJoin,
Select,
SQL,
- With,
- )
+ With, Or,
+)
from storm.info import ClassAlias
from storm.store import EmptyResultSet
from zope.component import getUtility
@@ -61,6 +61,7 @@ from lp.registry.model.person import Person
from lp.registry.model.product import Product
from lp.registry.model.sourcepackagename import SourcePackageName
from lp.registry.model.teammembership import TeamParticipation
+from lp.services.config import config
from lp.services.database.bulk import load_related
from lp.services.database.decoratedresultset import DecoratedResultSet
from lp.services.database.interfaces import IStore
@@ -515,6 +516,14 @@ class GenericGitCollection:
return self._filterBy(
[GitRepository.date_last_modified > date], symmetric=False)
+ def qualifiesForRepack(self):
+ """See `IGitCollection`."""
+ return self._filterBy(
+ [Or(GitRepository.loose_object_count >=
+ config.codehosting.loose_objects_threshold,
+ GitRepository.pack_count >=
+ config.codehosting.packs_threshold)], symmetric=False)
+
def ownedBy(self, person):
"""See `IGitCollection`."""
return self._filterBy([GitRepository.owner == person], symmetric=False)
diff --git a/lib/lp/code/model/gitrepository.py b/lib/lp/code/model/gitrepository.py
index 74d94ec..eb64f6e 100644
--- a/lib/lp/code/model/gitrepository.py
+++ b/lib/lp/code/model/gitrepository.py
@@ -487,7 +487,7 @@ class GitRepository(StormBase, WebhookTargetMixin, GitIdentityMixin):
namespace.moveRepository(self, user, rename_if_necessary=True)
self._reconcileAccess()
- def repackRepository(self):
+ def repackRepository(self, logger=None):
getUtility(IGitHostingClient).repackRepository(self.getInternalPath())
@property
@@ -1730,6 +1730,12 @@ class GitRepository(StormBase, WebhookTargetMixin, GitIdentityMixin):
getUtility(IReclaimGitRepositorySpaceJobSource).create(
repository_name, repository_path)
+ def getRepositoriesForRepack(self):
+ """See `IGitRepositorySet`."""
+ collection = getUtility(IAllGitRepositories)
+ collection = collection.qualifiesForRepack()
+ return collection.getRepositories(eager_load=True)
+
class DeletionOperation:
"""Represent an operation to perform as part of branch deletion."""
@@ -1847,6 +1853,12 @@ class GitRepositorySet:
collection = collection.modifiedSince(modified_since_date)
return collection.getRepositories(eager_load=True, sort_by=order_by)
+ # def getRepositoriesForRepack(self):
+ # """See `IGitRepositorySet`."""
+ # collection = getUtility(IAllGitRepositories)
+ # collection = collection.qualifiesForRepack()
+ # return collection.getRepositories(eager_load=True)
+
def getRepositoryVisibilityInfo(self, user, person, repository_names):
"""See `IGitRepositorySet`."""
if user is None:
diff --git a/lib/lp/code/model/tests/test_gitrepository.py b/lib/lp/code/model/tests/test_gitrepository.py
index c8e5bcf..00071db 100644
--- a/lib/lp/code/model/tests/test_gitrepository.py
+++ b/lib/lp/code/model/tests/test_gitrepository.py
@@ -569,6 +569,41 @@ class TestGitRepository(TestCaseWithFactory):
self.assertThat(recorder2, HasQueryCount.byEquality(recorder1))
self.assertEqual(7, recorder1.count)
+ def test_getRepositoriesForRepack(self):
+ # The values for the 2 thresholds are defined
+ # under config.codehosting.loose_objects_threshold = 4350
+ # and config.codehosting.packs_threshold = 30
+ repository = self.factory.makeGitRepository()
+ repository = removeSecurityProxy(repository)
+ repository.loose_object_count = 3
+ repository.pack_count = 4
+ self.assertEqual(0, len(list(repository.getRepositoriesForRepack())))
+
+ # If either one of the 2 repack indicators are above
+ # allowed thresholds the repository becomes
+ # a candidate for an auto repack
+ repository.loose_object_count = 4350
+ repository.pack_count = 4
+ self.assertEqual(1, len(list(repository.getRepositoriesForRepack())))
+
+ repository.loose_object_count = 3
+ repository.pack_count = 31
+ self.assertEqual(1, len(list(repository.getRepositoriesForRepack())))
+
+ # adding a second repo with values under admissible thresholds
+ # does not modify the number of candidate repos
+ repository2 = self.factory.makeGitRepository()
+ repository2 = removeSecurityProxy(repository2)
+ repository2.loose_object_count = 3
+ repository2.pack_count = 4
+ self.assertEqual(1, len(list(repository2.getRepositoriesForRepack())))
+
+ # second repo has one metric above threshold
+ # number of candidates increases by 1
+ repository2.loose_object_count = 3
+ repository2.pack_count = 99
+ self.assertEqual(2, len(list(repository.getRepositoriesForRepack())))
+
class TestGitIdentityMixin(TestCaseWithFactory):
"""Test the defaults and identities provided by GitIdentityMixin."""
diff --git a/lib/lp/code/scripts/tests/test_request_git_repack.py b/lib/lp/code/scripts/tests/test_request_git_repack.py
new file mode 100644
index 0000000..abb0630
--- /dev/null
+++ b/lib/lp/code/scripts/tests/test_request_git_repack.py
@@ -0,0 +1,123 @@
+# Copyright 2010-2021 Canonical Ltd. This software is licensed under the
+# GNU Affero General Public License version 3 (see the file LICENSE).
+
+"""Test the request_git_repack script."""
+import base64
+from collections import defaultdict
+import json
+import threading
+from wsgiref.simple_server import (
+ make_server,
+ WSGIRequestHandler,
+ )
+import transaction
+from zope.security.proxy import removeSecurityProxy
+
+from lp.code.interfaces.codehosting import BRANCH_ID_ALIAS_PREFIX
+from lp.services.config import config
+from lp.services.config.fixture import ConfigFixture, ConfigUseFixture
+from lp.testing import TestCaseWithFactory
+from lp.testing.layers import ZopelessAppServerLayer
+from lp.services.scripts.tests import run_script
+
+
+class SilentWSGIRequestHandler(WSGIRequestHandler):
+ """A request handler that doesn't log requests."""
+
+ def log_message(self, fmt, *args):
+ pass
+
+
+class FakeTurnipApplication:
+ """A WSGI application that provides some fake turnip endpoints."""
+
+ def __init__(self):
+ self.contents = defaultdict(dict)
+
+ def _not_found(self, start_response):
+ start_response('404 Not Found', [('Content-Type', 'text/plain')])
+ return [b'']
+
+ def __call__(self, environ, start_response):
+ segments = environ['PATH_INFO'].lstrip('/').split('/')
+ if (len(segments) < 4 or
+ segments[0] != 'repo' or segments[2] != 'blob'):
+ return self._not_found(start_response)
+ repository_id = segments[1]
+ if repository_id not in self.contents:
+ return self._not_found(start_response)
+ filename = '/'.join(segments[3:])
+ if filename not in self.contents[repository_id]:
+ return self._not_found(start_response)
+ blob = self.contents[repository_id][filename]
+ response = {'size': len(blob), 'data': base64.b64encode(blob)}
+ start_response(
+ '200 OK', [('Content-Type', 'application/octet-stream')])
+ return [json.dumps(response).encode('UTF-8')]
+
+ def addBlob(self, repository, filename, contents):
+ self.contents[repository.getInternalPath()][filename] = contents
+
+
+class FakeTurnipServer(threading.Thread):
+ """Thread that runs a fake turnip server."""
+
+ def __init__(self):
+ super(FakeTurnipServer, self).__init__()
+ self.app = FakeTurnipApplication()
+ self.server = make_server(
+ 'localhost', 0, self.app, handler_class=SilentWSGIRequestHandler)
+
+ def run(self):
+ self.server.serve_forever()
+
+ def getURL(self):
+ host, port = self.server.server_address
+ return 'http://%s:%d/' % (host, port)
+
+ def addBlob(self, repository_id, filename, contents):
+ self.app.addBlob(repository_id, filename, contents)
+
+ def stop(self):
+ self.server.shutdown()
+
+
+class TestRequestGitRepack(TestCaseWithFactory):
+
+ layer = ZopelessAppServerLayer
+
+ def setUp(self):
+ super(TestRequestGitRepack, self).setUp()
+
+ def makeTurnipServer(self):
+ turnip_server = FakeTurnipServer()
+ config_name = self.factory.getUniqueString()
+ config_fixture = self.useFixture(ConfigFixture(
+ config_name, config.instance_name))
+ setting_lines = [
+ '[codehosting]',
+ 'internal_git_api_endpoint: %s' % turnip_server.getURL(),
+ ]
+ config_fixture.add_section('\n' + '\n'.join(setting_lines))
+ self.useFixture(ConfigUseFixture(config_name))
+ turnip_server.start()
+ self.addCleanup(turnip_server.stop)
+ return turnip_server
+
+ def test_request_git_repack(self):
+ """Ensure the request_git_repack script requests the repacks."""
+ repo = self.factory.makeGitRepository()
+ repo = removeSecurityProxy(repo)
+ repo.loose_object_count = 7000
+ repo.pack_count = 43
+ transaction.commit()
+
+ turnip_server = self.makeTurnipServer()
+ turnip_server.addBlob(
+ repo, 'repo', b'name: prod_repo')
+
+ retcode, stdout, stderr = run_script(
+ 'cronscripts/request_git_repack.py', [])
+
+ self.assertIsNotNone(repo.date_last_repacked)
+ self.assertIn('Requested 0 automatic git repository repack.', stderr)
diff --git a/lib/lp/services/config/schema-lazr.conf b/lib/lp/services/config/schema-lazr.conf
index 7b35f3a..d048462 100644
--- a/lib/lp/services/config/schema-lazr.conf
+++ b/lib/lp/services/config/schema-lazr.conf
@@ -1491,6 +1491,11 @@ dbuser: request-daily-builds
# datatype: integer
timeout: 60
+[request_git_repack]
+dbuser: request_git_repack
+
+# datatype: integer
+timeout: 60
[revisionkarma]
# The database user which will be used by this process.
Follow ups