← Back to team overview

launchpad-reviewers team mailing list archive

Re: [Merge] ~ilasc/launchpad:git-repack-cron-script into launchpad:master

 


Diff comments:

> diff --git a/cronscripts/request_git_repack.py b/cronscripts/request_git_repack.py

This isn't really requesting anything, except maybe in a technical sense in that it's making HTTP requests.  Perhaps "repack_git_repositories.py"?

> new file mode 100755
> index 0000000..10def8e
> --- /dev/null
> +++ b/cronscripts/request_git_repack.py
> @@ -0,0 +1,52 @@
> +#!/usr/bin/python2 -S
> +#
> +# Copyright 2010-2021 Canonical Ltd.  This software is licensed under the
> +# GNU Affero General Public License version 3 (see the file LICENSE).
> +
> +"""Request git repack for git repositories."""
> +
> +__metaclass__ = type
> +
> +import _pythonpath
> +
> +import transaction
> +from zope.component import getUtility
> +
> +from lp.code.interfaces.gitrepository import IGitRepository
> +from lp.services.config import config
> +from lp.services.database.constants import UTC_NOW
> +from lp.services.scripts.base import LaunchpadCronScript
> +from lp.services.timeout import set_default_timeout_function
> +from lp.services.webapp.errorlog import globalErrorUtility
> +
> +
> +class RequestGitRepack(LaunchpadCronScript):

Normally we'd put the implementation class somewhere more like lp.code.scripts.repack_git_repositories, so that it can more easily be imported by unit tests.  Only the `if __name__ == '__main__':` bit would remain in the top-level executable.  We can then write most of our script tests as in-process tests against the implementation class (which is much faster, and allows for more precise tests), and just have one or two smoke-tests that run the script out-of-process.

(There are certainly scripts that violate this, but it's the practice we generally follow for new scripts.)

> +    """Run git repository repack job."""
> +
> +    def __init__(self):
> +        name = 'request_git_repack'
> +        dbuser = config.request_git_repack.dbuser
> +        LaunchpadCronScript.__init__(self, name, dbuser)
> +
> +    def main(self):
> +        globalErrorUtility.configure(self.name)
> +        self.logger.info(
> +            'Requesting automatic git repository repack.')
> +        set_default_timeout_function(
> +            lambda: config.request_git_repack.timeout)
> +        repackable_repos = getUtility(
> +            IGitRepository).getRepositoriesForRepack()
> +
> +        for repo in repackable_repos:
> +            repo.repackRepository(self.logger)
> +            repo.date_last_repacked = UTC_NOW
> +
> +        self.logger.info(
> +            'Requested %d automatic git repository repack.'
> +            % len(repackable_repos))
> +
> +        transaction.commit()
> +
> +        if __name__ == '__main__':
> +            script = RequestGitRepack()
> +            script.lock_and_run()

This `if __name__ == '__main__':` block is wrongly indented, which is probably why you're finding that nothing works in unit tests.  It should start at the leftmost column.

> diff --git a/database/schema/security.cfg b/database/schema/security.cfg
> index 7b5876e..737fb84 100644
> --- a/database/schema/security.cfg
> +++ b/database/schema/security.cfg
> @@ -848,6 +848,12 @@ public.webhook                                  = SELECT
>  public.webhookjob                               = SELECT, INSERT
>  type=user
>  
> +[request_git_repack]

We can certainly add a new DB user if we need to, but it's a bit of a deployment hassle.  Could you reasonably run this script using an existing DB user instead?  Perhaps branchscanner would do.

> +groups=script
> +public.gitrepository                            = SELECT, UPDATE
> +public.job                                      = SELECT, INSERT
> +type=user
> +
>  [revisionkarma]
>  groups=script
>  public.branch                                   = SELECT
> diff --git a/lib/lp/code/interfaces/gitrepository.py b/lib/lp/code/interfaces/gitrepository.py
> index 5b3cc61..e9c0095 100644
> --- a/lib/lp/code/interfaces/gitrepository.py
> +++ b/lib/lp/code/interfaces/gitrepository.py
> @@ -900,13 +900,10 @@ class IGitRepositoryEdit(IWebhookTarget):
>              effective permissions on each of the requested references.
>          """
>  
> -    def setRepackData(loose_object_count, pack_count):

Removing this from the interface seems strange and perhaps unintentional.

> -        """Sets the repack parameters received from Turnip.
> +    def getRepositoriesForRepack():
> +        """Get all repositories that need a repack.
>  
> -        :param loose_object_count: The number of loose objects that
> -            this repository currently has.
> -        :param pack_count: The number of packs that
> -            this repository currently has.
> +        :return: A collection of `IGitRepository` objects.
>          """
>  
>      @operation_parameters(
> diff --git a/lib/lp/code/scripts/tests/test_request_git_repack.py b/lib/lp/code/scripts/tests/test_request_git_repack.py
> new file mode 100644
> index 0000000..6643aa4
> --- /dev/null
> +++ b/lib/lp/code/scripts/tests/test_request_git_repack.py
> @@ -0,0 +1,126 @@
> +# Copyright 2010-2021 Canonical Ltd.  This software is licensed under the
> +# GNU Affero General Public License version 3 (see the file LICENSE).
> +
> +"""Test the request_git_repack script."""
> +import base64
> +from collections import defaultdict
> +import json
> +import threading
> +from wsgiref.simple_server import (
> +    make_server,
> +    WSGIRequestHandler,
> +    )
> +
> +import transaction
> +from zope.security.proxy import removeSecurityProxy
> +
> +from lp.services.config import config
> +from lp.services.config.fixture import (
> +    ConfigFixture,
> +    ConfigUseFixture,
> +    )
> +from lp.services.scripts.tests import run_script
> +from lp.testing import TestCaseWithFactory
> +from lp.testing.layers import ZopelessAppServerLayer
> +
> +
> +class SilentWSGIRequestHandler(WSGIRequestHandler):
> +    """A request handler that doesn't log requests."""
> +
> +    def log_message(self, fmt, *args):
> +        pass
> +
> +
> +class FakeTurnipApplication:
> +    """A WSGI application that provides some fake turnip endpoints."""
> +
> +    def __init__(self):
> +        self.contents = defaultdict(dict)
> +
> +    def _not_found(self, start_response):
> +        start_response('404 Not Found', [('Content-Type', 'text/plain')])
> +        return [b'']
> +
> +    def __call__(self, environ, start_response):
> +        segments = environ['PATH_INFO'].lstrip('/').split('/')
> +        if (len(segments) < 4 or
> +                segments[0] != 'repo' or segments[2] != 'blob'):
> +            return self._not_found(start_response)
> +        repository_id = segments[1]
> +        if repository_id not in self.contents:
> +            return self._not_found(start_response)
> +        filename = '/'.join(segments[3:])
> +        if filename not in self.contents[repository_id]:
> +            return self._not_found(start_response)
> +        blob = self.contents[repository_id][filename]
> +        response = {'size': len(blob), 'data': base64.b64encode(blob)}
> +        start_response(
> +            '200 OK', [('Content-Type', 'application/octet-stream')])
> +        return [json.dumps(response).encode('UTF-8')]
> +
> +    def addBlob(self, repository, filename, contents):
> +        self.contents[repository.getInternalPath()][filename] = contents
> +
> +
> +class FakeTurnipServer(threading.Thread):
> +    """Thread that runs a fake turnip server."""
> +
> +    def __init__(self):
> +        super(FakeTurnipServer, self).__init__()
> +        self.app = FakeTurnipApplication()
> +        self.server = make_server(
> +            'localhost', 0, self.app, handler_class=SilentWSGIRequestHandler)
> +
> +    def run(self):
> +        self.server.serve_forever()
> +
> +    def getURL(self):
> +        host, port = self.server.server_address
> +        return 'http://%s:%d/' % (host, port)
> +
> +    def addBlob(self, repository_id, filename, contents):
> +        self.app.addBlob(repository_id, filename, contents)
> +
> +    def stop(self):
> +        self.server.shutdown()

There's a lot of duplication between the test fixtures here and in lp.code.scripts.tests.test_request_daily_builds.  Consider putting them somewhere common.

> +
> +
> +class TestRequestGitRepack(TestCaseWithFactory):
> +
> +    layer = ZopelessAppServerLayer
> +
> +    def setUp(self):
> +        super(TestRequestGitRepack, self).setUp()
> +
> +    def makeTurnipServer(self):
> +        turnip_server = FakeTurnipServer()
> +        config_name = self.factory.getUniqueString()
> +        config_fixture = self.useFixture(ConfigFixture(
> +            config_name, config.instance_name))
> +        setting_lines = [
> +            '[codehosting]',
> +            'internal_git_api_endpoint: %s' % turnip_server.getURL(),
> +            ]
> +        config_fixture.add_section('\n' + '\n'.join(setting_lines))
> +        self.useFixture(ConfigUseFixture(config_name))
> +        turnip_server.start()
> +        self.addCleanup(turnip_server.stop)
> +        return turnip_server
> +
> +    def test_request_git_repack(self):
> +        """Ensure the request_git_repack script requests the repacks."""
> +        repo = self.factory.makeGitRepository()
> +        repo = removeSecurityProxy(repo)
> +        repo.loose_object_count = 7000
> +        repo.pack_count = 43
> +        transaction.commit()
> +
> +        turnip_server = self.makeTurnipServer()
> +        turnip_server.addBlob(
> +            repo, 'repo', b'name: prod_repo')
> +
> +        retcode, stdout, stderr = run_script(
> +            'cronscripts/request_git_repack.py', [])
> +
> +        self.assertIsNotNone(repo.date_last_repacked)
> +        self.assertIn('Requested 0 automatic git repository repack.', stderr)


-- 
https://code.launchpad.net/~ilasc/launchpad/+git/launchpad/+merge/398963
Your team Launchpad code reviewers is requested to review the proposed merge of ~ilasc/launchpad:git-repack-cron-script into launchpad:master.


References