← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~abentley/launchpad/memlimit-sendbranchmail into lp:launchpad

 

Aaron Bentley has proposed merging lp:~abentley/launchpad/memlimit-sendbranchmail into lp:launchpad.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)
Related bugs:
  Bug #585126 in Launchpad itself: "sendbranchmail with lp:~vcs-imports/linux/trunk is eating memory"
  https://bugs.launchpad.net/launchpad/+bug/585126

For more details, see:
https://code.launchpad.net/~abentley/launchpad/memlimit-sendbranchmail/+merge/65689

= Summary =
Fix bug #585126: sendbranchmail with lp:~vcs-imports/linux/trunk is eating memory 

== Proposed fix ==
Introduce a memory limit to branch mail jobs.

== Pre-implementation notes ==
None

== Implementation details ==
The TwistedJobRunner now supports memory limits for jobs, which means that each job runs in a separate process, and raises a MemoryError if it exceeds allowable memory use.  This branch uses that mechanism for the sendbranchmail jobs.

In order to use the memory limit, we must switch to the TwistedJobRunner.  In order to use the TwistedJobRunner, we need a suitable job source, so BranchMailJobSource was implemented.  In order to implement BranchMailJobSource, we changed BranchJobDerived.get so that it will return instances of any given type.  In order to make BranchJobDerived.get to return instances of any given type, we used RegisteredSubclass as its metaclass and provided _subclass and _register_subclass.

There were several drivebys as well:
AsyncVirtualServer can now behave as a ContextManager, meaning that the following works:
with get_ro_server():
    server_ops

Old bzr workarounds that are no longer needed were deleted.

Lots of lint was fixed


== Tests ==

bin/test -vt sendbranchmail -t branchjob

== Demo and Q/A ==
Not sure whether this is QAable.

= Launchpad lint =

Checking for conflicts and issues in changed files.

Linting changed files:
  lib/lp/translations/tests/test_translationtemplatesbuildjob.py
  cronscripts/sendbranchmail.py
  lib/lp/registry/model/packagingjob.py
  lib/lp/codehosting/vfs/transport.py
  lib/lp/code/scripts/tests/test_sendbranchmail.py
  lib/lp/code/model/branchjob.py
  lib/lp/services/utils.py
-- 
https://code.launchpad.net/~abentley/launchpad/memlimit-sendbranchmail/+merge/65689
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~abentley/launchpad/memlimit-sendbranchmail into lp:launchpad.
=== modified file 'cronscripts/sendbranchmail.py'
--- cronscripts/sendbranchmail.py	2010-05-19 18:07:56 +0000
+++ cronscripts/sendbranchmail.py	2011-06-23 16:00:30 +0000
@@ -1,6 +1,6 @@
 #!/usr/bin/python -S
 #
-# Copyright 2009 Canonical Ltd.  This software is licensed under the
+# Copyright 2008-2011 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
 # pylint: disable-msg=W0403
@@ -12,14 +12,18 @@
 
 __metaclass__ = type
 
+
+import logging
+
 import _pythonpath
-from zope.component import getUtility
 
 from canonical.config import config
-from lp.codehosting.vfs import get_ro_server
-from lp.services.job.runner import JobRunner
-from lp.code.interfaces.branchjob import (
-    IRevisionMailJobSource, IRevisionsAddedJobSource)
+from lp.services.job.runner import (
+    TwistedJobRunner,
+    )
+from lp.code.model.branchjob import (
+    BranchMailJobSource,
+    )
 from lp.services.scripts.base import LaunchpadCronScript
 from canonical.launchpad.webapp.errorlog import globalErrorUtility
 
@@ -29,15 +33,8 @@
 
     def main(self):
         globalErrorUtility.configure('sendbranchmail')
-        jobs = list(getUtility(IRevisionMailJobSource).iterReady())
-        jobs.extend(getUtility(IRevisionsAddedJobSource).iterReady())
-        runner = JobRunner(jobs, self.logger)
-        server = get_ro_server()
-        server.start_server()
-        try:
-            runner.runAll()
-        finally:
-            server.stop_server()
+        runner = TwistedJobRunner.runFromSource(
+            BranchMailJobSource, 'send-branch-mail', logging.getLogger())
         self.logger.info(
             'Ran %d RevisionMailJobs.' % len(runner.completed_jobs))
 

=== modified file 'lib/lp/code/model/branchjob.py'
--- lib/lp/code/model/branchjob.py	2011-06-20 20:07:33 +0000
+++ lib/lp/code/model/branchjob.py	2011-06-23 16:00:30 +0000
@@ -13,6 +13,7 @@
 ]
 
 import contextlib
+from itertools import chain
 import operator
 import os
 import shutil
@@ -106,7 +107,11 @@
 from lp.scripts.helpers import TransactionFreeOperation
 from lp.services.job.interfaces.job import JobStatus
 from lp.services.job.model.job import Job
-from lp.services.job.runner import BaseRunnableJob
+from lp.services.job.runner import (
+    BaseRunnableJob,
+    BaseRunnableJobSource,
+    )
+from lp.services.utils import RegisteredSubclass
 from lp.translations.interfaces.translationimportqueue import (
     ITranslationImportQueue,
     )
@@ -217,6 +222,23 @@
 
 class BranchJobDerived(BaseRunnableJob):
 
+    __metaclass__ = RegisteredSubclass
+    _subclass = {}
+
+    @staticmethod
+    def _register_subclass(cls):
+        """Register this class with its enumeration."""
+        # This would be a classmethod, except that subclasses (e.g.
+        # TranslationPackagingJob) need to be able to override it and call
+        # into it, and there's no syntax to call a base class's version of a
+        # classmethod with the subclass as the first parameter.
+        job_type = getattr(cls, 'class_job_type', None)
+        if job_type is not None:
+            value = cls._subclass.setdefault(job_type, cls)
+            assert value is cls, (
+                '%s already registered to %s.' % (
+                    job_type.name, value.__name__))
+
     delegates(IBranchJob)
 
     def __init__(self, branch_job):
@@ -251,19 +273,29 @@
             And(BranchJob.job_type == cls.class_job_type,
                 BranchJob.job == Job.id,
                 Job.id.is_in(Job.ready_jobs)))
-        return (cls(job) for job in jobs)
-
-    @classmethod
-    def get(cls, key):
-        """Return the instance of this class whose key is supplied.
+        return (cls._getInstance(job) for job in jobs)
+
+    @classmethod
+    def _getInstance(cls, job):
+        return cls._subclass[job.job_type](job)
+
+    @classmethod
+    def get(cls, key, desired_classes=None):
+        """Return the instance of this class (or a subclass) whose key
+        is supplied.  Calling this method on a subclass returns only values
+        for that subclass.
 
         :raises: SQLObjectNotFound
         """
-        instance = IStore(BranchJob).get(BranchJob, key)
-        if instance is None or instance.job_type != cls.class_job_type:
-            raise SQLObjectNotFound(
-                'No occurrence of %s has key %s' % (cls.__name__, key))
-        return cls(instance)
+        if desired_classes is None:
+            desired_classes = cls
+        branchjob = IStore(BranchJob).get(BranchJob, key)
+        if branchjob is not None:
+            job = cls._getInstance(branchjob)
+            if isinstance(job, cls):
+                return job
+        raise SQLObjectNotFound(
+            'No occurrence of %s has key %s' % (cls.__name__, key))
 
     def getOopsVars(self):
         """See `IRunnableJob`."""
@@ -764,6 +796,25 @@
         return outf.getvalue()
 
 
+class BranchMailJobSource(BaseRunnableJobSource):
+    """Source of jobs that send mail about branches."""
+
+    memory_limit = 2 * (1024 ** 3)
+
+    @staticmethod
+    def contextManager():
+        return get_ro_server()
+
+    @staticmethod
+    def iterReady():
+        return chain(
+            RevisionMailJob.iterReady(), RevisionsAddedJob.iterReady())
+
+    @staticmethod
+    def get(key):
+        return BranchJobDerived.get(key, (RevisionMailJob, RevisionsAddedJob))
+
+
 class RosettaUploadJob(BranchJobDerived):
     """A Job that uploads translation files to Rosetta."""
 

=== modified file 'lib/lp/code/scripts/tests/test_sendbranchmail.py'
--- lib/lp/code/scripts/tests/test_sendbranchmail.py	2010-10-26 15:47:24 +0000
+++ lib/lp/code/scripts/tests/test_sendbranchmail.py	2011-06-23 16:00:30 +0000
@@ -20,7 +20,6 @@
     RevisionMailJob,
     RevisionsAddedJob,
     )
-from lp.services.osutils import override_environ
 from lp.testing import TestCaseWithFactory
 
 
@@ -41,8 +40,7 @@
         tree.add('foo')
         # XXX: AaronBentley 2010-08-06 bug=614404: a bzr username is
         # required to generate the revision-id.
-        with override_environ(BZR_EMAIL='me@xxxxxxxxxxx'):
-            tree.commit('Added foo.', rev_id='rev1')
+        tree.commit('Added foo.', rev_id='rev1', committer='me@xxxxxxxxxxx')
         return branch, tree
 
     def test_sendbranchmail(self):
@@ -55,7 +53,9 @@
         retcode, stdout, stderr = run_script(
             'cronscripts/sendbranchmail.py', [])
         self.assertEqual(
-            'INFO    Creating lockfile: /var/lock/launchpad-sendbranchmail.lock\n'
+            'INFO    Creating lockfile: '
+                '/var/lock/launchpad-sendbranchmail.lock\n'
+            'INFO    Running through Twisted.\n'
             'INFO    Ran 1 RevisionMailJobs.\n', stderr)
         self.assertEqual('', stdout)
         self.assertEqual(0, retcode)
@@ -70,7 +70,8 @@
         retcode, stdout, stderr = run_script(
             'cronscripts/sendbranchmail.py', [])
         self.assertIn(
-            'INFO    Creating lockfile: /var/lock/launchpad-sendbranchmail.lock\n',
+            'INFO    Creating lockfile: '
+                '/var/lock/launchpad-sendbranchmail.lock\n',
             stderr)
         self.assertIn('INFO    Job resulted in OOPS:', stderr)
         self.assertIn('INFO    Ran 0 RevisionMailJobs.\n', stderr)
@@ -82,17 +83,16 @@
         self.useBzrBranches()
         branch, tree = self.createBranch()
         tree.bzrdir.root_transport.put_bytes('foo', 'baz')
-        # XXX: AaronBentley 2010-08-06 bug=614404: a bzr username is
-        # required to generate the revision-id.
-        with override_environ(BZR_EMAIL='me@xxxxxxxxxxx'):
-            tree.commit('Added foo.', rev_id='rev2')
+        tree.commit('Added foo.', rev_id='rev2', committer='me@xxxxxxxxxxx')
         RevisionsAddedJob.create(
             branch, 'rev1', 'rev2', 'from@xxxxxxxxxxx')
         transaction.commit()
         retcode, stdout, stderr = run_script(
             'cronscripts/sendbranchmail.py', [])
         self.assertEqual(
-            'INFO    Creating lockfile: /var/lock/launchpad-sendbranchmail.lock\n'
+            'INFO    Creating lockfile:'
+                ' /var/lock/launchpad-sendbranchmail.lock\n'
+            'INFO    Running through Twisted.\n'
             'INFO    Ran 1 RevisionMailJobs.\n',
             stderr)
         self.assertEqual('', stdout)

=== modified file 'lib/lp/codehosting/vfs/transport.py'
--- lib/lp/codehosting/vfs/transport.py	2011-03-29 03:24:05 +0000
+++ lib/lp/codehosting/vfs/transport.py	2011-06-23 16:00:30 +0000
@@ -102,7 +102,7 @@
         """Return the absolute, escaped path to `relpath` without the schema.
         """
         return urlutils.joinpath(
-            self.base[len(self.server.get_url())-1:], relpath)
+            self.base[len(self.server.get_url()) - 1:], relpath)
 
     def _getUnderylingTransportAndPath(self, relpath):
         """Return the underlying transport and path for `relpath`."""
@@ -178,6 +178,7 @@
 
     def iter_files_recursive(self):
         deferred = self._getUnderylingTransportAndPath('.')
+
         @no_traceback_failures
         def iter_files((transport, path)):
             return transport.clone(path).iter_files_recursive()
@@ -186,6 +187,7 @@
 
     def listable(self):
         deferred = self._getUnderylingTransportAndPath('.')
+
         @no_traceback_failures
         def listable((transport, path)):
             return transport.listable()
@@ -427,3 +429,10 @@
             return
         self._is_started = False
         unregister_transport(self.get_url(), self._transportFactory)
+
+    def __enter__(self):
+        self.start_server()
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.stop_server()

=== modified file 'lib/lp/registry/model/packagingjob.py'
--- lib/lp/registry/model/packagingjob.py	2011-03-23 16:28:51 +0000
+++ lib/lp/registry/model/packagingjob.py	2011-06-23 16:00:30 +0000
@@ -35,6 +35,7 @@
     JobStatus,
     )
 from lp.services.job.model.job import Job
+from lp.services.utils import RegisteredSubclass
 
 
 class PackagingJobType(DBEnumeratedType):
@@ -96,13 +97,6 @@
         self.productseries = productseries
 
 
-class RegisteredSubclass(type):
-    """Metaclass for when subclasses should be registered."""
-
-    def __init__(cls, name, bases, dict_):
-        cls._register_subclass(cls)
-
-
 class PackagingJobDerived:
     """Base class for specialized Packaging Job types."""
 

=== modified file 'lib/lp/services/utils.py'
--- lib/lp/services/utils.py	2011-04-01 16:30:57 +0000
+++ lib/lp/services/utils.py	2011-06-23 16:00:30 +0000
@@ -18,6 +18,7 @@
     'file_exists',
     'iter_list_chunks',
     'iter_split',
+    'RegisteredSubclass',
     'run_capturing_output',
     'synchronize',
     'text_delta',
@@ -129,7 +130,7 @@
     I'm amazed this isn't in itertools (mwhudson).
     """
     for i in range(0, len(a_list), size):
-        yield a_list[i:i+size]
+        yield a_list[i:i + size]
 
 
 def synchronize(source, target, add, remove):
@@ -239,7 +240,7 @@
     then reassemble.
     """
     # Make sure there is at least one newline so the split works.
-    first, rest = (s+'\n').split('\n', 1)
+    first, rest = (s + '\n').split('\n', 1)
     return (first + '\n' + dedent(rest)).strip()
 
 
@@ -285,3 +286,10 @@
     variables, and helps to avoid typos.
     """
     sys._getframe(1).f_locals["__traceback_info__"] = info
+
+
+class RegisteredSubclass(type):
+    """Metaclass for when subclasses should be registered."""
+
+    def __init__(cls, name, bases, dict_):
+        cls._register_subclass(cls)

=== modified file 'lib/lp/translations/tests/test_translationtemplatesbuildjob.py'
--- lib/lp/translations/tests/test_translationtemplatesbuildjob.py	2011-05-27 21:12:25 +0000
+++ lib/lp/translations/tests/test_translationtemplatesbuildjob.py	2011-06-23 16:00:30 +0000
@@ -156,6 +156,8 @@
     behavior by setting an attribute of the class (not an object!) at
     the beginning of every test.
     """
+    class_job_type = None
+
     # Fake _hasPotteryCompatibleSetup, and if so, make it give what
     # answer?
     fake_pottery_compatibility = None