← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] ~ilkeremrekoc/launchpad:buildbot-memory-fix into launchpad:master

 

İlker Emre Koç has proposed merging ~ilkeremrekoc/launchpad:buildbot-memory-fix into launchpad:master.

Commit message:
Add rlimit preservation context manager and apply

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~ilkeremrekoc/launchpad/+git/launchpad/+merge/492061

Jobs have a memory limit baked in that is set with a `setrlimit()`
call. Normally this would only be called within Celery tasks, but due
to the way we are doing tests in buildbot, the `setrlimit` gets called
within the main testing process of the buildbot worker when called
with the `runAll` method. This causes the memory's of the tests to be
limited and result in failures for certain tests due to Memory and
thread creation errors.

As a result, I added a context manager that preserves and restores
the rlimit in question whenever `runAll` is called in the tests
in question.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of ~ilkeremrekoc/launchpad:buildbot-memory-fix into launchpad:master.
diff --git a/lib/lp/crafts/tests/test_craftrecipebuildjob.py b/lib/lp/crafts/tests/test_craftrecipebuildjob.py
index ac3c5ef..ebddc16 100644
--- a/lib/lp/crafts/tests/test_craftrecipebuildjob.py
+++ b/lib/lp/crafts/tests/test_craftrecipebuildjob.py
@@ -8,6 +8,7 @@ import os
 import tarfile
 import tempfile
 from pathlib import Path
+from resource import RLIMIT_AS
 
 from artifactory import ArtifactoryPath
 from fixtures import FakeLogger
@@ -34,6 +35,7 @@ from lp.services.job.interfaces.job import JobStatus
 from lp.services.job.runner import JobRunner
 from lp.services.librarian.interfaces import ILibraryFileAliasSet
 from lp.services.librarian.utils import copy_and_close
+from lp.services.osutils import preserve_rlimit
 from lp.testing import TestCaseWithFactory
 from lp.testing.layers import CeleryJobLayer, ZopelessDatabaseLayer
 
@@ -95,7 +97,12 @@ class TestCraftPublishingJob(TestCaseWithFactory):
     def run_job(self, job):
         """Helper to run a job and return the result."""
         job = getUtility(ICraftPublishingJobSource).create(self.build)
-        JobRunner([job]).runAll()
+
+        # Preserve RLIMIT_AS resource limit because runAll changes it which
+        # causes the whole test worker process to have limited memory
+        with preserve_rlimit(RLIMIT_AS):
+            JobRunner([job]).runAll()
+
         job = removeSecurityProxy(job)
         return job
 
@@ -852,7 +859,10 @@ class TestCraftPublishingJob(TestCaseWithFactory):
             lambda self: "https://example.com/repo.git";,
         )
 
-        JobRunner([job]).runAll()
+        # Preserve RLIMIT_AS resource limit because runAll changes it which
+        # causes the whole test worker process to have limited memory
+        with preserve_rlimit(RLIMIT_AS):
+            JobRunner([job]).runAll()
 
         artifact = self._artifactory_search("repository", "artifact.file")
         self.assertEqual(artifact["properties"]["soss.license"], "unknown")
@@ -894,7 +904,10 @@ class TestCraftPublishingJob(TestCaseWithFactory):
             lambda self: "https://example.com/repo.git";,
         )
 
-        JobRunner([job]).runAll()
+        # Preserve RLIMIT_AS resource limit because runAll changes it which
+        # causes the whole test worker process to have limited memory
+        with preserve_rlimit(RLIMIT_AS):
+            JobRunner([job]).runAll()
 
         artifact = self._artifactory_search("repository", "artifact.file")
         self.assertEqual(artifact["properties"]["soss.license"], "unknown")
@@ -937,7 +950,10 @@ class TestCraftPublishingJob(TestCaseWithFactory):
             lambda self: "https://example.com/repo.git";,
         )
 
-        JobRunner([job]).runAll()
+        # Preserve RLIMIT_AS resource limit because runAll changes it which
+        # causes the whole test worker process to have limited memory
+        with preserve_rlimit(RLIMIT_AS):
+            JobRunner([job]).runAll()
 
         artifact = self._artifactory_search("repository", "artifact.file")
         self.assertEqual(artifact["properties"]["soss.license"], license_value)
diff --git a/lib/lp/services/osutils.py b/lib/lp/services/osutils.py
index 5472b79..3234842 100644
--- a/lib/lp/services/osutils.py
+++ b/lib/lp/services/osutils.py
@@ -20,7 +20,9 @@ import os.path
 import shutil
 import time
 from contextlib import contextmanager
+from resource import getrlimit, setrlimit
 from signal import SIGKILL, SIGTERM
+from typing import Tuple
 
 
 def remove_tree(path):
@@ -184,3 +186,19 @@ def process_exists(pid):
         # All is well - the process doesn't exist.
         return False
     return True
+
+
+@contextmanager
+def preserve_rlimit(resource_type: int):
+    """
+    Context manager to preserve and restore a specific resource limit.
+    """
+    saved_limits: Tuple[int, int] = getrlimit(resource_type)
+    try:
+        yield
+    finally:
+        try:
+            setrlimit(resource_type, saved_limits)
+        except Exception:
+            # Ignore restoration errors
+            pass