sts-sponsors team mailing list archive
-
sts-sponsors team
-
Mailing list archive
-
Message #05906
[Merge] ~bjornt/maas:bug-2003310-3.3 into maas:3.3
Björn Tillenius has proposed merging ~bjornt/maas:bug-2003310-3.3 into maas:3.3.
Commit message:
Bug #2003310: Refresh scripts are not re-run if they pass, but fail to report the results to the region
j
While refreshing the hardware information for controllers,
the scripts might all pass, but when reporting the results
back to the metadata server, something went wrong.
The exception was swallowed and ignored, though, so the
networking service didn't noticed it and assumed that
all scripts had passed.
This changes things so that the signal exception is caught
in the networking service itself, so it's aware that something
went wrong.
(cherry picked from commit 42cdcb554f65574d216aaff06ad592675920922a)
Requested reviews:
MAAS Lander (maas-lander): unittests
Björn Tillenius (bjornt)
For more details, see:
https://code.launchpad.net/~bjornt/maas/+git/maas/+merge/438759
--
Your team MAAS Committers is subscribed to branch maas:3.3.
diff --git a/src/provisioningserver/refresh/__init__.py b/src/provisioningserver/refresh/__init__.py
index 04c19cd..09a75b4 100644
--- a/src/provisioningserver/refresh/__init__.py
+++ b/src/provisioningserver/refresh/__init__.py
@@ -15,7 +15,6 @@ from provisioningserver.refresh.maas_api_helper import (
Credentials,
MD_VERSION,
signal,
- SignalException,
)
from provisioningserver.refresh.node_info_scripts import NODE_INFO_SCRIPTS
from provisioningserver.utils.snap import running_in_snap
@@ -24,14 +23,6 @@ from provisioningserver.utils.twisted import synchronous
maaslog = get_maas_logger("refresh")
-def signal_wrapper(*args, **kwargs):
- """Wrapper to capture and log any SignalException from signal."""
- try:
- signal(*args, **kwargs)
- except SignalException as e:
- maaslog.error("Error during controller refresh: %s" % e)
-
-
@synchronous
def refresh(
system_id,
@@ -72,11 +63,11 @@ def refresh(
)
if failed_scripts:
- signal_wrapper(
+ signal(
url, creds, "FAILED", f"Failed refreshing {system_id}", retry=False
)
else:
- signal_wrapper(
+ signal(
url, creds, "OK", f"Finished refreshing {system_id}", retry=False
)
@@ -104,7 +95,7 @@ def runscripts(
(parsed_url.scheme, parsed_url.netloc, "", "", "", "")
)
for script_name in sorted(scripts.keys()):
- signal_wrapper(
+ signal(
url,
creds,
"WORKING",
@@ -149,7 +140,7 @@ def runscripts(
if result == b"":
result = b"Unable to execute script"
files = {script_name: result, stderr_name: result}
- signal_wrapper(
+ signal(
url,
creds,
"WORKING",
@@ -165,7 +156,7 @@ def runscripts(
stdout_name: open(stdout_path, "rb").read(),
stderr_name: open(stderr_path, "rb").read(),
}
- signal_wrapper(
+ signal(
url,
creds,
"TIMEDOUT",
@@ -186,7 +177,7 @@ def runscripts(
}
if os.path.exists(result_path):
files[result_name] = open(result_path, "rb").read()
- signal_wrapper(
+ signal(
url,
creds,
"WORKING",
diff --git a/src/provisioningserver/refresh/tests/test_refresh.py b/src/provisioningserver/refresh/tests/test_refresh.py
index 66678bd..85efdfa 100644
--- a/src/provisioningserver/refresh/tests/test_refresh.py
+++ b/src/provisioningserver/refresh/tests/test_refresh.py
@@ -10,14 +10,12 @@ from unittest.mock import ANY, patch, sentinel
from maastesting.factory import factory
from maastesting.fixtures import TempDirectory
-from maastesting.matchers import MockAnyCall
from maastesting.testcase import MAASTestCase
from provisioningserver import refresh
from provisioningserver.path import get_maas_data_path
from provisioningserver.refresh import maas_api_helper
from provisioningserver.refresh.maas_api_helper import (
MD_VERSION,
- SignalException,
TimeoutExpired,
)
from provisioningserver.refresh.node_info_scripts import (
@@ -403,28 +401,6 @@ class TestRefresh(MAASTestCase):
self.assertIn(tmpdir_during, tmpdirs_during)
self.assertNotIn(tmpdir_during, tmpdirs_after)
- def test_refresh_logs_error(self):
- signal = self.patch(refresh, "signal")
- maaslog = self.patch(refresh.maaslog, "error")
- error = factory.make_string()
- signal.side_effect = SignalException(error)
- info_scripts = self.create_scripts_failure()
-
- system_id = factory.make_name("system_id")
- consumer_key = factory.make_name("consumer_key")
- token_key = factory.make_name("token_key")
- token_secret = factory.make_name("token_secret")
- url = factory.make_url()
-
- with patch.dict(refresh.NODE_INFO_SCRIPTS, info_scripts, clear=True):
- refresh.refresh(
- system_id, consumer_key, token_key, token_secret, url
- )
-
- self.assertThat(
- maaslog, MockAnyCall("Error during controller refresh: %s" % error)
- )
-
def test_refresh_runs_script_no_sudo_snap(self):
mock_popen = self.patch(refresh, "Popen")
mock_popen.side_effect = OSError()
diff --git a/src/provisioningserver/utils/services.py b/src/provisioningserver/utils/services.py
index 4433fe4..92fdf08 100644
--- a/src/provisioningserver/utils/services.py
+++ b/src/provisioningserver/utils/services.py
@@ -32,6 +32,7 @@ from zope.interface.verify import verifyObject
from provisioningserver.config import is_dev_environment
from provisioningserver.logger import get_maas_logger, LegacyLogger
from provisioningserver.refresh import refresh
+from provisioningserver.refresh.maas_api_helper import SignalException
from provisioningserver.refresh.node_info_scripts import (
COMMISSIONING_OUTPUT_NAME,
)
@@ -1171,17 +1172,23 @@ class NetworksMonitoringService(SingleInstanceService):
yield pause(3.0)
hints = self.beaconing_protocol.getJSONTopologyHints()
maas_url, system_id, credentials = yield self.getRefreshDetails()
- yield self._run_refresh(
- maas_url,
- system_id,
- credentials,
- interfaces,
- hints,
- )
- # Note: _interfacesRecorded() will reconfigure discovery after
- # recording the interfaces, so there is no need to call
- # _configureNetworkDiscovery() here.
- self._interfacesRecorded(interfaces)
+ try:
+ yield self._run_refresh(
+ maas_url,
+ system_id,
+ credentials,
+ interfaces,
+ hints,
+ )
+ except SignalException as exc:
+ # Most likely a transient networking error. We'll retry the
+ # next time the service runs the scripts.
+ log.warn(f"Couldn't report test results: {exc}")
+ else:
+ # Note: _interfacesRecorded() will reconfigure discovery after
+ # recording the interfaces, so there is no need to call
+ # _configureNetworkDiscovery() here.
+ self._interfacesRecorded(interfaces)
else:
# Send out beacons unsolicited once every 30 seconds. (Use
# solicitations so that replies will be received, that way peers
diff --git a/src/provisioningserver/utils/tests/test_services.py b/src/provisioningserver/utils/tests/test_services.py
index 5afbbce..e74c442 100644
--- a/src/provisioningserver/utils/tests/test_services.py
+++ b/src/provisioningserver/utils/tests/test_services.py
@@ -28,6 +28,7 @@ from twisted.internet.error import (
ProcessTerminated,
)
from twisted.internet.task import Clock
+from twisted.logger import formatEvent, LogLevel
from twisted.python import threadable
from twisted.python.failure import Failure
@@ -36,6 +37,7 @@ from maastesting.factory import factory
from maastesting.testcase import MAASTestCase, MAASTwistedRunTest
from maastesting.twisted import TwistedLoggerFixture
from provisioningserver import refresh as refresh_module
+from provisioningserver.refresh.maas_api_helper import SignalException
from provisioningserver.refresh.node_info_scripts import (
COMMISSIONING_OUTPUT_NAME,
)
@@ -531,6 +533,41 @@ class TestNetworksMonitoringService(MAASTestCase):
get_interfaces.assert_has_calls(calls=[call(), call()])
@inlineCallbacks
+ def test_recordInterfaces_not_called_after_signal_error(self):
+ get_interfaces = self.patch(services, "get_all_interfaces_definition")
+ my_interfaces = {"foo": {"parents": [], "enabled": True}}
+ get_interfaces.return_value = my_interfaces
+
+ service = self.makeService()
+ service.running = 1
+ self.assertEqual(service.interfaces, [])
+
+ with TwistedLoggerFixture() as logger:
+ self.fake_refresher.mock_signal.side_effect = SignalException(
+ "boom"
+ )
+ yield service._do_action()
+ self.assertEqual(service.interfaces, [])
+ self.assertEqual(
+ ["Couldn't report test results: boom"],
+ [
+ formatEvent(event)
+ for event in logger.events
+ if event.get("log_level") == LogLevel.warn
+ ],
+ )
+
+ # The next time it's run, and no signal error occurs, the interfaces get recorded.
+ self.fake_refresher.reset()
+ self.fake_refresher.mock_signal.side_effect = (
+ self.fake_refresher.fake_signal
+ )
+ yield service._do_action()
+ self.assertEqual(service.interfaces, [my_interfaces])
+
+ get_interfaces.assert_has_calls(calls=[call(), call()])
+
+ @inlineCallbacks
def test_recordInterfaces_not_called_when_interfaces_not_changed(self):
get_interfaces = self.patch(services, "get_all_interfaces_definition")
# Configuration does NOT change between the first and second call.
@@ -547,7 +584,7 @@ class TestNetworksMonitoringService(MAASTestCase):
get_interfaces.assert_has_calls(calls=[call(), call()])
@inlineCallbacks
- def test_recordInterfaces_called_after_failure(self):
+ def test_recordInterfaces_called_after_unexpected_failure(self):
get_interfaces = self.patch(services, "get_all_interfaces_definition")
get_interfaces.return_value = {}
References