cloud-init-dev team mailing list archive
-
cloud-init-dev team
-
Mailing list archive
-
Message #04670
[Merge] ~mgerdts/cloud-init:lp1746605 into cloud-init:master
Mike Gerdts has proposed merging ~mgerdts/cloud-init:lp1746605 into cloud-init:master.
Commit message:
DataSourceSmartOS: add locking
cloud-init and mdata-get each have their own implementation of the
SmartOS metadata protocol. If cloud-init and other services that call
mdata-get are run concurrently, crosstalk on the serial port can cause
them both to become confused.
This change makes it so that cloud-init uses the same cooperative
locking scheme that's used by mdata-get, thus preventing cross-talk
between mdata-get and cloud-init.
For testing, a VM running on a SmartOS host and pyserial are required.
If the tests are run on a platform other than SmartOS, those that use a
real serial port are skipped. pyserial remains commented in
requirements.txt because most testers will not be running atop SmartOS.
LP: #1746605
Requested reviews:
cloud-init commiters (cloud-init-dev)
Related bugs:
Bug #1746605 in cloud-init: "DataSourceSmartOS needs locking"
https://bugs.launchpad.net/cloud-init/+bug/1746605
For more details, see:
https://code.launchpad.net/~mgerdts/cloud-init/+git/cloud-init/+merge/343576
--
Your team cloud-init commiters is requested to review the proposed merge of ~mgerdts/cloud-init:lp1746605 into cloud-init:master.
diff --git a/cloudinit/sources/DataSourceSmartOS.py b/cloudinit/sources/DataSourceSmartOS.py
index c8998b4..7df44f8 100644
--- a/cloudinit/sources/DataSourceSmartOS.py
+++ b/cloudinit/sources/DataSourceSmartOS.py
@@ -23,6 +23,7 @@
import base64
import binascii
import errno
+import fcntl
import json
import os
import random
@@ -520,6 +521,7 @@ class JoyentMetadataSerialClient(JoyentMetadataClient):
if not ser.isOpen():
raise SystemError("Unable to open %s" % self.device)
self.fp = ser
+ fcntl.lockf(ser, fcntl.LOCK_EX)
self._flush()
self._negotiate()
diff --git a/tests/unittests/test_datasource/test_smartos.py b/tests/unittests/test_datasource/test_smartos.py
index 2bea7a1..0be6171 100644
--- a/tests/unittests/test_datasource/test_smartos.py
+++ b/tests/unittests/test_datasource/test_smartos.py
@@ -16,23 +16,27 @@ from __future__ import print_function
from binascii import crc32
import json
+import multiprocessing
import os
import os.path
import re
import shutil
+import signal
import stat
import tempfile
+import unittest2
import uuid
from cloudinit import serial
from cloudinit.sources import DataSourceSmartOS
from cloudinit.sources.DataSourceSmartOS import (
- convert_smartos_network_data as convert_net)
+ convert_smartos_network_data as convert_net,
+ SMARTOS_ENV_KVM, SERIAL_DEVICE, get_smartos_environ)
import six
from cloudinit import helpers as c_helpers
-from cloudinit.util import b64e
+from cloudinit.util import (b64e, subp)
from cloudinit.tests.helpers import mock, FilesystemMockingTestCase, TestCase
@@ -973,4 +977,63 @@ class TestNetworkConversion(TestCase):
found = convert_net(SDC_NICS_SINGLE_GATEWAY)
self.assertEqual(expected, found)
+
+@unittest2.skipUnless(get_smartos_environ() == SMARTOS_ENV_KVM,
+ "Only supported on KVM and bhyve guests under SmartOS")
+@unittest2.skipUnless(os.access(SERIAL_DEVICE, os.W_OK),
+ "Requires write access to " + SERIAL_DEVICE)
+class TestSerialConcurrency(TestCase):
+ """
+ This class tests locking on an actual serial port, and as such can only
+ be run in a kvm or bhyve guest running on a SmartOS host. A test run on
+ a metadata socket will not be valid because a metadata socket ensures
+ there is only one session over a connection. In contrast, in the
+ absence of proper locking multiple processes opening the same serial
+ port can corrupt each others' exchanges with the metadata server.
+ """
+ def setUp(self):
+ self.mdata_proc = multiprocessing.Process(target=self.start_mdata_loop)
+ self.mdata_proc.start()
+ super(TestSerialConcurrency, self).setUp()
+
+ def tearDown(self):
+ # os.kill() rather than mdata_proc.terminate() to avoid console spam.
+ os.kill(self.mdata_proc.pid, signal.SIGKILL)
+ self.mdata_proc.join()
+ super(TestSerialConcurrency, self).tearDown()
+
+ def start_mdata_loop(self):
+ """
+ The mdata-get command is repeatedly run in a separate process so
+ that it may try to race with metadata operations performed in the
+ main test process. Use of mdata-get is better than two processes
+ using the protocol implementation in DataSourceSmartOS because we
+ are testing to be sure that cloud-init and mdata-get respect each
+ others locks.
+ """
+ rcs = list(range(0, 256))
+ while True:
+ subp(['mdata-get', 'sdc:routes'], rcs=rcs)
+
+ def test_all_keys(self):
+ self.assertIsNotNone(self.mdata_proc.pid)
+ ds = DataSourceSmartOS
+ keys = [tup[0] for tup in ds.SMARTOS_ATTRIB_MAP.values()]
+ keys.extend(ds.SMARTOS_ATTRIB_JSON.values())
+
+ client = ds.jmc_client_factory()
+ self.assertIsNotNone(client)
+
+ # The behavior that we are testing for was observed mdata-get running
+ # 10 times at roughly the same time as cloud-init fetched each key
+ # once. cloud-init would regularly see failures before making it
+ # through all keys once.
+ for it in range(0, 3):
+ for key in keys:
+ # We don't care about the return value, just that it doesn't
+ # thrown any exceptions.
+ client.get(key)
+
+ self.assertIsNone(self.mdata_proc.exitcode)
+
# vi: ts=4 expandtab
Follow ups