cloud-init-dev team mailing list archive
-
cloud-init-dev team
-
Mailing list archive
-
Message #06714
[Merge] ~raharper/cloud-init:feature/cloudinit-clean-from-write-log into cloud-init:master
Ryan Harper has proposed merging ~raharper/cloud-init:feature/cloudinit-clean-from-write-log into cloud-init:master.
Commit message:
clean: add a write_file audit log and use it to clean artifacts
Cloud-init uses util.write_file() in many places to write files
during cloud-init execution. This branch will create an audit log
in /var/lib/cloud/instance/write_file.log which contains JSON entries
which captures information about each write.
As a user of this newly created log, cloud-init clean subcommand
reads this data (if present) and augments the list of artifacts to
clean.
Requested reviews:
cloud-init Commiters (cloud-init-dev)
For more details, see:
https://code.launchpad.net/~raharper/cloud-init/+git/cloud-init/+merge/372946
--
Your team cloud-init Commiters is requested to review the proposed merge of ~raharper/cloud-init:feature/cloudinit-clean-from-write-log into cloud-init:master.
diff --git a/cloudinit/cmd/clean.py b/cloudinit/cmd/clean.py
index 30e49de..06f7daa 100644
--- a/cloudinit/cmd/clean.py
+++ b/cloudinit/cmd/clean.py
@@ -12,7 +12,7 @@ import sys
from cloudinit.stages import Init
from cloudinit.util import (
ProcessExecutionError, del_dir, del_file, get_config_logfiles,
- is_link, subp)
+ is_link, subp, write_file_log_read)
def error(msg):
@@ -45,7 +45,20 @@ def get_parser(parser=None):
return parser
-def remove_artifacts(remove_logs, remove_seed=False):
+def get_written_files(filterfn=None):
+ if filterfn is None:
+ def _exists(fn):
+ return os.path.exists(fn)
+ filterfn = _exists
+
+ for wf in write_file_log_read():
+ fn = wf.get('filename')
+ if fn:
+ if filterfn(fn):
+ yield fn
+
+
+def remove_artifacts(remove_logs=False, remove_seed=False):
"""Helper which removes artifacts dir and optionally log files.
@param: remove_logs: Boolean. Set True to delete the cloud_dir path. False
@@ -56,14 +69,24 @@ def remove_artifacts(remove_logs, remove_seed=False):
"""
init = Init(ds_deps=[])
init.read_cfg()
+
+ logfiles = get_config_logfiles(init.cfg)
if remove_logs:
- for log_file in get_config_logfiles(init.cfg):
+ for log_file in logfiles:
del_file(log_file)
if not os.path.isdir(init.paths.cloud_dir):
return 0 # Artifacts dir already cleaned
seed_path = os.path.join(init.paths.cloud_dir, 'seed')
- for path in glob.glob('%s/*' % init.paths.cloud_dir):
+
+ def _written_filter(fn):
+ if fn not in logfiles:
+ if not fn.startswith(init.paths.cloud_dir):
+ if os.path.exists(fn):
+ return fn
+
+ files_written = list(get_written_files(filterfn=_written_filter))
+ for path in glob.glob('%s/*' % init.paths.cloud_dir) + files_written:
if path == seed_path and not remove_seed:
continue
try:
diff --git a/cloudinit/cmd/tests/test_clean.py b/cloudinit/cmd/tests/test_clean.py
index f092ab3..143d955 100644
--- a/cloudinit/cmd/tests/test_clean.py
+++ b/cloudinit/cmd/tests/test_clean.py
@@ -2,6 +2,7 @@
from cloudinit.cmd import clean
from cloudinit.util import ensure_dir, sym_link, write_file
+from cloudinit import util
from cloudinit.tests.helpers import CiTestCase, wrap_and_call, mock
from collections import namedtuple
import os
@@ -18,6 +19,10 @@ class TestClean(CiTestCase):
self.artifact_dir = self.tmp_path('artifacts', self.new_root)
self.log1 = self.tmp_path('cloud-init.log', self.new_root)
self.log2 = self.tmp_path('cloud-init-output.log', self.new_root)
+ self.wflog = self.tmp_path('write_file.log', self.artifact_dir)
+ os.makedirs(os.path.dirname(self.wflog))
+ util.WRITE_FILE_LOG = self.wflog
+ util._ENABLE_WRITE_FILE_LOG = True
class FakeInit(object):
cfg = {'def_log_file': self.log1,
@@ -37,21 +42,31 @@ class TestClean(CiTestCase):
"""remove_artifacts removes logs when remove_logs is True."""
write_file(self.log1, 'cloud-init-log')
write_file(self.log2, 'cloud-init-output-log')
+ netplan = self.tmp_path('50-cloud-init.yaml', self.artifact_dir)
+ write_file(netplan, 'netplan content')
+ # read the write log before it gets cleaned
+ with open(self.wflog) as fh:
+ contents = fh.readlines()
+ self.assertEqual(3, len(contents))
- self.assertFalse(
- os.path.exists(self.artifact_dir), 'Unexpected artifacts dir')
retcode = wrap_and_call(
'cloudinit.cmd.clean',
{'Init': {'side_effect': self.init_class}},
clean.remove_artifacts, remove_logs=True)
self.assertFalse(os.path.exists(self.log1), 'Unexpected file')
self.assertFalse(os.path.exists(self.log2), 'Unexpected file')
+ self.assertFalse(os.path.exists(netplan), 'Unexpected file')
self.assertEqual(0, retcode)
+ self.assertFalse(os.path.exists(self.wflog))
def test_remove_artifacts_preserves_logs(self):
"""remove_artifacts leaves logs when remove_logs is False."""
write_file(self.log1, 'cloud-init-log')
write_file(self.log2, 'cloud-init-output-log')
+ # read the write log before it gets cleaned
+ with open(self.wflog) as fh:
+ contents = fh.readlines()
+ self.assertEqual(2, len(contents))
retcode = wrap_and_call(
'cloudinit.cmd.clean',
@@ -60,6 +75,7 @@ class TestClean(CiTestCase):
self.assertTrue(os.path.exists(self.log1), 'Missing expected file')
self.assertTrue(os.path.exists(self.log2), 'Missing expected file')
self.assertEqual(0, retcode)
+ self.assertFalse(os.path.exists(self.wflog))
def test_remove_artifacts_removes_unlinks_symlinks(self):
"""remove_artifacts cleans artifacts dir unlinking any symlinks."""
@@ -77,6 +93,7 @@ class TestClean(CiTestCase):
self.assertFalse(
os.path.exists(path),
'Unexpected {0} dir'.format(path))
+ self.assertFalse(os.path.exists(self.wflog))
def test_remove_artifacts_removes_artifacts_skipping_seed(self):
"""remove_artifacts cleans artifacts dir with exception of seed dir."""
@@ -101,6 +118,7 @@ class TestClean(CiTestCase):
self.assertFalse(
os.path.exists(deleted_dir),
'Unexpected {0} dir'.format(deleted_dir))
+ self.assertFalse(os.path.exists(self.wflog))
def test_remove_artifacts_removes_artifacts_removes_seed(self):
"""remove_artifacts removes seed dir when remove_seed is True."""
@@ -124,6 +142,8 @@ class TestClean(CiTestCase):
os.path.exists(deleted_dir),
'Unexpected {0} dir'.format(deleted_dir))
+ self.assertFalse(os.path.exists(self.wflog))
+
def test_remove_artifacts_returns_one_on_errors(self):
"""remove_artifacts returns non-zero on failure and prints an error."""
ensure_dir(self.artifact_dir)
@@ -139,6 +159,7 @@ class TestClean(CiTestCase):
self.assertEqual(
'ERROR: Could not remove %s/dir1: oops\n' % self.artifact_dir,
m_stderr.getvalue())
+ self.assertFalse(os.path.exists(self.wflog))
def test_handle_clean_args_reboots(self):
"""handle_clean_args_reboots when reboot arg is provided."""
@@ -159,10 +180,15 @@ class TestClean(CiTestCase):
self.assertEqual(0, retcode)
self.assertEqual(
[(['shutdown', '-r', 'now'], False)], called_cmds)
+ self.assertFalse(os.path.exists(self.wflog))
def test_status_main(self):
'''clean.main can be run as a standalone script.'''
write_file(self.log1, 'cloud-init-log')
+ # read the write log before it gets cleaned
+ with open(self.wflog) as fh:
+ contents = fh.readlines()
+ self.assertEqual(1, len(contents))
with self.assertRaises(SystemExit) as context_manager:
wrap_and_call(
'cloudinit.cmd.clean',
@@ -175,5 +201,4 @@ class TestClean(CiTestCase):
self.assertFalse(
os.path.exists(self.log1), 'Unexpected log {0}'.format(self.log1))
-
# vi: ts=4 expandtab syntax=python
diff --git a/cloudinit/tests/helpers.py b/cloudinit/tests/helpers.py
index 4dad2af..c994a3b 100644
--- a/cloudinit/tests/helpers.py
+++ b/cloudinit/tests/helpers.py
@@ -96,6 +96,8 @@ class TestCase(unittest2.TestCase):
util.PROC_CMDLINE = None
util._DNS_REDIRECT_IP = None
util._LSB_RELEASE = {}
+ util._ENABLE_WRITE_FILE_LOG = False
+ util.WRITE_LOG_FILE = '/var/lib/cloud/instance/write_file.log'
def setUp(self):
super(TestCase, self).setUp()
diff --git a/cloudinit/util.py b/cloudinit/util.py
index aa23b3f..99ec940 100644
--- a/cloudinit/util.py
+++ b/cloudinit/util.py
@@ -70,6 +70,8 @@ CONTAINER_TESTS = (['systemd-detect-virt', '--quiet', '--container'],
['lxc-is-container'])
PROC_CMDLINE = None
+WRITE_FILE_LOG = '/var/lib/cloud/instance/write_file.log'
+_ENABLE_WRITE_FILE_LOG = True
_LSB_RELEASE = {}
@@ -1841,6 +1843,52 @@ def chmod(path, mode):
os.chmod(path, real_mode)
+def write_file_log_append(filename, omode, target=None):
+ """ Create an audit log of files that cloud-init has written.
+
+ The log is located at: /var/lib/cloud/instance/write_file.log
+
+ The format is JSON dict, one per line
+ {'timestamp': time.time(), 'path': filename, 'mode': omode}
+
+ Example entries:
+ {'filename': '/etc/apt/sources.list', 'mode': 'wb', 'timestamp': ts}
+
+ """
+ global WRITE_FILE_LOG
+ global _ENABLE_WRITE_FILE_LOG
+
+ if not _ENABLE_WRITE_FILE_LOG:
+ return
+
+ log_file = target_path(target, path=WRITE_FILE_LOG)
+ if not os.path.exists(os.path.dirname(log_file)):
+ return
+
+ record = {'timestamp': time.time(), 'filename': filename, 'mode': omode}
+ content = json.dumps(record, default=json_serialize_default)
+ with open(log_file, "ab") as wfl:
+ wfl.write((content + '\n').encode('utf-8'))
+ wfl.flush()
+
+
+def write_file_log_read(target=None):
+ """ Read the WRITE_FILE_LOG and yield the contents.
+
+ :returns a list of record dicts
+ """
+ global WRITE_FILE_LOG
+
+ log_file = target_path(target, path=WRITE_FILE_LOG)
+ if os.path.exists(log_file):
+ with open(log_file, "rb") as wfl:
+ contents = wfl.read()
+ for line in contents.splitlines():
+ record = load_json(line)
+ if record:
+ yield record
+
+
def write_file(filename, content, mode=0o644, omode="wb", copy_mode=False):
"""
Writes a file with the given content and sets the file mode as specified.
@@ -1872,6 +1920,7 @@ def write_file(filename, content, mode=0o644, omode="wb", copy_mode=False):
mode_r = "%r" % mode
LOG.debug("Writing to %s - %s: [%s] %s %s",
filename, omode, mode_r, len(content), write_type)
+ write_file_log_append(filename, omode)
with SeLinuxGuard(path=filename):
with open(filename, omode) as fh:
fh.write(content)
diff --git a/tests/unittests/test_util.py b/tests/unittests/test_util.py
index 0e71db8..82909a7 100644
--- a/tests/unittests/test_util.py
+++ b/tests/unittests/test_util.py
@@ -186,6 +186,51 @@ class TestWriteFile(helpers.TestCase):
mockobj.assert_called_once_with('selinux')
+class TestWriteFileLogAppend(helpers.CiTestCase):
+
+ idir = 'var/lib/cloud/instance'
+
+ def test_write_file_log_append(self):
+ root_d = self.tmp_dir()
+ log_path = os.path.join(root_d, self.idir, 'write_file.log')
+ util._ENABLE_WRITE_FILE_LOG = True
+ util.WRITE_FILE_LOG = log_path
+ helpers.populate_dir(root_d,
+ {os.path.join(self.idir, 'dummy'): 'i-foobar'})
+
+ content = self.random_string()
+ fname = os.path.join(root_d, self.random_string())
+ util.write_file(fname, content, omode='wb')
+
+ self.assertTrue(os.path.exists(log_path))
+ contents = util.load_file(log_path)
+ self.assertEqual(1, len(contents.splitlines()))
+ for line in contents.splitlines():
+ record = util.load_json(line)
+ self.assertEqual(fname, record['filename'])
+ self.assertEqual('wb', record['mode'])
+ self.assertIn('timestamp', record)
+
+ def test_write_file_log_read(self):
+ root_d = self.tmp_dir()
+ log_path = os.path.join(root_d, self.idir, 'write_file.log')
+ util.WRITE_FILE_LOG = log_path
+ helpers.populate_dir(root_d,
+ {os.path.join(self.idir, 'dummy'): 'i-foobar'})
+
+ fname = os.path.join(root_d, self.random_string())
+
+ expected_record = {'timestamp': 0, 'filename': fname, 'mode': 'wb'}
+ with open(log_path, 'ab') as wfl:
+ record = (json.dumps(expected_record) + '\n').encode('utf-8')
+ wfl.write(record)
+ wfl.flush()
+
+ records = list(util.write_file_log_read())
+ self.assertEqual(1, len(records))
+ self.assertEqual(expected_record, records[0])
+
+
class TestDeleteDirContents(helpers.TestCase):
def setUp(self):
super(TestDeleteDirContents, self).setUp()
Follow ups