← Back to team overview

cloud-init-dev team mailing list archive

[Merge] ~raharper/cloud-init:feature/cloudinit-clean-from-write-log into cloud-init:master

 

Ryan Harper has proposed merging ~raharper/cloud-init:feature/cloudinit-clean-from-write-log into cloud-init:master.

Commit message:
clean: add a write_file audit log and use it to clean artifacts

Cloud-init uses util.write_file() in many places to write files
during cloud-init execution.  This branch will create an audit log
in /var/lib/cloud/instance/write_file.log which contains JSON entries
which captures information about each write.

As a user of this newly created log, cloud-init clean subcommand
reads this data (if present) and augments the list of artifacts to
clean.

Requested reviews:
  cloud-init Commiters (cloud-init-dev)

For more details, see:
https://code.launchpad.net/~raharper/cloud-init/+git/cloud-init/+merge/372946
-- 
Your team cloud-init Commiters is requested to review the proposed merge of ~raharper/cloud-init:feature/cloudinit-clean-from-write-log into cloud-init:master.
diff --git a/cloudinit/cmd/clean.py b/cloudinit/cmd/clean.py
index 30e49de..06f7daa 100644
--- a/cloudinit/cmd/clean.py
+++ b/cloudinit/cmd/clean.py
@@ -12,7 +12,7 @@ import sys
 from cloudinit.stages import Init
 from cloudinit.util import (
     ProcessExecutionError, del_dir, del_file, get_config_logfiles,
-    is_link, subp)
+    is_link, subp, write_file_log_read)
 
 
 def error(msg):
@@ -45,7 +45,20 @@ def get_parser(parser=None):
     return parser
 
 
-def remove_artifacts(remove_logs, remove_seed=False):
+def get_written_files(filterfn=None):
+    if filterfn is None:
+        def _exists(fn):
+            return os.path.exists(fn)
+        filterfn = _exists
+
+    for wf in write_file_log_read():
+        fn = wf.get('filename')
+        if fn:
+            if filterfn(fn):
+                yield fn
+
+
+def remove_artifacts(remove_logs=False, remove_seed=False):
     """Helper which removes artifacts dir and optionally log files.
 
     @param: remove_logs: Boolean. Set True to delete the cloud_dir path. False
@@ -56,14 +69,24 @@ def remove_artifacts(remove_logs, remove_seed=False):
     """
     init = Init(ds_deps=[])
     init.read_cfg()
+
+    logfiles = get_config_logfiles(init.cfg)
     if remove_logs:
-        for log_file in get_config_logfiles(init.cfg):
+        for log_file in logfiles:
             del_file(log_file)
 
     if not os.path.isdir(init.paths.cloud_dir):
         return 0  # Artifacts dir already cleaned
     seed_path = os.path.join(init.paths.cloud_dir, 'seed')
-    for path in glob.glob('%s/*' % init.paths.cloud_dir):
+
+    def _written_filter(fn):
+        if fn not in logfiles:
+            if not fn.startswith(init.paths.cloud_dir):
+                if os.path.exists(fn):
+                    return fn
+
+    files_written = list(get_written_files(filterfn=_written_filter))
+    for path in glob.glob('%s/*' % init.paths.cloud_dir) + files_written:
         if path == seed_path and not remove_seed:
             continue
         try:
diff --git a/cloudinit/cmd/tests/test_clean.py b/cloudinit/cmd/tests/test_clean.py
index f092ab3..143d955 100644
--- a/cloudinit/cmd/tests/test_clean.py
+++ b/cloudinit/cmd/tests/test_clean.py
@@ -2,6 +2,7 @@
 
 from cloudinit.cmd import clean
 from cloudinit.util import ensure_dir, sym_link, write_file
+from cloudinit import util
 from cloudinit.tests.helpers import CiTestCase, wrap_and_call, mock
 from collections import namedtuple
 import os
@@ -18,6 +19,10 @@ class TestClean(CiTestCase):
         self.artifact_dir = self.tmp_path('artifacts', self.new_root)
         self.log1 = self.tmp_path('cloud-init.log', self.new_root)
         self.log2 = self.tmp_path('cloud-init-output.log', self.new_root)
+        self.wflog = self.tmp_path('write_file.log', self.artifact_dir)
+        os.makedirs(os.path.dirname(self.wflog))
+        util.WRITE_FILE_LOG = self.wflog
+        util._ENABLE_WRITE_FILE_LOG = True
 
         class FakeInit(object):
             cfg = {'def_log_file': self.log1,
@@ -37,21 +42,31 @@ class TestClean(CiTestCase):
         """remove_artifacts removes logs when remove_logs is True."""
         write_file(self.log1, 'cloud-init-log')
         write_file(self.log2, 'cloud-init-output-log')
+        netplan = self.tmp_path('50-cloud-init.yaml', self.artifact_dir)
+        write_file(netplan, 'netplan content')
+        # read the write log before it gets cleaned
+        with open(self.wflog) as fh:
+            contents = fh.readlines()
+        self.assertEqual(3, len(contents))
 
-        self.assertFalse(
-            os.path.exists(self.artifact_dir), 'Unexpected artifacts dir')
         retcode = wrap_and_call(
             'cloudinit.cmd.clean',
             {'Init': {'side_effect': self.init_class}},
             clean.remove_artifacts, remove_logs=True)
         self.assertFalse(os.path.exists(self.log1), 'Unexpected file')
         self.assertFalse(os.path.exists(self.log2), 'Unexpected file')
+        self.assertFalse(os.path.exists(netplan), 'Unexpected file')
         self.assertEqual(0, retcode)
+        self.assertFalse(os.path.exists(self.wflog))
 
     def test_remove_artifacts_preserves_logs(self):
         """remove_artifacts leaves logs when remove_logs is False."""
         write_file(self.log1, 'cloud-init-log')
         write_file(self.log2, 'cloud-init-output-log')
+        # read the write log before it gets cleaned
+        with open(self.wflog) as fh:
+            contents = fh.readlines()
+        self.assertEqual(2, len(contents))
 
         retcode = wrap_and_call(
             'cloudinit.cmd.clean',
@@ -60,6 +75,7 @@ class TestClean(CiTestCase):
         self.assertTrue(os.path.exists(self.log1), 'Missing expected file')
         self.assertTrue(os.path.exists(self.log2), 'Missing expected file')
         self.assertEqual(0, retcode)
+        self.assertFalse(os.path.exists(self.wflog))
 
     def test_remove_artifacts_removes_unlinks_symlinks(self):
         """remove_artifacts cleans artifacts dir unlinking any symlinks."""
@@ -77,6 +93,7 @@ class TestClean(CiTestCase):
             self.assertFalse(
                 os.path.exists(path),
                 'Unexpected {0} dir'.format(path))
+        self.assertFalse(os.path.exists(self.wflog))
 
     def test_remove_artifacts_removes_artifacts_skipping_seed(self):
         """remove_artifacts cleans artifacts dir with exception of seed dir."""
@@ -101,6 +118,7 @@ class TestClean(CiTestCase):
             self.assertFalse(
                 os.path.exists(deleted_dir),
                 'Unexpected {0} dir'.format(deleted_dir))
+        self.assertFalse(os.path.exists(self.wflog))
 
     def test_remove_artifacts_removes_artifacts_removes_seed(self):
         """remove_artifacts removes seed dir when remove_seed is True."""
@@ -124,6 +142,8 @@ class TestClean(CiTestCase):
                 os.path.exists(deleted_dir),
                 'Unexpected {0} dir'.format(deleted_dir))
 
+        self.assertFalse(os.path.exists(self.wflog))
+
     def test_remove_artifacts_returns_one_on_errors(self):
         """remove_artifacts returns non-zero on failure and prints an error."""
         ensure_dir(self.artifact_dir)
@@ -139,6 +159,7 @@ class TestClean(CiTestCase):
         self.assertEqual(
             'ERROR: Could not remove %s/dir1: oops\n' % self.artifact_dir,
             m_stderr.getvalue())
+        self.assertFalse(os.path.exists(self.wflog))
 
     def test_handle_clean_args_reboots(self):
         """handle_clean_args_reboots when reboot arg is provided."""
@@ -159,10 +180,15 @@ class TestClean(CiTestCase):
         self.assertEqual(0, retcode)
         self.assertEqual(
             [(['shutdown', '-r', 'now'], False)], called_cmds)
+        self.assertFalse(os.path.exists(self.wflog))
 
     def test_status_main(self):
         '''clean.main can be run as a standalone script.'''
         write_file(self.log1, 'cloud-init-log')
+        # read the write log before it gets cleaned
+        with open(self.wflog) as fh:
+            contents = fh.readlines()
+        self.assertEqual(1, len(contents))
         with self.assertRaises(SystemExit) as context_manager:
             wrap_and_call(
                 'cloudinit.cmd.clean',
@@ -175,5 +201,4 @@ class TestClean(CiTestCase):
         self.assertFalse(
             os.path.exists(self.log1), 'Unexpected log {0}'.format(self.log1))
 
-
 # vi: ts=4 expandtab syntax=python
diff --git a/cloudinit/tests/helpers.py b/cloudinit/tests/helpers.py
index 4dad2af..c994a3b 100644
--- a/cloudinit/tests/helpers.py
+++ b/cloudinit/tests/helpers.py
@@ -96,6 +96,8 @@ class TestCase(unittest2.TestCase):
         util.PROC_CMDLINE = None
         util._DNS_REDIRECT_IP = None
         util._LSB_RELEASE = {}
+        util._ENABLE_WRITE_FILE_LOG = False
+        util.WRITE_LOG_FILE = '/var/lib/cloud/instance/write_file.log'
 
     def setUp(self):
         super(TestCase, self).setUp()
diff --git a/cloudinit/util.py b/cloudinit/util.py
index aa23b3f..99ec940 100644
--- a/cloudinit/util.py
+++ b/cloudinit/util.py
@@ -70,6 +70,8 @@ CONTAINER_TESTS = (['systemd-detect-virt', '--quiet', '--container'],
                    ['lxc-is-container'])
 
 PROC_CMDLINE = None
+WRITE_FILE_LOG = '/var/lib/cloud/instance/write_file.log'
+_ENABLE_WRITE_FILE_LOG = True
 
 _LSB_RELEASE = {}
 
@@ -1841,6 +1843,52 @@ def chmod(path, mode):
             os.chmod(path, real_mode)
 
 
+def write_file_log_append(filename, omode, target=None):
+    """ Create an audit log of files that cloud-init has written.
+
+    The log is located at: /var/lib/cloud/instance/write_file.log
+
+    The format is JSON dict, one per line
+      {'timestamp': time.time(), 'path': filename, 'mode': omode}
+
+    Example entries:
+      {'filename': '/etc/apt/sources.list', 'mode': 'wb', 'timestamp': ts}
+
+    """
+    global WRITE_FILE_LOG
+    global _ENABLE_WRITE_FILE_LOG
+
+    if not _ENABLE_WRITE_FILE_LOG:
+        return
+
+    log_file = target_path(target, path=WRITE_FILE_LOG)
+    if not os.path.exists(os.path.dirname(log_file)):
+        return
+
+    record = {'timestamp': time.time(), 'filename': filename, 'mode': omode}
+    content = json.dumps(record, default=json_serialize_default)
+    with open(log_file, "ab") as wfl:
+        wfl.write((content + '\n').encode('utf-8'))
+        wfl.flush()
+
+
+def write_file_log_read(target=None):
+    """ Read the WRITE_FILE_LOG and yield the contents.
+
+    :returns a list of record dicts
+    """
+    global WRITE_FILE_LOG
+
+    log_file = target_path(target, path=WRITE_FILE_LOG)
+    if os.path.exists(log_file):
+        with open(log_file, "rb") as wfl:
+            contents = wfl.read()
+        for line in contents.splitlines():
+            record = load_json(line)
+            if record:
+                yield record
+
+
 def write_file(filename, content, mode=0o644, omode="wb", copy_mode=False):
     """
     Writes a file with the given content and sets the file mode as specified.
@@ -1872,6 +1920,7 @@ def write_file(filename, content, mode=0o644, omode="wb", copy_mode=False):
         mode_r = "%r" % mode
     LOG.debug("Writing to %s - %s: [%s] %s %s",
               filename, omode, mode_r, len(content), write_type)
+    write_file_log_append(filename, omode)
     with SeLinuxGuard(path=filename):
         with open(filename, omode) as fh:
             fh.write(content)
diff --git a/tests/unittests/test_util.py b/tests/unittests/test_util.py
index 0e71db8..82909a7 100644
--- a/tests/unittests/test_util.py
+++ b/tests/unittests/test_util.py
@@ -186,6 +186,51 @@ class TestWriteFile(helpers.TestCase):
         mockobj.assert_called_once_with('selinux')
 
 
+class TestWriteFileLogAppend(helpers.CiTestCase):
+
+    idir = 'var/lib/cloud/instance'
+
+    def test_write_file_log_append(self):
+        root_d = self.tmp_dir()
+        log_path = os.path.join(root_d, self.idir, 'write_file.log')
+        util._ENABLE_WRITE_FILE_LOG = True
+        util.WRITE_FILE_LOG = log_path
+        helpers.populate_dir(root_d,
+                             {os.path.join(self.idir, 'dummy'): 'i-foobar'})
+
+        content = self.random_string()
+        fname = os.path.join(root_d, self.random_string())
+        util.write_file(fname, content, omode='wb')
+
+        self.assertTrue(os.path.exists(log_path))
+        contents = util.load_file(log_path)
+        self.assertEqual(1, len(contents.splitlines()))
+        for line in contents.splitlines():
+            record = util.load_json(line)
+            self.assertEqual(fname, record['filename'])
+            self.assertEqual('wb', record['mode'])
+            self.assertIn('timestamp', record)
+
+    def test_write_file_log_read(self):
+        root_d = self.tmp_dir()
+        log_path = os.path.join(root_d, self.idir, 'write_file.log')
+        util.WRITE_FILE_LOG = log_path
+        helpers.populate_dir(root_d,
+                             {os.path.join(self.idir, 'dummy'): 'i-foobar'})
+
+        fname = os.path.join(root_d, self.random_string())
+
+        expected_record = {'timestamp': 0, 'filename': fname, 'mode': 'wb'}
+        with open(log_path, 'ab') as wfl:
+            record = (json.dumps(expected_record) + '\n').encode('utf-8')
+            wfl.write(record)
+            wfl.flush()
+
+        records = list(util.write_file_log_read())
+        self.assertEqual(1, len(records))
+        self.assertEqual(expected_record, records[0])
+
+
 class TestDeleteDirContents(helpers.TestCase):
     def setUp(self):
         super(TestDeleteDirContents, self).setUp()

Follow ups