← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] ~ruinedyourlife/launchpad-buildd:redact-secrets into launchpad-buildd:master

 

RuinedYourLife has proposed merging ~ruinedyourlife/launchpad-buildd:redact-secrets into launchpad-buildd:master.

Commit message:
Redact secrets in logs

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~ruinedyourlife/launchpad-buildd/+git/launchpad-buildd/+merge/491155

The new build types have added new form of secrets that can appear in the logs.
We wish to redact these, the same way the URLs are.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of ~ruinedyourlife/launchpad-buildd:redact-secrets into launchpad-buildd:master.
diff --git a/lpbuildd/builder.py b/lpbuildd/builder.py
index 42e49bb..90fb821 100644
--- a/lpbuildd/builder.py
+++ b/lpbuildd/builder.py
@@ -36,14 +36,16 @@ devnull = open("/dev/null")
 
 
 # coverity[COV_PY_SIGMA.hardcoded_secret_pattern_low:SUPPRESS] doc example
-def _sanitizeURLs(bytes_seq):
-    """A generator that deletes URL passwords from a bytes sequence.
+def _sanitize_log_lines(bytes_seq):
+    """A generator that sanitizes sensitive data from log line bytes.
 
-    This generator removes user/password data from URLs if embedded
-    in the latter as follows: scheme://user:passwd@netloc/path.
+    Removes user/password data embedded in URLs (scheme://user:passwd@host),
+    strips builder proxy auth parameters, and redacts common secret-bearing
+    patterns such as Authorization headers and secret-like environment
+    variable assignments.
 
-    :param bytes_seq: A sequence of byte strings (that may contain URLs).
-    :return: A (sanitized) line stripped of authentication credentials.
+    :param bytes_seq: A sequence of byte strings representing log lines.
+    :return: Sanitized lines with credentials and secrets removed or masked.
     """
     # This regular expression will be used to remove authentication
     # credentials from URLs.
@@ -51,9 +53,35 @@ def _sanitizeURLs(bytes_seq):
     # Builder proxy passwords are UUIDs.
     proxy_auth_re = re.compile(rb",proxyauth=[^:]+:[A-Za-z0-9-]+")
 
+    # Additional redaction patterns for common secret-bearing content.
+    # Redact environment assignments for keys that likely contain secrets.
+    # Matches KEY=VALUE where KEY ends with sensitive strings.
+    env_secret_re = re.compile(
+        rb"(\b(?:[A-Z][A-Z0-9_]*?(?:PASSWORD|TOKEN|SECRET|API(?:_|)?KEY|READ_AUTH)\b)\s*=\s*)(?:\"[^\"]*\"|'[^']*'|\S+)"
+    )
+    # Handle env assignments of the form KEY=Bearer <token> (value with a space).
+    env_bearer_pair_re = re.compile(
+        rb"(\b(?:[A-Z][A-Z0-9_]*?(?:PASSWORD|TOKEN|SECRET|API(?:_|)?KEY|READ_AUTH)\b)\s*=\s*)Bearer\s+\S+"
+    )
+    # Redact Authorization headers.
+    authorization_header_re = re.compile(rb"\bAuthorization:\s*\S.*")
+    # Redact Bearer tokens that might appear outside of Authorization headers.
+    bearer_re = re.compile(rb"(\bBearer\s+)([A-Za-z0-9._-]+)")
+    # Redact value passed to the fetch-service MITM certificate flag.
+    mitm_cert_flag_re = re.compile(
+        rb"(--fetch-service-mitm-certificate\s+)(?:\"[^\"]*\"|'[^']*'|\S+)"
+    )
+
     for line in bytes_seq:
         sanitized_line = password_re.sub(rb"://\2", line)
         sanitized_line = proxy_auth_re.sub(b"", sanitized_line)
+        sanitized_line = env_bearer_pair_re.sub(rb"\1REDACTED", sanitized_line)
+        sanitized_line = env_secret_re.sub(rb"\1REDACTED", sanitized_line)
+        sanitized_line = authorization_header_re.sub(
+            b"Authorization: REDACTED", sanitized_line
+        )
+        sanitized_line = bearer_re.sub(rb"\1REDACTED", sanitized_line)
+        sanitized_line = mitm_cert_flag_re.sub(rb"\1<redacted>", sanitized_line)
         yield sanitized_line
 
 
@@ -607,7 +635,7 @@ class Builder:
         When the 'buildlog' is present it returns up to 2 KiB bytes of the
         end of the file.
 
-        The returned content will be 'sanitized', see `_sanitizeURLs` for
+        The returned content will be 'sanitized', see `_sanitize_log_lines` for
         further information.
         """
         if self._log is None:
@@ -642,7 +670,7 @@ class Builder:
             # Please note: we are throwing away the first line (of the
             # excerpt to be scrubbed) because it may be cut off thus
             # thwarting the detection of embedded passwords.
-            clean_content_iter = _sanitizeURLs(log_lines[1:])
+            clean_content_iter = _sanitize_log_lines(log_lines[1:])
             ret = b"\n".join(clean_content_iter)
 
         return ret
@@ -759,7 +787,7 @@ class Builder:
             sanitized_file = open(log_path, "wb")
 
             # Scrub the buildlog file line by line
-            clean_content_iter = _sanitizeURLs(unsanitized_file)
+            clean_content_iter = _sanitize_log_lines(unsanitized_file)
             for line in clean_content_iter:
                 sanitized_file.write(line)
         finally:
diff --git a/lpbuildd/tests/test_builder.py b/lpbuildd/tests/test_builder.py
index b43e7e1..cd5b8e3 100644
--- a/lpbuildd/tests/test_builder.py
+++ b/lpbuildd/tests/test_builder.py
@@ -17,12 +17,12 @@ from twisted.internet import defer
 from twisted.logger import FileLogObserver, formatEvent, globalLogPublisher
 from unittest import mock
 
-from lpbuildd.builder import Builder, BuildManager, _sanitizeURLs
+from lpbuildd.builder import Builder, BuildManager, _sanitize_log_lines
 from lpbuildd.tests.fakebuilder import FakeConfig
 
 
 class TestSanitizeURLs(TestCase):
-    """Unit-test URL sanitization.
+    """Unit-test log sanitization (URLs, env secrets, headers).
 
     `lpbuildd.tests.test_buildd.LaunchpadBuilddTests` also covers some of
     this, but at a higher level.
@@ -30,18 +30,18 @@ class TestSanitizeURLs(TestCase):
 
     def test_non_urls(self):
         lines = [b"not a URL", b"still not a URL"]
-        self.assertEqual(lines, list(_sanitizeURLs(lines)))
+        self.assertEqual(lines, list(_sanitize_log_lines(lines)))
 
     def test_url_without_credentials(self):
         lines = [b"Get:1 http://ftpmaster.internal focal InRelease"]
-        self.assertEqual(lines, list(_sanitizeURLs(lines)))
+        self.assertEqual(lines, list(_sanitize_log_lines(lines)))
 
     def test_url_with_credentials(self):
         lines = [
             b"Get:1 http://buildd:secret@ftpmaster.internal focal InRelease",
         ]
         expected_lines = [b"Get:1 http://ftpmaster.internal focal InRelease"]
-        self.assertEqual(expected_lines, list(_sanitizeURLs(lines)))
+        self.assertEqual(expected_lines, list(_sanitize_log_lines(lines)))
 
     def test_multiple_urls(self):
         lines = [
@@ -52,7 +52,7 @@ class TestSanitizeURLs(TestCase):
             b"http_proxy=http://squid.internal:3128/ "
             b"GOPROXY=http://example.com/goproxy";,
         ]
-        self.assertEqual(expected_lines, list(_sanitizeURLs(lines)))
+        self.assertEqual(expected_lines, list(_sanitize_log_lines(lines)))
 
     def test_proxyauth(self):
         lines = [
@@ -63,7 +63,36 @@ class TestSanitizeURLs(TestCase):
             b"socat STDIO PROXY:builder-proxy.launchpad.dev:github.com:443,"
             b"proxyport=3128",
         ]
-        self.assertEqual(expected_lines, list(_sanitizeURLs(lines)))
+        self.assertEqual(expected_lines, list(_sanitize_log_lines(lines)))
+
+    def test_cargo_read_auth_env_is_redacted(self):
+        lines = [b"CARGO_ARTIFACTORY1_READ_AUTH=user:token123"]
+        expected_lines = [b"CARGO_ARTIFACTORY1_READ_AUTH=REDACTED"]
+        self.assertEqual(expected_lines, list(_sanitize_log_lines(lines)))
+
+    def test_cargo_token_env_is_redacted(self):
+        lines = [b"CARGO_REGISTRIES_ARTIFACTORY1_TOKEN=Bearer token123"]
+        expected_lines = [b"CARGO_REGISTRIES_ARTIFACTORY1_TOKEN=REDACTED"]
+        self.assertEqual(expected_lines, list(_sanitize_log_lines(lines)))
+
+    def test_authorization_header_is_redacted(self):
+        lines = [b"Authorization: Bearer abc.def.ghi"]
+        expected_lines = [b"Authorization: REDACTED"]
+        self.assertEqual(expected_lines, list(_sanitize_log_lines(lines)))
+
+    def test_standalone_bearer_token_is_redacted(self):
+        lines = [b"Some tool printed: Bearer abcdef123456"]
+        expected_lines = [b"Some tool printed: Bearer REDACTED"]
+        self.assertEqual(expected_lines, list(_sanitize_log_lines(lines)))
+
+    def test_mitm_certificate_flag_value_is_redacted(self):
+        lines = [
+            b"RUN: build-craft --fetch-service-mitm-certificate 'BEGINCERTDATA'"
+        ]
+        expected_lines = [
+            b"RUN: build-craft --fetch-service-mitm-certificate <redacted>"
+        ]
+        self.assertEqual(expected_lines, list(_sanitize_log_lines(lines)))
 
 
 class TestBuildManager(TestCase):