launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #32886
[Merge] ~ruinedyourlife/launchpad-buildd:redact-secrets into launchpad-buildd:master
RuinedYourLife has proposed merging ~ruinedyourlife/launchpad-buildd:redact-secrets into launchpad-buildd:master.
Commit message:
Redact secrets in logs
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
For more details, see:
https://code.launchpad.net/~ruinedyourlife/launchpad-buildd/+git/launchpad-buildd/+merge/491155
The new build types have added new form of secrets that can appear in the logs.
We wish to redact these, the same way the URLs are.
--
Your team Launchpad code reviewers is requested to review the proposed merge of ~ruinedyourlife/launchpad-buildd:redact-secrets into launchpad-buildd:master.
diff --git a/lpbuildd/builder.py b/lpbuildd/builder.py
index 42e49bb..90fb821 100644
--- a/lpbuildd/builder.py
+++ b/lpbuildd/builder.py
@@ -36,14 +36,16 @@ devnull = open("/dev/null")
# coverity[COV_PY_SIGMA.hardcoded_secret_pattern_low:SUPPRESS] doc example
-def _sanitizeURLs(bytes_seq):
- """A generator that deletes URL passwords from a bytes sequence.
+def _sanitize_log_lines(bytes_seq):
+ """A generator that sanitizes sensitive data from log line bytes.
- This generator removes user/password data from URLs if embedded
- in the latter as follows: scheme://user:passwd@netloc/path.
+ Removes user/password data embedded in URLs (scheme://user:passwd@host),
+ strips builder proxy auth parameters, and redacts common secret-bearing
+ patterns such as Authorization headers and secret-like environment
+ variable assignments.
- :param bytes_seq: A sequence of byte strings (that may contain URLs).
- :return: A (sanitized) line stripped of authentication credentials.
+ :param bytes_seq: A sequence of byte strings representing log lines.
+ :return: Sanitized lines with credentials and secrets removed or masked.
"""
# This regular expression will be used to remove authentication
# credentials from URLs.
@@ -51,9 +53,35 @@ def _sanitizeURLs(bytes_seq):
# Builder proxy passwords are UUIDs.
proxy_auth_re = re.compile(rb",proxyauth=[^:]+:[A-Za-z0-9-]+")
+ # Additional redaction patterns for common secret-bearing content.
+ # Redact environment assignments for keys that likely contain secrets.
+ # Matches KEY=VALUE where KEY ends with sensitive strings.
+ env_secret_re = re.compile(
+ rb"(\b(?:[A-Z][A-Z0-9_]*?(?:PASSWORD|TOKEN|SECRET|API(?:_|)?KEY|READ_AUTH)\b)\s*=\s*)(?:\"[^\"]*\"|'[^']*'|\S+)"
+ )
+ # Handle env assignments of the form KEY=Bearer <token> (value with a space).
+ env_bearer_pair_re = re.compile(
+ rb"(\b(?:[A-Z][A-Z0-9_]*?(?:PASSWORD|TOKEN|SECRET|API(?:_|)?KEY|READ_AUTH)\b)\s*=\s*)Bearer\s+\S+"
+ )
+ # Redact Authorization headers.
+ authorization_header_re = re.compile(rb"\bAuthorization:\s*\S.*")
+ # Redact Bearer tokens that might appear outside of Authorization headers.
+ bearer_re = re.compile(rb"(\bBearer\s+)([A-Za-z0-9._-]+)")
+ # Redact value passed to the fetch-service MITM certificate flag.
+ mitm_cert_flag_re = re.compile(
+ rb"(--fetch-service-mitm-certificate\s+)(?:\"[^\"]*\"|'[^']*'|\S+)"
+ )
+
for line in bytes_seq:
sanitized_line = password_re.sub(rb"://\2", line)
sanitized_line = proxy_auth_re.sub(b"", sanitized_line)
+ sanitized_line = env_bearer_pair_re.sub(rb"\1REDACTED", sanitized_line)
+ sanitized_line = env_secret_re.sub(rb"\1REDACTED", sanitized_line)
+ sanitized_line = authorization_header_re.sub(
+ b"Authorization: REDACTED", sanitized_line
+ )
+ sanitized_line = bearer_re.sub(rb"\1REDACTED", sanitized_line)
+ sanitized_line = mitm_cert_flag_re.sub(rb"\1<redacted>", sanitized_line)
yield sanitized_line
@@ -607,7 +635,7 @@ class Builder:
When the 'buildlog' is present it returns up to 2 KiB bytes of the
end of the file.
- The returned content will be 'sanitized', see `_sanitizeURLs` for
+ The returned content will be 'sanitized', see `_sanitize_log_lines` for
further information.
"""
if self._log is None:
@@ -642,7 +670,7 @@ class Builder:
# Please note: we are throwing away the first line (of the
# excerpt to be scrubbed) because it may be cut off thus
# thwarting the detection of embedded passwords.
- clean_content_iter = _sanitizeURLs(log_lines[1:])
+ clean_content_iter = _sanitize_log_lines(log_lines[1:])
ret = b"\n".join(clean_content_iter)
return ret
@@ -759,7 +787,7 @@ class Builder:
sanitized_file = open(log_path, "wb")
# Scrub the buildlog file line by line
- clean_content_iter = _sanitizeURLs(unsanitized_file)
+ clean_content_iter = _sanitize_log_lines(unsanitized_file)
for line in clean_content_iter:
sanitized_file.write(line)
finally:
diff --git a/lpbuildd/tests/test_builder.py b/lpbuildd/tests/test_builder.py
index b43e7e1..cd5b8e3 100644
--- a/lpbuildd/tests/test_builder.py
+++ b/lpbuildd/tests/test_builder.py
@@ -17,12 +17,12 @@ from twisted.internet import defer
from twisted.logger import FileLogObserver, formatEvent, globalLogPublisher
from unittest import mock
-from lpbuildd.builder import Builder, BuildManager, _sanitizeURLs
+from lpbuildd.builder import Builder, BuildManager, _sanitize_log_lines
from lpbuildd.tests.fakebuilder import FakeConfig
class TestSanitizeURLs(TestCase):
- """Unit-test URL sanitization.
+ """Unit-test log sanitization (URLs, env secrets, headers).
`lpbuildd.tests.test_buildd.LaunchpadBuilddTests` also covers some of
this, but at a higher level.
@@ -30,18 +30,18 @@ class TestSanitizeURLs(TestCase):
def test_non_urls(self):
lines = [b"not a URL", b"still not a URL"]
- self.assertEqual(lines, list(_sanitizeURLs(lines)))
+ self.assertEqual(lines, list(_sanitize_log_lines(lines)))
def test_url_without_credentials(self):
lines = [b"Get:1 http://ftpmaster.internal focal InRelease"]
- self.assertEqual(lines, list(_sanitizeURLs(lines)))
+ self.assertEqual(lines, list(_sanitize_log_lines(lines)))
def test_url_with_credentials(self):
lines = [
b"Get:1 http://buildd:secret@ftpmaster.internal focal InRelease",
]
expected_lines = [b"Get:1 http://ftpmaster.internal focal InRelease"]
- self.assertEqual(expected_lines, list(_sanitizeURLs(lines)))
+ self.assertEqual(expected_lines, list(_sanitize_log_lines(lines)))
def test_multiple_urls(self):
lines = [
@@ -52,7 +52,7 @@ class TestSanitizeURLs(TestCase):
b"http_proxy=http://squid.internal:3128/ "
b"GOPROXY=http://example.com/goproxy",
]
- self.assertEqual(expected_lines, list(_sanitizeURLs(lines)))
+ self.assertEqual(expected_lines, list(_sanitize_log_lines(lines)))
def test_proxyauth(self):
lines = [
@@ -63,7 +63,36 @@ class TestSanitizeURLs(TestCase):
b"socat STDIO PROXY:builder-proxy.launchpad.dev:github.com:443,"
b"proxyport=3128",
]
- self.assertEqual(expected_lines, list(_sanitizeURLs(lines)))
+ self.assertEqual(expected_lines, list(_sanitize_log_lines(lines)))
+
+ def test_cargo_read_auth_env_is_redacted(self):
+ lines = [b"CARGO_ARTIFACTORY1_READ_AUTH=user:token123"]
+ expected_lines = [b"CARGO_ARTIFACTORY1_READ_AUTH=REDACTED"]
+ self.assertEqual(expected_lines, list(_sanitize_log_lines(lines)))
+
+ def test_cargo_token_env_is_redacted(self):
+ lines = [b"CARGO_REGISTRIES_ARTIFACTORY1_TOKEN=Bearer token123"]
+ expected_lines = [b"CARGO_REGISTRIES_ARTIFACTORY1_TOKEN=REDACTED"]
+ self.assertEqual(expected_lines, list(_sanitize_log_lines(lines)))
+
+ def test_authorization_header_is_redacted(self):
+ lines = [b"Authorization: Bearer abc.def.ghi"]
+ expected_lines = [b"Authorization: REDACTED"]
+ self.assertEqual(expected_lines, list(_sanitize_log_lines(lines)))
+
+ def test_standalone_bearer_token_is_redacted(self):
+ lines = [b"Some tool printed: Bearer abcdef123456"]
+ expected_lines = [b"Some tool printed: Bearer REDACTED"]
+ self.assertEqual(expected_lines, list(_sanitize_log_lines(lines)))
+
+ def test_mitm_certificate_flag_value_is_redacted(self):
+ lines = [
+ b"RUN: build-craft --fetch-service-mitm-certificate 'BEGINCERTDATA'"
+ ]
+ expected_lines = [
+ b"RUN: build-craft --fetch-service-mitm-certificate <redacted>"
+ ]
+ self.assertEqual(expected_lines, list(_sanitize_log_lines(lines)))
class TestBuildManager(TestCase):