launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #24664
[Merge] ~cjwatson/launchpad-buildd:py3-build-logs-bytes into launchpad-buildd:master
Colin Watson has proposed merging ~cjwatson/launchpad-buildd:py3-build-logs-bytes into launchpad-buildd:master.
Commit message:
Treat build logs as binary files
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
For more details, see:
https://code.launchpad.net/~cjwatson/launchpad-buildd/+git/launchpad-buildd/+merge/383344
Build logs can contain arbitrary binary data. Although we generally parse and display them as text, when fetching the log tail we may start in the middle of a UTF-8 sequence, and in general we need to parse them defensively. Treat the files themselves as binary and decode them for parsing purposes.
--
Your team Launchpad code reviewers is requested to review the proposed merge of ~cjwatson/launchpad-buildd:py3-build-logs-bytes into launchpad-buildd:master.
diff --git a/debian/changelog b/debian/changelog
index c8029fc..ee93dfa 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -8,6 +8,7 @@ launchpad-buildd (190) UNRELEASED; urgency=medium
* Open temporary files in text mode in more cases.
* Ensure that regex patterns with \-escapes are raw strings.
* Adjust X-LXD-mode header construction for Python 3.
+ * Treat build logs as binary files.
-- Colin Watson <cjwatson@xxxxxxxxxx> Tue, 28 Apr 2020 10:19:27 +0100
diff --git a/lpbuildd/binarypackage.py b/lpbuildd/binarypackage.py
index 664f459..2111bbe 100644
--- a/lpbuildd/binarypackage.py
+++ b/lpbuildd/binarypackage.py
@@ -359,12 +359,12 @@ class BinaryPackageBuildManager(DebianBuildManager):
# Check the last 4KiB for the Fail-Stage. If it failed
# during install-deps, search for the missing dependency
# string.
- with open(os.path.join(self._cachepath, "buildlog")) as log:
+ with open(os.path.join(self._cachepath, "buildlog"), "rb") as log:
try:
log.seek(-4096, os.SEEK_END)
except IOError:
pass
- tail = log.read(4096)
+ tail = log.read(4096).decode("UTF-8", "replace")
if re.search(r"^Fail-Stage: install-deps$", tail, re.M):
for rx in BuildLogRegexes.MAYBEDEPFAIL:
log_patterns.append([rx, re.M | re.S])
@@ -389,9 +389,11 @@ class BinaryPackageBuildManager(DebianBuildManager):
success = SBuildExitCodes.FAILED
elif rx in BuildLogRegexes.DEPFAIL:
# A depwait match forces depwait.
- missing_dep = mo.expand(BuildLogRegexes.DEPFAIL[rx])
+ missing_dep = mo.expand(
+ BuildLogRegexes.DEPFAIL[rx].encode("UTF-8"))
missing_dep = self.stripDependencies(
- PkgRelation.parse_relations(missing_dep))
+ PkgRelation.parse_relations(
+ missing_dep.decode("UTF-8", "replace")))
else:
# Otherwise it was a givenback pattern, so leave it
# in givenback.
diff --git a/lpbuildd/builder.py b/lpbuildd/builder.py
index c5ad197..fc3a0ce 100644
--- a/lpbuildd/builder.py
+++ b/lpbuildd/builder.py
@@ -36,24 +36,24 @@ from lpbuildd.util import shell_escape
devnull = open("/dev/null", "r")
-def _sanitizeURLs(text_seq):
- """A generator that deletes URL passwords from a string sequence.
+def _sanitizeURLs(bytes_seq):
+ """A generator that deletes URL passwords from a bytes sequence.
This generator removes user/password data from URLs if embedded
in the latter as follows: scheme://user:passwd@netloc/path.
- :param text_seq: A sequence of strings (that may contain URLs).
+ :param bytes_seq: A sequence of byte strings (that may contain URLs).
:return: A (sanitized) line stripped of authentication credentials.
"""
# This regular expression will be used to remove authentication
# credentials from URLs.
- password_re = re.compile(r'://([^:]+:[^@]+@)(\S+)')
+ password_re = re.compile(br'://([^:]+:[^@]+@)(\S+)')
# Snap proxy passwords are UUIDs.
- snap_proxy_auth_re = re.compile(r',proxyauth=[^:]+:[A-Za-z0-9-]+')
+ snap_proxy_auth_re = re.compile(br',proxyauth=[^:]+:[A-Za-z0-9-]+')
- for line in text_seq:
- sanitized_line = password_re.sub(r'://\2', line)
- sanitized_line = snap_proxy_auth_re.sub('', sanitized_line)
+ for line in bytes_seq:
+ sanitized_line = password_re.sub(br'://\2', line)
+ sanitized_line = snap_proxy_auth_re.sub(b'', sanitized_line)
yield sanitized_line
@@ -512,8 +512,12 @@ class Builder(object):
def log(self, data):
"""Write the provided data to the log."""
if self._log is not None:
- self._log.write(data)
+ data_bytes = (
+ data if isinstance(data, bytes) else data.encode("UTF-8"))
+ self._log.write(data_bytes)
self._log.flush()
+ if not isinstance(data, str):
+ data = data.decode("UTF-8", "replace")
if data.endswith("\n"):
data = data[:-1]
log.msg("Build log: " + data)
@@ -522,27 +526,27 @@ class Builder(object):
"""Return the tail of the log.
If the buildlog is not yet opened for writing (self._log is None),
- return a empty string.
+ return an empty bytes object.
It safely tries to open the 'buildlog', if it doesn't exist, due to
job cleanup or buildlog sanitization race-conditions, it also returns
- an empty string.
+ an empty bytes object.
- When the 'buildlog' is present it return up to 2 KiB character of
- the end of the file.
+ When the 'buildlog' is present it returns up to 2 KiB bytes of the
+ end of the file.
The returned content will be 'sanitized', see `_sanitizeURLs` for
further information.
"""
if self._log is None:
- return ""
+ return b""
rlog = None
try:
try:
- rlog = open(self.cachePath("buildlog"), "r")
+ rlog = open(self.cachePath("buildlog"), "rb")
except IOError:
- ret = ""
+ ret = b""
else:
# We rely on good OS practices that keep the file handler
# usable once it's opened. So, if open() is ok, a subsequent
@@ -567,7 +571,7 @@ class Builder(object):
# excerpt to be scrubbed) because it may be cut off thus
# thwarting the detection of embedded passwords.
clean_content_iter = _sanitizeURLs(log_lines[1:])
- ret = '\n'.join(clean_content_iter)
+ ret = b'\n'.join(clean_content_iter)
return ret
@@ -584,7 +588,7 @@ class Builder(object):
"""Empty the log and start again."""
if self._log is not None:
self._log.close()
- self._log = open(self.cachePath("buildlog"), "w")
+ self._log = open(self.cachePath("buildlog"), "wb")
def builderFail(self):
"""Cease building because the builder has a problem."""
@@ -667,14 +671,14 @@ class Builder(object):
os.rename(log_path, unsanitized_path)
# Open the unsanitized buildlog file for reading.
- unsanitized_file = open(unsanitized_path)
+ unsanitized_file = open(unsanitized_path, 'rb')
# Open the file that will hold the resulting, sanitized buildlog
# content for writing.
sanitized_file = None
try:
- sanitized_file = open(log_path, 'w')
+ sanitized_file = open(log_path, 'wb')
# Scrub the buildlog file line by line
clean_content_iter = _sanitizeURLs(unsanitized_file)
diff --git a/lpbuildd/debian.py b/lpbuildd/debian.py
index 7065c7d..7f8dd92 100644
--- a/lpbuildd/debian.py
+++ b/lpbuildd/debian.py
@@ -237,29 +237,27 @@ class DebianBuildManager(BuildManager):
"""
chunk_size = 256 * 1024
regexes = [
- re.compile(pattern, flags)
+ re.compile(pattern.encode("UTF-8"), flags)
for pattern, flags in patterns_and_flags]
stop_regexes = [
- re.compile(pattern, flags)
+ re.compile(pattern.encode("UTF-8"), flags)
for pattern, flags in stop_patterns_and_flags]
- buildlog = open(os.path.join(self._cachepath, "buildlog"))
- try:
- window = ""
+ buildlog_path = os.path.join(self._cachepath, "buildlog")
+ with open(buildlog_path, "rb") as buildlog:
+ window = b""
chunk = buildlog.read(chunk_size)
while chunk:
window += chunk
for regex in regexes:
match = regex.search(window)
if match is not None:
- return regex.pattern, match
+ return regex.pattern.decode("UTF-8"), match
for regex in stop_regexes:
if regex.search(window) is not None:
return None, None
if len(window) > chunk_size:
window = window[chunk_size:]
chunk = buildlog.read(chunk_size)
- finally:
- buildlog.close()
return None, None
def iterate_SOURCES(self, success):
diff --git a/lpbuildd/sourcepackagerecipe.py b/lpbuildd/sourcepackagerecipe.py
index 4781847..b2f46cb 100644
--- a/lpbuildd/sourcepackagerecipe.py
+++ b/lpbuildd/sourcepackagerecipe.py
@@ -109,9 +109,10 @@ class SourcePackageRecipeBuildManager(DebianBuildManager):
r'.*: Depends: ([^ ]*( \([^)]*\))?)')
_, mo = self.searchLogContents([[rx, re.M]])
if mo:
- self._builder.depFail(mo.group(1))
+ missing_dep = mo.group(1).decode("UTF-8", "replace")
+ self._builder.depFail(missing_dep)
print("Returning build status: DEPFAIL")
- print("Dependencies: " + mo.group(1))
+ print("Dependencies: " + missing_dep)
else:
print("Returning build status: Build failed")
self._builder.buildFail()
diff --git a/lpbuildd/tests/test_buildd_slave.py b/lpbuildd/tests/test_buildd_slave.py
index 53c259e..6c70529 100644
--- a/lpbuildd/tests/test_buildd_slave.py
+++ b/lpbuildd/tests/test_buildd_slave.py
@@ -112,12 +112,12 @@ class LaunchpadBuilddSlaveTests(BuilddTestCase):
# behaviour because the BuildManager's 'is_archive_private'
# property is initialized to False).
self.slave.manager.is_archive_private = False
- unsanitized = self.slave.getLogTail().splitlines()
+ unsanitized = self.slave.getLogTail().decode('UTF-8').splitlines()
# Make the builder believe we are building in a private archive to
# obtain the scrubbed log tail output.
self.slave.manager.is_archive_private = True
- clean = self.slave.getLogTail().splitlines()
+ clean = self.slave.getLogTail().decode('UTF-8').splitlines()
# Get the differences ..
differences = '\n'.join(difflib.unified_diff(unsanitized, clean))