apport-hackers team mailing list archive
-
apport-hackers team
-
Mailing list archive
-
Message #00177
[Merge] lp:~louis-bouchard/apport/apport-unpack-extract into lp:apport
Louis Bouchard has proposed merging lp:~louis-bouchard/apport/apport-unpack-extract into lp:apport.
Requested reviews:
Apport upstream developers (apport-hackers)
Related bugs:
Bug #1307413 in apport (Ubuntu): "apport-unpack requires too much main memory to run"
https://bugs.launchpad.net/ubuntu/+source/apport/+bug/1307413
For more details, see:
https://code.launchpad.net/~louis-bouchard/apport/apport-unpack-extract/+merge/247591
The modifications to problem_report.py are mandatory in order for the changes in apport-unpack to work.
Changes to apport-unpack are only meaningful for Ubuntu Precise, so they could be brought in as a debian patch, hence dropped from that MP, while keeping the enhancement to problem_report.
Explanation:
============
Kernel VmCore can be very big ( multi Gigabytes) files. ProblemReport.load() does a systematic load of those in RAM which is very expensive in resource. This adds a ProblemReport.extract() method that will write directly to a file the binary element without loading it into memory first. ProblemExtact() method will write the element given as an argument to disk in the directory passed as an argument or /tmp by default.
apport-unpack is modified to extract the content of the report in two steps :
1) Load the report with binary=False and write all the non-binary element to their respective file, keeping a list of the binary element identified.
2) Use the extract() method on each element identified as binary to write directly to disk
Here is a comparison of the time taken before and after the modification :
root@crashdump:~# time apport-unpack linux-image-3.2.0-38-generic.0.crash tmp
Test on Precise:
Current version :
real 4m54.216s
user 1m16.777s
sys 0m47.919s
With extract method :
============
real 0m12.574s
user 0m5.560s
sys 0m7.004s
--
Your team Apport upstream developers is requested to review the proposed merge of lp:~louis-bouchard/apport/apport-unpack-extract into lp:apport.
=== modified file 'bin/apport-unpack'
--- bin/apport-unpack 2012-05-04 06:54:56 +0000
+++ bin/apport-unpack 2015-01-26 14:41:51 +0000
@@ -49,18 +49,28 @@
except OSError as e:
fatal(str(e))
+binaries = ()
pr = problem_report.ProblemReport()
if report == '-':
- pr.load(sys.stdin)
+ pr.load(sys.stdin, binary=False)
else:
try:
with open(report, 'rb') as f:
- pr.load(f)
+ pr.load(f, binary=False)
except IOError as e:
fatal(str(e))
for k in pr:
+ if pr[k] == u'':
+ binaries += (k,)
+ continue
with open(os.path.join(dir, k), 'wb') as f:
if type(pr[k]) == t_str:
f.write(pr[k].encode('UTF-8'))
else:
f.write(pr[k])
+try:
+ with open(report, 'rb') as f:
+ for element in binaries:
+ pr.extract(f, element, dir)
+except IOError as e:
+ fatal(str(e))
=== modified file 'problem_report.py'
--- problem_report.py 2013-09-19 14:26:33 +0000
+++ problem_report.py 2015-01-26 14:41:51 +0000
@@ -188,6 +188,76 @@
self.old_keys = set(self.data.keys())
+ def extract(self, file, item=None, directory='/tmp'):
+ '''Extract only one element from the problem_report
+ directly without loading the report beforehand
+ This is required for Kernel Crash Dumps that can be
+ very big and saturate the RAM
+ '''
+ self._assert_bin_mode(file)
+ self.data.clear()
+ key = None
+ value = None
+ b64_block = False
+ bd = None
+ # Make sure the report is at the beginning
+ file.seek(0)
+ for line in file:
+ # continuation line
+ if line.startswith(b' '):
+ if not b64_block:
+ continue
+ assert (key is not None and value is not None)
+ if b64_block:
+ l = base64.b64decode(line)
+ if bd:
+ out.write(bd.decompress(l))
+ else:
+ # lazy initialization of bd
+ # skip gzip header, if present
+ if l.startswith(b'\037\213\010'):
+ bd = zlib.decompressobj(-zlib.MAX_WBITS)
+ out.write(bd.decompress(self._strip_gzip_header(l)))
+ else:
+ # legacy zlib-only format used default block
+ # size
+ bd = zlib.decompressobj()
+ out.write(bd.decompress(l))
+ else:
+ if len(value) > 0:
+ out.write('{}'.format(b'\n'))
+ if line.endswith(b'\n'):
+ out.write('{}'.format(line[1:-1]))
+ else:
+ out.write('{}'.format(line[1:]))
+ else:
+ if b64_block:
+ if bd:
+ value += bd.flush()
+ b64_block = False
+ bd = None
+ if key:
+ assert value is not None
+ self.data[key] = self._try_unicode(value)
+ (key, value) = line.split(b':', 1)
+ if not _python2:
+ key = key.decode('ASCII')
+ if key != item:
+ continue
+ value = value.strip()
+ if value == b'base64':
+ value = b''
+ b64_block = True
+ try:
+ out=open(os.path.join(directory, item), 'wb')
+ except IOError as e:
+ fatal(str(e))
+
+ if key is not None:
+ self.data[key] = self._try_unicode(value)
+
+ self.old_keys = set(self.data.keys())
+
def has_removed_fields(self):
'''Check if the report has any keys which were not loaded.
=== modified file 'test/test_problem_report.py'
--- test/test_problem_report.py 2012-10-11 05:56:35 +0000
+++ test/test_problem_report.py 2015-01-26 14:41:51 +0000
@@ -1,5 +1,5 @@
# vim: set encoding=UTF-8 fileencoding=UTF-8 :
-import unittest, tempfile, os, email, gzip, time, sys
+import unittest, tempfile, os, shutil, email, gzip, time, sys
from io import BytesIO
import problem_report
@@ -12,6 +12,14 @@
class T(unittest.TestCase):
+ @classmethod
+ def setUp(self):
+ self.workdir = tempfile.mkdtemp()
+
+ @classmethod
+ def tearDown(self):
+ shutil.rmtree(self.workdir)
+
def test_basic_operations(self):
'''basic creation and operation.'''
@@ -258,6 +266,52 @@
pr.load(BytesIO(b'ProblemType: Crash'))
self.assertEqual(list(pr.keys()), ['ProblemType'])
+
+ def test_extract(self):
+ '''extract() with various binary elements.'''
+
+ # create a test report with binary elements
+ large_val = b'A' * 5000000
+
+ pr = problem_report.ProblemReport()
+ pr['Txt'] = 'some text'
+ pr['MoreTxt'] = 'some more text'
+ pr['Foo'] = problem_report.CompressedValue(b'FooFoo!')
+ pr['Uncompressed'] = bin_data
+ pr['Bin'] = problem_report.CompressedValue()
+ pr['Bin'].set_value(bin_data)
+ pr['Large'] = problem_report.CompressedValue(large_val)
+ pr['Multiline'] = problem_report.CompressedValue(b'\1\1\1\n\2\2\n\3\3\3')
+
+ report = BytesIO()
+ pr.write(report)
+ report.seek(0)
+
+ #Extracts nothing if non binary
+ pr.extract(report, 'Txt', self.workdir )
+ self.assertEqual(os.path.exists(os.path.join(self.workdir, 'Txt')), False)
+ #Check inexistant element
+ pr.extract(report, 'Bar', self.workdir )
+ self.assertEqual(os.path.exists(os.path.join(self.workdir, 'Bar')), False)
+ #Check valid elements
+ pr.extract(report, 'Foo', self.workdir )
+ element = open(os.path.join(self.workdir, 'Foo'))
+ self.assertEqual(element.read(), b'FooFoo!' )
+ pr.extract(report, 'Uncompressed', self.workdir )
+ element = open(os.path.join(self.workdir, 'Uncompressed'))
+ self.assertEqual(element.read(), bin_data )
+ pr.extract(report, 'Bin', self.workdir )
+ element = open(os.path.join(self.workdir, 'Bin'))
+ self.assertEqual(element.read(), bin_data )
+ pr.extract(report, 'Large', self.workdir )
+ element = open(os.path.join(self.workdir, 'Large'))
+ self.assertEqual(element.read(), large_val )
+ pr.extract(report, 'Multiline', self.workdir )
+ element = open(os.path.join(self.workdir, 'Multiline'))
+ self.assertEqual(element.read().splitlines(),
+ [b'\1\1\1', b'\2\2', b'\3\3\3'])
+
+
def test_write_file(self):
'''writing a report with binary file data.'''
Follow ups