← Back to team overview

apport-hackers team mailing list archive

[Merge] lp:~brian-murray/apport/contents-mapping into lp:apport

 

Brian Murray has proposed merging lp:~brian-murray/apport/contents-mapping into lp:apport.

Requested reviews:
  Apport upstream developers (apport-hackers)
Related bugs:
  Bug #1370230 in Apport: "apport-retrace has become slower"
  https://bugs.launchpad.net/apport/+bug/1370230

For more details, see:
https://code.launchpad.net/~brian-murray/apport/contents-mapping/+merge/356447

This modifies apport-retrace so that it stops searching each Contents.gz files for every file that appears in ProcMaps until it finds a match. Instead a global mapping of files to packages is created and saved to disk so that subsequent file lookups and retraces are quicker.

Its worth noting that not every file in Contents.gz is added to the mapping because not every file which exists in ProcMaps is utilized as a search criteria for Contents.gz. You can see the filtering in the needed_runtime_packages() function - https://bazaar.launchpad.net/~apport-hackers/apport/trunk/view/head:/apport/sandboxutils.py#L81

I've tested it with sample crashes from every supported release of Ubuntu without issue.
-- 
Your team Apport upstream developers is requested to review the proposed merge of lp:~brian-murray/apport/contents-mapping into lp:apport.
=== modified file 'backends/packaging-apt-dpkg.py'
--- backends/packaging-apt-dpkg.py	2018-10-10 18:10:01 +0000
+++ backends/packaging-apt-dpkg.py	2018-10-10 23:40:47 +0000
@@ -50,8 +50,10 @@
         self._contents_dir = None
         self._mirror = None
         self._virtual_mapping_obj = None
+        self._contents_mapping_obj = None
         self._launchpad_base = 'https://api.launchpad.net/devel'
         self._ppa_archive_url = self._launchpad_base + '/~%(user)s/+archive/%(distro)s/%(ppaname)s'
+        self._contents_update = False
 
     def __del__(self):
         try:
@@ -79,6 +81,25 @@
             with open(mapping_file, 'wb') as fp:
                 pickle.dump(self._virtual_mapping_obj, fp)
 
+    def _contents_mapping(self, configdir, arch):
+        if self._contents_mapping_obj is not None:
+            return self._contents_mapping_obj
+
+        mapping_file = os.path.join(configdir, 'contents_mapping-%s.pickle' % arch)
+        if os.path.exists(mapping_file):
+            with open(mapping_file, 'rb') as fp:
+                self._contents_mapping_obj = pickle.load(fp)
+        else:
+            self._contents_mapping_obj = {}
+
+        return self._contents_mapping_obj
+
+    def _save_contents_mapping(self, configdir, arch):
+        mapping_file = os.path.join(configdir, 'contents_mapping-%s.pickle' % arch)
+        if self._contents_mapping_obj is not None:
+            with open(mapping_file, 'wb') as fp:
+                pickle.dump(self._contents_mapping_obj, fp)
+
     def _cache(self):
         '''Return apt.Cache() (initialized lazily).'''
 
@@ -1224,12 +1245,14 @@
             release = self.get_distro_codename()
         else:
             release = self._distro_release_to_codename(release)
-        # search -proposed last since file may be provided by a new package
-        for pocket in ['-updates', '-security', '', '-proposed']:
-            map = os.path.join(dir, '%s%s-Contents-%s.gz' % (release, pocket, arch))
-
-            # check if map exists and is younger than a day; if not, we need to
-            # refresh it
+        # this is ordered by likelihood of installation with the most common
+        # last
+        for pocket in ['-proposed', '', '-security', '-updates']:
+            map = os.path.join(dir, '%s%s-Contents-%s.gz' % \
+                                    (release, pocket, arch))
+            # check if map exists and is younger than a day; if not, we need
+            # to refresh it
+            update = False
             try:
                 st = os.stat(map)
                 age = int(time.time() - st.st_mtime)
@@ -1263,6 +1286,7 @@
                 else:
                     update = True
                 if update:
+                    self._contents_update = True
                     try:
                         src = urlopen(url)
                     except IOError:
@@ -1282,23 +1306,18 @@
                     src.close()
                     assert os.path.exists(map)
 
-            if file.startswith('/'):
-                file = file[1:]
-
-            # zgrep is magnitudes faster than a 'gzip.open/split() loop'
-            package = None
-            try:
-                zgrep = subprocess.Popen(['zgrep', '-m1', '^%s[[:space:]]' % file, map],
-                                         stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-                out = zgrep.communicate()[0].decode('UTF-8')
-            except OSError as e:
-                if e.errno != errno.ENOMEM:
-                    raise
-                file_b = file.encode()
+            contents_mapping = self._contents_mapping(dir, arch)
+            # if the mapping is empty build it
+            if not contents_mapping:
+                self._contents_update = True
+            # if any of the Contents files were updated we need to update the
+            # map because the ordering in which is created is important
+            if self._contents_update:
                 import gzip
                 with gzip.open('%s' % map, 'rb') as contents:
-                    out = ''
+                    line_num = 0
                     for line in contents:
+<<<<<<< TREE
                         if line.startswith(file_b):
                             out = line
                             break
@@ -1309,6 +1328,45 @@
                 package = out.split()[-1].split(',')[0].split('/')[-1]
             if package:
                 return package
+=======
+                        line_num += 1
+                        # the first 32 lines are descriptive only for these
+                        # releases
+                        if pocket == '' and release in ['trusty', 'xenial'] \
+                                and line_num < 33:
+                            continue
+                        path = line.split()[0]
+                        if path.split(b'/')[0] == b'usr' and \
+                                path.split(b'/')[1] in (b'lib', b'bin', b'sbin'):
+                            package = line.split()[-1].split(b',')[0].split(b'/')[-1]
+                        elif path.split(b'/')[0] in (b'lib', b'bin', b'sbin'):
+                            package = line.split()[-1].split(b',')[0].split(b'/')[-1]
+                        else:
+                            continue
+                        if path in contents_mapping:
+                            if package == contents_mapping[path]:
+                                continue
+                            else:
+                                # if the package was updated use the update
+                                # b/c everyone should have packages from
+                                # -updates and -security installed
+                                contents_mapping[path] = package
+                        else:
+                            contents_mapping[path] = package
+        # the file only needs to be saved after an update
+        if self._contents_update:
+            self._save_contents_mapping(dir, arch)
+            # the update of the mapping only needs to be done once
+            self._contents_update = False
+        if file.startswith('/'):
+            file = file[1:]
+        file = file.encode()
+        try:
+            pkg = contents_mapping[file].decode()
+            return pkg
+        except KeyError:
+            pass
+>>>>>>> MERGE-SOURCE
         return None
 
     @classmethod


Follow ups