← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~jelmer/launchpad/617393-cleanup-tmp into lp:launchpad/devel

 

Jelmer Vernooij has proposed merging lp:~jelmer/launchpad/617393-cleanup-tmp into lp:launchpad/devel.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers): code
Related bugs:
  #617393 queue and archive processing(?) leave directories in /tmp
  https://bugs.launchpad.net/bugs/617393


This changes the handling of unpacking of source packages to extract copyright and changelog files.

Rather than keeping the unpacked sources around and cleaning them up afterwards, this makes us clean up the unpacked source directory immediately. 

This does have the consequence that we keep the changelog file into memory before streaming it to the librarian rather than streaming it from disk. That seems like a reasonable tradeoff though, as there were already checks in place to make sure that the changelog file is never bigger than 10Mb; we also only keep a single changelog in memory.
-- 
https://code.launchpad.net/~jelmer/launchpad/617393-cleanup-tmp/+merge/33194
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~jelmer/launchpad/617393-cleanup-tmp into lp:launchpad/devel.
=== modified file 'lib/lp/archiveuploader/dscfile.py'
--- lib/lp/archiveuploader/dscfile.py	2010-08-16 15:47:33 +0000
+++ lib/lp/archiveuploader/dscfile.py	2010-08-20 10:21:46 +0000
@@ -13,11 +13,12 @@
     'SignableTagFile',
     'DSCFile',
     'DSCUploadedFile',
-    'findChangelog',
-    'findCopyright',
+    'find_changelog',
+    'find_copyright',
     ]
 
 import apt_pkg
+from cStringIO import StringIO
 import errno
 import glob
 import os
@@ -50,6 +51,70 @@
 from lp.soyuz.interfaces.sourcepackageformat import SourcePackageFormat
 
 
+class DpkgSourceError(Exception):
+
+    _fmt = "Unable to unpack source package (%(result)s): %(output)s"
+
+    def __init__(self, output, result):
+        Exception.__init__(
+            self, self._fmt % {"output": output, "result": result})
+        self.output = output
+        self.result = result
+
+
+def unpack_source(dsc_filepath):
+    """Unpack a source package into a temporary directory
+
+    :param dsc_filenpath: Path to the dsc file
+    :return: Path to the temporary directory with the unpacked sources
+    """
+    # Get a temporary dir together.
+    unpacked_dir = tempfile.mkdtemp()
+    try:
+        # chdir into it
+        cwd = os.getcwd()
+        os.chdir(unpacked_dir)
+        try:
+            args = ["dpkg-source", "-sn", "-x", dsc_filepath]
+            dpkg_source = subprocess.Popen(args, stdout=subprocess.PIPE,
+                                           stderr=subprocess.PIPE)
+            output, unused = dpkg_source.communicate()
+            result = dpkg_source.wait()
+        finally:
+            # When all is said and done, chdir out again so that we can
+            # clean up the tree with shutil.rmtree without leaving the
+            # process in a directory we're trying to remove.
+            os.chdir(cwd)
+
+        if result != 0:
+            dpkg_output = prefix_multi_line_string(output, "  ")
+            raise DpkgSourceError(result=result, output=dpkg_output)
+    except:
+        shutil.rmtree(unpacked_dir)
+        raise
+
+    return unpacked_dir
+
+
+def cleanup_unpacked_dir(unpacked_dir):
+    """Remove the directory with an unpacked source package.
+
+    :param unpacked_dir: Path to the directory.
+    """
+    try:
+        shutil.rmtree(unpacked_dir)
+    except OSError, error:
+        if errno.errorcode[error.errno] != 'EACCES':
+            raise UploadError(
+                "couldn't remove tmp dir %s: code %s" % (
+                unpacked_dir, error.errno))
+        else:
+            result = os.system("chmod -R u+rwx " + unpacked_dir)
+            if result != 0:
+                raise UploadError("chmod failed with %s" % result)
+            shutil.rmtree(unpacked_dir)
+
+
 class SignableTagFile:
     """Base class for signed file verification."""
 
@@ -137,7 +202,7 @@
             "rfc2047": rfc2047,
             "name": name,
             "email": email,
-            "person": person
+            "person": person,
             }
 
 
@@ -154,9 +219,9 @@
 
     # Note that files is actually only set inside verify().
     files = None
-    # Copyright and changelog_path are only set inside unpackAndCheckSource().
+    # Copyright and changelog are only set inside unpackAndCheckSource().
     copyright = None
-    changelog_path = None
+    changelog = None
 
     def __init__(self, filepath, digest, size, component_and_section,
                  priority, package, version, changes, policy, logger):
@@ -205,12 +270,9 @@
         else:
             self.processSignature()
 
-        self.unpacked_dir = None
-
     #
     # Useful properties.
     #
-
     @property
     def source(self):
         """Return the DSC source name."""
@@ -244,12 +306,11 @@
     #
     # DSC file checks.
     #
-
     def verify(self):
         """Verify the uploaded .dsc file.
 
-        This method is an error generator, i.e, it returns an iterator over all
-        exceptions that are generated while processing DSC file checks.
+        This method is an error generator, i.e, it returns an iterator over
+        all exceptions that are generated while processing DSC file checks.
         """
 
         for error in SourceUploadFile.verify(self):
@@ -485,82 +546,53 @@
         self.logger.debug(
             "Verifying uploaded source package by unpacking it.")
 
-        # Get a temporary dir together.
-        self.unpacked_dir = tempfile.mkdtemp()
-
-        # chdir into it
-        cwd = os.getcwd()
-        os.chdir(self.unpacked_dir)
-        dsc_in_tmpdir = os.path.join(self.unpacked_dir, self.filename)
-
-        package_files = self.files + [self]
         try:
-            for source_file in package_files:
-                os.symlink(
-                    source_file.filepath,
-                    os.path.join(self.unpacked_dir, source_file.filename))
-            args = ["dpkg-source", "-sn", "-x", dsc_in_tmpdir]
-            dpkg_source = subprocess.Popen(args, stdout=subprocess.PIPE,
-                                           stderr=subprocess.PIPE)
-            output, unused = dpkg_source.communicate()
-            result = dpkg_source.wait()
-        finally:
-            # When all is said and done, chdir out again so that we can
-            # clean up the tree with shutil.rmtree without leaving the
-            # process in a directory we're trying to remove.
-            os.chdir(cwd)
-
-        if result != 0:
-            dpkg_output = prefix_multi_line_string(output, "  ")
+            unpacked_dir = unpack_source(self.filepath)
+        except DpkgSourceError, e:
             yield UploadError(
                 "dpkg-source failed for %s [return: %s]\n"
                 "[dpkg-source output: %s]"
-                % (self.filename, result, dpkg_output))
-
-        # Copy debian/copyright file content. It will be stored in the
-        # SourcePackageRelease records.
-
-        # Check if 'dpkg-source' created only one directory.
-        temp_directories = [
-            dirname for dirname in os.listdir(self.unpacked_dir)
-            if os.path.isdir(dirname)]
-        if len(temp_directories) > 1:
-            yield UploadError(
-                'Unpacked source contains more than one directory: %r'
-                % temp_directories)
-
-        # XXX cprov 20070713: We should access only the expected directory
-        # name (<sourcename>-<no_epoch(no_revision(version))>).
-
-        # Locate both the copyright and changelog files for later processing.
-        for error in findCopyright(self, self.unpacked_dir, self.logger):
-            yield error
-
-        for error in findChangelog(self, self.unpacked_dir, self.logger):
-            yield error
-
-        self.logger.debug("Cleaning up source tree.")
+                % (self.filename, e.result, e.output))
+            return
+
+        try:
+            # Copy debian/copyright file content. It will be stored in the
+            # SourcePackageRelease records.
+
+            # Check if 'dpkg-source' created only one directory.
+            temp_directories = [
+                dirname for dirname in os.listdir(unpacked_dir)
+                if os.path.isdir(dirname)]
+            if len(temp_directories) > 1:
+                yield UploadError(
+                    'Unpacked source contains more than one directory: %r'
+                    % temp_directories)
+
+            # XXX cprov 20070713: We should access only the expected directory
+            # name (<sourcename>-<no_epoch(no_revision(version))>).
+
+            # Locate both the copyright and changelog files for later
+            # processing.
+            try:
+                self.copyright = find_copyright(unpacked_dir, self.logger)
+            except UploadError, error:
+                yield error
+                return
+            except UploadWarning, warning:
+                yield warning
+
+            try:
+                self.changelog = find_changelog(unpacked_dir, self.logger)
+            except UploadError, error:
+                yield error
+                return
+            except UploadWarning, warning:
+                yield warning
+        finally:
+            self.logger.debug("Cleaning up source tree.")
+            cleanup_unpacked_dir(unpacked_dir)
         self.logger.debug("Done")
 
-    def cleanUp(self):
-        if self.unpacked_dir is None:
-            return
-        try:
-            shutil.rmtree(self.unpacked_dir)
-        except OSError, error:
-            # XXX: dsilvers 2006-03-15: We currently lack a test for this.
-            if errno.errorcode[error.errno] != 'EACCES':
-                raise UploadError(
-                    "%s: couldn't remove tmp dir %s: code %s" % (
-                    self.filename, self.unpacked_dir, error.errno))
-            else:
-                result = os.system("chmod -R u+rwx " + self.unpacked_dir)
-                if result != 0:
-                    raise UploadError("chmod failed with %s" % result)
-                shutil.rmtree(self.unpacked_dir)
-        self.unpacked_dir = None
-
-
     def findBuild(self):
         """Find and return the SourcePackageRecipeBuild, if one is specified.
 
@@ -618,8 +650,8 @@
 
         changelog_lfa = self.librarian.create(
             "changelog",
-            os.stat(self.changelog_path).st_size,
-            open(self.changelog_path, "r"),
+            len(self.changelog),
+            StringIO(self.changelog),
             "text/x-debian-source-changelog",
             restricted=self.policy.archive.private)
 
@@ -679,6 +711,7 @@
           validation inside DSCFile.verify(); there is no
           store_in_database() method.
     """
+
     def __init__(self, filepath, digest, size, policy, logger):
         component_and_section = priority = "--no-value--"
         NascentUploadFile.__init__(
@@ -698,7 +731,7 @@
 
     :param source_file: The directory where the source was extracted
     :param source_dir: The directory where the source was extracted.
-    :return fullpath: The full path of the file, else return None if the 
+    :return fullpath: The full path of the file, else return None if the
                       file is not found.
     """
     # Instead of trying to predict the unpacked source directory name,
@@ -721,50 +754,49 @@
             return fullpath
     return None
 
-def findCopyright(dsc_file, source_dir, logger):
+
+def find_copyright(source_dir, logger):
     """Find and store any debian/copyright.
 
-    :param dsc_file: A DSCFile object where the copyright will be stored.
     :param source_dir: The directory where the source was extracted.
     :param logger: A logger object for debug output.
+    :return: Contents of copyright file
     """
-    try:
-        copyright_file = findFile(source_dir, 'debian/copyright')
-    except UploadError, error:
-        yield error
-        return
+    copyright_file = findFile(source_dir, 'debian/copyright')
     if copyright_file is None:
-        yield UploadWarning("No copyright file found.")
-        return
+        raise UploadWarning("No copyright file found.")
 
     logger.debug("Copying copyright contents.")
+<<<<<<< TREE
     dsc_file.copyright = open(copyright_file).read().strip()
 
 
 def findChangelog(dsc_file, source_dir, logger):
+=======
+    return open(copyright_file).read().strip()
+
+
+def find_changelog(source_dir, logger):
+>>>>>>> MERGE-SOURCE
     """Find and move any debian/changelog.
 
     This function finds the changelog file within the source package. The
     changelog file is later uploaded to the librarian by
     DSCFile.storeInDatabase().
 
-    :param dsc_file: A DSCFile object where the copyright will be stored.
     :param source_dir: The directory where the source was extracted.
     :param logger: A logger object for debug output.
+    :return: Changelog contents
     """
-    try:
-        changelog_file = findFile(source_dir, 'debian/changelog')
-    except UploadError, error:
-        yield error
-        return
+    changelog_file = findFile(source_dir, 'debian/changelog')
     if changelog_file is None:
         # Policy requires debian/changelog to always exist.
-        yield UploadError("No changelog file found.")
-        return
+        raise UploadError("No changelog file found.")
 
     # Move the changelog file out of the package direcotry
     logger.debug("Found changelog")
-    dsc_file.changelog_path = changelog_file
+    return open(changelog_file, 'r').read()
+
 
 def check_format_1_0_files(filename, file_type_counts, component_counts,
                            bzip2_count):

=== modified file 'lib/lp/archiveuploader/nascentupload.py'
--- lib/lp/archiveuploader/nascentupload.py	2010-08-04 22:07:41 +0000
+++ lib/lp/archiveuploader/nascentupload.py	2010-08-20 10:21:46 +0000
@@ -880,12 +880,6 @@
                 'Exception while accepting:\n %s' % e, exc_info=True)
             self.do_reject(notify)
             return False
-        else:
-            self.cleanUp()
-
-    def cleanUp(self):
-        if self.changes.dsc is not None:
-            self.changes.dsc.cleanUp()
 
     def do_reject(self, notify=True):
         """Reject the current upload given the reason provided."""
@@ -916,7 +910,6 @@
         self.queue_root.notify(summary_text=self.rejection_message,
             changes_file_object=changes_file_object, logger=self.logger)
         changes_file_object.close()
-        self.cleanUp()
 
     def _createQueueEntry(self):
         """Return a PackageUpload object."""

=== modified file 'lib/lp/archiveuploader/tests/test_dscfile.py'
--- lib/lp/archiveuploader/tests/test_dscfile.py	2010-07-24 03:46:56 +0000
+++ lib/lp/archiveuploader/tests/test_dscfile.py	2010-08-20 10:21:46 +0000
@@ -10,13 +10,25 @@
 from canonical.launchpad.scripts.logger import QuietFakeLogger
 from canonical.testing.layers import LaunchpadZopelessLayer
 from lp.archiveuploader.dscfile import (
-    DSCFile, findChangelog, findCopyright, format_to_file_checker_map)
+    cleanup_unpacked_dir,
+    DSCFile,
+    find_changelog,
+    find_copyright,
+    format_to_file_checker_map,
+    unpack_source,
+    )
 from lp.archiveuploader.nascentuploadfile import UploadError
-from lp.archiveuploader.tests import datadir, mock_logger_quiet
+from lp.archiveuploader.tests import (
+    datadir,
+    mock_logger_quiet,
+    )
 from lp.archiveuploader.uploadpolicy import BuildDaemonUploadPolicy
 from lp.registry.interfaces.sourcepackage import SourcePackageFileType
 from lp.soyuz.interfaces.sourcepackageformat import SourcePackageFormat
-from lp.testing import TestCase, TestCaseWithFactory
+from lp.testing import (
+    TestCase,
+    TestCaseWithFactory,
+    )
 
 ORIG_TARBALL = SourcePackageFileType.ORIG_TARBALL
 DEBIAN_TARBALL = SourcePackageFileType.DEBIAN_TARBALL
@@ -26,9 +38,6 @@
 
 class TestDscFile(TestCase):
 
-    class MockDSCFile:
-        copyright = None
-
     def setUp(self):
         super(TestDscFile, self).setUp()
         self.tmpdir = self.makeTemporaryDirectory()
@@ -36,7 +45,6 @@
         os.makedirs(self.dir_path)
         self.copyright_path = os.path.join(self.dir_path, "copyright")
         self.changelog_path = os.path.join(self.dir_path, "changelog")
-        self.dsc_file = self.MockDSCFile()
 
     def testBadDebianCopyright(self):
         """Test that a symlink as debian/copyright will fail.
@@ -45,14 +53,13 @@
         dangling symlink in an attempt to try and access files on the system
         processing the source packages."""
         os.symlink("/etc/passwd", self.copyright_path)
-        errors = list(findCopyright(
-            self.dsc_file, self.tmpdir, mock_logger_quiet))
-
-        self.assertEqual(len(errors), 1)
-        self.assertIsInstance(errors[0], UploadError)
-        self.assertEqual(
-            errors[0].args[0],
-            "Symbolic link for debian/copyright not allowed")
+        try:
+            find_copyright(self.tmpdir, mock_logger_quiet)
+            self.fail()
+        except UploadError, error:
+            self.assertEqual(
+                error.args[0],
+                "Symbolic link for debian/copyright not allowed")
 
     def testGoodDebianCopyright(self):
         """Test that a proper copyright file will be accepted"""
@@ -61,11 +68,8 @@
         file.write(copyright)
         file.close()
 
-        errors = list(findCopyright(
-            self.dsc_file, self.tmpdir, mock_logger_quiet))
-
-        self.assertEqual(len(errors), 0)
-        self.assertEqual(self.dsc_file.copyright, copyright)
+        self.assertEquals(
+            copyright, find_copyright(self.tmpdir, mock_logger_quiet))
 
     def testBadDebianChangelog(self):
         """Test that a symlink as debian/changelog will fail.
@@ -74,14 +78,13 @@
         dangling symlink in an attempt to try and access files on the system
         processing the source packages."""
         os.symlink("/etc/passwd", self.changelog_path)
-        errors = list(findChangelog(
-            self.dsc_file, self.tmpdir, mock_logger_quiet))
-
-        self.assertEqual(len(errors), 1)
-        self.assertIsInstance(errors[0], UploadError)
-        self.assertEqual(
-            errors[0].args[0],
-            "Symbolic link for debian/changelog not allowed")
+        try:
+            find_changelog(self.tmpdir, mock_logger_quiet)
+            self.fail()
+        except UploadError, error:
+            self.assertEqual(
+                error.args[0],
+                "Symbolic link for debian/changelog not allowed")
 
     def testGoodDebianChangelog(self):
         """Test that a proper changelog file will be accepted"""
@@ -90,12 +93,8 @@
         file.write(changelog)
         file.close()
 
-        errors = list(findChangelog(
-            self.dsc_file, self.tmpdir, mock_logger_quiet))
-
-        self.assertEqual(len(errors), 0)
-        self.assertEqual(self.dsc_file.changelog_path,
-                         self.changelog_path)
+        self.assertEquals(
+            changelog, find_changelog(self.tmpdir, mock_logger_quiet))
 
     def testOversizedFile(self):
         """Test that a file larger than 10MiB will fail.
@@ -114,13 +113,13 @@
         file.write(empty_file)
         file.close()
 
-        errors = list(findChangelog(
-            self.dsc_file, self.tmpdir, mock_logger_quiet))
-
-        self.assertIsInstance(errors[0], UploadError)
-        self.assertEqual(
-            errors[0].args[0],
-            "debian/changelog file too large, 10MiB max")
+        try:
+            find_changelog(self.tmpdir, mock_logger_quiet)
+            self.fail()
+        except UploadError, error:
+            self.assertEqual(
+                error.args[0],
+                "debian/changelog file too large, 10MiB max")
 
 
 class TestDscFileLibrarian(TestCaseWithFactory):
@@ -130,6 +129,7 @@
 
     def getDscFile(self, name):
         dsc_path = datadir(os.path.join('suite', name, name + '.dsc'))
+
         class Changes:
             architectures = ['source']
         logger = QuietFakeLogger()
@@ -146,10 +146,7 @@
         os.chmod(tempdir, 0555)
         try:
             dsc_file = self.getDscFile('bar_1.0-1')
-            try:
-                list(dsc_file.verify())
-            finally:
-                dsc_file.cleanUp()
+            list(dsc_file.verify())
         finally:
             os.chmod(tempdir, 0755)
 
@@ -281,3 +278,42 @@
         # A 3.0 (native) source with component tarballs is invalid.
         self.assertErrorsForFiles(
             [self.wrong_files_error], {NATIVE_TARBALL: 1}, {'foo': 1})
+
+
+class UnpackedDirTests(TestCase):
+    """Tests for unpack_source and cleanup_unpacked_dir."""
+
+    def test_unpack_source(self):
+        # unpack_source unpacks in a temporary directory and returns the
+        # path.
+        unpacked_dir = unpack_source(
+            datadir(os.path.join('suite', 'bar_1.0-1', 'bar_1.0-1.dsc')))
+        try:
+            self.assertEquals(["bar-1.0"], os.listdir(unpacked_dir))
+            self.assertContentEqual(
+                ["THIS_IS_BAR", "debian"],
+                os.listdir(os.path.join(unpacked_dir, "bar-1.0")))
+        finally:
+            cleanup_unpacked_dir(unpacked_dir)
+
+    def test_cleanup(self):
+        # cleanup_dir removes the temporary directory and all files under it.
+        temp_dir = self.makeTemporaryDirectory()
+        unpacked_dir = os.path.join(temp_dir, "unpacked")
+        os.mkdir(unpacked_dir)
+        os.mkdir(os.path.join(unpacked_dir, "bar_1.0"))
+        cleanup_unpacked_dir(unpacked_dir)
+        self.assertFalse(os.path.exists(unpacked_dir))
+
+    def test_cleanup_invalid_mode(self):
+        # cleanup_dir can remove a directory even if the mode does
+        # not allow it.
+        temp_dir = self.makeTemporaryDirectory()
+        unpacked_dir = os.path.join(temp_dir, "unpacked")
+        os.mkdir(unpacked_dir)
+        bar_path = os.path.join(unpacked_dir, "bar_1.0")
+        os.mkdir(bar_path)
+        os.chmod(unpacked_dir, 0600)
+        os.chmod(bar_path, 0600)
+        cleanup_unpacked_dir(unpacked_dir)
+        self.assertFalse(os.path.exists(unpacked_dir))


Follow ups