← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] ~cjwatson/launchpad:gina-py2-rmtree into launchpad:master

 

Colin Watson has proposed merging ~cjwatson/launchpad:gina-py2-rmtree into launchpad:master.

Commit message:
Fix removal of trees with non-UTF-8 file names

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/+git/launchpad/+merge/398971

https://code.launchpad.net/~cjwatson/launchpad/+git/launchpad/+merge/398367 incidentally caused gina to pass `unicode` objects to `shutil.rmtree` on Python 2 when removing unpacked source trees, which triggered https://bugs.python.org/issue24672 if a source package contained non-UTF-8 file names.  Ensure that we pass a `str` object to `shutil.rmtree` to avoid this bug.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of ~cjwatson/launchpad:gina-py2-rmtree into launchpad:master.
diff --git a/lib/lp/services/tarfile_helpers.py b/lib/lp/services/tarfile_helpers.py
index 30f34d6..b10cf13 100644
--- a/lib/lp/services/tarfile_helpers.py
+++ b/lib/lp/services/tarfile_helpers.py
@@ -32,8 +32,8 @@ class LaunchpadWriteTarFile:
     This class makes it convenient to generate tar files in various ways.
     """
 
-    def __init__(self, stream):
-        self.tarfile = tarfile.open('', 'w:gz', stream)
+    def __init__(self, stream, encoding='UTF-8'):
+        self.tarfile = tarfile.open('', 'w:gz', stream, encoding=encoding)
         self.closed = False
 
     @classmethod
diff --git a/lib/lp/soyuz/scripts/gina/packages.py b/lib/lp/soyuz/scripts/gina/packages.py
index 861b81d..d659399 100644
--- a/lib/lp/soyuz/scripts/gina/packages.py
+++ b/lib/lp/soyuz/scripts/gina/packages.py
@@ -113,7 +113,9 @@ def unpack_dsc(package, version, component, distro_name, archive_root):
         extract_dpkg_source(dsc_path, ".", vendor=distro_name)
     except DpkgSourceError as e:
         if os.path.isdir(source_dir):
-            shutil.rmtree(source_dir)
+            # Coerce to str to avoid https://bugs.python.org/issue24672 on
+            # Python 2.
+            shutil.rmtree(six.ensure_str(source_dir))
         raise ExecutionError("Error %d unpacking source" % e.result)
 
     return source_dir, dsc_path
@@ -150,7 +152,9 @@ def read_dsc(package, version, component, distro_name, archive_root):
                 "No copyright file found for %s in %s" % (package, source_dir))
             copyright = b''
     finally:
-        shutil.rmtree(source_dir)
+        # Coerce to str to avoid https://bugs.python.org/issue24672 on
+        # Python 2.
+        shutil.rmtree(six.ensure_str(source_dir))
 
     return dsc, changelog, copyright
 
diff --git a/lib/lp/soyuz/scripts/tests/test_gina.py b/lib/lp/soyuz/scripts/tests/test_gina.py
index 0fbd765..f563c10 100644
--- a/lib/lp/soyuz/scripts/tests/test_gina.py
+++ b/lib/lp/soyuz/scripts/tests/test_gina.py
@@ -1,3 +1,7 @@
+# -*- coding: utf-8 -*-
+# NOTE: The first line above must stay first; do not move the copyright
+# notice to the top.  See http://www.python.org/dev/peps/pep-0263/.
+#
 # Copyright 2009-2020 Canonical Ltd.  This software is licensed under the
 # GNU Affero General Public License version 3 (see the file LICENSE).
 
@@ -241,8 +245,14 @@ class TestSourcePackageData(TestCaseWithFactory):
         # but not with DEB_VENDOR=ubuntu.
         with open(os.path.join(
             pool_dir, "foo_1.0.orig.tar.gz"), "wb+") as buffer:
-            orig_tar = LaunchpadWriteTarFile(buffer)
+            orig_tar = LaunchpadWriteTarFile(buffer, encoding="ISO-8859-1")
             orig_tar.add_directory("foo-1.0")
+            # Add a Unicode file name (which becomes non-UTF-8 due to
+            # encoding="ISO-8859-1" above) to ensure that shutil.rmtree is
+            # called in such a way as to cope with non-UTF-8 file names on
+            # Python 2.  See
+            # https://bugs.launchpad.net/launchpad/+bug/1917449.
+            orig_tar.add_file(u"íslenska.alias", b"Non-UTF-8 file name")
             orig_tar.close()
             buffer.seek(0)
             orig_tar_contents = buffer.read()
@@ -296,8 +306,14 @@ class TestSourcePackageData(TestCaseWithFactory):
 
         with open(os.path.join(
             pool_dir, "foo_1.0.orig.tar.gz"), "wb+") as buffer:
-            orig_tar = LaunchpadWriteTarFile(buffer)
+            orig_tar = LaunchpadWriteTarFile(buffer, encoding="ISO-8859-1")
             orig_tar.add_directory("foo-1.0")
+            # Add a Unicode file name (which becomes non-UTF-8 due to
+            # encoding="ISO-8859-1" above) to ensure that shutil.rmtree is
+            # called in such a way as to cope with non-UTF-8 file names on
+            # Python 2.  See
+            # https://bugs.launchpad.net/launchpad/+bug/1917449.
+            orig_tar.add_file(u"íslenska.alias", b"Non-UTF-8 file name")
             orig_tar.close()
             buffer.seek(0)
             orig_tar_contents = buffer.read()