launchpad-reviewers team mailing list archive

Thread
Date

[Merge] ~cjwatson/launchpad:py3-prf into launchpad:master

To: mp+388909@xxxxxxxxxxxxxxxxxx
From: Colin Watson <cjwatson@xxxxxxxxxxxxx>
Date: Fri, 07 Aug 2020 16:54:23 -0000
Reply-to: mp+388909@xxxxxxxxxxxxxxxxxx
Sender: bounces@xxxxxxxxxxxxx

Colin Watson has proposed merging ~cjwatson/launchpad:py3-prf into launchpad:master.

Commit message:
Port product release finder to Python 3

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/+git/launchpad/+merge/388909

This involves vendoring the bits of cscvs.dircompare.path that the product release finder uses.  They work fine on Python 3 with only minimal porting, but cscvs in general doesn't, and it's a strange thing for something outside codeimport to depend on.

Most of this could probably be replaced using pathlib, but one thing at a time.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of ~cjwatson/launchpad:py3-prf into launchpad:master.

diff --git a/lib/lp/registry/scripts/productreleasefinder/finder.py b/lib/lp/registry/scripts/productreleasefinder/finder.py
index 3fd49f0..a8fa035 100644
--- a/lib/lp/registry/scripts/productreleasefinder/finder.py
+++ b/lib/lp/registry/scripts/productreleasefinder/finder.py
@@ -14,7 +14,6 @@ import os
 import re
 import tempfile
 
-from cscvs.dircompare import path
 import pytz
 import requests
 from six.moves.urllib.parse import urlsplit
@@ -31,6 +30,7 @@ from lp.registry.model.productrelease import (
     ProductReleaseFile,
     )
 from lp.registry.model.productseries import ProductSeries
+from lp.registry.scripts.productreleasefinder import path
 from lp.registry.scripts.productreleasefinder.filter import FilterPattern
 from lp.registry.scripts.productreleasefinder.hose import Hose
 from lp.services.config import config
@@ -118,7 +118,7 @@ class ProductReleaseFinder:
     def getFilters(self):
         """Build the list of products and filters.
 
-        Returns a list of (product_name, filters) for each product in
+        Returns an iterator of (product_name, filters) for each product in
         the database, where the filter keys are series names.
         """
         found_globs = IStore(Product).find(
diff --git a/lib/lp/registry/scripts/productreleasefinder/path.py b/lib/lp/registry/scripts/productreleasefinder/path.py
new file mode 100644
index 0000000..c86a75e
--- /dev/null
+++ b/lib/lp/registry/scripts/productreleasefinder/path.py
@@ -0,0 +1,480 @@
+# Copyright 2004-2020 Canonical Ltd.  This software is licensed under the
+# GNU Affero General Public License version 3 (see the file LICENSE).
+
+"""Path handling.
+
+This module supplies useful functions for dealing with paths and
+extracting useful information out of them.  It was extracted from cscvs, and
+cut down to only what Launchpad's product release finder needs.
+"""
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+import os
+import re
+import stat
+
+
+# Regular expressions make things easy
+patched_ext = re.compile(r'([._+-](orig|old|new|patch|patched)|~)$',
+                         re.IGNORECASE)
+version_ext = re.compile(r'[_-]v?([0-9][0-9a-z:.+]*'
+                         r'(?:[-_](?:pre|rc|alpha|beta|test)'
+                         r'(?:[0-9:.+][0-9a-z:.+]*|(?![a-z])))?)',
+                         re.IGNORECASE)
+
+
+class FileFormat:
+    """Known file formats.
+
+    Constants:
+      TAR       Tar file
+      PATCH     Patch file
+      ZIP       Zip file
+    """
+
+    TAR = "TAR"
+    PATCH = "PATCH"
+    ZIP = "ZIP"
+
+
+class Compression:
+    """Known compressions.
+
+    Constants:
+      GZIP      gzip
+      BZIP2     bzip2
+    """
+
+    GZIP = "gzip"
+    BZIP2 = "bzip2"
+    COMPRESS = "compress"
+
+
+class Extensions:
+    """Extensions that map to file formats and compressions.
+
+    Constants:
+      FORMAT    Extensions that suggest a particular file format.
+      COMPRESS  Extensions that suggest a particular compression.
+      BOTH      Extensions that suggest both.
+    """
+
+    FORMAT = {
+        ".tar":    (FileFormat.TAR,),
+        ".patch":  (FileFormat.PATCH,),
+        ".dpatch": (FileFormat.PATCH,),
+        ".diff":   (FileFormat.PATCH,),
+        ".zip":    (FileFormat.ZIP,),
+        ".jar":    (FileFormat.ZIP,),
+        }
+
+    COMPRESS = {
+        ".gz":  (Compression.GZIP,),
+        ".bz2": (Compression.BZIP2,),
+        ".Z":   (Compression.COMPRESS,),
+        }
+
+    BOTH = {
+        ".tgz":  (FileFormat.TAR, Compression.GZIP),
+        ".tbz":  (FileFormat.TAR, Compression.BZIP2),
+        ".tbz2": (FileFormat.TAR, Compression.BZIP2),
+        }
+
+
+class Filenames:
+    """File names that may be special.
+
+    Constants:
+      IGNORE    Filenames that suggest we ignore the path.
+    """
+
+    IGNORE = [".arch-inventory", ".cvsignore", ".bzrignore"]
+
+
+class Directories:
+    """Directory names that suggest formats for their contents.
+
+    Constants:
+      FORMAT    Directory names that suggest a particular file format.
+      IGNORE    Directory names that suggest we ignore the path.
+    """
+
+    FORMAT = {
+        "tarballs": FileFormat.TAR,
+        "tarfiles": FileFormat.TAR,
+        "patches":  FileFormat.PATCH,
+        }
+
+    IGNORE = ["{arch}", ".arch-ids", "CVS", "RCS", ".svn", "_darcs", ".bzr"]
+
+
+class PathBase(object):
+    """Cache path information.
+
+    This class provides functionality for representing a path on a
+    filesystem.  It can be combined with any sub-class of the built-in str
+    type.
+
+    Properties are available which hold cached information about the file,
+    to reduce stat calls and improve performance.  They are properties
+    rather than functions to indicate their cached status.
+
+    You almost certainly want to use Path rather than PathBase unless you
+    are sub-classing from multiple str-derived base-classes.  PathBase
+    _must_ be mixed with a class that derives from str.
+    """
+
+    def __new__(cls, path=".", *args, **kwds):
+        if path.__class__ == cls:
+            # Identical class means we behave like a singleton
+            return path
+        elif isinstance(path, PathBase):
+            # Otherwise instance of PathBase or sub-class means we avoid
+            # repeated calls of canon()
+            return str.__new__(cls, path)
+        else:
+            return str.__new__(cls, canon(path))
+
+    def __repr__(self):
+        """Return a debugging representation of the manifest."""
+        text = "<%s %r>" % (type(self).__name__, str(self))
+        return text
+
+    def __eq__(self, other):
+        """Compare to another Path or string."""
+        return as_file(str(self)) == as_file(str(other))
+
+    @property
+    def basename(self):
+        """Cache and return filename portion of path."""
+        try:
+            return self._basename
+        except AttributeError:
+            basename = os.path.basename(as_file(self))
+            if self.isdir:
+                self._basename = as_dir(basename)
+            else:
+                self._basename = basename
+            return self._basename
+
+    @property
+    def dirname(self):
+        """Cache and return directory portion of path."""
+        try:
+            return self._dirname
+        except AttributeError:
+            path = as_file(self)
+            if path != "":
+                self._dirname = CanonPath(os.path.dirname(path))
+            else:
+                self._dirname = None
+            return self._dirname
+
+    def stat(self):
+        """Cache and return stat results for path.
+
+        If the path does not exist, None is returned.
+        """
+        try:
+            return self._stat
+        except AttributeError:
+            try:
+                self._stat = os.lstat(self)
+            except OSError:
+                self._stat = None
+            return self._stat
+
+    def _del_stat(self):
+        """Clear the cached stat results."""
+        try:
+            del self._stat
+        except AttributeError:
+            pass
+    stat = property(stat, fdel=_del_stat)
+
+    @property
+    def exists(self):
+        """Return whether path exists.
+
+        Symbolic links are not followed.
+        """
+        return self.stat is not None
+
+    @property
+    def isdir(self):
+        """Return whether path exists and is a directory."""
+        try:
+            return stat.S_ISDIR(self.stat.st_mode)
+        except AttributeError:
+            return False
+
+    @property
+    def isfile(self):
+        """Return whether path exists and is an ordinary file."""
+        try:
+            return stat.S_ISREG(self.stat.st_mode)
+        except AttributeError:
+            return False
+
+    @property
+    def islink(self):
+        """Return whether path exists and is a symbolic link."""
+        try:
+            return stat.S_ISLNK(self.stat.st_mode)
+        except AttributeError:
+            return False
+
+    @property
+    def size(self):
+        """Return size of file."""
+        try:
+            if self.isfile:
+                return self.stat.st_size
+            else:
+                return None
+        except AttributeError:
+            return None
+
+    @property
+    def mode(self):
+        """Return permissions of path."""
+        try:
+            return stat.S_IMODE(self.stat.st_mode)
+        except AttributeError:
+            return None
+
+    @property
+    def mtime(self):
+        """Return modification time of path."""
+        try:
+            return self.stat.st_mtime
+        except AttributeError:
+            return None
+
+    def join(self, *args):
+        """Join path elements."""
+        return Path(os.path.join(self, *args))
+
+    def splitpath(self):
+        """Return dirname and basename together."""
+        return (self.dirname, self.basename)
+
+    def parents(self):
+        """Iterate over the parents of the path.
+
+        Generator that yields (dirname, basename) for each directory
+        above the path.
+        """
+        (dirname, basename) = self.splitpath()
+        while dirname is not None:
+            yield (dirname, basename)
+
+            basename = os.path.join(dirname.basename, basename)
+            dirname = dirname.dirname
+
+
+class Path(PathBase, str):
+    """Path with cached information.
+
+    This object represents a path on the filesystem.  The class
+    is derived from the built-in str type to allow it to be used
+    naturally.
+
+    Additional properties are available which hold cached information
+    about the file, to reduce stat calls and improve performance.  They
+    are properties rather than functions to indicate their cached status.
+    """
+
+
+class CanonPath(Path):
+    """Canonical path with cached information.
+
+    This object is identical to Path except that it performs no
+    canonicalisation of the path given to it, assuming (and requiring) that
+    you have already done so.
+    """
+
+    def __new__(cls, path=".", *args, **kwds):
+        if path.__class__ == cls:
+            # Identical class means we behave like a singleton
+            return path
+        else:
+            return str.__new__(cls, path)
+
+
+def ignored_part(path):
+    """Return whether the path contains an ignored part."""
+    path_parts = parts(path)
+    for part in path_parts:
+        # Rely on anything calling us filtering out ,, and ++ directories
+        # because we use those ourselves quite a lot.  walk() for example
+        # never descends into them if caught by the block further below.
+        if part in Directories.IGNORE:
+            return True
+    else:
+        # path may end in /, we don't want to test the bit before that
+        # against Filenames.IGNORE but _do_ want to test it against
+        # ,, and ++ so this is why the following code is different.
+        if os.path.basename(path) in Filenames.IGNORE:
+            return True
+        elif path_parts[-1].startswith(",,"):
+            return True
+        elif path_parts[-1].startswith("++"):
+            return True
+        else:
+            return False
+
+
+def format_part(path):
+    """Return whether the path contains a part that suggests a format."""
+    for part in reversed(parts(path)):
+        if part in Directories.FORMAT:
+            return Directories.FORMAT[part]
+    else:
+        return None
+
+
+def match_ext(name, extensions):
+    """Match a filename against the list of extensions.
+
+    If a match is found it returns a tuple of the name with the matching
+    extension stripped, the matching extension and information about the
+    extension appended.
+
+    If no match is found it returns None.
+    """
+    for ext in sorted(extensions, key=len, reverse=True):
+        if name.endswith(ext):
+            return (name[:-len(ext)], ext) + extensions[ext]
+    else:
+        return None
+
+
+def split_path(path):
+    """Split path into pieces and extract information from it.
+
+    Returns a tuple of (dirname, name, ext, format, compress).
+    """
+    dirname = os.path.dirname(path)
+    name = os.path.basename(path)
+    path_ext = ""
+    path_format = None
+    path_compress = None
+
+    # Check for version-control leaking
+    if ignored_part(dirname):
+        return (dirname, name, path_ext, path_format, path_compress)
+
+    # Check combined extensions
+    info = match_ext(name, Extensions.BOTH)
+    if info is not None:
+        (name, ext, path_format, path_compress) = info
+        path_ext = ext + path_ext
+
+    # Check compression extensions
+    if path_compress is None:
+        info = match_ext(name, Extensions.COMPRESS)
+        if info is not None:
+            (name, ext, path_compress) = info
+            path_ext = ext + path_ext
+
+    # Check format extensions
+    if path_format is None:
+        info = match_ext(name, Extensions.FORMAT)
+        if info is not None:
+            (name, ext, path_format) = info
+            path_ext = ext + path_ext
+
+    # Check format directory names
+    if path_format is None:
+        path_format = format_part(dirname)
+
+    return (dirname, name, path_ext, path_format, path_compress)
+
+
+def name(path):
+    """Return the name prefix extracted from the path."""
+    return split_path(path)[1]
+
+
+def split_version(name):
+    """Extract the version from the filename.
+
+    Returns a tuple of (name, version), where version is None if not found.
+    """
+    match = version_ext.search(name)
+    if match is not None:
+        split = match.start()
+        return (name[:split], name[split + 1:])
+    else:
+        return (name, None)
+
+
+def as_dir(path):
+    """Return the path with a trailing slash."""
+    if path.endswith("/"):
+        return path
+    elif len(path):
+        return path + "/"
+    else:
+        return ""
+
+
+def as_file(path):
+    """Return the path without a trailing slash."""
+    while path[-1:] == "/":
+        path = path[:-1]
+    return path
+
+
+def relative(path):
+    """Return the path without a leading slash."""
+    while path[:1] == "/":
+        path = path[1:]
+    return path
+
+
+def parts(path):
+    """Return the parts of the path."""
+    return relative(as_file(path)).split("/")
+
+
+def under(root, path):
+    """Return whether a path is underneath a given root."""
+    if as_dir(root) == as_dir(path):
+        return True
+    elif path.startswith(as_dir(root)):
+        return True
+    else:
+        return False
+
+
+def subdir(root, path):
+    """Return path relative to root."""
+    if not under(root, path):
+        raise ValueError("path must start with root")
+
+    return relative(path[len(root):])
+
+
+def canon(path):
+    """Canonicalise the path.
+
+    The return path is normalised, absolute and has any symlinks within
+    it expanded.
+    """
+    (path, base) = os.path.split(os.path.abspath(path))
+    while path != "/":
+        if os.path.islink(path):
+            path = os.path.normpath(os.path.join(os.path.dirname(path),
+                                                 os.readlink(path)))
+        else:
+            base = os.path.join(os.path.basename(path), base)
+            path = os.path.dirname(path)
+
+    path = os.path.join(path, base)
+    if os.path.isdir(path) and not os.path.islink(path):
+        return as_dir(path)
+    else:
+        return path
diff --git a/lib/lp/registry/scripts/productreleasefinder/walker.py b/lib/lp/registry/scripts/productreleasefinder/walker.py
index 4865a46..6dff7de 100644
--- a/lib/lp/registry/scripts/productreleasefinder/walker.py
+++ b/lib/lp/registry/scripts/productreleasefinder/walker.py
@@ -15,10 +15,6 @@ __all__ = [
 import ftplib
 import socket
 
-from cscvs.dircompare.path import (
-    as_dir,
-    subdir,
-    )
 from lazr.uri import (
     InvalidURIError,
     URI,
@@ -32,6 +28,10 @@ from six.moves.urllib.parse import (
     )
 
 from lp.registry.scripts.productreleasefinder import log
+from lp.registry.scripts.productreleasefinder.path import (
+    as_dir,
+    subdir,
+    )
 from lp.services.beautifulsoup import BeautifulSoup
 from lp.services.config import config
 from lp.services.timeout import (
diff --git a/lib/lp/registry/tests/test_prf_finder.py b/lib/lp/registry/tests/test_prf_finder.py
index f37cb1a..bbabb95 100644
--- a/lib/lp/registry/tests/test_prf_finder.py
+++ b/lib/lp/registry/tests/test_prf_finder.py
@@ -140,7 +140,7 @@ class GetFiltersTestCase(TestCaseWithFactory):
         ztm.commit()
         logging.basicConfig(level=logging.CRITICAL)
         prf = ProductReleaseFinder(ztm, logging.getLogger())
-        product_filters = prf.getFilters()
+        product_filters = list(prf.getFilters())
         self.assertEqual(1, len(product_filters))
         found_product, filters = product_filters[0]
         self.assertEqual('bunny', found_product)
diff --git a/lib/lp/registry/tests/test_prf_hose.py b/lib/lp/registry/tests/test_prf_hose.py
index e037359..600d11e 100644
--- a/lib/lp/registry/tests/test_prf_hose.py
+++ b/lib/lp/registry/tests/test_prf_hose.py
@@ -165,7 +165,7 @@ class Hose_LimitWalk(unittest.TestCase):
                             'foo/1.5/source/foo-1.5.tar.gz',
                             'foo/2.0/source/foo-2.0.tar.gz']:
             fp = open(os.path.join(self.release_root, releasefile), 'wb')
-            fp.write('data')
+            fp.write(b'data')
             fp.close()
 
         # Run the hose over the test data
diff --git a/lib/lp/registry/tests/test_prf_walker.py b/lib/lp/registry/tests/test_prf_walker.py
index 547d906..9a1f32d 100644
--- a/lib/lp/registry/tests/test_prf_walker.py
+++ b/lib/lp/registry/tests/test_prf_walker.py
@@ -4,9 +4,9 @@
 """Tests for lp.registry.scripts.productreleasefinder.walker."""
 
 import logging
-import StringIO
 
 import responses
+import six
 
 from lp.registry.scripts.productreleasefinder.walker import (
     combine_url,
@@ -164,7 +164,7 @@ class WalkerBase_walk(TestCase):
             def close(self):
                 pass
 
-        log_output = StringIO.StringIO()
+        log_output = six.StringIO()
         logger = logging.getLogger()
         self.addCleanup(logger.setLevel, logger.level)
         logger.setLevel(logging.DEBUG)
@@ -189,7 +189,7 @@ class WalkerBase_walk(TestCase):
             def close(self):
                 pass
 
-        log_output = StringIO.StringIO()
+        log_output = six.StringIO()
         logger = logging.getLogger()
         self.addCleanup(logger.setLevel, logger.level)
         logger.setLevel(logging.DEBUG)
diff --git a/lib/lp/testing/factory.py b/lib/lp/testing/factory.py
index 66fa9b3..227bb13 100644
--- a/lib/lp/testing/factory.py
+++ b/lib/lp/testing/factory.py
@@ -974,7 +974,7 @@ class BareLaunchpadObjectFactory(ObjectFactory):
         signature_content = None
         if signed:
             signature_filename = '%s.asc' % filename
-            signature_content = '123'
+            signature_content = b'123'
         if release is None:
             release = self.makeProductRelease(product=product,
                                               productseries=productseries,