launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #28215
[Merge] ~cjwatson/launchpad:diskpool-pathlib into launchpad:master
Colin Watson has proposed merging ~cjwatson/launchpad:diskpool-pathlib into launchpad:master.
Commit message:
Refactor lp.archivepublisher.diskpool using pathlib
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/+git/launchpad/+merge/416755
The Artifactory bindings are based on `pathlib`, and so it makes it a little easier for my forthcoming Artifactory publication work to have a more `pathlib`-style implementation of `DiskPool` and `DiskPoolEntry`.
I don't think it makes sense to go all-in on `pathlib` while we're stuck on Python 3.5, because Python 3.6 improved much of the standard library to accept path-like objects as paths rather than just (byte) strings, and without that we have to add some awkward stringifications in several places. However, at this scale it doesn't work out too badly.
I also added a few type annotations, mainly to serve as documentation for the types I was changing. These are the first type annotations in Launchpad, and I haven't even tried to run `mypy` to check them yet. Similarly to the above, I don't think it makes sense to go all-in on type annotations until we're on at least Python 3.6 and can use variable annotations, but I saw no reason not to at least add these.
--
Your team Launchpad code reviewers is requested to review the proposed merge of ~cjwatson/launchpad:diskpool-pathlib into launchpad:master.
diff --git a/lib/lp/archivepublisher/deathrow.py b/lib/lp/archivepublisher/deathrow.py
index 87722a3..6e56ca3 100644
--- a/lib/lp/archivepublisher/deathrow.py
+++ b/lib/lp/archivepublisher/deathrow.py
@@ -7,7 +7,6 @@ Processes removals of packages that are scheduled for deletion.
import datetime
import logging
-import os
import pytz
from storm.expr import Exists
@@ -103,9 +102,9 @@ class DeathRow:
self.logger.debug("(Not really!) removing %s %s/%s" %
(cn, sn, fn))
fullpath = self.diskpool.pathFor(cn, sn, fn)
- if not os.path.exists(fullpath):
+ if not fullpath.exists():
raise NotInPool
- return os.lstat(fullpath).st_size
+ return fullpath.lstat().st_size
self._removeFile = _mockRemoveFile
source_files, binary_files = self._collectCondemned()
@@ -243,7 +242,7 @@ class DeathRow:
pub_record.source_package_name,
pub_record.component_name,
)
- file_path = self.diskpool.pathFor(*pub_file_details)
+ file_path = str(self.diskpool.pathFor(*pub_file_details))
# Check if the LibraryFileAlias in question was already
# verified. If the verification was already made and the
diff --git a/lib/lp/archivepublisher/diskpool.py b/lib/lp/archivepublisher/diskpool.py
index 528da2a..68cb38b 100644
--- a/lib/lp/archivepublisher/diskpool.py
+++ b/lib/lp/archivepublisher/diskpool.py
@@ -4,7 +4,12 @@
__all__ = ['DiskPoolEntry', 'DiskPool', 'poolify', 'unpoolify']
import os
+from pathlib import Path
import tempfile
+from typing import (
+ Optional,
+ Tuple,
+ )
from lp.archivepublisher import HARDCODED_COMPONENT_ORDER
from lp.services.librarian.utils import (
@@ -19,20 +24,20 @@ from lp.soyuz.interfaces.publishing import (
)
-def poolify(source, component):
+def poolify(source, component) -> Path:
"""Poolify a given source and component name."""
if source.startswith("lib"):
- return os.path.join(component, source[:4], source)
+ return Path(component) / source[:4] / source
else:
- return os.path.join(component, source[:1], source)
+ return Path(component) / source[:1] / source
-def unpoolify(self, path):
+def unpoolify(self, path: Path) -> Tuple[str, str, Optional[str]]:
"""Take a path and unpoolify it.
Return a tuple of component, source, filename
"""
- p = path.split("/")
+ p = path.parts
if len(p) < 3 or len(p) > 4:
raise ValueError("Path %s is not in a valid pool form" % path)
if len(p) == 4:
@@ -40,22 +45,16 @@ def unpoolify(self, path):
return p[0], p[2], None
-def relative_symlink(src_path, dst_path):
- """os.symlink replacement that creates relative symbolic links."""
- path_sep = os.path.sep
- src_path = os.path.normpath(src_path)
- dst_path = os.path.normpath(dst_path)
- src_path_elems = src_path.split(path_sep)
- dst_path_elems = dst_path.split(path_sep)
- if os.path.isabs(src_path):
- if not os.path.isabs(dst_path):
- dst_path = os.path.abspath(dst_path)
- common_prefix = os.path.commonprefix([src_path_elems, dst_path_elems])
- backward_elems = ['..'] * (
- len(dst_path_elems) - len(common_prefix) - 1)
- forward_elems = src_path_elems[len(common_prefix):]
- src_path = path_sep.join(backward_elems + forward_elems)
- os.symlink(src_path, dst_path)
+def relative_symlink(src_path: Path, dst_path: Path):
+ """Path.symlink_to replacement that creates relative symbolic links."""
+ src_path = Path(os.path.normpath(str(src_path)))
+ dst_path = Path(os.path.normpath(str(dst_path)))
+ common_prefix = Path(os.path.commonpath([str(src_path), str(dst_path)]))
+ backward_elems = [os.path.pardir] * (
+ len(dst_path.parts) - len(common_prefix.parts) - 1)
+ forward_elems = src_path.parts[len(common_prefix.parts):]
+ src_path = Path(*backward_elems, *forward_elems)
+ dst_path.symlink_to(src_path)
class FileAddActionEnum:
@@ -86,7 +85,7 @@ class _diskpool_atomicfile:
the filename is present in the pool, it is definitely complete.
"""
- def __init__(self, targetfilename, mode, rootpath="/tmp"):
+ def __init__(self, targetfilename: Path, mode, rootpath="/tmp"):
# atomicfile implements the file object interface, but it is only
# really used (or useful) for writing binary files, which is why we
# keep the mode constructor argument but assert it's sane below.
@@ -94,21 +93,21 @@ class _diskpool_atomicfile:
mode = "wb"
assert mode == "wb"
- assert not os.path.exists(targetfilename)
+ assert not targetfilename.exists()
self.targetfilename = targetfilename
- fd, name = tempfile.mkstemp(prefix="temp-download.", dir=rootpath)
+ fd, name = tempfile.mkstemp(prefix="temp-download.", dir=str(rootpath))
self.fd = os.fdopen(fd, mode)
- self.tempname = name
+ self.tempname = Path(name)
self.write = self.fd.write
def close(self):
"""Make the atomic move into place having closed the temp file."""
self.fd.close()
- os.chmod(self.tempname, 0o644)
+ self.tempname.chmod(0o644)
# Note that this will fail if the target and the temp dirs are on
# different filesystems.
- os.rename(self.tempname, self.targetfilename)
+ self.tempname.rename(self.targetfilename)
class DiskPoolEntry:
@@ -125,7 +124,8 @@ class DiskPoolEntry:
Remaining files in the 'temppath' indicated installation failures and
require manual removal after further investigation.
"""
- def __init__(self, rootpath, temppath, source, filename, logger):
+ def __init__(self, rootpath: Path, temppath: Path, source, filename,
+ logger):
self.rootpath = rootpath
self.temppath = temppath
self.source = source
@@ -137,9 +137,9 @@ class DiskPoolEntry:
for component in HARDCODED_COMPONENT_ORDER:
path = self.pathFor(component)
- if os.path.islink(path):
+ if path.is_symlink():
self.symlink_components.add(component)
- elif os.path.isfile(path):
+ elif path.is_file():
assert not self.file_component
self.file_component = component
if self.symlink_components:
@@ -148,11 +148,9 @@ class DiskPoolEntry:
def debug(self, *args, **kwargs):
self.logger.debug(*args, **kwargs)
- def pathFor(self, component):
+ def pathFor(self, component) -> Path:
"""Return the path for this file in the given component."""
- return os.path.join(self.rootpath,
- poolify(self.source, component),
- self.filename)
+ return self.rootpath / poolify(self.source, component) / self.filename
def preferredComponent(self, add=None, remove=None):
"""Return the appropriate component for the real file.
@@ -179,15 +177,14 @@ class DiskPoolEntry:
def file_hash(self):
"""Return the SHA1 sum of this file."""
targetpath = self.pathFor(self.file_component)
- return sha1_from_path(targetpath)
+ return sha1_from_path(str(targetpath))
def addFile(self, component, sha1, contents):
"""See DiskPool.addFile."""
assert component in HARDCODED_COMPONENT_ORDER
targetpath = self.pathFor(component)
- if not os.path.exists(os.path.dirname(targetpath)):
- os.makedirs(os.path.dirname(targetpath))
+ targetpath.parent.mkdir(parents=True, exist_ok=True)
if self.file_component:
# There's something on disk. Check hash.
@@ -212,7 +209,7 @@ class DiskPoolEntry:
return FileAddActionEnum.SYMLINK_ADDED
# If we get to here, we want to write the file.
- assert not os.path.exists(targetpath)
+ assert not targetpath.exists()
self.debug("Making new file in %s for %s/%s" %
(component, self.source, self.filename))
@@ -248,7 +245,7 @@ class DiskPoolEntry:
# ensure we are removing a symbolic link and
# it is published in one or more components
link_path = self.pathFor(component)
- assert os.path.islink(link_path)
+ assert link_path.is_symlink()
return self._reallyRemove(component)
if component != self.file_component:
@@ -278,7 +275,7 @@ class DiskPoolEntry:
structures.
"""
fullpath = self.pathFor(component)
- assert os.path.exists(fullpath)
+ assert fullpath.exists()
if component == self.file_component:
# Deleting the master file is only allowed if there
@@ -288,8 +285,8 @@ class DiskPoolEntry:
elif component in self.symlink_components:
self.symlink_components.remove(component)
- size = os.lstat(fullpath).st_size
- os.remove(fullpath)
+ size = fullpath.lstat().st_size
+ fullpath.unlink()
return size
def _shufflesymlinks(self, targetcomponent):
@@ -309,7 +306,7 @@ class DiskPoolEntry:
# Okay, so first up, we unlink the targetcomponent symlink.
targetpath = self.pathFor(targetcomponent)
- os.remove(targetpath)
+ targetpath.unlink()
# Now we rename the source file into the target component.
sourcepath = self.pathFor(self.file_component)
@@ -323,9 +320,9 @@ class DiskPoolEntry:
# ensure targetpath doesn't exists and the sourcepath exists
# before rename them.
- assert not os.path.exists(targetpath)
- assert os.path.exists(sourcepath)
- os.rename(sourcepath, targetpath)
+ assert not targetpath.exists()
+ assert sourcepath.exists()
+ sourcepath.rename(targetpath)
# XXX cprov 2006-06-12: it may cause problems to the database, since
# ZTM isn't handled properly in scripts/publish-distro.py. Things are
@@ -340,7 +337,7 @@ class DiskPoolEntry:
for comp in self.symlink_components:
newpath = self.pathFor(comp)
try:
- os.remove(newpath)
+ newpath.unlink()
except OSError:
# Do nothing because it's almost certainly a not found.
pass
@@ -376,14 +373,8 @@ class DiskPool:
results = FileAddActionEnum
def __init__(self, rootpath, temppath, logger):
- self.rootpath = rootpath
- if not rootpath.endswith("/"):
- self.rootpath += "/"
-
- self.temppath = temppath
- if not temppath.endswith("/"):
- self.temppath += "/"
-
+ self.rootpath = Path(rootpath)
+ self.temppath = Path(temppath)
self.entries = {}
self.logger = logger
@@ -392,7 +383,7 @@ class DiskPool:
return DiskPoolEntry(
self.rootpath, self.temppath, sourcename, file, self.logger)
- def pathFor(self, comp, source, file=None):
+ def pathFor(self, comp, source, file=None) -> Path:
"""Return the path for the given pool folder or file.
If file is none, the path to the folder containing all packages
@@ -401,10 +392,9 @@ class DiskPool:
If file is specified, the path to the specific package file will
be returned.
"""
- path = os.path.join(
- self.rootpath, poolify(source, comp))
+ path = self.rootpath / poolify(source, comp)
if file:
- return os.path.join(path, file)
+ path = path / file
return path
def addFile(self, component, sourcename, filename, sha1, contents):
diff --git a/lib/lp/archivepublisher/model/ftparchive.py b/lib/lp/archivepublisher/model/ftparchive.py
index f5bfe02..1a5d8af 100644
--- a/lib/lp/archivepublisher/model/ftparchive.py
+++ b/lib/lp/archivepublisher/model/ftparchive.py
@@ -693,8 +693,8 @@ class FTPArchiveHandler:
def updateFileList(sourcepackagename, filename, component,
architecturetag=None):
- ondiskname = self._diskpool.pathFor(
- component, sourcepackagename, filename)
+ ondiskname = str(
+ self._diskpool.pathFor(component, sourcepackagename, filename))
if architecturetag is None:
architecturetag = "source"
filelist[component][architecturetag].append(ondiskname)
diff --git a/lib/lp/archivepublisher/tests/deathrow.txt b/lib/lp/archivepublisher/tests/deathrow.txt
index 4d77654..c46474d 100644
--- a/lib/lp/archivepublisher/tests/deathrow.txt
+++ b/lib/lp/archivepublisher/tests/deathrow.txt
@@ -211,18 +211,17 @@ Publish files on disk and build a list of all created file paths
... unique_file_paths.add(file_path)
... pub.publish(quiet_disk_pool, BufferLogger())
- >>> all_test_file_paths = sorted(unique_file_paths)
+ >>> all_test_file_paths = sorted(unique_file_paths, key=str)
Create a helper function to check if the publication files exist in
the temporary repository.
- >>> import os
>>> def check_pool_files():
... for file_path in all_test_file_paths:
- ... if os.path.exists(file_path):
- ... print('%s: OK' % os.path.basename(file_path))
+ ... if file_path.exists():
+ ... print('%s: OK' % file_path.name)
... else:
- ... print('%s: REMOVED' % os.path.basename(file_path))
+ ... print('%s: REMOVED' % file_path.name)
>>> check_pool_files()
deleted-bin_666_i386.deb: OK
diff --git a/lib/lp/archivepublisher/tests/test_deathrow.py b/lib/lp/archivepublisher/tests/test_deathrow.py
index c493805..02819c6 100644
--- a/lib/lp/archivepublisher/tests/test_deathrow.py
+++ b/lib/lp/archivepublisher/tests/test_deathrow.py
@@ -3,7 +3,7 @@
"""Tests for deathrow class."""
-import os
+from pathlib import Path
import shutil
import tempfile
@@ -55,29 +55,29 @@ class TestDeathRow(TestCase):
pub.source_package_name,
pub_file.libraryfile.filename)
- def assertIsFile(self, path):
+ def assertIsFile(self, path: Path):
"""Assert the path exists and is a regular file."""
self.assertTrue(
- os.path.exists(path),
- "File %s does not exist" % os.path.basename(path))
+ path.exists(),
+ "File %s does not exist" % path.name)
self.assertFalse(
- os.path.islink(path),
- "File %s is a symbolic link" % os.path.basename(path))
+ path.is_symlink(),
+ "File %s is a symbolic link" % path.name)
- def assertIsLink(self, path):
+ def assertIsLink(self, path: Path):
"""Assert the path exists and is a symbolic link."""
self.assertTrue(
- os.path.exists(path),
- "File %s does not exist" % os.path.basename(path))
+ path.exists(),
+ "File %s does not exist" % path.name)
self.assertTrue(
- os.path.islink(path),
- "File %s is a not symbolic link" % os.path.basename(path))
+ path.is_symlink(),
+ "File %s is a not symbolic link" % path.name)
- def assertDoesNotExist(self, path):
+ def assertDoesNotExist(self, path: Path):
"""Assert the path does not exit."""
self.assertFalse(
- os.path.exists(path),
- "File %s exists" % os.path.basename(path))
+ path.exists(),
+ "File %s exists" % path.name)
def test_MissingSymLinkInPool(self):
# When a publication is promoted from 'universe' to 'main' and
@@ -118,7 +118,7 @@ class TestDeathRow(TestCase):
self.assertIsLink(universe_dsc_path)
# Remove the symbolic link to emulate MissingSymlinkInPool scenario.
- os.remove(universe_dsc_path)
+ universe_dsc_path.unlink()
# Remove the testing publications.
for pub in test_publications:
diff --git a/lib/lp/archivepublisher/tests/test_ftparchive.py b/lib/lp/archivepublisher/tests/test_ftparchive.py
index 5a1219b..efe3e63 100755
--- a/lib/lp/archivepublisher/tests/test_ftparchive.py
+++ b/lib/lp/archivepublisher/tests/test_ftparchive.py
@@ -7,6 +7,7 @@ import difflib
from functools import partial
import gzip
import os
+from pathlib import Path
import re
import shutil
from textwrap import dedent
@@ -149,16 +150,11 @@ class TestFTPArchive(TestCaseWithFactory):
samplename=None):
"""Create a repository file."""
fullpath = self._dp.pathFor(component, sourcename, leafname)
- dirname = os.path.dirname(fullpath)
- if not os.path.exists(dirname):
- os.makedirs(dirname)
+ fullpath.parent.mkdir(parents=True, exist_ok=True)
if samplename is None:
samplename = leafname
- leaf = os.path.join(self._sampledir, samplename)
- with open(leaf, "rb") as leaf_file:
- leafcontent = leaf_file.read()
- with open(fullpath, "wb") as f:
- f.write(leafcontent)
+ leaf = Path(self._sampledir) / samplename
+ fullpath.write_bytes(leaf.read_bytes())
def _setUpFTPArchiveHandler(self):
return FTPArchiveHandler(
@@ -792,8 +788,8 @@ class TestFTPArchive(TestCaseWithFactory):
# Remove most of this repository's files so that cleanCaches has
# something to do.
for i in range(49):
- os.unlink(
- self._dp.pathFor("main", "bin%d" % i, "bin%d_1_i386.deb" % i))
+ self._dp.pathFor(
+ "main", "bin%d" % i, "bin%d_1_i386.deb" % i).unlink()
cache_path = os.path.join(self._config.cacheroot, "packages-i386.db")
old_cache_size = os.stat(cache_path).st_size
diff --git a/lib/lp/archivepublisher/tests/test_pool.py b/lib/lp/archivepublisher/tests/test_pool.py
index 1387697..a164f8a 100644
--- a/lib/lp/archivepublisher/tests/test_pool.py
+++ b/lib/lp/archivepublisher/tests/test_pool.py
@@ -4,7 +4,7 @@
"""Tests for pool.py."""
import hashlib
-import os
+from pathlib import Path
import shutil
from tempfile import mkdtemp
import unittest
@@ -52,11 +52,11 @@ class PoolTestingFile:
def checkExists(self, component):
path = self.pool.pathFor(component, self.sourcename, self.filename)
- return os.path.exists(path)
+ return path.exists()
def checkIsLink(self, component):
path = self.pool.pathFor(component, self.sourcename, self.filename)
- return os.path.islink(path)
+ return path.is_symlink()
def checkIsFile(self, component):
return self.checkExists(component) and not self.checkIsLink(component)
@@ -67,9 +67,9 @@ class TestPoolification(unittest.TestCase):
def testPoolificationOkay(self):
"""poolify should poolify properly"""
cases = (
- ("foo", "main", "main/f/foo"),
- ("foo", "universe", "universe/f/foo"),
- ("libfoo", "main", "main/libf/libfoo"),
+ ("foo", "main", Path("main/f/foo")),
+ ("foo", "universe", Path("universe/f/foo")),
+ ("libfoo", "main", Path("main/libf/libfoo")),
)
for case in cases:
self.assertEqual(case[2], poolify(case[0], case[1]))
diff --git a/lib/lp/registry/model/distributionmirror.py b/lib/lp/registry/model/distributionmirror.py
index 2454187..37b912b 100644
--- a/lib/lp/registry/model/distributionmirror.py
+++ b/lib/lp/registry/model/distributionmirror.py
@@ -876,7 +876,7 @@ class MirrorDistroArchSeries(SQLBase, _MirrorSeriesMixIn):
"""
bpr = publishing_record.binarypackagerelease
base_url = self.distribution_mirror.base_url
- path = poolify(bpr.sourcepackagename, self.component.name)
+ path = poolify(bpr.sourcepackagename, self.component.name).as_posix()
file = BinaryPackageFile.selectOneBy(
binarypackagerelease=bpr, filetype=BinaryPackageFileType.DEB)
full_path = 'pool/%s/%s' % (path, file.libraryfile.filename)
diff --git a/lib/lp/soyuz/model/publishing.py b/lib/lp/soyuz/model/publishing.py
index 421e649..dbcdd80 100644
--- a/lib/lp/soyuz/model/publishing.py
+++ b/lib/lp/soyuz/model/publishing.py
@@ -16,7 +16,7 @@ from operator import (
attrgetter,
itemgetter,
)
-import os
+from pathlib import Path
import sys
import pytz
@@ -119,7 +119,7 @@ def makePoolPath(source_name, component_name):
# XXX cprov 2006-08-18: move it away, perhaps archivepublisher/pool.py
"""Return the pool path for a given source name and component name."""
from lp.archivepublisher.diskpool import poolify
- return os.path.join('pool', poolify(source_name, component_name))
+ return str(Path('pool') / poolify(source_name, component_name))
def get_component(archive, distroseries, component):
diff --git a/lib/lp/soyuz/scripts/gina/handlers.py b/lib/lp/soyuz/scripts/gina/handlers.py
index 4a17e60..380637b 100644
--- a/lib/lp/soyuz/scripts/gina/handlers.py
+++ b/lib/lp/soyuz/scripts/gina/handlers.py
@@ -20,6 +20,7 @@ __all__ = [
import io
import os
+from pathlib import Path
import re
from storm.exceptions import NotOneError
@@ -507,8 +508,8 @@ class SourcePackageHandler:
# Since the dsc doesn't know, we add in the directory, package
# component and section
- dsc_contents['directory'] = os.path.join("pool",
- poolify(sp_name, sp_component)).encode("ASCII")
+ dsc_contents['directory'] = bytes(
+ Path('pool') / poolify(sp_name, sp_component))
dsc_contents['package'] = sp_name.encode("ASCII")
dsc_contents['component'] = sp_component.encode("ASCII")
dsc_contents['section'] = sp_section.encode("ASCII")
diff --git a/lib/lp/soyuz/scripts/gina/packages.py b/lib/lp/soyuz/scripts/gina/packages.py
index 15b4a45..9bda8ae 100644
--- a/lib/lp/soyuz/scripts/gina/packages.py
+++ b/lib/lp/soyuz/scripts/gina/packages.py
@@ -82,7 +82,7 @@ def get_dsc_path(name, version, component, archive_root):
# We do a first attempt using the obvious directory name, composed
# with the component. However, this may fail if a binary is being
# published in another component.
- pool_dir = poolify(name, component)
+ pool_dir = str(poolify(name, component))
fullpath = os.path.join(pool_root, pool_dir, filename)
if os.path.exists(fullpath):
return filename, fullpath, component
@@ -91,7 +91,7 @@ def get_dsc_path(name, version, component, archive_root):
for alt_component_entry in os.scandir(pool_root):
if not alt_component_entry.is_dir():
continue
- pool_dir = poolify(name, alt_component_entry.name)
+ pool_dir = str(poolify(name, alt_component_entry.name))
fullpath = os.path.join(pool_root, pool_dir, filename)
if os.path.exists(fullpath):
return filename, fullpath, alt_component_entry.name
diff --git a/lib/lp/soyuz/tests/test_binarypackagebuildbehaviour.py b/lib/lp/soyuz/tests/test_binarypackagebuildbehaviour.py
index d687a9d..867d495 100644
--- a/lib/lp/soyuz/tests/test_binarypackagebuildbehaviour.py
+++ b/lib/lp/soyuz/tests/test_binarypackagebuildbehaviour.py
@@ -279,7 +279,7 @@ class TestBinaryBuildPackageBehaviour(StatsMixin, TestCaseWithFactory):
% (archive.owner.name, archive.name,
poolify(
build.source_package_release.sourcepackagename.name,
- 'main'),
+ 'main').as_posix(),
sprf.libraryfile.filename))
lf = self.factory.makeLibraryFileAlias(db_only=True)
build.distro_arch_series.addOrUpdateChroot(lf)