launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #06577
[Merge] lp:~jtv/maas/filestorage-gc into lp:maas
Jeroen T. Vermeulen has proposed merging lp:~jtv/maas/filestorage-gc into lp:maas with lp:~jtv/maas/overwrite-file as a prerequisite.
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
For more details, see:
https://code.launchpad.net/~jtv/maas/filestorage-gc/+merge/95696
In implementing the overwriting of stored files, I created a problem: what happens to uploaded files that have been overwritten from the client's point of view, and can no longer be accessed?
This branch does not solve that problem. But it does implement a method you can call to take care of the problem: FileStorage.objects.collect_garbage(). I'm not sure who's going to call this method yet, but that's a separate ticket.
--
https://code.launchpad.net/~jtv/maas/filestorage-gc/+merge/95696
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~jtv/maas/filestorage-gc into lp:maas.
=== modified file 'src/maasserver/models.py'
--- src/maasserver/models.py 2012-03-02 23:29:20 +0000
+++ src/maasserver/models.py 2012-03-02 23:29:20 +0000
@@ -23,14 +23,18 @@
import copy
import datetime
+import os
import re
from socket import gethostname
+import time
from uuid import uuid1
+from django.conf import settings
from django.contrib import admin
from django.contrib.auth.backends import ModelBackend
from django.contrib.auth.models import User
from django.core.files.base import ContentFile
+from django.core.files.storage import FileSystemStorage
from django.db import models
from django.db.models.signals import post_save
from django.shortcuts import get_object_or_404
@@ -600,7 +604,6 @@
old file will continue without iterruption.
>>>>>>> MERGE-SOURCE
"""
- # TODO: Garbage-collect obsolete stored files.
def get_existing_storage(self, filename):
"""Return an existing `FileStorage` of this name, or None."""
@@ -639,6 +642,48 @@
storage.data.save(filename, content)
return storage
+ def list_stored_files(self):
+ """Find the files stored in the filesystem."""
+ dirs, files = FileStorage.storage.listdir(FileStorage.upload_dir)
+ return [
+ os.path.join(FileStorage.upload_dir, filename)
+ for filename in files]
+
+ def list_referenced_files(self):
+ """Find the names of files that are referenced from `FileStorage`.
+
+ :return: All file paths within MEDIA ROOT (relative to MEDIA_ROOT)
+ that have `FileStorage` entries referencing them.
+ :rtype: frozenset
+ """
+ return frozenset(
+ file_storage.data.name
+ for file_storage in self.all())
+
+ def is_old(self, storage_filename):
+ """Is the named file in the filesystem storage old enough to be dead?
+
+ :param storage_filename: The name under which the file is stored in
+ the filesystem, relative to MEDIA_ROOT. This need not be the
+ same name as its filename as stored in the `FileStorage` object.
+ It includes the name of the upload directory.
+ """
+ # The time, in seconds, that an unreferenced file is allowed to
+ # persist in order to satisfy ongoing requests.
+ grace_time = 24 * 60 * 60
+
+ file_path = os.path.join(settings.MEDIA_ROOT, storage_filename)
+ mtime = os.stat(file_path).st_mtime
+ expiry = mtime + grace_time
+ return expiry <= time.time()
+
+ def collect_garbage(self):
+ """Clean up stored files that are no longer accessible."""
+ referenced_files = self.list_referenced_files()
+ for path in self.list_stored_files():
+ if path not in referenced_files and self.is_old(path):
+ FileStorage.storage.delete(path)
+
class FileStorage(models.Model):
"""A simple file storage keyed on file name.
@@ -647,10 +692,15 @@
:ivar data: The file's actual data.
"""
+ storage = FileSystemStorage()
+
+ upload_dir = "storage"
+
# Unix filenames can be longer than this (e.g. 255 bytes), but leave
# some extra room for the full path, as well as a versioning suffix.
filename = models.CharField(max_length=100, unique=True, editable=False)
- data = models.FileField(upload_to="storage", max_length=255)
+ data = models.FileField(
+ upload_to=upload_dir, storage=storage, max_length=255)
objects = FileStorageManager()
=== modified file 'src/maasserver/tests/test_models.py'
--- src/maasserver/tests/test_models.py 2012-03-02 23:29:20 +0000
+++ src/maasserver/tests/test_models.py 2012-03-02 23:29:20 +0000
@@ -442,13 +442,18 @@
# The development settings say to write storage files in
# /var/tmp/maas.
os.mkdir(self.FILEPATH)
+ os.mkdir(os.path.join(self.FILEPATH, FileStorage.upload_dir))
self.addCleanup(shutil.rmtree, self.FILEPATH)
def get_storage_path(self, filename):
"""Get the full path to the storage file of the given name."""
# The storage field is hard-coded to write files to a
# subdirectory of FILEPATH called "storage".
- return os.path.join(self.FILEPATH, "storage", filename)
+ return os.path.join(self.FILEPATH, FileStorage.upload_dir, filename)
+
+ def get_media_path(self, filename):
+ """Get the path to a given stored file, relative to MEDIA_ROOT."""
+ return os.path.join(FileStorage.upload_dir, filename)
def make_data(self, including_text='data'):
"""Return arbitrary data.
@@ -464,6 +469,16 @@
text = "%s %s" % (including_text, factory.getRandomString())
return text.encode('ascii')
+ def age_file(self, path):
+ """Make the file at `path` look like it hasn't been touched recently.
+
+ Decrements the file's mtime by a bit over a day.
+ """
+ stat_result = os.stat(path)
+ atime = stat_result.st_atime
+ mtime = stat_result.st_mtime
+ os.utime(path, (atime, mtime - (24 * 60 * 60 + 1)))
+
def test_get_existing_storage_returns_None_if_none_found(self):
nonexistent_file = factory.getRandomString()
self.assertIsNone(
@@ -523,6 +538,87 @@
self.assertEqual(
new_data, FileStorage.objects.get(filename=filename).data.read())
+ def test_list_stored_files_lists_files(self):
+ filename = factory.getRandomString()
+ path = self.get_storage_path(filename)
+ with open(path, 'w') as f:
+ f.write(self.make_data())
+ self.assertIn(
+ self.get_media_path(filename),
+ FileStorage.objects.list_stored_files())
+
+ def test_list_stored_files_includes_referenced_files(self):
+ storage = factory.make_file_storage()
+ self.assertIn(
+ storage.data.name, FileStorage.objects.list_stored_files())
+
+ def test_list_referenced_files_lists_FileStorage_files(self):
+ storage = factory.make_file_storage()
+ self.assertIn(
+ storage.data.name, FileStorage.objects.list_referenced_files())
+
+ def test_list_referenced_files_excludes_unreferenced_files(self):
+ filename = factory.getRandomString()
+ path = self.get_storage_path(filename)
+ with open(path, 'w') as f:
+ f.write(self.make_data())
+ self.assertNotIn(
+ self.get_media_path(filename),
+ FileStorage.objects.list_referenced_files())
+
+ def test_list_referenced_files_uses_file_name_not_FileStorage_name(self):
+ filename = factory.getRandomString()
+ # The filename we're going to use is already taken. The file
+ # we'll be looking at will have to have a different name.
+ factory.make_file_storage(filename=filename)
+ storage = factory.make_file_storage(filename=filename)
+ # It's the name of the file, not the FileStorage.filename, that
+ # is in list_referenced_files.
+ self.assertIn(
+ storage.data.name, FileStorage.objects.list_referenced_files())
+
+ def test_is_old_returns_False_for_recent_file(self):
+ filename = factory.getRandomString()
+ path = self.get_storage_path(filename)
+ with open(path, 'w') as f:
+ f.write(self.make_data())
+ self.assertFalse(
+ FileStorage.objects.is_old(self.get_media_path(filename)))
+
+ def test_is_old_returns_True_for_old_file(self):
+ filename = factory.getRandomString()
+ path = self.get_storage_path(filename)
+ with open(path, 'w') as f:
+ f.write(self.make_data())
+ self.age_file(path)
+ self.assertTrue(
+ FileStorage.objects.is_old(self.get_media_path(filename)))
+
+ def test_collect_garbage_deletes_garbage(self):
+ filename = factory.getRandomString()
+ path = self.get_storage_path(filename)
+ with open(path, 'w') as f:
+ f.write(self.make_data())
+ self.age_file(path)
+ FileStorage.objects.collect_garbage()
+ self.assertFalse(
+ FileStorage.storage.exists(self.get_media_path(filename)))
+
+ def test_collect_garbage_leaves_recent_files_alone(self):
+ filename = factory.getRandomString()
+ path = self.get_storage_path(filename)
+ with open(path, 'w') as f:
+ f.write(self.make_data())
+ FileStorage.objects.collect_garbage()
+ self.assertTrue(
+ FileStorage.storage.exists(self.get_media_path(filename)))
+
+ def test_collect_garbage_leaves_referenced_files_alone(self):
+ storage = factory.make_file_storage()
+ self.age_file(storage.data.path)
+ FileStorage.objects.collect_garbage()
+ self.assertTrue(FileStorage.storage.exists(storage.data.name))
+
class ConfigTest(TestCase):
"""Testing of the :class:`Config` model."""
Follow ups