← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~jtv/maas/1.2-bug-1069734 into lp:maas/1.2

 

Jeroen T. Vermeulen has proposed merging lp:~jtv/maas/1.2-bug-1069734 into lp:maas/1.2.

Commit message:
Move file storage from the filesystem into the database, so that it's no longer local to a single app server.

Functionally the change is hidden inside FileStorage, but the need for garbage collection goes out the window as well.  We should remove the maas-gc cron script altogether, but for that, the packaging branch needs updating as well.  Keeping the file in disabled form for now decouples the two changes.

Requested reviews:
  Raphaël Badin (rvb)
Related bugs:
  Bug #1069734 in MAAS: "Filestorage is unique to each appserver instance"
  https://bugs.launchpad.net/maas/+bug/1069734

For more details, see:
https://code.launchpad.net/~jtv/maas/1.2-bug-1069734/+merge/131334

This is the change for the 1.2 branch.  We'll want to apply the change to trunk as well.

Jeroen
-- 
https://code.launchpad.net/~jtv/maas/1.2-bug-1069734/+merge/131334
Your team MAAS Maintainers is subscribed to branch lp:maas/1.2.
=== modified file 'etc/cron.d/maas-gc'
--- etc/cron.d/maas-gc	2012-08-03 16:33:05 +0000
+++ etc/cron.d/maas-gc	2012-10-25 08:10:27 +0000
@@ -1,8 +1,4 @@
 # Perform daily background cleanups in MAAS.
 #
-# The "maas gc" command is for garbage-collection, such as deleting uploaded
-# files from Juju's file storage API, and in the future commissioning logs,
-# that have been superseded by newer ones.  (This isn't done immediately
-# when the files are overwritten because (1) the transaction that overwrites
-# them may fail, and (2) a file may still be in use when it's overwritten.)
-0 0 * * * root  /usr/sbin/maas gc &> /dev/null
+# This currently does nothing: maas-gc is no longer needed.
+#0 0 * * * root  /usr/sbin/maas gc &> /dev/null

=== modified file 'setup.py'
--- setup.py	2012-10-01 22:56:46 +0000
+++ setup.py	2012-10-25 08:10:27 +0000
@@ -65,8 +65,6 @@
              'etc/maas/commissioning-user-data',
              'contrib/maas-http.conf',
              'contrib/maas_local_settings.py']),
-        ('/etc/cron.d',
-            ['etc/cron.d/maas-gc']),
         ('/usr/share/maas',
             ['contrib/wsgi.py',
              'etc/celeryconfig.py',

=== modified file 'src/maasserver/api.py'
--- src/maasserver/api.py	2012-10-11 13:55:44 +0000
+++ src/maasserver/api.py	2012-10-25 08:10:27 +0000
@@ -87,7 +87,6 @@
 from functools import partial
 import httplib
 from inspect import getdoc
-import simplejson as json
 import sys
 from textwrap import dedent
 
@@ -175,6 +174,7 @@
 from piston.utils import rc
 from provisioningserver.enum import POWER_TYPE
 from provisioningserver.kernel_opts import KernelParameters
+import simplejson as json
 
 
 class OperationsResource(Resource):
@@ -926,7 +926,7 @@
         db_file = FileStorage.objects.get(filename=filename)
     except FileStorage.DoesNotExist:
         raise MAASAPINotFound("File not found")
-    return HttpResponse(db_file.data.read(), status=httplib.OK)
+    return HttpResponse(db_file.data, status=httplib.OK)
 
 
 class AnonFilesHandler(AnonymousOperationsHandler):

=== removed file 'src/maasserver/management/commands/gc.py'
--- src/maasserver/management/commands/gc.py	2012-04-16 10:00:51 +0000
+++ src/maasserver/management/commands/gc.py	1970-01-01 00:00:00 +0000
@@ -1,24 +0,0 @@
-# Copyright 2012 Canonical Ltd.  This software is licensed under the
-# GNU Affero General Public License version 3 (see the file LICENSE).
-
-"""Custom django command: garabge-collect."""
-
-from __future__ import (
-    absolute_import,
-    print_function,
-    unicode_literals,
-    )
-
-__metaclass__ = type
-__all__ = [
-    'Command',
-    ]
-
-
-from django.core.management.base import BaseCommand
-from maasserver.models import FileStorage
-
-
-class Command(BaseCommand):
-    def handle(self, *args, **options):
-        FileStorage.objects.collect_garbage()

=== modified file 'src/maasserver/models/filestorage.py'
--- src/maasserver/models/filestorage.py	2012-08-24 10:28:29 +0000
+++ src/maasserver/models/filestorage.py	2012-10-25 08:10:27 +0000
@@ -15,22 +15,17 @@
     ]
 
 
-from errno import ENOENT
-import os
-import time
-
-from django.conf import settings
-from django.core.files.base import ContentFile
-from django.core.files.storage import FileSystemStorage
 from django.db.models import (
     CharField,
-    FileField,
     Manager,
     Model,
     )
 from maasserver import DefaultMeta
 from maasserver.models.cleansave import CleanSave
-from maasserver.utils.orm import get_one
+from metadataserver.fields import (
+    Bin,
+    BinaryField,
+    )
 
 
 class FileStorageManager(Manager):
@@ -47,89 +42,23 @@
     original file will not be affected.  Also, any ongoing reads from the
     old file will continue without iterruption.
     """
-    # The time, in seconds, that an unreferenced file is allowed to
-    # persist in order to satisfy ongoing requests.
-    grace_time = 12 * 60 * 60
-
-    def get_existing_storage(self, filename):
-        """Return an existing `FileStorage` of this name, or None."""
-        return get_one(self.filter(filename=filename))
 
     def save_file(self, filename, file_object):
-        """Save the file to the filesystem and persist to the database.
-
-        The file will end up in MEDIA_ROOT/storage/
+        """Save the file to the database.
 
         If a file of that name already existed, it will be replaced by the
         new contents.
         """
         # This probably ought to read in chunks but large files are
-        # not expected.  Also note that uploading a file with the same
-        # name as an existing one will cause that file to be written
-        # with a new generated name, and the old one remains where it
-        # is.  See https://code.djangoproject.com/ticket/6157 - the
-        # Django devs consider deleting things dangerous ... ha.
-        # HOWEVER - this operation would need to be atomic anyway so
-        # it's safest left how it is for now (reads can overlap with
-        # writes from Juju).
-        content = ContentFile(file_object.read())
-
-        storage = self.get_existing_storage(filename)
-        if storage is None:
-            storage = FileStorage(filename=filename)
-        storage.data.save(filename, content)
+        # not expected.
+        content = Bin(file_object.read())
+        storage, created = self.get_or_create(
+            filename=filename, defaults={'data': content})
+        if not created:
+            storage.data = content
+            storage.save()
         return storage
 
-    def list_stored_files(self):
-        """Find the files stored in the filesystem."""
-        dirs, files = FileStorage.storage.listdir(FileStorage.upload_dir)
-        return [
-            os.path.join(FileStorage.upload_dir, filename)
-            for filename in files]
-
-    def list_referenced_files(self):
-        """Find the names of files that are referenced from `FileStorage`.
-
-        :return: All file paths within MEDIA ROOT (relative to MEDIA_ROOT)
-            that have `FileStorage` entries referencing them.
-        :rtype: frozenset
-        """
-        return frozenset(
-            file_storage.data.name
-            for file_storage in self.all())
-
-    def is_old(self, storage_filename):
-        """Is the named file in the filesystem storage old enough to be dead?
-
-        :param storage_filename: The name under which the file is stored in
-            the filesystem, relative to MEDIA_ROOT.  This need not be the
-            same name as its filename as stored in the `FileStorage` object.
-            It includes the name of the upload directory.
-        """
-        file_path = os.path.join(settings.MEDIA_ROOT, storage_filename)
-        mtime = os.stat(file_path).st_mtime
-        expiry = mtime + self.grace_time
-        return expiry <= time.time()
-
-    def collect_garbage(self):
-        """Clean up stored files that are no longer accessible."""
-        # Avoid circular imports.
-        from maasserver.models import logger
-
-        try:
-            stored_files = self.list_stored_files()
-        except OSError as e:
-            if e.errno != ENOENT:
-                raise
-            logger.info(
-                "Upload directory does not exist yet.  "
-                "Skipping garbage collection.")
-            return
-        referenced_files = self.list_referenced_files()
-        for path in stored_files:
-            if path not in referenced_files and self.is_old(path):
-                FileStorage.storage.delete(path)
-
 
 class FileStorage(CleanSave, Model):
     """A simple file storage keyed on file name.
@@ -141,14 +70,11 @@
     class Meta(DefaultMeta):
         """Needed for South to recognize this model."""
 
-    storage = FileSystemStorage()
-
     upload_dir = "storage"
 
-    # Unix filenames can be longer than this (e.g. 255 bytes), but leave
-    # some extra room for the full path, as well as a versioning suffix.
-    filename = CharField(max_length=200, unique=True, editable=False)
-    data = FileField(upload_to=upload_dir, storage=storage, max_length=255)
+    filename = CharField(max_length=255, unique=True, editable=False)
+
+    data = BinaryField(null=False)
 
     objects = FileStorageManager()
 

=== modified file 'src/maasserver/tests/test_commands.py'
--- src/maasserver/tests/test_commands.py	2012-08-24 10:28:29 +0000
+++ src/maasserver/tests/test_commands.py	2012-10-25 08:10:27 +0000
@@ -14,13 +14,10 @@
 
 from codecs import getwriter
 from io import BytesIO
-import os
 
-from django.conf import settings
 from django.contrib.auth.models import User
 from django.core.cache import cache
 from django.core.management import call_command
-from maasserver.models import FileStorage
 from maasserver.testing.factory import factory
 from maasserver.utils.orm import get_one
 from maastesting.djangotestcase import DjangoTestCase
@@ -33,14 +30,6 @@
     in a command's code, it should be extracted and unit-tested separately.
     """
 
-    def test_gc(self):
-        upload_dir = os.path.join(settings.MEDIA_ROOT, FileStorage.upload_dir)
-        os.makedirs(upload_dir)
-        self.addCleanup(os.removedirs, upload_dir)
-        call_command('gc')
-        # The test is that we get here without errors.
-        pass
-
     def test_generate_api_doc(self):
         out = BytesIO()
         stdout = getwriter("UTF-8")(out)

=== modified file 'src/maasserver/tests/test_filestorage.py'
--- src/maasserver/tests/test_filestorage.py	2012-06-25 09:03:14 +0000
+++ src/maasserver/tests/test_filestorage.py	2012-10-25 08:10:27 +0000
@@ -14,45 +14,15 @@
 
 import codecs
 from io import BytesIO
-import os
-import shutil
 
-from django.conf import settings
 from maasserver.models import FileStorage
 from maasserver.testing.factory import factory
 from maasserver.testing.testcase import TestCase
-from maastesting.utils import age_file
-from testtools.matchers import (
-    GreaterThan,
-    LessThan,
-    )
 
 
 class FileStorageTest(TestCase):
     """Testing of the :class:`FileStorage` model."""
 
-    def make_upload_dir(self):
-        """Create the upload directory, and arrange for eventual deletion.
-
-        The directory must not already exist.  If it does, this method will
-        fail rather than arrange for deletion of a directory that may
-        contain meaningful data.
-
-        :return: Absolute path to the `FileStorage` upload directory.  This
-            is the directory where the actual files are stored.
-        """
-        media_root = settings.MEDIA_ROOT
-        self.assertFalse(os.path.exists(media_root), "See media/README")
-        self.addCleanup(shutil.rmtree, media_root, ignore_errors=True)
-        os.mkdir(media_root)
-        upload_dir = os.path.join(media_root, FileStorage.upload_dir)
-        os.mkdir(upload_dir)
-        return upload_dir
-
-    def get_media_path(self, filename):
-        """Get the path to a given stored file, relative to MEDIA_ROOT."""
-        return os.path.join(FileStorage.upload_dir, filename)
-
     def make_data(self, including_text='data'):
         """Return arbitrary data.
 
@@ -67,40 +37,24 @@
         text = "%s %s" % (including_text, factory.getRandomString())
         return text.encode('ascii')
 
-    def test_get_existing_storage_returns_None_if_none_found(self):
-        nonexistent_file = factory.getRandomString()
-        self.assertIsNone(
-            FileStorage.objects.get_existing_storage(nonexistent_file))
-
-    def test_get_existing_storage_finds_FileStorage(self):
-        self.make_upload_dir()
-        storage = factory.make_file_storage()
-        self.assertEqual(
-            storage,
-            FileStorage.objects.get_existing_storage(storage.filename))
-
     def test_save_file_creates_storage(self):
-        self.make_upload_dir()
         filename = factory.getRandomString()
         data = self.make_data()
         storage = FileStorage.objects.save_file(filename, BytesIO(data))
         self.assertEqual(
             (filename, data),
-            (storage.filename, storage.data.read()))
+            (storage.filename, storage.data))
 
     def test_storage_can_be_retrieved(self):
-        self.make_upload_dir()
         filename = factory.getRandomString()
         data = self.make_data()
         factory.make_file_storage(filename=filename, data=data)
         storage = FileStorage.objects.get(filename=filename)
         self.assertEqual(
             (filename, data),
-            (storage.filename, storage.data.read()))
+            (storage.filename, storage.data))
 
     def test_stores_binary_data(self):
-        self.make_upload_dir()
-
         # This horrible binary data could never, ever, under any
         # encoding known to man be interpreted as text(1).  Switch the
         # bytes of the byte-order mark around and by design you get an
@@ -115,120 +69,18 @@
         # And yet, because FileStorage supports binary data, it comes
         # out intact.
         storage = factory.make_file_storage(filename="x", data=binary_data)
-        self.assertEqual(binary_data, storage.data.read())
+        self.assertEqual(binary_data, storage.data)
 
     def test_overwrites_file(self):
         # If a file of the same name has already been stored, the
         # reference to the old data gets overwritten with one to the new
-        # data.  They are actually different files on the filesystem.
-        self.make_upload_dir()
+        # data.
         filename = factory.make_name('filename')
         old_storage = factory.make_file_storage(
             filename=filename, data=self.make_data('old data'))
         new_data = self.make_data('new-data')
         new_storage = factory.make_file_storage(
             filename=filename, data=new_data)
-        self.assertNotEqual(old_storage.data.name, new_storage.data.name)
+        self.assertEqual(old_storage.filename, new_storage.filename)
         self.assertEqual(
-            new_data, FileStorage.objects.get(filename=filename).data.read())
-
-    def test_list_stored_files_lists_files(self):
-        filename = factory.getRandomString()
-        factory.make_file(
-            location=self.make_upload_dir(), name=filename,
-            contents=self.make_data())
-        self.assertIn(
-            self.get_media_path(filename),
-            FileStorage.objects.list_stored_files())
-
-    def test_list_stored_files_includes_referenced_files(self):
-        self.make_upload_dir()
-        storage = factory.make_file_storage()
-        self.assertIn(
-            storage.data.name, FileStorage.objects.list_stored_files())
-
-    def test_list_referenced_files_lists_FileStorage_files(self):
-        self.make_upload_dir()
-        storage = factory.make_file_storage()
-        self.assertIn(
-            storage.data.name, FileStorage.objects.list_referenced_files())
-
-    def test_list_referenced_files_excludes_unreferenced_files(self):
-        filename = factory.getRandomString()
-        factory.make_file(
-            location=self.make_upload_dir(), name=filename,
-            contents=self.make_data())
-        self.assertNotIn(
-            self.get_media_path(filename),
-            FileStorage.objects.list_referenced_files())
-
-    def test_list_referenced_files_uses_file_name_not_FileStorage_name(self):
-        self.make_upload_dir()
-        filename = factory.getRandomString()
-        # The filename we're going to use is already taken.  The file
-        # we'll be looking at will have to have a different name.
-        factory.make_file_storage(filename=filename)
-        storage = factory.make_file_storage(filename=filename)
-        # It's the name of the file, not the FileStorage.filename, that
-        # is in list_referenced_files.
-        self.assertIn(
-            storage.data.name, FileStorage.objects.list_referenced_files())
-
-    def test_is_old_returns_False_for_recent_file(self):
-        filename = factory.getRandomString()
-        path = factory.make_file(
-            location=self.make_upload_dir(), name=filename,
-            contents=self.make_data())
-        age_file(path, FileStorage.objects.grace_time - 60)
-        self.assertFalse(
-            FileStorage.objects.is_old(self.get_media_path(filename)))
-
-    def test_is_old_returns_True_for_old_file(self):
-        filename = factory.getRandomString()
-        path = factory.make_file(
-            location=self.make_upload_dir(), name=filename,
-            contents=self.make_data())
-        age_file(path, FileStorage.objects.grace_time + 1)
-        self.assertTrue(
-            FileStorage.objects.is_old(self.get_media_path(filename)))
-
-    def test_collect_garbage_deletes_garbage(self):
-        filename = factory.getRandomString()
-        path = factory.make_file(
-            location=self.make_upload_dir(), name=filename,
-            contents=self.make_data())
-        age_file(path, FileStorage.objects.grace_time + 1)
-        FileStorage.objects.collect_garbage()
-        self.assertFalse(
-            FileStorage.storage.exists(self.get_media_path(filename)))
-
-    def test_grace_time_is_generous_but_not_unlimited(self):
-        # Grace time for garbage collection is long enough that it won't
-        # expire while the request that wrote it is still being handled.
-        # But it won't keep a file around for ages.  For instance, it'll
-        # be more than 20 seconds, but less than a day.
-        self.assertThat(FileStorage.objects.grace_time, GreaterThan(20))
-        self.assertThat(FileStorage.objects.grace_time, LessThan(24 * 60 * 60))
-
-    def test_collect_garbage_leaves_recent_files_alone(self):
-        filename = factory.getRandomString()
-        factory.make_file(
-            location=self.make_upload_dir(), name=filename,
-            contents=self.make_data())
-        FileStorage.objects.collect_garbage()
-        self.assertTrue(
-            FileStorage.storage.exists(self.get_media_path(filename)))
-
-    def test_collect_garbage_leaves_referenced_files_alone(self):
-        self.make_upload_dir()
-        storage = factory.make_file_storage()
-        age_file(storage.data.path, FileStorage.objects.grace_time + 1)
-        FileStorage.objects.collect_garbage()
-        self.assertTrue(FileStorage.storage.exists(storage.data.name))
-
-    def test_collect_garbage_tolerates_missing_upload_dir(self):
-        # When MAAS is freshly installed, the upload directory is still
-        # missing.  But...
-        FileStorage.objects.collect_garbage()
-        # ...we get through garbage collection without breakage.
-        pass
+            new_data, FileStorage.objects.get(filename=filename).data)


Follow ups