launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #17697
[Merge] lp:~wgrant/launchpad/bug-1410592 into lp:launchpad
William Grant has proposed merging lp:~wgrant/launchpad/bug-1410592 into lp:launchpad.
Commit message:
Fix librarian-gc to treat .migrated filenames like the originals, so migrated files can be GCed and not doom us.
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
Related bugs:
Bug #1410592 in Launchpad itself: "librarian-gc should treat .migrated files just like the originals"
https://bugs.launchpad.net/launchpad/+bug/1410592
For more details, see:
https://code.launchpad.net/~wgrant/launchpad/bug-1410592/+merge/246389
librarian-gc was recently adjusted to ignore any files ending in ".migrated", so as to avoid whining about files that have been migrated to Swift. But it should instead treat them just like the original filename -- they still want to be expired if they still exist locally.
--
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~wgrant/launchpad/bug-1410592 into lp:launchpad.
=== modified file 'lib/lp/services/librarianserver/librariangc.py'
--- lib/lp/services/librarianserver/librariangc.py 2014-12-16 10:54:15 +0000
+++ lib/lp/services/librarianserver/librariangc.py 2015-01-14 05:41:13 +0000
@@ -616,7 +616,7 @@
removed_count = 0
content_id = next_wanted_content_id = -1
- hex_content_id_re = re.compile('^[0-9a-f]{8}$')
+ hex_content_id_re = re.compile('^([0-9a-f]{8})(\.migrated)?$')
ONE_DAY = 24 * 60 * 60
for dirpath, dirnames, filenames in os.walk(
@@ -627,8 +627,7 @@
dirnames.remove('incoming')
if 'lost+found' in dirnames:
dirnames.remove('lost+found')
- filenames = set(fn for fn in filenames
- if not fn.endswith('.migrated'))
+ filenames = set(filenames)
filenames.discard('librarian.pid')
filenames.discard('librarian.log')
@@ -660,12 +659,13 @@
for filename in filenames:
path = os.path.join(dirpath, filename)
hex_content_id = ''.join(path.split(os.sep)[-4:])
- if hex_content_id_re.search(hex_content_id) is None:
+ match = hex_content_id_re.search(hex_content_id)
+ if match is None:
log.warning(
"Ignoring invalid path %s" % path)
continue
- content_id = int(hex_content_id, 16)
+ content_id = int(match.groups()[0], 16)
while (next_wanted_content_id is not None
and content_id > next_wanted_content_id):
=== modified file 'lib/lp/services/librarianserver/tests/test_gc.py'
--- lib/lp/services/librarianserver/tests/test_gc.py 2014-12-16 10:51:41 +0000
+++ lib/lp/services/librarianserver/tests/test_gc.py 2015-01-14 05:41:13 +0000
@@ -19,6 +19,7 @@
import sys
import tempfile
+from contextlib import contextmanager
from sqlobject import SQLObjectNotFound
from swiftclient import client as swiftclient
import transaction
@@ -437,6 +438,25 @@
len(results), 0, 'Too many results %r' % (results,)
)
+ @contextmanager
+ def librariangc_thinking_it_is_tomorrow(self):
+ org_time = librariangc.time
+ org_utcnow = librariangc._utcnow
+
+ def tomorrow_time():
+ return org_time() + 24 * 60 * 60 + 1
+
+ def tomorrow_utcnow():
+ return datetime.utcnow() + timedelta(days=1, seconds=1)
+
+ try:
+ librariangc.time = tomorrow_time
+ librariangc._utcnow = tomorrow_utcnow
+ yield
+ finally:
+ librariangc.time = org_time
+ librariangc._utcnow = org_utcnow
+
def test_deleteUnwantedFiles(self):
self.ztm.begin()
cur = cursor()
@@ -473,22 +493,8 @@
# To test removal does occur when we want it to, we need to trick
# the garbage collector into thinking it is tomorrow.
- org_time = librariangc.time
- org_utcnow = librariangc._utcnow
-
- def tomorrow_time():
- return org_time() + 24 * 60 * 60 + 1
-
- def tomorrow_utcnow():
- return datetime.utcnow() + timedelta(days=1, seconds=1)
-
- try:
- librariangc.time = tomorrow_time
- librariangc._utcnow = tomorrow_utcnow
+ with self.librariangc_thinking_it_is_tomorrow():
librariangc.delete_unwanted_files(self.con)
- finally:
- librariangc.time = org_time
- librariangc._utcnow = org_utcnow
self.failIf(self.file_exists(content_id))
@@ -578,18 +584,10 @@
# To test this function raises an excption when it should,
# the garbage collector into thinking it is tomorrow.
- org_time = librariangc.time
-
- def tomorrow_time():
- return org_time() + 24 * 60 * 60 + 1
-
- try:
- librariangc.time = tomorrow_time
+ with self.librariangc_thinking_it_is_tomorrow():
self.assertRaises(
Exception, librariangc.confirm_no_clock_skew, (self.con,)
)
- finally:
- librariangc.time = org_time
class TestDiskLibrarianGarbageCollection(
@@ -603,6 +601,39 @@
path = librariangc.get_file_path(content_id)
os.unlink(path)
+ def test_delete_unwanted_files_handles_migrated(self):
+ # Files that have been uploaded to Swift have ".migrated"
+ # appended to their names. These are treated just like the
+ # original file, ignoring the extension.
+ switch_dbuser('testadmin')
+ content = 'foo'
+ lfa = LibraryFileAlias.get(self.client.addFile(
+ 'foo.txt', len(content), StringIO(content), 'text/plain'))
+ id_aborted = lfa.contentID
+ # Roll back the database changes, leaving the file on disk.
+ transaction.abort()
+
+ lfa = LibraryFileAlias.get(self.client.addFile(
+ 'bar.txt', len(content), StringIO(content), 'text/plain'))
+ transaction.commit()
+ id_committed = lfa.contentID
+
+ switch_dbuser(config.librarian_gc.dbuser)
+
+ # Now, we will move the directory containing the trash somewhere else
+ # and make a symlink to it.
+ path_aborted = librariangc.get_file_path(id_aborted)
+ os.rename(path_aborted, path_aborted + '.migrated')
+
+ path_committed = librariangc.get_file_path(id_committed)
+ os.rename(path_committed, path_committed + '.migrated')
+
+ with self.librariangc_thinking_it_is_tomorrow():
+ librariangc.delete_unwanted_files(self.con)
+
+ self.assertFalse(os.path.exists(path_aborted + '.migrated'))
+ self.assertTrue(os.path.exists(path_committed + '.migrated'))
+
def test_deleteUnwantedFilesIgnoresNoise(self):
# Directories with invalid names in the storage area are
# ignored. They are reported as warnings though.
@@ -616,8 +647,10 @@
# Long non-hexadecimal number
noisedir3_path = os.path.join(config.librarian_server.root, '11.bak')
- # A file we will migrate to Swift using the --rename feed-swift option.
- migrated_path = librariangc.get_file_path(8769786) + '.migrated'
+ # A file with the ".migrated" suffix has migrated to Swift but
+ # may still be removed from disk as unwanted by the GC. Other
+ # suffixes are unknown and stay around.
+ migrated_path = librariangc.get_file_path(8769786) + '.noise'
try:
os.mkdir(noisedir1_path)
@@ -636,16 +669,8 @@
# Pretend it is tomorrow to ensure the files don't count as
# recently created, and run the delete_unwanted_files process.
- org_time = librariangc.time
-
- def tomorrow_time():
- return org_time() + 24 * 60 * 60 + 1
-
- try:
- librariangc.time = tomorrow_time
+ with self.librariangc_thinking_it_is_tomorrow():
librariangc.delete_unwanted_files(self.con)
- finally:
- librariangc.time = org_time
# None of the rubbish we created has been touched.
self.assert_(os.path.isdir(noisedir1_path))
Follow ups