launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #32906
[Merge] ~ilkeremrekoc/launchpad:librarian-gc-file-dryrun into launchpad:master
İlker Emre Koç has proposed merging ~ilkeremrekoc/launchpad:librarian-gc-file-dryrun into launchpad:master.
Commit message:
Add dry-run to librarian-gc file deletions
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
For more details, see:
https://code.launchpad.net/~ilkeremrekoc/launchpad/+git/launchpad/+merge/491422
To test a change to the librarian-gc in a safe manner, we need a
dry-run to ensure nothing is deleted when we run the file deletion
script.
--
Your team Launchpad code reviewers is requested to review the proposed merge of ~ilkeremrekoc/launchpad:librarian-gc-file-dryrun into launchpad:master.
diff --git a/cronscripts/librarian-gc.py b/cronscripts/librarian-gc.py
index 7f8d39e..b5aa082 100755
--- a/cronscripts/librarian-gc.py
+++ b/cronscripts/librarian-gc.py
@@ -24,6 +24,14 @@ class LibrarianGC(LaunchpadCronScript):
def add_my_options(self):
self.parser.add_option(
"",
+ "--dry-run-files",
+ action="store_true",
+ default=False,
+ dest="dry_run_files",
+ help="Run over the file deletions without deleting any files",
+ )
+ self.parser.add_option(
+ "",
"--skip-duplicates",
action="store_true",
default=False,
@@ -103,7 +111,7 @@ class LibrarianGC(LaunchpadCronScript):
# Second sweep.
librariangc.delete_unreferenced_content(conn)
if not self.options.skip_files:
- librariangc.delete_unwanted_files(conn)
+ librariangc.delete_unwanted_files(conn, self.options.dry_run_files)
if __name__ == "__main__":
diff --git a/lib/lp/services/librarianserver/librariangc.py b/lib/lp/services/librarianserver/librariangc.py
index 5c04c55..1b82f11 100644
--- a/lib/lp/services/librarianserver/librariangc.py
+++ b/lib/lp/services/librarianserver/librariangc.py
@@ -669,22 +669,22 @@ def delete_unreferenced_content(con):
loop_tuner.run()
-def delete_unwanted_files(con):
+def delete_unwanted_files(con, dry_run=False):
con.rollback()
orig_autocommit = con.autocommit
try:
# Disable autocommit so that we can use named cursors.
con.autocommit = False
- delete_unwanted_disk_files(con)
+ delete_unwanted_disk_files(con, dry_run)
swift_enabled = getFeatureFlag("librarian.swift.enabled") or False
if swift_enabled:
- delete_unwanted_swift_files(con)
+ delete_unwanted_swift_files(con, dry_run)
finally:
con.rollback()
con.autocommit = orig_autocommit
-def delete_unwanted_disk_files(con):
+def delete_unwanted_disk_files(con, dry_run=False):
"""Delete files found on disk that have no corresponding record in the
database.
@@ -693,7 +693,10 @@ def delete_unwanted_disk_files(con):
the database records committed.
"""
- log.info("Deleting unwanted files from disk.")
+ if dry_run:
+ log.info("Dry run - not deleting any files from disk.")
+ else:
+ log.info("Deleting unwanted files from disk.")
swift_enabled = getFeatureFlag("librarian.swift.enabled") or False
@@ -819,8 +822,11 @@ def delete_unwanted_disk_files(con):
)
else:
# File uploaded a while ago but no longer wanted.
- os.unlink(path)
- log.debug3("Deleted %s" % path)
+ if dry_run:
+ log.info("Would delete %s" % path)
+ else:
+ os.unlink(path)
+ log.debug3("Deleted %s" % path)
removed_count += 1
# Report any remaining LibraryFileContent that the database says
@@ -837,10 +843,16 @@ def delete_unwanted_disk_files(con):
next_wanted_content_id = next(content_id_iter, None)
cur.close()
- log.info(
- "Deleted %d files from disk that were no longer referenced "
- "in the db." % removed_count
- )
+ if dry_run:
+ log.info(
+ "Would have deleted %d files from disk that were no longer "
+ "referenced in the db." % removed_count
+ )
+ else:
+ log.info(
+ "Deleted %d files from disk that were no longer referenced "
+ "in the db." % removed_count
+ )
def swift_files(max_lfc_id):
@@ -896,11 +908,14 @@ def swift_files(max_lfc_id):
seen_names.add((obj["name"], pool_index))
-def delete_unwanted_swift_files(con):
+def delete_unwanted_swift_files(con, dry_run=False):
"""Delete files found in Swift that have no corresponding db record."""
assert getFeatureFlag("librarian.swift.enabled")
- log.info("Deleting unwanted files from Swift.")
+ if dry_run:
+ log.info("Dry run - not deleting any files from Swift.")
+ else:
+ log.info("Deleting unwanted files from Swift.")
# Get the largest LibraryFileContent id in the database. This lets
# us know when to stop looking in Swift for more files.
@@ -992,13 +1007,21 @@ def delete_unwanted_swift_files(con):
else:
with swift.connection(connection_pool) as swift_connection:
try:
- swift_connection.delete_object(container, name)
- log.debug3(
- "Deleted (%s, %s) from Swift (%s)",
- container,
- name,
- connection_pool.os_auth_url,
- )
+ if dry_run:
+ log.info(
+ "Would delete (%s, %s) from Swift (%s)",
+ container,
+ name,
+ connection_pool.os_auth_url,
+ )
+ else:
+ swift_connection.delete_object(container, name)
+ log.debug3(
+ "Deleted (%s, %s) from Swift (%s)",
+ container,
+ name,
+ connection_pool.os_auth_url,
+ )
except swiftclient.ClientException as e:
if e.http_status != 404:
log.exception(
@@ -1032,10 +1055,18 @@ def delete_unwanted_swift_files(con):
next_wanted_content_id = next(content_id_iter, None)
cur.close()
- log.info(
- "Deleted {} files from Swift that were no longer referenced "
- "in the db.".format(removed_count)
- )
+
+ if dry_run:
+ log.info(
+ "Dry run - would have deleted {} files from Swift.".format(
+ removed_count
+ )
+ )
+ else:
+ log.info(
+ "Deleted {} files from Swift that were no longer referenced "
+ "in the db.".format(removed_count)
+ )
def get_file_path(content_id):
diff --git a/lib/lp/services/librarianserver/tests/test_gc.py b/lib/lp/services/librarianserver/tests/test_gc.py
index 23a7eac..efec95f 100644
--- a/lib/lp/services/librarianserver/tests/test_gc.py
+++ b/lib/lp/services/librarianserver/tests/test_gc.py
@@ -517,6 +517,59 @@ class TestLibrarianGarbageCollectionBase:
for content_id in (row[0] for row in cur.fetchall()):
self.assertTrue(self.file_exists(content_id))
+ def test_delete_unwanted_files_dry_run(self):
+ # Test the --dry-run-files option to the librarian-gc script.
+
+ self.ztm.begin()
+ cur = cursor()
+
+ # We may find files in the LibraryFileContent repository
+ # that do not have an corresponding LibraryFileContent row.
+
+ # Find a content_id we can easily delete and do so. This row is
+ # removed from the database, leaving an orphaned file on the
+ # filesystem that should be removed.
+ cur.execute(
+ """
+ SELECT LibraryFileContent.id
+ FROM LibraryFileContent
+ LEFT OUTER JOIN LibraryFileAlias
+ ON LibraryFileContent.id = content
+ WHERE LibraryFileAlias.id IS NULL
+ LIMIT 1
+ """
+ )
+ content_id = cur.fetchone()[0]
+ cur.execute(
+ """
+ DELETE FROM LibraryFileContent WHERE id=%s
+ """,
+ (content_id,),
+ )
+ self.ztm.commit()
+
+ self.assertTrue(self.file_exists(content_id))
+
+ # To test removal does occur when we want it to, we need to trick
+ # the garbage collector into thinking it is tomorrow.
+ with self.librariangc_thinking_it_is_tomorrow():
+ # Add the dry_run=True argument to ensure no files are deleted.
+ librariangc.delete_unwanted_files(self.con, dry_run=True)
+
+ # The file should still be there.
+ self.assertTrue(self.file_exists(content_id))
+
+ # Make sure nothing else has been removed from disk
+ self.ztm.begin()
+ cur = cursor()
+ cur.execute(
+ """
+ SELECT id FROM LibraryFileContent
+ """
+ )
+ for content_id in (row[0] for row in cur.fetchall()):
+ self.assertTrue(self.file_exists(content_id))
+
def test_delete_unwanted_files_bug437084(self):
# There was a bug where delete_unwanted_files() would die
# if the last file found on disk was unwanted.