launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #22413
[Merge] lp:~cjwatson/launchpad/archive-file-history into lp:launchpad
Colin Watson has proposed merging lp:~cjwatson/launchpad/archive-file-history into lp:launchpad.
Commit message:
Turn ArchiveFile into a history table, adding date_created and date_superseded columns. Adjust the publisher to match.
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
Related bugs:
Bug #1765933 in Launchpad itself: "Allow building livefses against a view of the archive at a fixed point in time"
https://bugs.launchpad.net/launchpad/+bug/1765933
For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/archive-file-history/+merge/343751
The main complexity here is in the changed publisher logic, especially for reprieving (that is, the situation where file contents that were scheduled for deletion become live again, particularly common for empty files). We previously did this by simply clearing scheduled_deletion_date on the old ArchiveFile row, but that strategy no longer works when we're trying to maintain history: instead, we need to create new rows in such cases. As a result of the logic changes here, we no longer need the only_condemned=True option in ArchiveFileSet.getByArchive.
I think the publisher tests are now somewhat clearer, since they now explicitly test creation dates, making the chain of events more obvious.
ArchiveFile.superseded_at is arguably redundant with ArchiveFile.scheduled_deletion_date, but I think keeping both is a good idea for clarity, and in case we ever change the stay of execution in future.
We'll need to backfill the new columns. I'll probably make a separate branch with a garbo job for this: my plan is to set date_created to some arbitrary value (probably just the epoch, so that it's clear that it's arbitrary), and to set date_superseded to scheduled_deletion_date minus the stay of execution for rows that have a scheduled_deletion_date.
--
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~cjwatson/launchpad/archive-file-history into lp:launchpad.
=== modified file 'cronscripts/librarian-gc.py'
--- cronscripts/librarian-gc.py 2013-09-10 10:48:31 +0000
+++ cronscripts/librarian-gc.py 2018-04-21 11:23:22 +0000
@@ -66,11 +66,12 @@
# XXX wgrant 2011-09-18 bug=853066: Using Storm's raw connection
# here is wrong. We should either create our own or use
# Store.execute or cursor() and the transaction module.
- conn = IStore(LibraryFileAlias)._connection._raw_connection
+ store = IStore(LibraryFileAlias)
+ conn = store._connection._raw_connection
# Refuse to run if we have significant clock skew between the
# librarian and the database.
- librariangc.confirm_no_clock_skew(conn)
+ librariangc.confirm_no_clock_skew(store)
# Note that each of these next steps will issue commit commands
# as appropriate to make this script transaction friendly
=== modified file 'lib/lp/archivepublisher/publishing.py'
--- lib/lp/archivepublisher/publishing.py 2018-03-27 23:26:12 +0000
+++ lib/lp/archivepublisher/publishing.py 2018-04-21 11:23:22 +0000
@@ -1063,8 +1063,7 @@
assert path.startswith("dists/")
return path[len("dists/"):]
- # Gather information on entries in the current Release file, and
- # make sure nothing there is condemned.
+ # Gather information on entries in the current Release file.
current_files = {}
for current_entry in (
release_data["SHA256"] + extra_data.get("SHA256", [])):
@@ -1073,33 +1072,54 @@
real_path = os.path.join(suite_dir, real_name)
current_files[path] = (
int(current_entry["size"]), current_entry["sha256"], real_path)
+
+ # Gather information on entries currently in the database. Ensure
+ # that we know about all the relevant by-hash directory trees before
+ # doing any removals so that we can prune them properly later, and
+ # work out which condemned files should be reprieved due to the
+ # paths in question having their previous content again.
+ reprieved_files = defaultdict(list)
uncondemned_files = set()
for db_file in archive_file_set.getByArchive(
- self.archive, container=container, only_condemned=True,
- eager_load=True):
- stripped_path = strip_dists(db_file.path)
- if stripped_path in current_files:
- current_sha256 = current_files[stripped_path][1]
- if db_file.library_file.content.sha256 == current_sha256:
- uncondemned_files.add(db_file)
- if uncondemned_files:
- for container, path, sha256 in archive_file_set.unscheduleDeletion(
- uncondemned_files):
+ self.archive, container=container, eager_load=True):
+ by_hashes.registerChild(os.path.dirname(strip_dists(db_file.path)))
+ file_key = (db_file.path, db_file.library_file.content.sha256)
+ if db_file.scheduled_deletion_date is None:
+ uncondemned_files.add(file_key)
+ else:
+ stripped_path = strip_dists(db_file.path)
+ if stripped_path in current_files:
+ current_sha256 = current_files[stripped_path][1]
+ if db_file.library_file.content.sha256 == current_sha256:
+ reprieved_files[file_key].append(db_file)
+
+ # We may already have uncondemned entries with the same path and
+ # content as condemned entries that we were about to reprieve; if
+ # so, there's no need to reprieve them.
+ for file_key in uncondemned_files:
+ reprieved_files.pop(file_key, None)
+
+ # Make sure nothing in the current Release file is condemned.
+ if reprieved_files:
+ reprieved_files_flat = set(
+ chain.from_iterable(reprieved_files.values()))
+ archive_file_set.unscheduleDeletion(reprieved_files_flat)
+ for db_file in reprieved_files_flat:
self.log.debug(
"by-hash: Unscheduled %s for %s in %s for deletion" % (
- sha256, path, container))
+ db_file.library_file.content.sha256, db_file.path,
+ db_file.container))
# Remove any condemned files from the database whose stay of
# execution has elapsed. We ensure that we know about all the
# relevant by-hash directory trees before doing any removals so that
# we can prune them properly later.
- for db_file in archive_file_set.getByArchive(
- self.archive, container=container):
- by_hashes.registerChild(os.path.dirname(strip_dists(db_file.path)))
for container, path, sha256 in archive_file_set.reap(
self.archive, container=container):
- self.log.debug(
- "by-hash: Deleted %s for %s in %s" % (sha256, path, container))
+ if (path, sha256) not in uncondemned_files:
+ self.log.debug(
+ "by-hash: Deleted %s for %s in %s" %
+ (sha256, path, container))
# Ensure that all files recorded in the database are in by-hash.
db_files = archive_file_set.getByArchive(
@@ -1120,12 +1140,13 @@
if db_file.library_file.content.sha256 != current_sha256:
condemned_files.add(db_file)
if condemned_files:
- for container, path, sha256 in archive_file_set.scheduleDeletion(
- condemned_files,
- timedelta(days=BY_HASH_STAY_OF_EXECUTION)):
+ archive_file_set.scheduleDeletion(
+ condemned_files, timedelta(days=BY_HASH_STAY_OF_EXECUTION))
+ for db_file in condemned_files:
self.log.debug(
"by-hash: Scheduled %s for %s in %s for deletion" % (
- sha256, path, container))
+ db_file.library_file.content.sha256, db_file.path,
+ db_file.container))
# Ensure that all the current index files are in by-hash and have
# corresponding database entries.
=== modified file 'lib/lp/archivepublisher/tests/test_publisher.py'
--- lib/lp/archivepublisher/tests/test_publisher.py 2018-04-05 11:32:50 +0000
+++ lib/lp/archivepublisher/tests/test_publisher.py 2018-04-21 11:23:22 +0000
@@ -21,8 +21,6 @@
from functools import partial
import gzip
import hashlib
-from itertools import product
-from operator import attrgetter
import os
import shutil
import stat
@@ -51,7 +49,6 @@
LessThan,
Matcher,
MatchesDict,
- MatchesListwise,
MatchesSetwise,
MatchesStructure,
Not,
@@ -2581,12 +2578,12 @@
publisher.D_writeReleaseFiles(False)
@classmethod
- def _makeScheduledDeletionDateMatcher(cls, condemned_at):
- if condemned_at is None:
+ def _makeScheduledDeletionDateMatcher(cls, superseded_at):
+ if superseded_at is None:
return Is(None)
else:
return Equals(
- condemned_at + timedelta(days=BY_HASH_STAY_OF_EXECUTION))
+ superseded_at + timedelta(days=BY_HASH_STAY_OF_EXECUTION))
def assertHasSuiteFiles(self, patterns, *properties):
def is_interesting(path):
@@ -2600,11 +2597,13 @@
self.ubuntutest.main_archive)
if is_interesting(archive_file.path)]
matchers = []
- for path, condemned_at in properties:
+ for path, created_at, superseded_at in properties:
matchers.append(MatchesStructure(
path=Equals('dists/breezy-autotest/%s' % path),
+ date_created=Equals(created_at),
+ date_superseded=Equals(superseded_at),
scheduled_deletion_date=self._makeScheduledDeletionDateMatcher(
- condemned_at)))
+ superseded_at)))
self.assertThat(files, MatchesSetwise(*matchers))
def test_disabled(self):
@@ -2754,7 +2753,8 @@
flush_database_caches()
self.assertHasSuiteFiles(
('Contents-*', 'Release'),
- ('Contents-i386', None), ('Release', None))
+ ('Contents-i386', self.times[0], None),
+ ('Release', self.times[0], None))
releases = [get_release_contents()]
self.assertThat(
suite_path('by-hash'),
@@ -2768,8 +2768,10 @@
flush_database_caches()
self.assertHasSuiteFiles(
('Contents-*', 'Release'),
- ('Contents-i386', None), ('Contents-hppa', None),
- ('Release', self.times[1]), ('Release', None))
+ ('Contents-i386', self.times[0], None),
+ ('Contents-hppa', self.times[1], None),
+ ('Release', self.times[0], self.times[1]),
+ ('Release', self.times[1], None))
releases.append(get_release_contents())
self.assertThat(
suite_path('by-hash'),
@@ -2782,9 +2784,11 @@
flush_database_caches()
self.assertHasSuiteFiles(
('Contents-*', 'Release'),
- ('Contents-i386', self.times[2]), ('Contents-hppa', None),
- ('Release', self.times[1]), ('Release', self.times[2]),
- ('Release', None))
+ ('Contents-i386', self.times[0], self.times[2]),
+ ('Contents-hppa', self.times[1], None),
+ ('Release', self.times[0], self.times[1]),
+ ('Release', self.times[1], self.times[2]),
+ ('Release', self.times[2], None))
releases.append(get_release_contents())
self.assertThat(
suite_path('by-hash'),
@@ -2796,9 +2800,12 @@
flush_database_caches()
self.assertHasSuiteFiles(
('Contents-*', 'Release'),
- ('Contents-i386', self.times[2]), ('Contents-hppa', None),
- ('Release', self.times[1]), ('Release', self.times[2]),
- ('Release', self.times[3]), ('Release', None))
+ ('Contents-i386', self.times[0], self.times[2]),
+ ('Contents-hppa', self.times[1], None),
+ ('Release', self.times[0], self.times[1]),
+ ('Release', self.times[1], self.times[2]),
+ ('Release', self.times[2], self.times[3]),
+ ('Release', self.times[3], None))
releases.append(get_release_contents())
self.assertThat(
suite_path('by-hash'),
@@ -2817,9 +2824,10 @@
flush_database_caches()
self.assertHasSuiteFiles(
('Contents-*', 'Release'),
- ('Contents-hppa', self.times[4]),
- ('Release', self.times[3]), ('Release', self.times[4]),
- ('Release', None))
+ ('Contents-hppa', self.times[1], self.times[4]),
+ ('Release', self.times[2], self.times[3]),
+ ('Release', self.times[3], self.times[4]),
+ ('Release', self.times[4], None))
releases.append(get_release_contents())
self.assertThat(
suite_path('by-hash'),
@@ -2836,7 +2844,8 @@
flush_database_caches()
self.assertHasSuiteFiles(
('Contents-*', 'Release'),
- ('Release', self.times[5]), ('Release', None))
+ ('Release', self.times[4], self.times[5]),
+ ('Release', self.times[5], None))
releases.append(get_release_contents())
self.assertThat(suite_path('by-hash'), ByHashHasContents(releases[4:]))
@@ -2863,9 +2872,13 @@
for name in ('Release', 'Sources'):
with open(suite_path('main', 'source', name), 'rb') as f:
main_contents.add(f.read())
+ self.assertHasSuiteFiles(
+ ('main/source/Sources',),
+ ('main/source/Sources', self.times[0], None))
# Add a source package so that Sources is non-empty.
pub_source = self.getPubSource(filecontent='Source: foo\n')
+ self.advanceTime(delta=timedelta(hours=1))
self.runSteps(publisher, step_a=True, step_c=True, step_d=True)
transaction.commit()
with open(suite_path('main', 'source', 'Sources'), 'rb') as f:
@@ -2874,28 +2887,42 @@
self.assertThat(
suite_path('main', 'source', 'by-hash'),
ByHashHasContents(main_contents))
-
- # Make the empty Sources file ready to prune.
- self.advanceTime(
- delta=timedelta(days=BY_HASH_STAY_OF_EXECUTION, hours=1))
+ self.assertHasSuiteFiles(
+ ('main/source/Sources',),
+ ('main/source/Sources', self.times[0], self.times[1]),
+ ('main/source/Sources', self.times[1], None))
# Delete the source package so that Sources is empty again. The
- # empty file is reprieved and the non-empty one is condemned.
+ # empty file is reprieved (by creating a new ArchiveFile referring
+ # to it) and the non-empty one is condemned.
pub_source.requestDeletion(self.ubuntutest.owner)
- self.runSteps(publisher, step_a=True, step_c=True, step_d=True)
- transaction.commit()
- self.assertThat(
- suite_path('main', 'source', 'by-hash'),
- ByHashHasContents(main_contents))
- archive_files = getUtility(IArchiveFileSet).getByArchive(
- self.ubuntutest.main_archive,
- path='dists/breezy-autotest/main/source/Sources')
- self.assertThat(
- sorted(archive_files, key=attrgetter('id')),
- MatchesListwise([
- MatchesStructure(scheduled_deletion_date=Is(None)),
- MatchesStructure(scheduled_deletion_date=Not(Is(None))),
- ]))
+ self.advanceTime(delta=timedelta(hours=1))
+ self.runSteps(publisher, step_a=True, step_c=True, step_d=True)
+ transaction.commit()
+ self.assertThat(
+ suite_path('main', 'source', 'by-hash'),
+ ByHashHasContents(main_contents))
+ self.assertHasSuiteFiles(
+ ('main/source/Sources',),
+ ('main/source/Sources', self.times[0], self.times[1]),
+ ('main/source/Sources', self.times[1], self.times[2]),
+ ('main/source/Sources', self.times[2], None))
+
+ # Make the first empty Sources file ready to prune. This doesn't
+ # change the set of files on disk, because there's still a newer
+ # reference to the empty file.
+ self.advanceTime(
+ absolute=self.times[1] + timedelta(
+ days=BY_HASH_STAY_OF_EXECUTION, minutes=30))
+ self.runSteps(publisher, step_a=True, step_c=True, step_d=True)
+ transaction.commit()
+ self.assertThat(
+ suite_path('main', 'source', 'by-hash'),
+ ByHashHasContents(main_contents))
+ self.assertHasSuiteFiles(
+ ('main/source/Sources',),
+ ('main/source/Sources', self.times[1], self.times[2]),
+ ('main/source/Sources', self.times[2], None))
def setUpPruneableSuite(self):
self.setUpMockTime()
@@ -2924,14 +2951,18 @@
# We have two condemned sets of index files and one uncondemned set.
# main/source/Release contains a small enough amount of information
# that it doesn't change.
- expected_suite_files = (
- list(product(
- ('main/source/Sources.gz', 'main/source/Sources.bz2',
- 'Release'),
- (self.times[1], self.times[2], None))) +
- [('main/source/Release', None)])
self.assertHasSuiteFiles(
- ('main/source/*', 'Release'), *expected_suite_files)
+ ('main/source/*', 'Release'),
+ ('main/source/Sources.gz', self.times[0], self.times[1]),
+ ('main/source/Sources.gz', self.times[1], self.times[2]),
+ ('main/source/Sources.gz', self.times[2], None),
+ ('main/source/Sources.bz2', self.times[0], self.times[1]),
+ ('main/source/Sources.bz2', self.times[1], self.times[2]),
+ ('main/source/Sources.bz2', self.times[2], None),
+ ('main/source/Release', self.times[0], None),
+ ('Release', self.times[0], self.times[1]),
+ ('Release', self.times[1], self.times[2]),
+ ('Release', self.times[2], None))
self.assertThat(suite_path('by-hash'), ByHashHasContents(top_contents))
self.assertThat(
suite_path('main', 'source', 'by-hash'),
@@ -2964,14 +2995,15 @@
self.assertEqual(set(), publisher.dirty_pockets)
# The condemned index files are removed, and no new Release file is
# generated.
- expected_suite_files = (
- list(product(
- ('main/source/Sources.gz', 'main/source/Sources.bz2'),
- (self.times[2], None))) +
- [('main/source/Release', None),
- ('Release', self.times[2]), ('Release', None)])
self.assertHasSuiteFiles(
- ('main/source/*', 'Release'), *expected_suite_files)
+ ('main/source/*', 'Release'),
+ ('main/source/Sources.gz', self.times[1], self.times[2]),
+ ('main/source/Sources.gz', self.times[2], None),
+ ('main/source/Sources.bz2', self.times[1], self.times[2]),
+ ('main/source/Sources.bz2', self.times[2], None),
+ ('main/source/Release', self.times[0], None),
+ ('Release', self.times[1], self.times[2]),
+ ('Release', self.times[2], None))
self.assertThat(suite_path('by-hash'), ByHashHasContents(top_contents))
self.assertThat(
suite_path('main', 'source', 'by-hash'),
@@ -2998,14 +3030,15 @@
self.assertEqual(release_mtime, os.stat(release_path).st_mtime)
# The condemned index files are removed, and no new Release file is
# generated.
- expected_suite_files = (
- list(product(
- ('main/source/Sources.gz', 'main/source/Sources.bz2'),
- (self.times[2], None))) +
- [('main/source/Release', None),
- ('Release', self.times[2]), ('Release', None)])
self.assertHasSuiteFiles(
- ('main/source/*', 'Release'), *expected_suite_files)
+ ('main/source/*', 'Release'),
+ ('main/source/Sources.gz', self.times[1], self.times[2]),
+ ('main/source/Sources.gz', self.times[2], None),
+ ('main/source/Sources.bz2', self.times[1], self.times[2]),
+ ('main/source/Sources.bz2', self.times[2], None),
+ ('main/source/Release', self.times[0], None),
+ ('Release', self.times[1], self.times[2]),
+ ('Release', self.times[2], None))
self.assertThat(suite_path('by-hash'), ByHashHasContents(top_contents))
self.assertThat(
suite_path('main', 'source', 'by-hash'),
=== modified file 'lib/lp/blueprints/doc/sprint-agenda.txt'
--- lib/lp/blueprints/doc/sprint-agenda.txt 2011-12-30 06:14:56 +0000
+++ lib/lp/blueprints/doc/sprint-agenda.txt 2018-04-21 11:23:22 +0000
@@ -42,8 +42,9 @@
It is possible to revise your choice, declining the spec. This should update
the date_decided.
+ >>> from storm.store import Store
>>> from lp.services.database.sqlbase import get_transaction_timestamp
- >>> transaction_timestamp = get_transaction_timestamp()
+ >>> transaction_timestamp = get_transaction_timestamp(Store.of(sl))
# Nobody is allowed to write directly to SprintSpecification.date_decided,
# so we need to remove its security proxy here.
=== modified file 'lib/lp/code/model/tests/test_codeimportjob.py'
--- lib/lp/code/model/tests/test_codeimportjob.py 2018-03-15 20:44:04 +0000
+++ lib/lp/code/model/tests/test_codeimportjob.py 2018-04-21 11:23:22 +0000
@@ -55,6 +55,8 @@
from lp.code.tests.helpers import GitHostingFixture
from lp.services.config import config
from lp.services.database.constants import UTC_NOW
+from lp.services.database.interfaces import IStore
+from lp.services.database.sqlbase import get_transaction_timestamp
from lp.services.librarian.interfaces import ILibraryFileAliasSet
from lp.services.librarian.interfaces.client import ILibrarianClient
from lp.services.macaroons.interfaces import IMacaroonIssuer
@@ -561,10 +563,10 @@
# This causes problems for the "UTC_NOW - interval / 2"
# expression below.
interval = code_import.effective_update_interval
- from lp.services.database.sqlbase import get_transaction_timestamp
+ store = IStore(CodeImportResult)
recent_result = CodeImportResult(
code_import=code_import, machine=machine, status=FAILURE,
- date_job_started=get_transaction_timestamp() - interval / 2)
+ date_job_started=get_transaction_timestamp(store) - interval / 2)
# When we create the job, its date_due should be set to the date_due
# of the job that was deleted when the CodeImport review status
# changed from REVIEWED. That is the date_job_started of the most
=== modified file 'lib/lp/registry/doc/announcement.txt'
--- lib/lp/registry/doc/announcement.txt 2015-01-29 12:33:01 +0000
+++ lib/lp/registry/doc/announcement.txt 2018-04-21 11:23:22 +0000
@@ -212,8 +212,10 @@
Announcements can be retracted at any time. Retracting an announcement
updates the date_last_modified and sets the announcement.active flag to False
+ >>> from storm.store import Store
>>> from lp.services.database.sqlbase import get_transaction_timestamp
- >>> transaction_timestamp = get_transaction_timestamp()
+ >>> transaction_timestamp = get_transaction_timestamp(
+ ... Store.of(apache_asia))
>>> print apache_asia.date_last_modified
None
=== modified file 'lib/lp/services/database/sqlbase.py'
--- lib/lp/services/database/sqlbase.py 2015-07-08 16:05:11 +0000
+++ lib/lp/services/database/sqlbase.py 2018-04-21 11:23:22 +0000
@@ -263,11 +263,9 @@
_get_sqlobject_store().invalidate()
-def get_transaction_timestamp():
- """Get the timestamp for the current transaction on the MAIN DEFAULT
- store. DEPRECATED - if needed it should become a method on the store.
- """
- timestamp = _get_sqlobject_store().execute(
+def get_transaction_timestamp(store):
+ """Get the timestamp for the current transaction on `store`."""
+ timestamp = store.execute(
"SELECT CURRENT_TIMESTAMP AT TIME ZONE 'UTC'").get_one()[0]
return timestamp.replace(tzinfo=pytz.timezone('UTC'))
=== modified file 'lib/lp/services/database/tests/test_bulk.py'
--- lib/lp/services/database/tests/test_bulk.py 2018-01-02 10:54:31 +0000
+++ lib/lp/services/database/tests/test_bulk.py 2018-04-21 11:23:22 +0000
@@ -343,4 +343,6 @@
[(bug, person, person,
SQL("CURRENT_TIMESTAMP AT TIME ZONE 'UTC'"),
BugNotificationLevel.LIFECYCLE)], get_objects=True)
- self.assertEqual(get_transaction_timestamp(), sub.date_created)
+ self.assertEqual(
+ get_transaction_timestamp(IStore(BugSubscription)),
+ sub.date_created)
=== modified file 'lib/lp/services/librarianserver/librariangc.py'
--- lib/lp/services/librarianserver/librariangc.py 2018-01-03 17:17:12 +0000
+++ lib/lp/services/librarianserver/librariangc.py 2018-04-21 11:23:22 +0000
@@ -17,6 +17,7 @@
from time import time
import iso8601
+import pytz
from swiftclient import client as swiftclient
from zope.interface import implementer
@@ -26,6 +27,7 @@
listReferences,
quoteIdentifier,
)
+from lp.services.database.sqlbase import get_transaction_timestamp
from lp.services.features import getFeatureFlag
from lp.services.librarianserver import swift
from lp.services.librarianserver.storage import (
@@ -67,7 +69,7 @@
def _utcnow():
# Wrapper that is replaced in the test suite.
- return datetime.utcnow()
+ return datetime.now(pytz.UTC)
def open_stream(content_id):
@@ -105,16 +107,14 @@
return hasher.hexdigest(), length
-def confirm_no_clock_skew(con):
+def confirm_no_clock_skew(store):
"""Raise an exception if there is significant clock skew between the
database and this machine.
It is theoretically possible to lose data if there is more than several
hours of skew.
"""
- cur = con.cursor()
- cur.execute("SELECT CURRENT_TIMESTAMP AT TIME ZONE 'UTC'")
- db_now = cur.fetchone()[0]
+ db_now = get_transaction_timestamp(store)
local_now = _utcnow()
five_minutes = timedelta(minutes=5)
@@ -813,8 +813,7 @@
and next_wanted_content_id == content_id)
if not file_wanted:
- mod_time = iso8601.parse_date(
- obj['last_modified']).replace(tzinfo=None)
+ mod_time = iso8601.parse_date(obj['last_modified'])
if mod_time > _utcnow() - timedelta(days=1):
log.debug3(
"File %d not removed - created too recently", content_id)
=== modified file 'lib/lp/services/librarianserver/tests/test_gc.py'
--- lib/lp/services/librarianserver/tests/test_gc.py 2018-02-13 17:46:48 +0000
+++ lib/lp/services/librarianserver/tests/test_gc.py 2018-04-21 11:23:22 +0000
@@ -23,6 +23,7 @@
import sys
import tempfile
+import pytz
from sqlobject import SQLObjectNotFound
from storm.store import Store
from swiftclient import client as swiftclient
@@ -88,8 +89,8 @@
# Make sure that every file the database knows about exists on disk.
# We manually remove them for tests that need to cope with missing
# library items.
- store = IMasterStore(LibraryFileContent)
- for content in store.find(LibraryFileContent):
+ self.store = IMasterStore(LibraryFileContent)
+ for content in self.store.find(LibraryFileContent):
path = librariangc.get_file_path(content.id)
if not os.path.exists(path):
if not os.path.exists(os.path.dirname(path)):
@@ -451,7 +452,7 @@
return org_time() + 24 * 60 * 60 + 1
def tomorrow_utcnow():
- return datetime.utcnow() + timedelta(days=1, seconds=1)
+ return datetime.now(pytz.UTC) + timedelta(days=1, seconds=1)
try:
librariangc.time = tomorrow_time
@@ -584,13 +585,13 @@
def test_confirm_no_clock_skew(self):
# There should not be any clock skew when running the test suite.
- librariangc.confirm_no_clock_skew(self.con)
+ librariangc.confirm_no_clock_skew(self.store)
# To test this function raises an excption when it should,
# fool the garbage collector into thinking it is tomorrow.
with self.librariangc_thinking_it_is_tomorrow():
self.assertRaises(
- Exception, librariangc.confirm_no_clock_skew, (self.con,)
+ Exception, librariangc.confirm_no_clock_skew, (self.store,)
)
=== modified file 'lib/lp/services/xref/tests/test_model.py'
--- lib/lp/services/xref/tests/test_model.py 2015-11-26 17:11:09 +0000
+++ lib/lp/services/xref/tests/test_model.py 2018-04-21 11:23:22 +0000
@@ -16,7 +16,10 @@
from zope.component import getUtility
from lp.services.database.interfaces import IStore
-from lp.services.database.sqlbase import flush_database_caches
+from lp.services.database.sqlbase import (
+ flush_database_caches,
+ get_transaction_timestamp,
+ )
from lp.services.xref.interfaces import IXRefSet
from lp.services.xref.model import XRef
from lp.testing import (
@@ -35,9 +38,7 @@
# date_created defaults to now, but can be overridden.
old = datetime.datetime.strptime('2005-01-01', '%Y-%m-%d').replace(
tzinfo=pytz.UTC)
- now = IStore(XRef).execute(
- "SELECT CURRENT_TIMESTAMP AT TIME ZONE 'UTC'"
- ).get_one()[0].replace(tzinfo=pytz.UTC)
+ now = get_transaction_timestamp(IStore(XRef))
getUtility(IXRefSet).create({
('a', '1'): {('b', 'foo'): {}},
('a', '2'): {('b', 'bar'): {'date_created': old}}})
@@ -68,9 +69,7 @@
def test_findFrom(self):
creator = self.factory.makePerson()
- now = IStore(XRef).execute(
- "SELECT CURRENT_TIMESTAMP AT TIME ZONE 'UTC'"
- ).get_one()[0].replace(tzinfo=pytz.UTC)
+ now = get_transaction_timestamp(IStore(XRef))
getUtility(IXRefSet).create({
('a', 'bar'): {
('b', 'foo'): {'creator': creator, 'metadata': {'test': 1}}},
=== modified file 'lib/lp/soyuz/doc/publishing.txt'
--- lib/lp/soyuz/doc/publishing.txt 2017-06-02 21:46:50 +0000
+++ lib/lp/soyuz/doc/publishing.txt 2018-04-21 11:23:22 +0000
@@ -347,8 +347,9 @@
Inspecting the modified record shows it's ready for domination:
+ >>> from storm.store import Store
>>> from lp.services.database.sqlbase import get_transaction_timestamp
- >>> transaction_timestamp = get_transaction_timestamp()
+ >>> transaction_timestamp = get_transaction_timestamp(Store.of(spph))
>>> modified_spph = spph
>>> modified_spph.status
=== modified file 'lib/lp/soyuz/interfaces/archivefile.py'
--- lib/lp/soyuz/interfaces/archivefile.py 2016-04-04 10:06:33 +0000
+++ lib/lp/soyuz/interfaces/archivefile.py 2018-04-21 11:23:22 +0000
@@ -1,4 +1,4 @@
-# Copyright 2016 Canonical Ltd. This software is licensed under the
+# Copyright 2016-2018 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
"""Interface for a file in an archive."""
@@ -49,6 +49,16 @@
title=_("The index file in the librarian."),
schema=ILibraryFileAlias, required=True, readonly=True)
+ date_created = Datetime(
+ title=_("The date when this file was created."),
+ # XXX cjwatson 2018-04-17: Should be required=True, but we need to
+ # backfill existing rows first.
+ required=False, readonly=False)
+
+ date_superseded = Datetime(
+ title=_("The date when this file was scheduled for future deletion."),
+ required=False, readonly=False)
+
scheduled_deletion_date = Datetime(
title=_("The date when this file should stop being published."),
required=False, readonly=False)
@@ -79,15 +89,12 @@
:param content_type: The MIME type of the file.
"""
- def getByArchive(archive, container=None, path=None, only_condemned=False,
- eager_load=False):
+ def getByArchive(archive, container=None, path=None, eager_load=False):
"""Get files in an archive.
:param archive: Return files in this `IArchive`.
:param container: Return only files with this container.
:param path: Return only files with this path.
- :param only_condemned: If True, return only files with a
- scheduled_deletion_date set.
:param eager_load: If True, preload related `LibraryFileAlias` and
`LibraryFileContent` rows.
:return: An iterable of matched files.
@@ -99,8 +106,6 @@
:param archive_files: The `IArchiveFile`s to schedule for deletion.
:param stay_of_execution: A `timedelta`; schedule files for deletion
this amount of time in the future.
- :return: An iterable of (container, path, sha256) for files that
- were scheduled for deletion.
"""
def unscheduleDeletion(archive_files):
@@ -110,8 +115,6 @@
identical to a version that was previously condemned.
:param archive_files: The `IArchiveFile`s to unschedule for deletion.
- :return: An iterable of (container, path, sha256) for files that
- were unscheduled for deletion.
"""
def getContainersToReap(archive, container_prefix=None):
=== modified file 'lib/lp/soyuz/model/archivefile.py'
--- lib/lp/soyuz/model/archivefile.py 2018-01-26 10:11:33 +0000
+++ lib/lp/soyuz/model/archivefile.py 2018-04-21 11:23:22 +0000
@@ -14,7 +14,6 @@
import os.path
import pytz
-from storm.databases.postgres import Returning
from storm.locals import (
And,
DateTime,
@@ -26,7 +25,10 @@
from zope.component import getUtility
from zope.interface import implementer
-from lp.services.database.bulk import load_related
+from lp.services.database.bulk import (
+ create,
+ load_related,
+ )
from lp.services.database.constants import UTC_NOW
from lp.services.database.decoratedresultset import DecoratedResultSet
from lp.services.database.interfaces import (
@@ -34,7 +36,6 @@
IStore,
)
from lp.services.database.sqlbase import convert_storm_clause_to_string
-from lp.services.database.stormexpr import BulkUpdate
from lp.services.librarian.interfaces import ILibraryFileAliasSet
from lp.services.librarian.model import (
LibraryFileAlias,
@@ -46,6 +47,15 @@
)
+def _now():
+ """Get the current transaction timestamp.
+
+ Tests can override this with a Storm expression or a `datetime` to
+ simulate time changes.
+ """
+ return UTC_NOW
+
+
@implementer(IArchiveFile)
class ArchiveFile(Storm):
"""See `IArchiveFile`."""
@@ -64,6 +74,14 @@
library_file_id = Int(name='library_file', allow_none=False)
library_file = Reference(library_file_id, 'LibraryFileAlias.id')
+ date_created = DateTime(
+ # XXX cjwatson 2018-04-17: Should be allow_none=False, but we need
+ # to backfill existing rows first.
+ name='date_created', tzinfo=pytz.UTC, allow_none=True)
+
+ date_superseded = DateTime(
+ name='date_superseded', tzinfo=pytz.UTC, allow_none=True)
+
scheduled_deletion_date = DateTime(
name='scheduled_deletion_date', tzinfo=pytz.UTC, allow_none=True)
@@ -74,18 +92,11 @@
self.container = container
self.path = path
self.library_file = library_file
+ self.date_created = _now()
+ self.date_superseded = None
self.scheduled_deletion_date = None
-def _now():
- """Get the current transaction timestamp.
-
- Tests can override this with a Storm expression or a `datetime` to
- simulate time changes.
- """
- return UTC_NOW
-
-
@implementer(IArchiveFileSet)
class ArchiveFileSet:
"""See `IArchiveFileSet`."""
@@ -106,8 +117,7 @@
return cls.new(archive, container, path, library_file)
@staticmethod
- def getByArchive(archive, container=None, path=None, only_condemned=False,
- eager_load=False):
+ def getByArchive(archive, container=None, path=None, eager_load=False):
"""See `IArchiveFileSet`."""
clauses = [ArchiveFile.archive == archive]
# XXX cjwatson 2016-03-15: We'll need some more sophisticated way to
@@ -116,8 +126,6 @@
clauses.append(ArchiveFile.container == container)
if path is not None:
clauses.append(ArchiveFile.path == path)
- if only_condemned:
- clauses.append(ArchiveFile.scheduled_deletion_date != None)
archive_files = IStore(ArchiveFile).find(ArchiveFile, *clauses)
def eager_load(rows):
@@ -132,41 +140,23 @@
@staticmethod
def scheduleDeletion(archive_files, stay_of_execution):
"""See `IArchiveFileSet`."""
- clauses = [
- ArchiveFile.id.is_in(
- set(archive_file.id for archive_file in archive_files)),
- ArchiveFile.library_file == LibraryFileAlias.id,
- LibraryFileAlias.content == LibraryFileContent.id,
- ]
- new_date = _now() + stay_of_execution
- return_columns = [
- ArchiveFile.container, ArchiveFile.path, LibraryFileContent.sha256]
- return list(IMasterStore(ArchiveFile).execute(Returning(
- BulkUpdate(
- {ArchiveFile.scheduled_deletion_date: new_date},
- table=ArchiveFile,
- values=[LibraryFileAlias, LibraryFileContent],
- where=And(*clauses)),
- columns=return_columns)))
+ rows = IMasterStore(ArchiveFile).find(
+ ArchiveFile, ArchiveFile.id.is_in(
+ set(archive_file.id for archive_file in archive_files)))
+ rows.set(
+ date_superseded=_now(),
+ scheduled_deletion_date=_now() + stay_of_execution)
@staticmethod
def unscheduleDeletion(archive_files):
"""See `IArchiveFileSet`."""
- clauses = [
- ArchiveFile.id.is_in(
- set(archive_file.id for archive_file in archive_files)),
- ArchiveFile.library_file == LibraryFileAlias.id,
- LibraryFileAlias.content == LibraryFileContent.id,
- ]
- return_columns = [
- ArchiveFile.container, ArchiveFile.path, LibraryFileContent.sha256]
- return list(IMasterStore(ArchiveFile).execute(Returning(
- BulkUpdate(
- {ArchiveFile.scheduled_deletion_date: None},
- table=ArchiveFile,
- values=[LibraryFileAlias, LibraryFileContent],
- where=And(*clauses)),
- columns=return_columns)))
+ create(
+ (ArchiveFile.archive, ArchiveFile.container, ArchiveFile.path,
+ ArchiveFile.library_file, ArchiveFile.date_created,
+ ArchiveFile.date_superseded, ArchiveFile.scheduled_deletion_date),
+ [(archive_file.archive, archive_file.container, archive_file.path,
+ archive_file.library_file, _now(), None, None)
+ for archive_file in archive_files])
@staticmethod
def getContainersToReap(archive, container_prefix=None):
=== modified file 'lib/lp/soyuz/tests/test_archivefile.py'
--- lib/lp/soyuz/tests/test_archivefile.py 2016-04-04 10:06:33 +0000
+++ lib/lp/soyuz/tests/test_archivefile.py 2018-04-21 11:23:22 +0000
@@ -1,4 +1,4 @@
-# Copyright 2016 Canonical Ltd. This software is licensed under the
+# Copyright 2016-2018 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
"""ArchiveFile tests."""
@@ -7,25 +7,39 @@
__metaclass__ = type
-from datetime import (
- datetime,
- timedelta,
- )
+from datetime import timedelta
import os
-import pytz
-from testtools.matchers import LessThan
+from storm.store import Store
+from testtools.matchers import (
+ AfterPreprocessing,
+ Equals,
+ Is,
+ MatchesSetwise,
+ MatchesStructure,
+ )
import transaction
from zope.component import getUtility
from zope.security.proxy import removeSecurityProxy
-from lp.services.database.sqlbase import flush_database_caches
+from lp.services.database.sqlbase import (
+ flush_database_caches,
+ get_transaction_timestamp,
+ )
from lp.services.osutils import open_for_writing
from lp.soyuz.interfaces.archivefile import IArchiveFileSet
from lp.testing import TestCaseWithFactory
from lp.testing.layers import LaunchpadZopelessLayer
+def read_library_file(library_file):
+ library_file.open()
+ try:
+ return library_file.read()
+ finally:
+ library_file.close()
+
+
class TestArchiveFile(TestCaseWithFactory):
layer = LaunchpadZopelessLayer
@@ -35,11 +49,15 @@
library_file = self.factory.makeLibraryFileAlias()
archive_file = getUtility(IArchiveFileSet).new(
archive, "foo", "dists/foo", library_file)
- self.assertEqual(archive, archive_file.archive)
- self.assertEqual("foo", archive_file.container)
- self.assertEqual("dists/foo", archive_file.path)
- self.assertEqual(library_file, archive_file.library_file)
- self.assertIsNone(archive_file.scheduled_deletion_date)
+ self.assertThat(archive_file, MatchesStructure(
+ archive=Equals(archive),
+ container=Equals("foo"),
+ path=Equals("dists/foo"),
+ library_file=Equals(library_file),
+ date_created=Equals(
+ get_transaction_timestamp(Store.of(archive_file))),
+ date_superseded=Is(None),
+ scheduled_deletion_date=Is(None)))
def test_newFromFile(self):
root = self.makeTemporaryDirectory()
@@ -49,24 +67,24 @@
with open(os.path.join(root, "dists/foo"), "rb") as f:
archive_file = getUtility(IArchiveFileSet).newFromFile(
archive, "foo", "dists/foo", f, 4, "text/plain")
+ now = get_transaction_timestamp(Store.of(archive_file))
transaction.commit()
- self.assertEqual(archive, archive_file.archive)
- self.assertEqual("foo", archive_file.container)
- self.assertEqual("dists/foo", archive_file.path)
- archive_file.library_file.open()
- try:
- self.assertEqual("abc\n", archive_file.library_file.read())
- finally:
- archive_file.library_file.close()
- self.assertIsNone(archive_file.scheduled_deletion_date)
+ self.assertThat(archive_file, MatchesStructure(
+ archive=Equals(archive),
+ container=Equals("foo"),
+ path=Equals("dists/foo"),
+ library_file=AfterPreprocessing(
+ read_library_file, Equals("abc\n")),
+ date_created=Equals(now),
+ date_superseded=Is(None),
+ scheduled_deletion_date=Is(None)))
def test_getByArchive(self):
archives = [self.factory.makeArchive(), self.factory.makeArchive()]
archive_files = []
- now = datetime.now(pytz.UTC)
for archive in archives:
archive_files.append(self.factory.makeArchiveFile(
- archive=archive, scheduled_deletion_date=now))
+ archive=archive))
archive_files.append(self.factory.makeArchiveFile(
archive=archive, container="foo"))
archive_file_set = getUtility(IArchiveFileSet)
@@ -84,9 +102,6 @@
self.assertContentEqual(
[], archive_file_set.getByArchive(archives[0], path="other"))
self.assertContentEqual(
- [archive_files[0]],
- archive_file_set.getByArchive(archives[0], only_condemned=True))
- self.assertContentEqual(
archive_files[2:], archive_file_set.getByArchive(archives[1]))
self.assertContentEqual(
[archive_files[3]],
@@ -99,46 +114,50 @@
archives[1], path=archive_files[3].path))
self.assertContentEqual(
[], archive_file_set.getByArchive(archives[1], path="other"))
- self.assertContentEqual(
- [archive_files[2]],
- archive_file_set.getByArchive(archives[1], only_condemned=True))
def test_scheduleDeletion(self):
archive_files = [self.factory.makeArchiveFile() for _ in range(3)]
- expected_rows = [
- (archive_file.container, archive_file.path,
- archive_file.library_file.content.sha256)
- for archive_file in archive_files[:2]]
- rows = getUtility(IArchiveFileSet).scheduleDeletion(
+ getUtility(IArchiveFileSet).scheduleDeletion(
archive_files[:2], timedelta(days=1))
- self.assertContentEqual(expected_rows, rows)
flush_database_caches()
- tomorrow = datetime.now(pytz.UTC) + timedelta(days=1)
- # Allow a bit of timing slack for slow tests.
- self.assertThat(
- tomorrow - archive_files[0].scheduled_deletion_date,
- LessThan(timedelta(minutes=5)))
- self.assertThat(
- tomorrow - archive_files[1].scheduled_deletion_date,
- LessThan(timedelta(minutes=5)))
+ tomorrow = (
+ get_transaction_timestamp(Store.of(archive_files[0])) +
+ timedelta(days=1))
+ self.assertEqual(tomorrow, archive_files[0].scheduled_deletion_date)
+ self.assertEqual(tomorrow, archive_files[1].scheduled_deletion_date)
self.assertIsNone(archive_files[2].scheduled_deletion_date)
def test_unscheduleDeletion(self):
archive_files = [self.factory.makeArchiveFile() for _ in range(3)]
- now = datetime.now(pytz.UTC)
+ now = get_transaction_timestamp(Store.of(archive_files[0]))
for archive_file in archive_files:
removeSecurityProxy(archive_file).scheduled_deletion_date = now
- expected_rows = [
- (archive_file.container, archive_file.path,
- archive_file.library_file.content.sha256)
- for archive_file in archive_files[:2]]
- rows = getUtility(IArchiveFileSet).unscheduleDeletion(
- archive_files[:2])
- self.assertContentEqual(expected_rows, rows)
+ archive_file_set = getUtility(IArchiveFileSet)
+ archive_file_set.unscheduleDeletion(archive_files[:2])
flush_database_caches()
- self.assertIsNone(archive_files[0].scheduled_deletion_date)
- self.assertIsNone(archive_files[1].scheduled_deletion_date)
- self.assertIsNotNone(archive_files[2].scheduled_deletion_date)
+ self.assertThat(
+ archive_file_set.getByArchive(
+ archive_files[0].archive,
+ container=archive_files[0].container,
+ path=archive_files[0].path),
+ MatchesSetwise(
+ MatchesStructure(scheduled_deletion_date=Equals(now)),
+ MatchesStructure(scheduled_deletion_date=Is(None))))
+ self.assertThat(
+ archive_file_set.getByArchive(
+ archive_files[1].archive,
+ container=archive_files[1].container,
+ path=archive_files[1].path),
+ MatchesSetwise(
+ MatchesStructure(scheduled_deletion_date=Equals(now)),
+ MatchesStructure(scheduled_deletion_date=Is(None))))
+ self.assertThat(
+ archive_file_set.getByArchive(
+ archive_files[2].archive,
+ container=archive_files[2].container,
+ path=archive_files[2].path),
+ MatchesSetwise(
+ MatchesStructure(scheduled_deletion_date=Equals(now))))
def test_getContainersToReap(self):
archive = self.factory.makeArchive()
@@ -150,7 +169,7 @@
other_archive = self.factory.makeArchive()
archive_files.append(self.factory.makeArchiveFile(
archive=other_archive, container="baz"))
- now = datetime.now(pytz.UTC)
+ now = get_transaction_timestamp(Store.of(archive_files[0]))
removeSecurityProxy(archive_files[0]).scheduled_deletion_date = (
now - timedelta(days=1))
removeSecurityProxy(archive_files[1]).scheduled_deletion_date = (
@@ -183,7 +202,7 @@
other_archive = self.factory.makeArchive()
archive_files.append(
self.factory.makeArchiveFile(archive=other_archive))
- now = datetime.now(pytz.UTC)
+ now = get_transaction_timestamp(Store.of(archive_files[0]))
removeSecurityProxy(archive_files[0]).scheduled_deletion_date = (
now - timedelta(days=1))
removeSecurityProxy(archive_files[1]).scheduled_deletion_date = (
References