← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] lp:~cjwatson/launchpad/publishing-timestamps into lp:launchpad

 

Colin Watson has proposed merging lp:~cjwatson/launchpad/publishing-timestamps into lp:launchpad.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)
Related bugs:
  Bug #1033581 in Launchpad itself: "Publisher should set modification times on Releases et al"
  https://bugs.launchpad.net/launchpad/+bug/1033581

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/publishing-timestamps/+merge/120148

== Summary ==

Slewed timestamps on archive index files for a given suite make it difficult for IS to set sensible Expires headers.  See LaMont's detailed rationale in bug 1033581.  This is currently being worked around, rather dodgily, on syncproxy.

== Proposed fix ==

We already collect most of the filenames in question in order to generate Release files, so it's a relatively simple matter to make sure all those files plus Release and Release.gpg have matching timestamps for any given suite.

== Implementation details ==

I cleaned up a pile of cruft, mostly unnecessary circular import avoidance, to gain LoC.

== Tests ==

bin/test -vvct lp.archivepublisher

I lacked the fortitude to write tests for Publisher._writeSuite, which has an XXX comment from 2006 about the lack of tests.  I have at least managed to write tests for the new _syncTimestamps method, though.

== Demo and Q/A ==

Upload something to dogfood and do a full publisher run.  Check that all the index files for that suite have matching timestamps.
-- 
https://code.launchpad.net/~cjwatson/launchpad/publishing-timestamps/+merge/120148
Your team Launchpad code reviewers is requested to review the proposed merge of lp:~cjwatson/launchpad/publishing-timestamps into lp:launchpad.
=== modified file 'lib/lp/archivepublisher/deathrow.py'
--- lib/lp/archivepublisher/deathrow.py	2012-06-29 08:40:05 +0000
+++ lib/lp/archivepublisher/deathrow.py	2012-08-17 11:40:27 +0000
@@ -24,6 +24,10 @@
     MissingSymlinkInPool,
     NotInPool,
     )
+from lp.soyuz.model.publishing import (
+    BinaryPackagePublishingHistory,
+    SourcePackagePublishingHistory,
+    )
 
 
 def getDeathRow(archive, log, pool_root_override):
@@ -104,10 +108,6 @@
 
         Both sources and binaries are lists.
         """
-        # Avoid circular imports.
-        from lp.soyuz.model.publishing import (
-            BinaryPackagePublishingHistory, SourcePackagePublishingHistory)
-
         sources = SourcePackagePublishingHistory.select("""
             SourcePackagePublishingHistory.archive = %s AND
             SourcePackagePublishingHistory.scheduleddeletiondate < %s AND
@@ -208,12 +208,6 @@
         this will result in the files being removed if they're not otherwise
         in use.
         """
-        # Avoid circular imports.
-        from lp.soyuz.model.publishing import (
-            BinaryPackagePublishingHistory,
-            SourcePackagePublishingHistory,
-            )
-
         bytes = 0
         condemned_files = set()
         condemned_records = set()

=== modified file 'lib/lp/archivepublisher/domination.py'
--- lib/lp/archivepublisher/domination.py	2012-01-03 05:05:39 +0000
+++ lib/lp/archivepublisher/domination.py	2012-08-17 11:40:27 +0000
@@ -86,8 +86,13 @@
     PackagePublishingStatus,
     )
 from lp.soyuz.interfaces.publishing import inactive_publishing_status
+from lp.soyuz.model.binarypackagebuild import BinaryPackageBuild
 from lp.soyuz.model.binarypackagename import BinaryPackageName
 from lp.soyuz.model.binarypackagerelease import BinaryPackageRelease
+from lp.soyuz.model.publishing import (
+    BinaryPackagePublishingHistory,
+    SourcePackagePublishingHistory,
+    )
 from lp.soyuz.model.sourcepackagerelease import SourcePackageRelease
 
 # Days before a package will be removed from disk.
@@ -100,9 +105,6 @@
 
 def join_spph_spn():
     """Join condition: SourcePackagePublishingHistory/SourcePackageName."""
-    # Avoid circular imports.
-    from lp.soyuz.model.publishing import SourcePackagePublishingHistory
-
     SPPH = SourcePackagePublishingHistory
     SPN = SourcePackageName
 
@@ -112,9 +114,6 @@
 def join_spph_spr():
     """Join condition: SourcePackageRelease/SourcePackagePublishingHistory.
     """
-    # Avoid circular imports.
-    from lp.soyuz.model.publishing import SourcePackagePublishingHistory
-
     SPPH = SourcePackagePublishingHistory
     SPR = SourcePackageRelease
 
@@ -325,9 +324,6 @@
         has active arch-specific publications.
     :return: A list of live versions.
     """
-    # Avoid circular imports
-    from lp.soyuz.model.binarypackagebuild import BinaryPackageBuild
-
     sorted_pubs = list(sorted_pubs)
     latest = sorted_pubs.pop(0)
     is_arch_specific = attrgetter('architecture_specific')
@@ -499,12 +495,6 @@
         'scheduled deletion date' of now plus the defined 'stay of execution'
         time provided in the configuration parameter.
         """
-        # Avoid circular imports.
-        from lp.soyuz.model.publishing import (
-            BinaryPackagePublishingHistory,
-            SourcePackagePublishingHistory,
-            )
-
         self.logger.debug("Beginning superseded processing...")
 
         for pub_record in binary_records:
@@ -572,9 +562,6 @@
         publication is always kept published.  It will ignore publications
         that have no other publications competing for the same binary package.
         """
-        # Avoid circular imports.
-        from lp.soyuz.model.publishing import BinaryPackagePublishingHistory
-
         BPPH = BinaryPackagePublishingHistory
         BPR = BinaryPackageRelease
 
@@ -666,9 +653,6 @@
 
     def _composeActiveSourcePubsCondition(self, distroseries, pocket):
         """Compose ORM condition for restricting relevant source pubs."""
-        # Avoid circular imports.
-        from lp.soyuz.model.publishing import SourcePackagePublishingHistory
-
         SPPH = SourcePackagePublishingHistory
 
         return And(
@@ -689,9 +673,6 @@
         publications that have no other publications competing for the same
         binary package.  There'd be nothing to do for those cases.
         """
-        # Avoid circular imports.
-        from lp.soyuz.model.publishing import SourcePackagePublishingHistory
-
         SPPH = SourcePackagePublishingHistory
         SPR = SourcePackageRelease
 
@@ -749,9 +730,6 @@
         Returns an iterable of tuples: (name of source package, number of
         publications in Published state).
         """
-        # Avoid circular imports.
-        from lp.soyuz.model.publishing import SourcePackagePublishingHistory
-
         looking_for = (
             SourcePackageName.name,
             Count(SourcePackagePublishingHistory.id),
@@ -765,9 +743,6 @@
 
     def findPublishedSPPHs(self, distroseries, pocket, package_name):
         """Find currently published source publications for given package."""
-        # Avoid circular imports.
-        from lp.soyuz.model.publishing import SourcePackagePublishingHistory
-
         SPPH = SourcePackagePublishingHistory
         SPR = SourcePackageRelease
 
@@ -806,12 +781,6 @@
 
     def judge(self, distroseries, pocket):
         """Judge superseded sources and binaries."""
-        # Avoid circular imports.
-        from lp.soyuz.model.publishing import (
-             BinaryPackagePublishingHistory,
-             SourcePackagePublishingHistory,
-             )
-
         sources = SourcePackagePublishingHistory.select("""
             sourcepackagepublishinghistory.distroseries = %s AND
             sourcepackagepublishinghistory.archive = %s AND

=== modified file 'lib/lp/archivepublisher/model/ftparchive.py'
--- lib/lp/archivepublisher/model/ftparchive.py	2012-06-22 15:07:50 +0000
+++ lib/lp/archivepublisher/model/ftparchive.py	2012-08-17 11:40:27 +0000
@@ -29,11 +29,16 @@
     )
 from lp.soyuz.enums import PackagePublishingStatus
 from lp.soyuz.model.binarypackagebuild import BinaryPackageBuild
+from lp.soyuz.model.binarypackagename import BinaryPackageName
 from lp.soyuz.model.binarypackagerelease import BinaryPackageRelease
 from lp.soyuz.model.component import Component
 from lp.soyuz.model.distroarchseries import DistroArchSeries
 from lp.soyuz.model.files import BinaryPackageFile
-from lp.soyuz.model.publishing import BinaryPackagePublishingHistory
+from lp.soyuz.model.publishing import (
+    BinaryPackagePublishingHistory,
+    SourcePackageFilePublishing,
+    SourcePackagePublishingHistory,
+    )
 from lp.soyuz.model.section import Section
 from lp.soyuz.model.sourcepackagerelease import SourcePackageRelease
 
@@ -243,9 +248,6 @@
         :return: a `DecoratedResultSet` with the source override information
             tuples
         """
-        # Avoid cicular imports.
-        from lp.soyuz.model.publishing import SourcePackagePublishingHistory
-
         store = getUtility(IStoreSelector).get(MAIN_STORE, DEFAULT_FLAVOR)
         origins = (
             SourcePackagePublishingHistory,
@@ -291,9 +293,6 @@
         :return: a `DecoratedResultSet` with the binary override information
             tuples
         """
-        # Avoid cicular imports.
-        from lp.soyuz.model.binarypackagename import BinaryPackageName
-
         store = getUtility(IStoreSelector).get(MAIN_STORE, DEFAULT_FLAVOR)
         origins = (
             BinaryPackagePublishingHistory,
@@ -524,10 +523,6 @@
         :return: a `DecoratedResultSet` with the source files information
             tuples.
         """
-
-        # Avoid circular imports.
-        from lp.soyuz.model.publishing import SourcePackageFilePublishing
-
         store = getUtility(IStoreSelector).get(MAIN_STORE, DEFAULT_FLAVOR)
         result_set = store.using(SourcePackageFilePublishing).find(
             (SourcePackageFilePublishing.sourcepackagename,

=== modified file 'lib/lp/archivepublisher/publishing.py'
--- lib/lp/archivepublisher/publishing.py	2012-06-29 08:40:05 +0000
+++ lib/lp/archivepublisher/publishing.py	2012-08-17 11:40:27 +0000
@@ -49,6 +49,10 @@
     BinaryPackageFormat,
     PackagePublishingStatus,
     )
+from lp.soyuz.model.publishing import (
+    BinaryPackagePublishingHistory,
+    SourcePackagePublishingHistory,
+    )
 
 # Use this as the lock file name for all scripts that may manipulate
 # archives in the filesystem.  In a Launchpad(Cron)Script, set
@@ -159,8 +163,7 @@
         return fixed_field_lengths
 
     def _get_size_field_length(self, key):
-        lengths = [len(str(item['size'])) for item in self[key]]
-        return max(lengths)
+        return max(len(str(item['size'])) for item in self[key])
 
 
 class Publisher(object):
@@ -281,9 +284,6 @@
         OBSOLETE), scheduledeletiondate NULL and dateremoved NULL as
         dirty, to ensure that they are processed in death row.
         """
-        from lp.soyuz.model.publishing import (
-            SourcePackagePublishingHistory, BinaryPackagePublishingHistory)
-
         self.log.debug("* Step A2: Mark pockets with deletions as dirty")
 
         # Query part that is common to both queries below.
@@ -520,6 +520,14 @@
         with open(os.path.join(location, "Release"), "w") as release_file:
             release_data.dump(release_file, "utf-8")
 
+    def _syncTimestamps(self, suite, all_files):
+        """Make sure the timestamps on all files in a suite match."""
+        location = os.path.join(self._config.distsroot, suite)
+        paths = [os.path.join(location, path) for path in all_files]
+        latest_timestamp = max(os.stat(path).st_mtime for path in paths)
+        for path in paths:
+            os.utime(path, (latest_timestamp, latest_timestamp))
+
     def _writeSuite(self, distroseries, pocket):
         """Write out the Release files for the provided suite."""
         # XXX: kiko 2006-08-24: Untested method.
@@ -564,8 +572,7 @@
         release_file["Codename"] = distroseries.name
         release_file["Date"] = datetime.utcnow().strftime(
             "%a, %d %b %Y %k:%M:%S UTC")
-        release_file["Architectures"] = " ".join(
-            sorted(list(all_architectures)))
+        release_file["Architectures"] = " ".join(sorted(all_architectures))
         release_file["Components"] = " ".join(
             reorder_components(all_components))
         release_file["Description"] = drsummary
@@ -574,7 +581,7 @@
             release_file["NotAutomatic"] = "yes"
             release_file["ButAutomaticUpgrades"] = "yes"
 
-        for filename in sorted(list(all_files), key=os.path.dirname):
+        for filename in sorted(all_files, key=os.path.dirname):
             entry = self._readIndexFileContents(suite, filename)
             if entry is None:
                 continue
@@ -592,6 +599,7 @@
                 "size": len(entry)})
 
         self._writeReleaseFile(suite, release_file)
+        all_files.add("Release")
 
         # Skip signature if the archive signing key is undefined.
         if self.archive.signing_key is None:
@@ -601,6 +609,11 @@
         # Sign the repository.
         archive_signer = IArchiveSigningKey(self.archive)
         archive_signer.signRepository(suite)
+        all_files.add("Release.gpg")
+
+        # Make sure all the timestamps match, to make it easier to insert
+        # caching headers on mirrors.
+        self._syncTimestamps(suite, all_files)
 
     def _writeSuiteArchOrSource(self, distroseries, pocket, component,
                                 file_stub, arch_name, arch_path,
@@ -627,12 +640,9 @@
         release_file["Label"] = self._getLabel()
         release_file["Architecture"] = arch_name
 
-        f = open(os.path.join(self._config.distsroot, suite,
-                              component, arch_path, "Release"), "w")
-        try:
+        with open(os.path.join(self._config.distsroot, suite,
+                               component, arch_path, "Release"), "w") as f:
             release_file.dump(f, "utf-8")
-        finally:
-            f.close()
 
     def _writeSuiteSource(self, distroseries, pocket, component,
                           all_series_files):
@@ -718,11 +728,8 @@
             self.log.debug("Failed to find " + full_name)
             return None
 
-        in_file = open(full_name, 'r')
-        try:
+        with open(full_name, 'r') as in_file:
             return in_file.read()
-        finally:
-            in_file.close()
 
     def deleteArchive(self):
         """Delete the archive.

=== modified file 'lib/lp/archivepublisher/scripts/generate_ppa_htaccess.py'
--- lib/lp/archivepublisher/scripts/generate_ppa_htaccess.py	2012-05-31 11:55:15 +0000
+++ lib/lp/archivepublisher/scripts/generate_ppa_htaccess.py	2012-08-17 11:40:27 +0000
@@ -37,6 +37,7 @@
     ArchiveStatus,
     ArchiveSubscriberStatus,
     )
+from lp.soyuz.model.archive import Archive
 from lp.soyuz.model.archiveauthtoken import ArchiveAuthToken
 from lp.soyuz.model.archivesubscriber import ArchiveSubscriber
 
@@ -271,8 +272,6 @@
 
     def getNewPrivatePPAs(self, since=None):
         """Return the recently created private PPAs."""
-        # Avoid circular import.
-        from lp.soyuz.model.archive import Archive
         store = IStore(Archive)
         extra_expr = []
         if since:

=== modified file 'lib/lp/archivepublisher/tests/archive-signing.txt'
--- lib/lp/archivepublisher/tests/archive-signing.txt	2011-12-29 05:29:36 +0000
+++ lib/lp/archivepublisher/tests/archive-signing.txt	2012-08-17 11:40:27 +0000
@@ -382,7 +382,6 @@
     >>> from lp.archivepublisher.config import getPubConfig
     >>> archive_root = getPubConfig(cprov.archive).archiveroot
 
-    >>> import os
     >>> suite_path = os.path.join(archive_root, 'dists', test_suite)
     >>> os.makedirs(suite_path)
     >>> release_path = os.path.join(suite_path, 'Release')

=== modified file 'lib/lp/archivepublisher/tests/test_debversion.py'
--- lib/lp/archivepublisher/tests/test_debversion.py	2011-03-29 23:13:16 +0000
+++ lib/lp/archivepublisher/tests/test_debversion.py	2012-08-17 11:40:27 +0000
@@ -133,5 +133,4 @@
         """Version should treat an omitted revision as being equal to zero.
         """
         self.assertEquals(Version("1.0"), Version("1.0-0"))
-        from lp.archivepublisher.debversion import Version
         self.failUnless(Version("1.0") == Version("1.0-0"))

=== modified file 'lib/lp/archivepublisher/tests/test_generate_contents_files.py'
--- lib/lp/archivepublisher/tests/test_generate_contents_files.py	2012-06-20 01:19:03 +0000
+++ lib/lp/archivepublisher/tests/test_generate_contents_files.py	2012-08-17 11:40:27 +0000
@@ -19,6 +19,7 @@
 from lp.registry.interfaces.pocket import PackagePublishingPocket
 from lp.services.log.logger import DevNullLogger
 from lp.services.scripts.base import LaunchpadScriptFailure
+from lp.services.scripts.tests import run_script
 from lp.services.utils import file_exists
 from lp.testing import TestCaseWithFactory
 from lp.testing.faketransaction import FakeTransaction
@@ -319,7 +320,6 @@
 
     def test_run_script(self):
         # The script will run stand-alone.
-        from lp.services.scripts.tests import run_script
         self.layer.force_dirty_database()
         retval, out, err = run_script(
             'cronscripts/generate-contents-files.py', ['-d', 'ubuntu', '-q'])

=== modified file 'lib/lp/archivepublisher/tests/test_generate_extra_overrides.py'
--- lib/lp/archivepublisher/tests/test_generate_extra_overrides.py	2012-06-22 15:07:50 +0000
+++ lib/lp/archivepublisher/tests/test_generate_extra_overrides.py	2012-08-17 11:40:27 +0000
@@ -32,6 +32,7 @@
     write_file,
     )
 from lp.services.scripts.base import LaunchpadScriptFailure
+from lp.services.scripts.tests import run_script
 from lp.services.utils import file_exists
 from lp.soyuz.enums import PackagePublishingStatus
 from lp.testing import TestCaseWithFactory
@@ -651,7 +652,6 @@
 
     def test_run_script(self):
         # The script will run stand-alone.
-        from lp.services.scripts.tests import run_script
         distro = self.makeDistro()
         self.factory.makeDistroSeries(distro)
         transaction.commit()

=== modified file 'lib/lp/archivepublisher/tests/test_publisher.py'
--- lib/lp/archivepublisher/tests/test_publisher.py	2012-01-03 01:02:12 +0000
+++ lib/lp/archivepublisher/tests/test_publisher.py	2012-08-17 11:40:27 +0000
@@ -15,6 +15,7 @@
 import stat
 import tempfile
 from textwrap import dedent
+import time
 
 from debian.deb822 import Release
 import transaction
@@ -1505,3 +1506,24 @@
         self.makePublisher(series)._writeReleaseFile(suite, release_data)
 
         self.assertTrue(file_exists(release_path))
+
+    def test_syncTimestamps_makes_timestamps_match_latest(self):
+        root = unicode(self.makeTemporaryDirectory())
+        series = self.makePublishableSeries(root)
+        location = self.getReleaseFileDir(root, series, series.name)
+        os.makedirs(location)
+        now = time.time()
+        path_times = (("a", now), ("b", now - 1), ("c", now - 2))
+        for path, timestamp in path_times:
+            with open(os.path.join(location, path), "w"):
+                pass
+            os.utime(os.path.join(location, path), (timestamp, timestamp))
+
+        paths = [path for path, _ in path_times]
+        self.makePublisher(series)._syncTimestamps(series.name, set(paths))
+
+        timestamps = set(
+            os.stat(os.path.join(location, path)).st_mtime for path in paths)
+        self.assertEqual(1, len(timestamps))
+        # The filesystem may round off subsecond parts of timestamps.
+        self.assertEqual(int(now), int(list(timestamps)[0]))

=== modified file 'lib/lp/archivepublisher/utils.py'
--- lib/lp/archivepublisher/utils.py	2012-04-16 23:02:44 +0000
+++ lib/lp/archivepublisher/utils.py	2012-08-17 11:40:27 +0000
@@ -111,8 +111,7 @@
         'temp_root'.
         """
         self.root = root
-        assert os.path.exists(temp_root), (
-            'Temporary root does not exist.')
+        assert os.path.exists(temp_root), 'Temporary root does not exist.'
 
         self.index_files = (
             PlainTempFile(temp_root, filename),


Follow ups