← Back to team overview

launchpad-reviewers team mailing list archive

[Merge] ~cjwatson/lp-archive:expiry into lp-archive:main

 

Colin Watson has proposed merging ~cjwatson/lp-archive:expiry into lp-archive:main.

Commit message:
Set cache expiry for non-content-addressed dists files

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
https://code.launchpad.net/~cjwatson/lp-archive/+git/lp-archive/+merge/438602

Most files in apt archives are immutable and never change their contents without also changing their name; these don't need complicated caching policy.  However, the index files and others under `dists/` may change their contents without also changing their name.  Send a relatively short expiry time for these using the `Cache-Control` header.

This also applies to negative responses, since for example `apt` can sometimes end up making lots of requests for things like translation files that will never succeed, and we want to avoid hitting the XML-RPC backend for these every time.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of ~cjwatson/lp-archive:expiry into lp-archive:main.
diff --git a/lp_archive/archive.py b/lp_archive/archive.py
index ef0a1a5..3e2e50f 100644
--- a/lp_archive/archive.py
+++ b/lp_archive/archive.py
@@ -4,6 +4,7 @@
 """The main archive view."""
 
 from datetime import datetime
+from pathlib import PurePath
 from xmlrpc.client import Fault, ServerProxy
 
 from flask import Flask, current_app, g, request
@@ -74,6 +75,19 @@ def check_auth(archive: str) -> None:
         current_app.logger.info("%s: Authorized.", log_prefix)
 
 
+def get_extra_headers(path: str, live_at: datetime | None) -> dict[str, str]:
+    headers = {}
+    # Non-content-addressed files under dists/ are mutable and may change on
+    # subsequent publisher runs, so if we aren't addressing a snapshot then
+    # we need to tell caches to give them a relatively short expiry time.
+    # (This applies to both positive and negative responses.)
+    if live_at is None:
+        path_parts = PurePath(path).parts
+        if path_parts[0] == "dists" and path_parts[2:][-3:-2] != ("by-hash",):
+            headers["Cache-Control"] = "max-age=1800, proxy-revalidate"
+    return headers
+
+
 def translate(
     archive: str, path: str, live_at: datetime | None = None
 ) -> tuple[str, int, dict[str, str]]:
@@ -82,12 +96,16 @@ def translate(
         url = get_archive_proxy().translatePath(archive, path, live_at)
     except Fault as f:
         if f.faultCode == 320:  # NotFound
-            return "Not found", 404, {"Content-Type": "text/plain"}
+            headers = {"Content-Type": "text/plain"}
+            headers.update(get_extra_headers(path, live_at))
+            return "Not found", 404, headers
         else:
             current_app.logger.info("%s %s: %s", archive, path, f.faultString)
             return "Internal server error", 500, {"Content-Type": "text/plain"}
     assert isinstance(url, str)
-    return "", 307, {"Location": url}
+    headers = {"Location": url}
+    headers.update(get_extra_headers(path, live_at))
+    return "", 307, headers
 
 
 def add_headers(response: Response) -> Response:
diff --git a/tests/test_archive.py b/tests/test_archive.py
index d6b926c..11481f7 100644
--- a/tests/test_archive.py
+++ b/tests/test_archive.py
@@ -15,7 +15,13 @@ from lp_archive.cache import cache
 
 class ArchiveXMLRPCServer(SimpleXMLRPCServer):
 
-    path_map = {"dists/focal/InRelease": "http://librarian.example.org/1"}
+    path_map = {
+        "dists/focal/InRelease": "http://librarian.example.org/1";,
+        f"dists/focal/by-hash/SHA256/{'0' * 64}": (
+            "http://librarian.example.org/1";
+        ),
+        "pool/main/h/hello/hello_1.0-1.deb": "http://librarian.example.org/2";,
+    }
 
     def __init__(self, *args: Any, **kwargs: Any) -> None:
         super().__init__(*args, **kwargs)
@@ -271,3 +277,61 @@ def test_translate_oops(client, archive_proxy, caplog):
         ("flask.app", logging.INFO, "<anonymous>@ubuntu: Authorized."),
         ("flask.app", logging.INFO, "ubuntu oops: Oops"),
     ]
+
+
+def test_translate_cache_control_dists_found(client, archive_proxy):
+    # Positive responses to non-content-addressed requests under dists/ get
+    # a suitable Cache-Control header.
+    response = client.get(
+        "/ubuntu/dists/focal/InRelease",
+        headers=[("Host", "snapshot.ubuntu.test")],
+    )
+    assert response.status_code == 307
+    assert (
+        response.headers["Cache-Control"] == "max-age=1800, proxy-revalidate"
+    )
+
+
+def test_translate_cache_control_dists_not_found(client, archive_proxy):
+    # Negative responses to non-content-addressed requests under dists/ get
+    # a suitable Cache-Control header.
+    response = client.get(
+        "/ubuntu/dists/focal/main/i18n/Translation-nonexistent",
+        headers=[("Host", "snapshot.ubuntu.test")],
+    )
+    assert response.status_code == 404
+    assert (
+        response.headers["Cache-Control"] == "max-age=1800, proxy-revalidate"
+    )
+
+
+def test_translate_cache_control_dists_at_timestamp(client, archive_proxy):
+    # Non-content-addressed requests to a snapshot do not get a
+    # Cache-Control header, even if they're under dists/.
+    response = client.get(
+        "/ubuntu/20220101T120000Z/dists/focal/InRelease",
+        headers=[("Host", "snapshot.ubuntu.test")],
+    )
+    assert response.status_code == 307
+    assert "Cache-Control" not in response.headers
+
+
+def test_translate_cache_control_dists_by_hash(client, archive_proxy):
+    # Content-addressed requests under dists/ do not get a Cache-Control
+    # header.
+    response = client.get(
+        f"/ubuntu/dists/focal/by-hash/SHA256/{'0' * 64}",
+        headers=[("Host", "snapshot.ubuntu.test")],
+    )
+    assert response.status_code == 307
+    assert "Cache-Control" not in response.headers
+
+
+def test_translate_cache_control_not_dists(client, archive_proxy):
+    # Requests not under dists/ do not get a Cache-Control header.
+    response = client.get(
+        "/ubuntu/pool/main/h/hello/hello_1.0-1.deb",
+        headers=[("Host", "snapshot.ubuntu.test")],
+    )
+    assert response.status_code == 307
+    assert "Cache-Control" not in response.headers