launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #29733
[Merge] ~cjwatson/lp-archive:expiry into lp-archive:main
Colin Watson has proposed merging ~cjwatson/lp-archive:expiry into lp-archive:main.
Commit message:
Set cache expiry for non-content-addressed dists files
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
For more details, see:
https://code.launchpad.net/~cjwatson/lp-archive/+git/lp-archive/+merge/438602
Most files in apt archives are immutable and never change their contents without also changing their name; these don't need complicated caching policy. However, the index files and others under `dists/` may change their contents without also changing their name. Send a relatively short expiry time for these using the `Cache-Control` header.
This also applies to negative responses, since for example `apt` can sometimes end up making lots of requests for things like translation files that will never succeed, and we want to avoid hitting the XML-RPC backend for these every time.
--
Your team Launchpad code reviewers is requested to review the proposed merge of ~cjwatson/lp-archive:expiry into lp-archive:main.
diff --git a/lp_archive/archive.py b/lp_archive/archive.py
index ef0a1a5..3e2e50f 100644
--- a/lp_archive/archive.py
+++ b/lp_archive/archive.py
@@ -4,6 +4,7 @@
"""The main archive view."""
from datetime import datetime
+from pathlib import PurePath
from xmlrpc.client import Fault, ServerProxy
from flask import Flask, current_app, g, request
@@ -74,6 +75,19 @@ def check_auth(archive: str) -> None:
current_app.logger.info("%s: Authorized.", log_prefix)
+def get_extra_headers(path: str, live_at: datetime | None) -> dict[str, str]:
+ headers = {}
+ # Non-content-addressed files under dists/ are mutable and may change on
+ # subsequent publisher runs, so if we aren't addressing a snapshot then
+ # we need to tell caches to give them a relatively short expiry time.
+ # (This applies to both positive and negative responses.)
+ if live_at is None:
+ path_parts = PurePath(path).parts
+ if path_parts[0] == "dists" and path_parts[2:][-3:-2] != ("by-hash",):
+ headers["Cache-Control"] = "max-age=1800, proxy-revalidate"
+ return headers
+
+
def translate(
archive: str, path: str, live_at: datetime | None = None
) -> tuple[str, int, dict[str, str]]:
@@ -82,12 +96,16 @@ def translate(
url = get_archive_proxy().translatePath(archive, path, live_at)
except Fault as f:
if f.faultCode == 320: # NotFound
- return "Not found", 404, {"Content-Type": "text/plain"}
+ headers = {"Content-Type": "text/plain"}
+ headers.update(get_extra_headers(path, live_at))
+ return "Not found", 404, headers
else:
current_app.logger.info("%s %s: %s", archive, path, f.faultString)
return "Internal server error", 500, {"Content-Type": "text/plain"}
assert isinstance(url, str)
- return "", 307, {"Location": url}
+ headers = {"Location": url}
+ headers.update(get_extra_headers(path, live_at))
+ return "", 307, headers
def add_headers(response: Response) -> Response:
diff --git a/tests/test_archive.py b/tests/test_archive.py
index d6b926c..11481f7 100644
--- a/tests/test_archive.py
+++ b/tests/test_archive.py
@@ -15,7 +15,13 @@ from lp_archive.cache import cache
class ArchiveXMLRPCServer(SimpleXMLRPCServer):
- path_map = {"dists/focal/InRelease": "http://librarian.example.org/1"}
+ path_map = {
+ "dists/focal/InRelease": "http://librarian.example.org/1",
+ f"dists/focal/by-hash/SHA256/{'0' * 64}": (
+ "http://librarian.example.org/1"
+ ),
+ "pool/main/h/hello/hello_1.0-1.deb": "http://librarian.example.org/2",
+ }
def __init__(self, *args: Any, **kwargs: Any) -> None:
super().__init__(*args, **kwargs)
@@ -271,3 +277,61 @@ def test_translate_oops(client, archive_proxy, caplog):
("flask.app", logging.INFO, "<anonymous>@ubuntu: Authorized."),
("flask.app", logging.INFO, "ubuntu oops: Oops"),
]
+
+
+def test_translate_cache_control_dists_found(client, archive_proxy):
+ # Positive responses to non-content-addressed requests under dists/ get
+ # a suitable Cache-Control header.
+ response = client.get(
+ "/ubuntu/dists/focal/InRelease",
+ headers=[("Host", "snapshot.ubuntu.test")],
+ )
+ assert response.status_code == 307
+ assert (
+ response.headers["Cache-Control"] == "max-age=1800, proxy-revalidate"
+ )
+
+
+def test_translate_cache_control_dists_not_found(client, archive_proxy):
+ # Negative responses to non-content-addressed requests under dists/ get
+ # a suitable Cache-Control header.
+ response = client.get(
+ "/ubuntu/dists/focal/main/i18n/Translation-nonexistent",
+ headers=[("Host", "snapshot.ubuntu.test")],
+ )
+ assert response.status_code == 404
+ assert (
+ response.headers["Cache-Control"] == "max-age=1800, proxy-revalidate"
+ )
+
+
+def test_translate_cache_control_dists_at_timestamp(client, archive_proxy):
+ # Non-content-addressed requests to a snapshot do not get a
+ # Cache-Control header, even if they're under dists/.
+ response = client.get(
+ "/ubuntu/20220101T120000Z/dists/focal/InRelease",
+ headers=[("Host", "snapshot.ubuntu.test")],
+ )
+ assert response.status_code == 307
+ assert "Cache-Control" not in response.headers
+
+
+def test_translate_cache_control_dists_by_hash(client, archive_proxy):
+ # Content-addressed requests under dists/ do not get a Cache-Control
+ # header.
+ response = client.get(
+ f"/ubuntu/dists/focal/by-hash/SHA256/{'0' * 64}",
+ headers=[("Host", "snapshot.ubuntu.test")],
+ )
+ assert response.status_code == 307
+ assert "Cache-Control" not in response.headers
+
+
+def test_translate_cache_control_not_dists(client, archive_proxy):
+ # Requests not under dists/ do not get a Cache-Control header.
+ response = client.get(
+ "/ubuntu/pool/main/h/hello/hello_1.0-1.deb",
+ headers=[("Host", "snapshot.ubuntu.test")],
+ )
+ assert response.status_code == 307
+ assert "Cache-Control" not in response.headers