launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #31196
[Merge] ~lgp171188/lp-archive:esm-snapshots into lp-archive:main
Guruprasad has proposed merging ~lgp171188/lp-archive:esm-snapshots into lp-archive:main.
Commit message:
Implement support for ESM archive snapshots
Co-authored-by: Quentin Debhi <quentin.debhi@xxxxxxxxxxxxx>
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
For more details, see:
https://code.launchpad.net/~lgp171188/lp-archive/+git/lp-archive/+merge/468170
--
Your team Launchpad code reviewers is requested to review the proposed merge of ~lgp171188/lp-archive:esm-snapshots into lp-archive:main.
diff --git a/lp_archive/archive.py b/lp_archive/archive.py
index f948c8f..79b89d4 100644
--- a/lp_archive/archive.py
+++ b/lp_archive/archive.py
@@ -3,11 +3,13 @@
"""The main archive view."""
-from datetime import datetime
+from datetime import datetime, timezone
from pathlib import PurePath
from xmlrpc.client import Fault, ServerProxy
+import requests
from flask import Flask, current_app, g, request
+from requests.auth import HTTPBasicAuth
from werkzeug.datastructures import WWWAuthenticate
from werkzeug.exceptions import Unauthorized
from werkzeug.wrappers import Response
@@ -15,6 +17,12 @@ from werkzeug.wrappers import Response
from lp_archive.cache import cache
+def raise_unauthorized_require_valid_http_basic_auth() -> None:
+ basic = WWWAuthenticate()
+ basic.set_basic()
+ raise Unauthorized(www_authenticate=basic)
+
+
def get_archive_proxy() -> ServerProxy:
archive_proxy = getattr(g, "archive_proxy", None)
if archive_proxy is None:
@@ -68,13 +76,31 @@ def check_auth(archive: str) -> None:
else:
# Interpret any other fault as NotFound (320).
current_app.logger.info("%s: %s", log_prefix, e.faultString)
- basic = WWWAuthenticate()
- basic.set_basic()
- raise Unauthorized(www_authenticate=basic)
+ raise_unauthorized_require_valid_http_basic_auth()
else:
current_app.logger.info("%s: Authorized.", log_prefix)
+def translate_path_get_http_response(
+ archive: str, path: str, live_at: datetime | None = None
+) -> tuple[str, int, dict[str, str]]:
+ """Try to translate the given archive path and return an HTTP response."""
+ try:
+ url = get_archive_proxy().translatePath(archive, path, live_at)
+ except Fault as f:
+ if f.faultCode == 320: # NotFound
+ headers = {"Content-Type": "text/plain"}
+ headers.update(get_extra_headers(path, live_at))
+ return "Not found", 404, headers
+ else:
+ current_app.logger.info("%s %s: %s", archive, path, f.faultString)
+ return "Internal server error", 500, {"Content-Type": "text/plain"}
+ assert isinstance(url, str)
+ headers = {"Location": url}
+ headers.update(get_extra_headers(path, live_at))
+ return "", 307, headers
+
+
def get_extra_headers(path: str, live_at: datetime | None) -> dict[str, str]:
# It's safe to default to long caching even for files in private
# archives, since we always set "Vary: Authorization" (see add_headers
@@ -95,20 +121,7 @@ def translate(
archive: str, path: str, live_at: datetime | None = None
) -> tuple[str, int, dict[str, str]]:
check_auth(archive)
- try:
- url = get_archive_proxy().translatePath(archive, path, live_at)
- except Fault as f:
- if f.faultCode == 320: # NotFound
- headers = {"Content-Type": "text/plain"}
- headers.update(get_extra_headers(path, live_at))
- return "Not found", 404, headers
- else:
- current_app.logger.info("%s %s: %s", archive, path, f.faultString)
- return "Internal server error", 500, {"Content-Type": "text/plain"}
- assert isinstance(url, str)
- headers = {"Location": url}
- headers.update(get_extra_headers(path, live_at))
- return "", 307, headers
+ return translate_path_get_http_response(archive, path, live_at)
def add_headers(response: Response) -> Response:
@@ -116,6 +129,101 @@ def add_headers(response: Response) -> Response:
return response
+@cache.memoize(timeout=60)
+def are_esm_credentials_valid(
+ resource: str, username: str, password: str
+) -> bool:
+ """Return whether the given credentials allow access to the resource."""
+ current_app.logger.info("Authenticating %s@%s.", username, resource)
+ esm_auth_endpoint = current_app.config["ESM"]["esm_auth_endpoint"]
+ headers = {
+ "Resource-Name": resource,
+ # XXX lgp171188 2024-06-18 - Do we pass the full request path here
+ # or just the ESM-specific parsed path from the URL pattern?
+ "Original-URI": request.path,
+ }
+ response = requests.get(
+ esm_auth_endpoint,
+ headers=headers,
+ auth=HTTPBasicAuth(username, password),
+ )
+
+ if response.status_code != 200:
+ return False
+ return True
+
+
+def check_esm_auth(resource: str, path: str) -> None:
+ """Check the provided ESM credentials for password-protected paths."""
+ auth_required_path_prefix = current_app.config["ESM"][
+ "auth_required_path_prefix"
+ ]
+ if path.startswith(auth_required_path_prefix):
+ if request.authorization is None:
+ log_prefix = f"anonymous@{resource}"
+ current_app.logger.info("%s: No credentials provided.", log_prefix)
+ raise_unauthorized_require_valid_http_basic_auth()
+ username = request.authorization.username
+ password = request.authorization.password
+
+ log_prefix = f"{username}@{resource}"
+ if not are_esm_credentials_valid(resource, username, password):
+ current_app.logger.info(
+ "%s: Invalid credentials provided.", log_prefix
+ )
+ raise_unauthorized_require_valid_http_basic_auth()
+ current_app.logger.info("%s: Authorized.", log_prefix)
+
+
+def check_esm_snapshot_available(
+ live_at: datetime | None, earliest_snapshot_timestamp: datetime | None
+) -> tuple[str, int, dict[str, str]] | None:
+ """Check whether a snapshot is available at the given timestamp."""
+ if live_at is not None and earliest_snapshot_timestamp is not None:
+ if live_at < earliest_snapshot_timestamp:
+ current_app.logger.info(
+ f"Requested timestamp {live_at.strftime('%Y%m%dT%H%M%S')}Z "
+ "is earlier than that of the first available snapshot."
+ )
+ headers = {
+ "Content-Type": "text/plain",
+ "Cache-Control": "max-age=31536000",
+ }
+ return "Not found", 404, headers
+
+
+def translate_esm_suite_name(
+ path: str, allowed_pockets: list[str] | None
+) -> str:
+ """Translate the ESM suite name to the one on the Launchpad PPA.
+
+ This is needed because Launchpad PPAs only have the release pocket whereas
+ ESM archives can have other pockets too.
+ """
+ if allowed_pockets and path.startswith("dists/"):
+ path_segments = path.split("/")
+ suite_name = path_segments[1]
+ for allowed_pocket in allowed_pockets:
+ if suite_name.endswith(allowed_pocket):
+ path_segments[1] = suite_name.replace(f"-{allowed_pocket}", "")
+ return "/".join(path_segments)
+ return path
+
+
+def translate_esm(
+ archive: str,
+ path: str,
+ resource: str,
+ live_at: datetime | None = None,
+ earliest_snapshot_timestamp: datetime | None = None,
+ allowed_pockets: list[str] | None = None,
+) -> tuple[str, int, dict[str, str]]:
+ check_esm_snapshot_available(live_at, earliest_snapshot_timestamp)
+ check_esm_auth(resource, path)
+ path = translate_esm_suite_name(path, allowed_pockets)
+ return translate_path_get_http_response(archive, path, live_at)
+
+
def init_app(app: Flask) -> None:
for layout in app.config.get("LAYOUTS", []):
app.add_url_rule(
@@ -128,4 +236,30 @@ def init_app(app: Flask) -> None:
host=layout["host"],
view_func=translate,
)
+ for layout in app.config.get("ESM_LAYOUTS", []):
+ earliest_snapshot_timestamp = layout.get("earliest_snapshot_timestamp")
+ if earliest_snapshot_timestamp:
+ earliest_snapshot_timestamp = datetime.strptime(
+ layout["earliest_snapshot_timestamp"], "%Y-%m-%dT%H:%M:%SZ"
+ ).replace(tzinfo=timezone.utc)
+ app.add_url_rule(
+ f"{layout['base_path']}/<timestamp:live_at>/<path:path>",
+ host=layout["host"],
+ view_func=translate_esm,
+ defaults={
+ "archive": layout["archive"],
+ "resource": layout["purpose"],
+ "earliest_snapshot_timestamp": earliest_snapshot_timestamp,
+ "allowed_pockets": layout.get("allowed_pockets"),
+ },
+ )
+ app.add_url_rule(
+ f"{layout['base_path']}/<path:path>",
+ host=layout["host"],
+ view_func=translate_esm,
+ defaults={
+ "archive": layout["archive"],
+ "resource": layout["purpose"],
+ },
+ )
app.after_request(add_headers)
diff --git a/setup.cfg b/setup.cfg
index 9490c1d..eb6890c 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -33,6 +33,7 @@ docs =
test =
coverage
pytest
+ responses
[options.package_data]
lp_archive =
diff --git a/tests/conftest.py b/tests/conftest.py
index f781440..0d17c0e 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -16,6 +16,39 @@ def app():
{"host": "snapshot.ubuntu.test", "purpose": "primary"},
{"host": "snapshot.ppa.test", "purpose": "ppa"},
],
+ "ESM": {
+ "auth_required_path_prefix": "pool/",
+ "esm_auth_endpoint": "http://esm-auth-server.test/auth-check",
+ },
+ "ESM_LAYOUTS": [
+ {
+ "host": "snapshot.esm-infra-security.test",
+ "purpose": "esm-infra",
+ "archive": "~user/ubuntu/esm-infra-security",
+ "base_path": "/infra/ubuntu",
+ # The 'earliest_snapshot_timestamp' key and the
+ # 'allowed_release_pockets' key are optional and
+ # not specified here.
+ },
+ {
+ "host": "snapshot.esm-apps-security.test",
+ "purpose": "esm-apps",
+ "archive": "~user/ubuntu/esm-apps-security",
+ "base_path": "/apps/ubuntu",
+ "earliest_snapshot_timestamp": "2024-06-30T00:00:00Z",
+ # The 'allowed_release_pockets' key is optional
+ # and not specified here.
+ },
+ {
+ "host": "snapshot.esm-infra-updates.test",
+ "purpose": "esm-infra",
+ "archive": "~user/ubuntu/esm-infra-updates",
+ "base_path": "/infra/ubuntu",
+ "allowed_pockets": ["infra-updates"],
+ # The 'earliest_snapshot_timestamp' key is optional and
+ # not specified here.
+ },
+ ],
}
)
yield app
diff --git a/tests/test_archive.py b/tests/test_archive.py
index a40cc6b..a0c68ca 100644
--- a/tests/test_archive.py
+++ b/tests/test_archive.py
@@ -9,6 +9,7 @@ from xmlrpc.client import Fault
from xmlrpc.server import SimpleXMLRPCServer
import pytest
+import responses
from lp_archive.cache import cache
@@ -336,3 +337,305 @@ def test_translate_cache_control_not_dists(client, archive_proxy):
)
assert response.status_code == 307
assert response.headers["Cache-Control"] == "max-age=31536000"
+
+
+def test_esm_snapshot_timestamp_earlier_than_first_available_snapshot(
+ client, archive_proxy, caplog
+):
+ caplog.set_level(logging.INFO, logger="flask.app")
+ response = client.get(
+ "/apps/ubuntu/20240601T000000Z/dists/jammy-apps-security/InRelease",
+ headers=[("Host", "snapshot.esm-apps-security.test")],
+ )
+ assert response.status_code == 404
+ assert caplog.record_tuples == [
+ (
+ "flask.app",
+ logging.INFO,
+ "Requested timestamp 20240601T000000Z is earlier than that of "
+ "the first available snapshot.",
+ )
+ ]
+
+
+@responses.activate
+def test_esm_snapshot_earliest_snapshot_timestamp_not_configured(
+ client, archive_proxy
+):
+ response = client.get(
+ "/apps/ubuntu/20240601T000000Z/dists/focal/InRelease",
+ headers=[("Host", "snapshot.esm-apps-security.test")],
+ )
+ assert response.status_code == 307
+ assert response.location == "http://librarian.example.org/1"
+ assert archive_proxy.call_log == [
+ (
+ "translatePath",
+ "~user/ubuntu/esm-apps-security",
+ "dists/focal/InRelease",
+ datetime(2024, 6, 1, 0, 0, 0, tzinfo=timezone.utc),
+ ),
+ ]
+ responses.add(
+ responses.GET,
+ "http://esm-auth-server.test/auth-check",
+ status=200,
+ )
+ archive_proxy.call_log = []
+ response = client.get(
+ "/apps/ubuntu/20240601T000000Z/pool/main/h/hello/hello_1.0-1.deb",
+ auth=("user", "password"),
+ headers=[("Host", "snapshot.esm-apps-security.test")],
+ )
+ assert response.status_code == 307
+ assert response.location == "http://librarian.example.org/2"
+ assert archive_proxy.call_log == [
+ (
+ "translatePath",
+ "~user/ubuntu/esm-apps-security",
+ "pool/main/h/hello/hello_1.0-1.deb",
+ datetime(2024, 6, 1, 0, 0, 0, tzinfo=timezone.utc),
+ ),
+ ]
+
+
+def test_esm_snapshot_auth_required_path_credentials_missing(
+ client, archive_proxy, caplog
+):
+ caplog.set_level(logging.INFO, logger="flask.app")
+ response = client.get(
+ "/infra/ubuntu/20240701T000000Z/pool/main/h/hello/hello_1.0-1.deb",
+ headers=[("Host", "snapshot.esm-infra-security.test")],
+ )
+ assert response.status_code == 401
+ assert caplog.record_tuples == [
+ (
+ "flask.app",
+ logging.INFO,
+ "anonymous@esm-infra: No credentials provided.",
+ )
+ ]
+
+
+@responses.activate
+def test_esm_snapshot_auth_required_path_invalid_credentials_provided(
+ client, archive_proxy, caplog
+):
+ caplog.set_level(logging.INFO, logger="flask.app")
+ responses.add(
+ responses.GET,
+ "http://esm-auth-server.test/auth-check",
+ status=401,
+ )
+ response = client.get(
+ "infra/ubuntu/20240701T000000Z/pool/main/h/hello/hello_1.0-1.deb",
+ auth=("user", "password"),
+ headers=[("Host", "snapshot.esm-infra-security.test")],
+ )
+ assert response.status_code == 401
+ assert caplog.record_tuples == [
+ (
+ "flask.app",
+ logging.INFO,
+ "Authenticating user@esm-infra.",
+ ),
+ (
+ "flask.app",
+ logging.INFO,
+ "user@esm-infra: Invalid credentials provided.",
+ ),
+ ]
+
+
+@responses.activate
+def test_esm_snapshot_auth_required_path_valid_credentials_provided(
+ client, archive_proxy, caplog
+):
+ caplog.set_level(logging.INFO, logger="flask.app")
+ responses.add(
+ responses.GET,
+ "http://esm-auth-server.test/auth-check",
+ status=200,
+ )
+ response = client.get(
+ "infra/ubuntu/20240701T000000Z/pool/main/h/hello/hello_1.0-1.deb",
+ auth=("user", "password"),
+ headers=[("Host", "snapshot.esm-infra-security.test")],
+ )
+ assert response.status_code == 307
+ assert caplog.record_tuples == [
+ (
+ "flask.app",
+ logging.INFO,
+ "Authenticating user@esm-infra.",
+ ),
+ (
+ "flask.app",
+ logging.INFO,
+ "user@esm-infra: Authorized.",
+ ),
+ ]
+ assert archive_proxy.call_log == [
+ (
+ "translatePath",
+ "~user/ubuntu/esm-infra-security",
+ "pool/main/h/hello/hello_1.0-1.deb",
+ datetime(2024, 7, 1, 0, 0, 0, tzinfo=timezone.utc),
+ ),
+ ]
+
+
+@responses.activate
+def test_esm_auth_authentication_results_cached(client, archive_proxy, caplog):
+ caplog.set_level(logging.INFO, logger="flask.app")
+ responses.add(
+ responses.GET,
+ "http://esm-auth-server.test/auth-check",
+ status=200,
+ )
+ client.get(
+ "infra/ubuntu/20240701T000000Z/pool/main/h/hello/hello_1.0-1.deb",
+ auth=("user", "password"),
+ headers=[("Host", "snapshot.esm-infra-security.test")],
+ )
+ assert caplog.record_tuples == [
+ (
+ "flask.app",
+ logging.INFO,
+ "Authenticating user@esm-infra.",
+ ),
+ (
+ "flask.app",
+ logging.INFO,
+ "user@esm-infra: Authorized.",
+ ),
+ ]
+ client.get(
+ "infra/ubuntu/20240701T000000Z/pool/main/h/hello/hello_1.0-1.deb",
+ auth=("user", "password"),
+ headers=[("Host", "snapshot.esm-infra-security.test")],
+ )
+ assert caplog.record_tuples == [
+ (
+ "flask.app",
+ logging.INFO,
+ "Authenticating user@esm-infra.",
+ ),
+ (
+ "flask.app",
+ logging.INFO,
+ "user@esm-infra: Authorized.",
+ ),
+ (
+ "flask.app",
+ logging.INFO,
+ "user@esm-infra: Authorized.",
+ ),
+ ]
+
+
+def test_translate_esm_not_found(client, archive_proxy):
+ response = client.get(
+ "/infra/ubuntu/20240601T000000Z/nonexistent",
+ headers=[("Host", "snapshot.esm-infra-security.test")],
+ )
+ assert response.status_code == 404
+ assert response.headers["Content-Type"] == "text/plain"
+ assert response.headers["Vary"] == "Authorization"
+ assert response.data == b"Not found"
+ assert archive_proxy.call_log == [
+ (
+ "translatePath",
+ "~user/ubuntu/esm-infra-security",
+ "nonexistent",
+ datetime(2024, 6, 1, 0, 0, 0, tzinfo=timezone.utc),
+ ),
+ ]
+
+
+@responses.activate
+def test_translate_esm(client, archive_proxy, caplog):
+ caplog.set_level(logging.INFO, logger="flask.app")
+ response = client.get(
+ "/infra/ubuntu/20240601T000000Z/dists/focal/InRelease",
+ headers=[("Host", "snapshot.esm-infra-security.test")],
+ )
+ assert response.status_code == 307
+ assert response.headers["Location"] == "http://librarian.example.org/1"
+ assert response.headers["Vary"] == "Authorization"
+ assert archive_proxy.call_log == [
+ (
+ "translatePath",
+ "~user/ubuntu/esm-infra-security",
+ "dists/focal/InRelease",
+ datetime(2024, 6, 1, 0, 0, 0, tzinfo=timezone.utc),
+ ),
+ ]
+ caplog.set_level(logging.INFO, logger="flask.app")
+ responses.add(
+ responses.GET,
+ "http://esm-auth-server.test/auth-check",
+ status=200,
+ )
+ archive_proxy.call_log = []
+ response = client.get(
+ "/infra/ubuntu/20240601T000000Z/pool/main/h/hello/hello_1.0-1.deb",
+ auth=("user", "password"),
+ headers=[("Host", "snapshot.esm-infra-security.test")],
+ )
+ assert response.status_code == 307
+ assert response.headers["Location"] == "http://librarian.example.org/2"
+ assert response.headers["Vary"] == "Authorization"
+ assert archive_proxy.call_log == [
+ (
+ "translatePath",
+ "~user/ubuntu/esm-infra-security",
+ "pool/main/h/hello/hello_1.0-1.deb",
+ datetime(2024, 6, 1, 0, 0, 0, tzinfo=timezone.utc),
+ ),
+ ]
+
+
+def test_translate_esm_translatePath_oops(client, archive_proxy, caplog):
+ caplog.set_level(logging.INFO, logger="flask.app")
+ response = client.get(
+ "/infra/ubuntu/20240701T000000Z/oops",
+ headers=[("Host", "snapshot.esm-infra-security.test")],
+ )
+ assert response.status_code == 500
+ assert response.headers["Content-Type"] == "text/plain"
+ assert response.headers["Vary"] == "Authorization"
+ assert response.data == b"Internal server error"
+ assert archive_proxy.call_log == [
+ (
+ "translatePath",
+ "~user/ubuntu/esm-infra-security",
+ "oops",
+ datetime(2024, 7, 1, 0, 0, 0, tzinfo=timezone.utc),
+ ),
+ ]
+ assert caplog.record_tuples == [
+ (
+ "flask.app",
+ logging.INFO,
+ "~user/ubuntu/esm-infra-security oops: Oops",
+ ),
+ ]
+
+
+def test_translate_esm_non_release_pocket_path(client, archive_proxy, caplog):
+ caplog.set_level(logging.INFO, logger="flask.app")
+ response = client.get(
+ "/infra/ubuntu/20240601T000000Z/dists/focal-infra-updates/InRelease",
+ headers=[("Host", "snapshot.esm-infra-updates.test")],
+ )
+ assert response.status_code == 307
+ assert response.location == "http://librarian.example.org/1"
+ assert archive_proxy.call_log == [
+ (
+ "translatePath",
+ "~user/ubuntu/esm-infra-updates",
+ "dists/focal/InRelease",
+ datetime(2024, 6, 1, 0, 0, 0, tzinfo=timezone.utc),
+ ),
+ ]