launchpad-reviewers team mailing list archive

[Merge] ~lgp171188/lp-archive:esm-snapshots into lp-archive:main


Guruprasad has proposed merging ~lgp171188/lp-archive:esm-snapshots into lp-archive:main.

Commit message:
Implement support for ESM archive snapshots

Co-authored-by: Quentin Debhi <quentin.debhi@xxxxxxxxxxxxx>

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)

For more details, see:
Your team Launchpad code reviewers is requested to review the proposed merge of ~lgp171188/lp-archive:esm-snapshots into lp-archive:main.
diff --git a/lp_archive/archive.py b/lp_archive/archive.py
index f948c8f..79b89d4 100644
--- a/lp_archive/archive.py
+++ b/lp_archive/archive.py
@@ -3,11 +3,13 @@
 """The main archive view."""
-from datetime import datetime
+from datetime import datetime, timezone
 from pathlib import PurePath
 from xmlrpc.client import Fault, ServerProxy
+import requests
 from flask import Flask, current_app, g, request
+from requests.auth import HTTPBasicAuth
 from werkzeug.datastructures import WWWAuthenticate
 from werkzeug.exceptions import Unauthorized
 from werkzeug.wrappers import Response
@@ -15,6 +17,12 @@ from werkzeug.wrappers import Response
 from lp_archive.cache import cache
+def raise_unauthorized_require_valid_http_basic_auth() -> None:
+    basic = WWWAuthenticate()
+    basic.set_basic()
+    raise Unauthorized(www_authenticate=basic)
 def get_archive_proxy() -> ServerProxy:
     archive_proxy = getattr(g, "archive_proxy", None)
     if archive_proxy is None:
@@ -68,13 +76,31 @@ def check_auth(archive: str) -> None:
             # Interpret any other fault as NotFound (320).
             current_app.logger.info("%s: %s", log_prefix, e.faultString)
-        basic = WWWAuthenticate()
-        basic.set_basic()
-        raise Unauthorized(www_authenticate=basic)
+        raise_unauthorized_require_valid_http_basic_auth()
         current_app.logger.info("%s: Authorized.", log_prefix)
+def translate_path_get_http_response(
+    archive: str, path: str, live_at: datetime | None = None
+) -> tuple[str, int, dict[str, str]]:
+    """Try to translate the given archive path and return an HTTP response."""
+    try:
+        url = get_archive_proxy().translatePath(archive, path, live_at)
+    except Fault as f:
+        if f.faultCode == 320:  # NotFound
+            headers = {"Content-Type": "text/plain"}
+            headers.update(get_extra_headers(path, live_at))
+            return "Not found", 404, headers
+        else:
+            current_app.logger.info("%s %s: %s", archive, path, f.faultString)
+            return "Internal server error", 500, {"Content-Type": "text/plain"}
+    assert isinstance(url, str)
+    headers = {"Location": url}
+    headers.update(get_extra_headers(path, live_at))
+    return "", 307, headers
 def get_extra_headers(path: str, live_at: datetime | None) -> dict[str, str]:
     # It's safe to default to long caching even for files in private
     # archives, since we always set "Vary: Authorization" (see add_headers
@@ -95,20 +121,7 @@ def translate(
     archive: str, path: str, live_at: datetime | None = None
 ) -> tuple[str, int, dict[str, str]]:
-    try:
-        url = get_archive_proxy().translatePath(archive, path, live_at)
-    except Fault as f:
-        if f.faultCode == 320:  # NotFound
-            headers = {"Content-Type": "text/plain"}
-            headers.update(get_extra_headers(path, live_at))
-            return "Not found", 404, headers
-        else:
-            current_app.logger.info("%s %s: %s", archive, path, f.faultString)
-            return "Internal server error", 500, {"Content-Type": "text/plain"}
-    assert isinstance(url, str)
-    headers = {"Location": url}
-    headers.update(get_extra_headers(path, live_at))
-    return "", 307, headers
+    return translate_path_get_http_response(archive, path, live_at)
 def add_headers(response: Response) -> Response:
@@ -116,6 +129,101 @@ def add_headers(response: Response) -> Response:
     return response
+def are_esm_credentials_valid(
+    resource: str, username: str, password: str
+) -> bool:
+    """Return whether the given credentials allow access to the resource."""
+    current_app.logger.info("Authenticating %s@%s.", username, resource)
+    esm_auth_endpoint = current_app.config["ESM"]["esm_auth_endpoint"]
+    headers = {
+        "Resource-Name": resource,
+        # XXX lgp171188 2024-06-18 - Do we pass the full request path here
+        # or just the ESM-specific parsed path from the URL pattern?
+        "Original-URI": request.path,
+    }
+    response = requests.get(
+        esm_auth_endpoint,
+        headers=headers,
+        auth=HTTPBasicAuth(username, password),
+    )
+    if response.status_code != 200:
+        return False
+    return True
+def check_esm_auth(resource: str, path: str) -> None:
+    """Check the provided ESM credentials for password-protected paths."""
+    auth_required_path_prefix = current_app.config["ESM"][
+        "auth_required_path_prefix"
+    ]
+    if path.startswith(auth_required_path_prefix):
+        if request.authorization is None:
+            log_prefix = f"anonymous@{resource}"
+            current_app.logger.info("%s: No credentials provided.", log_prefix)
+            raise_unauthorized_require_valid_http_basic_auth()
+        username = request.authorization.username
+        password = request.authorization.password
+        log_prefix = f"{username}@{resource}"
+        if not are_esm_credentials_valid(resource, username, password):
+            current_app.logger.info(
+                "%s: Invalid credentials provided.", log_prefix
+            )
+            raise_unauthorized_require_valid_http_basic_auth()
+        current_app.logger.info("%s: Authorized.", log_prefix)
+def check_esm_snapshot_available(
+    live_at: datetime | None, earliest_snapshot_timestamp: datetime | None
+) -> tuple[str, int, dict[str, str]] | None:
+    """Check whether a snapshot is available at the given timestamp."""
+    if live_at is not None and earliest_snapshot_timestamp is not None:
+        if live_at < earliest_snapshot_timestamp:
+            current_app.logger.info(
+                f"Requested timestamp {live_at.strftime('%Y%m%dT%H%M%S')}Z "
+                "is earlier than that of the first available snapshot."
+            )
+            headers = {
+                "Content-Type": "text/plain",
+                "Cache-Control": "max-age=31536000",
+            }
+            return "Not found", 404, headers
+def translate_esm_suite_name(
+    path: str, allowed_pockets: list[str] | None
+) -> str:
+    """Translate the ESM suite name to the one on the Launchpad PPA.
+    This is needed because Launchpad PPAs only have the release pocket whereas
+    ESM archives can have other pockets too.
+    """
+    if allowed_pockets and path.startswith("dists/"):
+        path_segments = path.split("/")
+        suite_name = path_segments[1]
+        for allowed_pocket in allowed_pockets:
+            if suite_name.endswith(allowed_pocket):
+                path_segments[1] = suite_name.replace(f"-{allowed_pocket}", "")
+                return "/".join(path_segments)
+    return path
+def translate_esm(
+    archive: str,
+    path: str,
+    resource: str,
+    live_at: datetime | None = None,
+    earliest_snapshot_timestamp: datetime | None = None,
+    allowed_pockets: list[str] | None = None,
+) -> tuple[str, int, dict[str, str]]:
+    check_esm_snapshot_available(live_at, earliest_snapshot_timestamp)
+    check_esm_auth(resource, path)
+    path = translate_esm_suite_name(path, allowed_pockets)
+    return translate_path_get_http_response(archive, path, live_at)
 def init_app(app: Flask) -> None:
     for layout in app.config.get("LAYOUTS", []):
@@ -128,4 +236,30 @@ def init_app(app: Flask) -> None:
+    for layout in app.config.get("ESM_LAYOUTS", []):
+        earliest_snapshot_timestamp = layout.get("earliest_snapshot_timestamp")
+        if earliest_snapshot_timestamp:
+            earliest_snapshot_timestamp = datetime.strptime(
+                layout["earliest_snapshot_timestamp"], "%Y-%m-%dT%H:%M:%SZ"
+            ).replace(tzinfo=timezone.utc)
+        app.add_url_rule(
+            f"{layout['base_path']}/<timestamp:live_at>/<path:path>",
+            host=layout["host"],
+            view_func=translate_esm,
+            defaults={
+                "archive": layout["archive"],
+                "resource": layout["purpose"],
+                "earliest_snapshot_timestamp": earliest_snapshot_timestamp,
+                "allowed_pockets": layout.get("allowed_pockets"),
+            },
+        )
+        app.add_url_rule(
+            f"{layout['base_path']}/<path:path>",
+            host=layout["host"],
+            view_func=translate_esm,
+            defaults={
+                "archive": layout["archive"],
+                "resource": layout["purpose"],
+            },
+        )
diff --git a/setup.cfg b/setup.cfg
index 9490c1d..eb6890c 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -33,6 +33,7 @@ docs =
 test =
+    responses
 lp_archive =
diff --git a/tests/conftest.py b/tests/conftest.py
index f781440..0d17c0e 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -16,6 +16,39 @@ def app():
                 {"host": "snapshot.ubuntu.test", "purpose": "primary"},
                 {"host": "snapshot.ppa.test", "purpose": "ppa"},
+            "ESM": {
+                "auth_required_path_prefix": "pool/",
+                "esm_auth_endpoint": "http://esm-auth-server.test/auth-check";,
+            },
+            "ESM_LAYOUTS": [
+                {
+                    "host": "snapshot.esm-infra-security.test",
+                    "purpose": "esm-infra",
+                    "archive": "~user/ubuntu/esm-infra-security",
+                    "base_path": "/infra/ubuntu",
+                    # The 'earliest_snapshot_timestamp' key and the
+                    # 'allowed_release_pockets' key are optional and
+                    # not specified here.
+                },
+                {
+                    "host": "snapshot.esm-apps-security.test",
+                    "purpose": "esm-apps",
+                    "archive": "~user/ubuntu/esm-apps-security",
+                    "base_path": "/apps/ubuntu",
+                    "earliest_snapshot_timestamp": "2024-06-30T00:00:00Z",
+                    # The 'allowed_release_pockets' key is optional
+                    # and not specified here.
+                },
+                {
+                    "host": "snapshot.esm-infra-updates.test",
+                    "purpose": "esm-infra",
+                    "archive": "~user/ubuntu/esm-infra-updates",
+                    "base_path": "/infra/ubuntu",
+                    "allowed_pockets": ["infra-updates"],
+                    # The 'earliest_snapshot_timestamp' key is optional and
+                    # not specified here.
+                },
+            ],
     yield app
diff --git a/tests/test_archive.py b/tests/test_archive.py
index a40cc6b..a0c68ca 100644
--- a/tests/test_archive.py
+++ b/tests/test_archive.py
@@ -9,6 +9,7 @@ from xmlrpc.client import Fault
 from xmlrpc.server import SimpleXMLRPCServer
 import pytest
+import responses
 from lp_archive.cache import cache
@@ -336,3 +337,305 @@ def test_translate_cache_control_not_dists(client, archive_proxy):
     assert response.status_code == 307
     assert response.headers["Cache-Control"] == "max-age=31536000"
+def test_esm_snapshot_timestamp_earlier_than_first_available_snapshot(
+    client, archive_proxy, caplog
+    caplog.set_level(logging.INFO, logger="flask.app")
+    response = client.get(
+        "/apps/ubuntu/20240601T000000Z/dists/jammy-apps-security/InRelease",
+        headers=[("Host", "snapshot.esm-apps-security.test")],
+    )
+    assert response.status_code == 404
+    assert caplog.record_tuples == [
+        (
+            "flask.app",
+            logging.INFO,
+            "Requested timestamp 20240601T000000Z is earlier than that of "
+            "the first available snapshot.",
+        )
+    ]
+def test_esm_snapshot_earliest_snapshot_timestamp_not_configured(
+    client, archive_proxy
+    response = client.get(
+        "/apps/ubuntu/20240601T000000Z/dists/focal/InRelease",
+        headers=[("Host", "snapshot.esm-apps-security.test")],
+    )
+    assert response.status_code == 307
+    assert response.location == "http://librarian.example.org/1";
+    assert archive_proxy.call_log == [
+        (
+            "translatePath",
+            "~user/ubuntu/esm-apps-security",
+            "dists/focal/InRelease",
+            datetime(2024, 6, 1, 0, 0, 0, tzinfo=timezone.utc),
+        ),
+    ]
+    responses.add(
+        responses.GET,
+        "http://esm-auth-server.test/auth-check";,
+        status=200,
+    )
+    archive_proxy.call_log = []
+    response = client.get(
+        "/apps/ubuntu/20240601T000000Z/pool/main/h/hello/hello_1.0-1.deb",
+        auth=("user", "password"),
+        headers=[("Host", "snapshot.esm-apps-security.test")],
+    )
+    assert response.status_code == 307
+    assert response.location == "http://librarian.example.org/2";
+    assert archive_proxy.call_log == [
+        (
+            "translatePath",
+            "~user/ubuntu/esm-apps-security",
+            "pool/main/h/hello/hello_1.0-1.deb",
+            datetime(2024, 6, 1, 0, 0, 0, tzinfo=timezone.utc),
+        ),
+    ]
+def test_esm_snapshot_auth_required_path_credentials_missing(
+    client, archive_proxy, caplog
+    caplog.set_level(logging.INFO, logger="flask.app")
+    response = client.get(
+        "/infra/ubuntu/20240701T000000Z/pool/main/h/hello/hello_1.0-1.deb",
+        headers=[("Host", "snapshot.esm-infra-security.test")],
+    )
+    assert response.status_code == 401
+    assert caplog.record_tuples == [
+        (
+            "flask.app",
+            logging.INFO,
+            "anonymous@esm-infra: No credentials provided.",
+        )
+    ]
+def test_esm_snapshot_auth_required_path_invalid_credentials_provided(
+    client, archive_proxy, caplog
+    caplog.set_level(logging.INFO, logger="flask.app")
+    responses.add(
+        responses.GET,
+        "http://esm-auth-server.test/auth-check";,
+        status=401,
+    )
+    response = client.get(
+        "infra/ubuntu/20240701T000000Z/pool/main/h/hello/hello_1.0-1.deb",
+        auth=("user", "password"),
+        headers=[("Host", "snapshot.esm-infra-security.test")],
+    )
+    assert response.status_code == 401
+    assert caplog.record_tuples == [
+        (
+            "flask.app",
+            logging.INFO,
+            "Authenticating user@esm-infra.",
+        ),
+        (
+            "flask.app",
+            logging.INFO,
+            "user@esm-infra: Invalid credentials provided.",
+        ),
+    ]
+def test_esm_snapshot_auth_required_path_valid_credentials_provided(
+    client, archive_proxy, caplog
+    caplog.set_level(logging.INFO, logger="flask.app")
+    responses.add(
+        responses.GET,
+        "http://esm-auth-server.test/auth-check";,
+        status=200,
+    )
+    response = client.get(
+        "infra/ubuntu/20240701T000000Z/pool/main/h/hello/hello_1.0-1.deb",
+        auth=("user", "password"),
+        headers=[("Host", "snapshot.esm-infra-security.test")],
+    )
+    assert response.status_code == 307
+    assert caplog.record_tuples == [
+        (
+            "flask.app",
+            logging.INFO,
+            "Authenticating user@esm-infra.",
+        ),
+        (
+            "flask.app",
+            logging.INFO,
+            "user@esm-infra: Authorized.",
+        ),
+    ]
+    assert archive_proxy.call_log == [
+        (
+            "translatePath",
+            "~user/ubuntu/esm-infra-security",
+            "pool/main/h/hello/hello_1.0-1.deb",
+            datetime(2024, 7, 1, 0, 0, 0, tzinfo=timezone.utc),
+        ),
+    ]
+def test_esm_auth_authentication_results_cached(client, archive_proxy, caplog):
+    caplog.set_level(logging.INFO, logger="flask.app")
+    responses.add(
+        responses.GET,
+        "http://esm-auth-server.test/auth-check";,
+        status=200,
+    )
+    client.get(
+        "infra/ubuntu/20240701T000000Z/pool/main/h/hello/hello_1.0-1.deb",
+        auth=("user", "password"),
+        headers=[("Host", "snapshot.esm-infra-security.test")],
+    )
+    assert caplog.record_tuples == [
+        (
+            "flask.app",
+            logging.INFO,
+            "Authenticating user@esm-infra.",
+        ),
+        (
+            "flask.app",
+            logging.INFO,
+            "user@esm-infra: Authorized.",
+        ),
+    ]
+    client.get(
+        "infra/ubuntu/20240701T000000Z/pool/main/h/hello/hello_1.0-1.deb",
+        auth=("user", "password"),
+        headers=[("Host", "snapshot.esm-infra-security.test")],
+    )
+    assert caplog.record_tuples == [
+        (
+            "flask.app",
+            logging.INFO,
+            "Authenticating user@esm-infra.",
+        ),
+        (
+            "flask.app",
+            logging.INFO,
+            "user@esm-infra: Authorized.",
+        ),
+        (
+            "flask.app",
+            logging.INFO,
+            "user@esm-infra: Authorized.",
+        ),
+    ]
+def test_translate_esm_not_found(client, archive_proxy):
+    response = client.get(
+        "/infra/ubuntu/20240601T000000Z/nonexistent",
+        headers=[("Host", "snapshot.esm-infra-security.test")],
+    )
+    assert response.status_code == 404
+    assert response.headers["Content-Type"] == "text/plain"
+    assert response.headers["Vary"] == "Authorization"
+    assert response.data == b"Not found"
+    assert archive_proxy.call_log == [
+        (
+            "translatePath",
+            "~user/ubuntu/esm-infra-security",
+            "nonexistent",
+            datetime(2024, 6, 1, 0, 0, 0, tzinfo=timezone.utc),
+        ),
+    ]
+def test_translate_esm(client, archive_proxy, caplog):
+    caplog.set_level(logging.INFO, logger="flask.app")
+    response = client.get(
+        "/infra/ubuntu/20240601T000000Z/dists/focal/InRelease",
+        headers=[("Host", "snapshot.esm-infra-security.test")],
+    )
+    assert response.status_code == 307
+    assert response.headers["Location"] == "http://librarian.example.org/1";
+    assert response.headers["Vary"] == "Authorization"
+    assert archive_proxy.call_log == [
+        (
+            "translatePath",
+            "~user/ubuntu/esm-infra-security",
+            "dists/focal/InRelease",
+            datetime(2024, 6, 1, 0, 0, 0, tzinfo=timezone.utc),
+        ),
+    ]
+    caplog.set_level(logging.INFO, logger="flask.app")
+    responses.add(
+        responses.GET,
+        "http://esm-auth-server.test/auth-check";,
+        status=200,
+    )
+    archive_proxy.call_log = []
+    response = client.get(
+        "/infra/ubuntu/20240601T000000Z/pool/main/h/hello/hello_1.0-1.deb",
+        auth=("user", "password"),
+        headers=[("Host", "snapshot.esm-infra-security.test")],
+    )
+    assert response.status_code == 307
+    assert response.headers["Location"] == "http://librarian.example.org/2";
+    assert response.headers["Vary"] == "Authorization"
+    assert archive_proxy.call_log == [
+        (
+            "translatePath",
+            "~user/ubuntu/esm-infra-security",
+            "pool/main/h/hello/hello_1.0-1.deb",
+            datetime(2024, 6, 1, 0, 0, 0, tzinfo=timezone.utc),
+        ),
+    ]
+def test_translate_esm_translatePath_oops(client, archive_proxy, caplog):
+    caplog.set_level(logging.INFO, logger="flask.app")
+    response = client.get(
+        "/infra/ubuntu/20240701T000000Z/oops",
+        headers=[("Host", "snapshot.esm-infra-security.test")],
+    )
+    assert response.status_code == 500
+    assert response.headers["Content-Type"] == "text/plain"
+    assert response.headers["Vary"] == "Authorization"
+    assert response.data == b"Internal server error"
+    assert archive_proxy.call_log == [
+        (
+            "translatePath",
+            "~user/ubuntu/esm-infra-security",
+            "oops",
+            datetime(2024, 7, 1, 0, 0, 0, tzinfo=timezone.utc),
+        ),
+    ]
+    assert caplog.record_tuples == [
+        (
+            "flask.app",
+            logging.INFO,
+            "~user/ubuntu/esm-infra-security oops: Oops",
+        ),
+    ]
+def test_translate_esm_non_release_pocket_path(client, archive_proxy, caplog):
+    caplog.set_level(logging.INFO, logger="flask.app")
+    response = client.get(
+        "/infra/ubuntu/20240601T000000Z/dists/focal-infra-updates/InRelease",
+        headers=[("Host", "snapshot.esm-infra-updates.test")],
+    )
+    assert response.status_code == 307
+    assert response.location == "http://librarian.example.org/1";
+    assert archive_proxy.call_log == [
+        (
+            "translatePath",
+            "~user/ubuntu/esm-infra-updates",
+            "dists/focal/InRelease",
+            datetime(2024, 6, 1, 0, 0, 0, tzinfo=timezone.utc),
+        ),
+    ]