diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index 370b22e7..0bbffe5c 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -155,6 +155,10 @@ jobs: WASM_BINARY_PATH: ${{ env.WASM_ARTIFACT_PATH }} INTEGRATION_ORIGIN_PORT: ${{ env.ORIGIN_PORT }} VICEROY_CONFIG_PATH: ${{ env.ARTIFACTS_DIR }}/configs/viceroy-edgezero.toml + # Opt into the EdgeZero entry-point probe in test_ec_lifecycle_fastly. + # Only set here, so the legacy integration-tests job runs the same + # scenarios through legacy_main without the EdgeZero diagnostic probe. + EXPECT_EDGEZERO_ENTRY_POINT: "true" RUST_LOG: info browser-tests: diff --git a/.gitignore b/.gitignore index dabeec55..343c86ad 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,9 @@ /spin /spin.sig +# Spin runtime state (local KV/SQLite created by `spin up`) +.spin/ + # EdgeZero local KV store (created by edgezero-adapter-axum framework) .edgezero/ diff --git a/Cargo.toml b/Cargo.toml index 3d139333..84f5e3d0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,14 @@ members = [ "crates/trusted-server-openrtb", ] +# The OpenRTB codegen crate is a standalone build tool run on demand via +# `crates/trusted-server-openrtb/generate.sh`; keep it out of the workspace so +# `cargo run --manifest-path .../trusted-server-openrtb-codegen/Cargo.toml` does +# not fail with "current package believes it's in a workspace when it's not". +exclude = [ + "crates/trusted-server-openrtb-codegen", +] + # Viceroy (cargo test-fastly runner) calls `cargo run --bin trusted-server-adapter-fastly` # against the default-run packages. It must be the sole default member so Cargo can # locate the binary. Use aliases to test each adapter with the correct target: diff --git a/crates/trusted-server-adapter-fastly/src/error.rs b/crates/trusted-server-adapter-fastly/src/error.rs index 560a2e18..f3182ddc 100644 --- a/crates/trusted-server-adapter-fastly/src/error.rs +++ b/crates/trusted-server-adapter-fastly/src/error.rs @@ -17,3 +17,26 @@ pub fn to_error_response(report: &Report) -> Response { Response::from_status(root_error.status_code()) .with_body_text_plain(&format!("{}\n", root_error.user_message())) } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn config_store_unavailable_renders_503() { + // Locks the end-to-end mapping: a config-store read failure reaches the + // client as 503 via `status_code()` — not bypassed by the adapter. + let report = Report::new(TrustedServerError::ConfigStoreUnavailable { + store_name: "app_config".to_string(), + message: "unavailable or not seeded".to_string(), + }); + + let response = to_error_response(&report); + + assert_eq!( + response.get_status(), + fastly::http::StatusCode::SERVICE_UNAVAILABLE, + "config-store read failure should render as 503 to the client" + ); + } +} diff --git a/crates/trusted-server-adapter-fastly/src/main.rs b/crates/trusted-server-adapter-fastly/src/main.rs index 489c4982..bf05f5eb 100644 --- a/crates/trusted-server-adapter-fastly/src/main.rs +++ b/crates/trusted-server-adapter-fastly/src/main.rs @@ -400,9 +400,7 @@ fn edgezero_main(mut req: FastlyRequest, config_store: ConfigStoreHandle) { } Err(e) => { log::warn!("EdgeZero JA4 endpoint: failed to load settings: {e:?}"); - FastlyResponse::from_status(fastly::http::StatusCode::INTERNAL_SERVER_ERROR) - .with_body_text_plain("Internal Server Error") - .send_to_client(); + to_error_response(&e).send_to_client(); } } return; diff --git a/crates/trusted-server-adapter-spin/runtime-config.toml b/crates/trusted-server-adapter-spin/runtime-config.toml new file mode 100644 index 00000000..9dae5719 --- /dev/null +++ b/crates/trusted-server-adapter-spin/runtime-config.toml @@ -0,0 +1,15 @@ +# Spin runtime configuration for the Trusted Server adapter. +# +# Declares the `app_config` key-value store that holds the Trusted Server +# app-config blob loaded at startup and seeded by `ts config push --adapter +# spin`. Spin auto-provides only the `default` label; any other label (here +# `app_config`) must be declared here or `spin up` fails with +# `unknown key_value_stores label app_config`. +# +# `type = "spin"` uses Spin's built-in SQLite key-value backend (a local +# `.spin/sqlite_key_value.db` file), matching what `ts config push --adapter +# spin --local` writes to. Point it at redis/azure/etc. for a shared backend. +# +# Load it explicitly: `spin up --runtime-config-file runtime-config.toml`. +[key_value_store.app_config] +type = "spin" diff --git a/crates/trusted-server-adapter-spin/spin.toml b/crates/trusted-server-adapter-spin/spin.toml index 9bc3634d..74a6d442 100644 --- a/crates/trusted-server-adapter-spin/spin.toml +++ b/crates/trusted-server-adapter-spin/spin.toml @@ -38,7 +38,11 @@ source = "../../target/wasm32-wasip1/release/trusted_server_adapter_spin.wasm" # origins are still served over plaintext http. Follow-up: scope this to the # configured origins once they can be enumerated from settings. allowed_outbound_hosts = ["https://*:*", "http://*:*"] -key_value_stores = ["default"] +# `app_config` holds the Trusted Server app-config blob loaded at startup and +# seeded by `ts config push --adapter spin`. Spin auto-provides only `default`; +# `app_config` must be granted here and backed by a `[key_value_store.app_config]` +# stanza in runtime-config.toml (passed via `spin up --runtime-config-file`). +key_value_stores = ["default", "app_config"] [component.trusted-server.variables] v_current_x2dkid = "{{ v_current_x2dkid }}" diff --git a/crates/trusted-server-adapter-spin/src/app.rs b/crates/trusted-server-adapter-spin/src/app.rs index 0dddd7a1..9b85afff 100644 --- a/crates/trusted-server-adapter-spin/src/app.rs +++ b/crates/trusted-server-adapter-spin/src/app.rs @@ -26,6 +26,10 @@ use trusted_server_core::request_signing::{ handle_verify_signature, }; use trusted_server_core::settings::Settings; +#[cfg(all(feature = "spin", target_arch = "wasm32"))] +use trusted_server_core::settings_data::{ + default_config_key, default_config_store_name, get_settings_from_config_store, +}; use crate::middleware::{AuthMiddleware, FinalizeResponseMiddleware, NormalizeMiddleware}; use crate::platform::build_runtime_services; @@ -48,8 +52,34 @@ pub struct AppState { /// Returns an error when settings, the auction orchestrator, or the integration /// registry fail to initialise. fn build_state() -> Result, Report> { - let settings = Settings::from_toml(include_str!("../../../trusted-server.example.toml"))?; - build_state_with_settings(settings) + build_state_with_settings(load_startup_settings()?) +} + +/// Loads startup [`Settings`] on the Spin runtime from the app-config blob in +/// the Spin key-value store seeded by `ts config push --adapter spin` (store id +/// and blob key both resolve to `app_config`). +/// +/// # Errors +/// +/// Returns [`TrustedServerError::ConfigStoreUnavailable`] (HTTP 503) when the +/// store is unseeded or unreadable, and [`TrustedServerError::Configuration`] +/// (HTTP 500) when the blob fails envelope/settings verification. +#[cfg(all(feature = "spin", target_arch = "wasm32"))] +fn load_startup_settings() -> Result> { + let store_name = default_config_store_name(); + let config_key = default_config_key(); + get_settings_from_config_store( + &crate::platform::SpinKvConfigStore, + &store_name, + &config_key, + ) +} + +/// Loads startup [`Settings`] from the embedded example config on non-Spin +/// (native test) builds, where Spin host key-value functions are unavailable. +#[cfg(not(all(feature = "spin", target_arch = "wasm32")))] +fn load_startup_settings() -> Result> { + Settings::from_toml(include_str!("../../../trusted-server.example.toml")) } /// Build the application state from explicit settings. @@ -348,27 +378,34 @@ fn legacy_admin_alias_denied() -> Response { // Startup error fallback // --------------------------------------------------------------------------- -/// Returns a [`RouterService`] that responds to every route with a generic -/// 503 Service Unavailable. The startup error is logged but not echoed in the -/// response body so that deployment state is not leaked to anonymous callers. +/// Returns a [`RouterService`] that answers every route with the startup +/// error's mapped HTTP status: an unseeded/unreadable config store yields 503, +/// while a blob that reads but fails envelope/settings verification yields 500. +/// The body is the generic user-facing message so deployment state is not +/// leaked to anonymous callers. fn startup_error_router(e: &Report) -> RouterService { - log::error!("startup failed, serving error fallback: {:?}", e); - - let handler = |_ctx: RequestContext| { - let body = edgezero_core::body::Body::from("Service Unavailable\n"); - let mut resp = Response::new(body); - *resp.status_mut() = StatusCode::SERVICE_UNAVAILABLE; - resp.headers_mut().insert( - header::CONTENT_TYPE, - HeaderValue::from_static("text/plain; charset=utf-8"), - ); - async move { Ok::(resp) } + log::error!("startup failed, serving error fallback: {e:?}"); + + let status = e.current_context().status_code(); + let message = Arc::new(format!("{}\n", e.current_context().user_message())); + + let make = move |msg: Arc| { + move |_ctx: RequestContext| { + let body = edgezero_core::body::Body::from((*msg).clone()); + let mut resp = Response::new(body); + *resp.status_mut() = status; + resp.headers_mut().insert( + header::CONTENT_TYPE, + HeaderValue::from_static("text/plain; charset=utf-8"), + ); + async move { Ok::(resp) } + } }; // Cover the full publisher fallback method set (GET, POST, HEAD, OPTIONS, // PUT, PATCH, DELETE) so degraded behaviour stays consistent with the - // healthy router: every method on `/` and `/{*rest}` returns the generic - // 503 instead of a router-level 405 for HEAD/OPTIONS/PATCH. + // healthy router: every method on `/` and `/{*rest}` returns the mapped + // error status instead of a router-level 405 for HEAD/OPTIONS/PATCH. let mut builder = RouterService::builder().middleware(FinalizeResponseMiddleware::new( Arc::new(Settings::default()), )); @@ -378,8 +415,8 @@ fn startup_error_router(e: &Report) -> RouterService { Ok::(health_response()) }); for method in publisher_fallback_methods() { - builder = builder.route("/", method.clone(), handler); - builder = builder.route("/{*rest}", method, handler); + builder = builder.route("/", method.clone(), make(Arc::clone(&message))); + builder = builder.route("/{*rest}", method, make(Arc::clone(&message))); } builder.build() } @@ -953,8 +990,9 @@ mod tests { // (including HEAD/OPTIONS/PATCH) on both "/" and nested paths with the // generic 503, never a router-level 405, so startup-failure behaviour // stays consistent with the healthy router. - let report = Report::new(TrustedServerError::BadRequest { - message: "startup failure".to_string(), + let report = Report::new(TrustedServerError::ConfigStoreUnavailable { + store_name: "app_config".to_string(), + message: "unseeded".to_string(), }); let router = startup_error_router(&report); @@ -979,12 +1017,40 @@ mod tests { } } + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] + async fn startup_error_router_maps_verify_failure_to_500() { + // A blob that reads but fails envelope/settings verification is a + // Configuration error (500), not a config-store read failure (503). The + // startup router must surface that distinction, not flatten to 503. + let report = Report::new(TrustedServerError::Configuration { + message: "blob failed integrity verification".to_string(), + }); + let router = startup_error_router(&report); + + let req = edgezero_core::http::request_builder() + .method("GET") + .uri("/") + .body(edgezero_core::body::Body::empty()) + .expect("should build request"); + let status = router + .oneshot(req) + .await + .expect("should route startup-error request") + .status() + .as_u16(); + assert_eq!( + status, 500, + "a verify-failure startup error must map to 500, not a flattened 503" + ); + } + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn startup_error_router_answers_health_with_200() { // The liveness probe must keep returning 200 even while application state // construction is failing, matching the Fastly/Axum health behaviour. - let report = Report::new(TrustedServerError::BadRequest { - message: "startup failure".to_string(), + let report = Report::new(TrustedServerError::ConfigStoreUnavailable { + store_name: "app_config".to_string(), + message: "unseeded".to_string(), }); let router = startup_error_router(&report); diff --git a/crates/trusted-server-adapter-spin/src/lib.rs b/crates/trusted-server-adapter-spin/src/lib.rs index f47877ff..38e27cf5 100644 --- a/crates/trusted-server-adapter-spin/src/lib.rs +++ b/crates/trusted-server-adapter-spin/src/lib.rs @@ -13,5 +13,58 @@ use spin_sdk::http_service; #[http_service] // FORCED: edgezero_adapter_spin::run_app returns anyhow::Result — EdgeZero SDK constraint, not a project choice. async fn handle(req: Request) -> anyhow::Result { + // Install a real `log` backend before dispatch. `edgezero_adapter_spin` + // only calls a no-op `init_logger`, so without this every `log::error!` + // (including the startup-error diagnostics) is silently dropped on Spin. + logging::init(); edgezero_adapter_spin::run_app::(req).await } + +/// Minimal stderr [`log`] backend for the Spin component. +/// +/// Spin captures component stderr to `.spin/logs/_stderr.txt` +/// (local) and to the Fermyon Cloud log stream, so routing `log` records to +/// stderr makes startup and request diagnostics visible. +#[cfg(all(feature = "spin", target_arch = "wasm32"))] +mod logging { + use std::io::Write as _; + use std::sync::Once; + + struct StderrLogger; + + impl log::Log for StderrLogger { + fn enabled(&self, _metadata: &log::Metadata<'_>) -> bool { + true + } + + fn log(&self, record: &log::Record<'_>) { + // Write to the stderr handle directly rather than via `eprintln!` + // so the component's `print_stderr` lint stays enforced elsewhere. + let _ = writeln!( + std::io::stderr(), + "[{}] {}: {}", + record.level(), + record.target(), + record.args() + ); + } + + fn flush(&self) {} + } + + static LOGGER: StderrLogger = StderrLogger; + static INIT: Once = Once::new(); + + /// Installs [`StderrLogger`] as the global `log` backend exactly once. + /// + /// Runs before `run_app`, so this wins the global slot and the adapter's + /// own no-op `init_logger` becomes the harmless second `set_logger` (whose + /// `Err` it already ignores). + pub(crate) fn init() { + INIT.call_once(|| { + if log::set_logger(&LOGGER).is_ok() { + log::set_max_level(log::LevelFilter::Info); + } + }); + } +} diff --git a/crates/trusted-server-adapter-spin/src/platform.rs b/crates/trusted-server-adapter-spin/src/platform.rs index 1e13ca30..2506d955 100644 --- a/crates/trusted-server-adapter-spin/src/platform.rs +++ b/crates/trusted-server-adapter-spin/src/platform.rs @@ -61,6 +61,57 @@ impl PlatformConfigStore for NoopConfigStore { } } +/// Spin key-value-backed [`PlatformConfigStore`] used to load the app-config +/// blob at startup. +/// +/// `ts config push --adapter spin` writes the app-config blob into a Spin +/// key-value store (label = the config store id, key = the blob key), so +/// startup opens that store by name and reads the blob. This is deliberately +/// distinct from [`ConfigStoreHandleAdapter`], which reads per-request +/// request-signing config from Spin component *variables*: a multi-kilobyte +/// blob does not fit Spin's flat variable namespace. +#[cfg(all(feature = "spin", target_arch = "wasm32"))] +pub(crate) struct SpinKvConfigStore; + +#[cfg(all(feature = "spin", target_arch = "wasm32"))] +impl PlatformConfigStore for SpinKvConfigStore { + fn get(&self, store_name: &StoreName, key: &str) -> Result> { + let label = store_name.as_ref(); + let store = + futures::executor::block_on(spin_sdk::key_value::Store::open(label)).map_err(|e| { + Report::new(PlatformError::ConfigStore).attach(format!( + "failed to open Spin key-value store `{label}`: {e}" + )) + })?; + let bytes = futures::executor::block_on(store.get(key)) + .map_err(|e| { + Report::new(PlatformError::ConfigStore).attach(format!( + "Spin key-value lookup for `{key}` in `{label}` failed: {e}" + )) + })? + .ok_or_else(|| { + Report::new(PlatformError::ConfigStore).attach(format!( + "key `{key}` not found in Spin key-value store `{label}`" + )) + })?; + String::from_utf8(bytes).map_err(|e| { + Report::new(PlatformError::ConfigStore).attach(format!( + "Spin key-value value for `{key}` is not valid UTF-8: {e}" + )) + }) + } + + fn put(&self, _: &StoreId, _: &str, _: &str) -> Result<(), Report> { + Err(Report::new(PlatformError::ConfigStore) + .attach("config store writes are not supported on Spin")) + } + + fn delete(&self, _: &StoreId, _: &str) -> Result<(), Report> { + Err(Report::new(PlatformError::ConfigStore) + .attach("config store writes are not supported on Spin")) + } +} + #[cfg(not(all(feature = "spin", target_arch = "wasm32")))] struct NoopSecretStore; diff --git a/crates/trusted-server-adapter-spin/tests/routes.rs b/crates/trusted-server-adapter-spin/tests/routes.rs index 943717e8..0ea4e394 100644 --- a/crates/trusted-server-adapter-spin/tests/routes.rs +++ b/crates/trusted-server-adapter-spin/tests/routes.rs @@ -73,7 +73,7 @@ fn edgezero_manifest_loads_and_resolves_spin_stores() { .as_ref() .expect("should declare a KV store") .default_id(), - "trusted_server_kv", + "ec_identity_store", "Spin KV declaration must expose its default logical store id" ); assert!( diff --git a/crates/trusted-server-core/src/error.rs b/crates/trusted-server-core/src/error.rs index 2804afec..ab406e3c 100644 --- a/crates/trusted-server-core/src/error.rs +++ b/crates/trusted-server-core/src/error.rs @@ -26,6 +26,11 @@ pub enum TrustedServerError { #[display("Configuration error: {message}")] Configuration { message: String }, + /// Config store could not be read (unseeded, transient backend, or a listed + /// key missing) — Settings cannot be loaded. Retryable / fix by seeding. + #[display("Config store unavailable: {store_name} - {message}")] + ConfigStoreUnavailable { store_name: String, message: String }, + /// Auction orchestration error. #[display("Auction error: {message}")] Auction { message: String }, @@ -123,6 +128,7 @@ impl IntoHttpResponse for TrustedServerError { Self::InvalidHeaderValue { .. } => StatusCode::BAD_REQUEST, Self::InvalidUtf8 { .. } => StatusCode::INTERNAL_SERVER_ERROR, Self::KvStore { .. } => StatusCode::SERVICE_UNAVAILABLE, + Self::ConfigStoreUnavailable { .. } => StatusCode::SERVICE_UNAVAILABLE, Self::Prebid { .. } => StatusCode::BAD_GATEWAY, Self::Integration { .. } => StatusCode::BAD_GATEWAY, Self::Proxy { .. } => StatusCode::BAD_GATEWAY, @@ -142,6 +148,12 @@ impl IntoHttpResponse for TrustedServerError { // Consent strings may contain user data; return category only. Self::GdprConsent { .. } => "GDPR consent error".to_string(), Self::InvalidHeaderValue { .. } => "Invalid header value".to_string(), + // Retryable 503s: signal transient unavailability so clients and + // monitoring can distinguish retry-me from terminal, without leaking + // any internal detail (full details are logged in to_error_response). + Self::ConfigStoreUnavailable { .. } | Self::KvStore { .. } => { + "Service temporarily unavailable".to_string() + } // Server/integration errors (5xx/502/503) — generic message only. // Full details are already logged via log::error! in to_error_response. _ => "An internal error occurred".to_string(), @@ -211,6 +223,13 @@ mod tests { }, StatusCode::SERVICE_UNAVAILABLE, ), + ( + TrustedServerError::ConfigStoreUnavailable { + store_name: String::from("app_config"), + message: String::from("store unavailable"), + }, + StatusCode::SERVICE_UNAVAILABLE, + ), ( TrustedServerError::Prebid { message: String::from("adapter error"), @@ -286,6 +305,9 @@ mod tests { TrustedServerError::RequestTooLarge { .. } => StatusCode::PAYLOAD_TOO_LARGE, TrustedServerError::InvalidHeaderValue { .. } => StatusCode::BAD_REQUEST, TrustedServerError::KvStore { .. } => StatusCode::SERVICE_UNAVAILABLE, + TrustedServerError::ConfigStoreUnavailable { .. } => { + StatusCode::SERVICE_UNAVAILABLE + } TrustedServerError::Prebid { .. } => StatusCode::BAD_GATEWAY, TrustedServerError::Integration { .. } => StatusCode::BAD_GATEWAY, TrustedServerError::Proxy { .. } => StatusCode::BAD_GATEWAY, @@ -317,10 +339,6 @@ mod tests { TrustedServerError::Configuration { message: "secret db path".into(), }, - TrustedServerError::KvStore { - store_name: "users".into(), - message: "timeout".into(), - }, TrustedServerError::Proxy { message: "upstream 10.0.0.1 refused".into(), }, @@ -400,6 +418,52 @@ mod tests { assert_eq!(error.user_message(), "Invalid header value"); } + #[test] + fn config_store_unavailable_maps_to_503() { + let error = TrustedServerError::ConfigStoreUnavailable { + store_name: String::from("app_config"), + message: String::from("unavailable or not seeded"), + }; + assert_eq!( + error.status_code(), + StatusCode::SERVICE_UNAVAILABLE, + "config-store read failure should map to 503" + ); + // Detail stays server-side; the public body signals retryable + // unavailability without leaking internal config-store detail. + assert_eq!( + error.user_message(), + "Service temporarily unavailable", + "503 client body must be retry-flavored and leak no internal detail" + ); + } + + #[test] + fn retryable_503_variants_return_service_unavailable_body() { + let cases = [ + TrustedServerError::ConfigStoreUnavailable { + store_name: "app_config".into(), + message: "unseeded".into(), + }, + TrustedServerError::KvStore { + store_name: "users".into(), + message: "timeout".into(), + }, + ]; + for error in &cases { + assert_eq!( + error.status_code(), + StatusCode::SERVICE_UNAVAILABLE, + "should map to 503 for {error:?}" + ); + assert_eq!( + error.user_message(), + "Service temporarily unavailable", + "503 body should be retry-flavored and leak no detail for {error:?}" + ); + } + } + #[test] fn status_code_maps_each_error_variant_to_expected_http_response() { // Compile-time guard: adding a TrustedServerError variant without @@ -413,6 +477,7 @@ mod tests { | TrustedServerError::InvalidUtf8 { .. } | TrustedServerError::InvalidHeaderValue { .. } | TrustedServerError::KvStore { .. } + | TrustedServerError::ConfigStoreUnavailable { .. } | TrustedServerError::Prebid { .. } | TrustedServerError::Integration { .. } | TrustedServerError::Proxy { .. } @@ -499,6 +564,13 @@ mod tests { }, StatusCode::SERVICE_UNAVAILABLE, ), + ( + TrustedServerError::ConfigStoreUnavailable { + store_name: "app_config".to_string(), + message: "config store unavailable".to_string(), + }, + StatusCode::SERVICE_UNAVAILABLE, + ), ( TrustedServerError::Auction { message: "auction failed".to_string(), diff --git a/crates/trusted-server-core/src/settings_data.rs b/crates/trusted-server-core/src/settings_data.rs index 0261c7e4..b69cf1ec 100644 --- a/crates/trusted-server-core/src/settings_data.rs +++ b/crates/trusted-server-core/src/settings_data.rs @@ -31,7 +31,14 @@ struct FastlyChunkRef { /// Returns the default `EdgeZero` app-config store name. #[must_use] pub fn default_config_store_name() -> StoreName { - StoreName::from(EnvConfig::from_env().store_name("config", DEFAULT_CONFIG_STORE_ID)) + config_store_name_from(&EnvConfig::from_env()) +} + +/// Resolves the app-config store name from an [`EnvConfig`], falling back to +/// the logical id when the override is unset, blank, or contains control +/// characters (the `EnvConfig::store_name` fallback semantics). +fn config_store_name_from(env_config: &EnvConfig) -> StoreName { + StoreName::from(env_config.store_name("config", DEFAULT_CONFIG_STORE_ID)) } /// Returns the default config-store key containing the app-config blob. @@ -44,9 +51,10 @@ pub fn default_config_key() -> String { /// /// # Errors /// -/// Returns [`TrustedServerError::Configuration`] when the config blob is -/// missing, cannot be read, fails envelope verification, or fails Trusted -/// Server settings validation. +/// Returns [`TrustedServerError::ConfigStoreUnavailable`] (HTTP 503) when the +/// config blob (or a referenced chunk) cannot be read, and +/// [`TrustedServerError::Configuration`] (HTTP 500) when the read succeeds but +/// envelope/chunk verification or settings validation fails. pub fn get_settings_from_config_store( config_store: &dyn PlatformConfigStore, store_name: &StoreName, @@ -62,14 +70,21 @@ fn read_config_entry( store_name: &StoreName, key: &str, ) -> Result> { - let message = format!( - "failed to read Trusted Server app config key `{key}` from config store `{store_name}`" - ); config_store .get(store_name, key) - .change_context(TrustedServerError::Configuration { message }) + .change_context(TrustedServerError::ConfigStoreUnavailable { + store_name: store_name.to_string(), + message: format!( + "read failed for `{key}` (unseeded, missing, or transient) — run `ts config push` to (re)seed" + ), + }) } +// Mirrors `edgezero-adapter-fastly`'s crate-private `chunked_config` resolver +// (same wire format). Kept local because the upstream one collapses missing +// chunks (retryable, 503 here) and corrupt chunks (terminal, 500 here) into +// one opaque error — see the design doc's follow-up section for the plan to +// delete this once upstream exports a resolver that keeps that distinction. fn resolve_fastly_chunk_pointer( config_store: &dyn PlatformConfigStore, store_name: &StoreName, @@ -177,6 +192,7 @@ fn configuration_error(message: String) -> Result()); + + assert_eq!( + config_store_name_from(&env_config).to_string(), + DEFAULT_CONFIG_STORE_ID, + "unset override should fall back to the logical id" ); } } diff --git a/crates/trusted-server-integration-tests/tests/common/ec.rs b/crates/trusted-server-integration-tests/tests/common/ec.rs index 7cc91781..623f96f4 100644 --- a/crates/trusted-server-integration-tests/tests/common/ec.rs +++ b/crates/trusted-server-integration-tests/tests/common/ec.rs @@ -271,6 +271,37 @@ fn mappings_to_json(mappings: &[BatchMapping]) -> Vec { // Assertion helpers // --------------------------------------------------------------------------- +/// Sends a non-fatal diagnostic probe for the `EdgeZero` entry point. +/// +/// `main()` silently falls back to the legacy entry point when the config store +/// cannot be opened or read, and the EC lifecycle scenarios pass on either path. +/// This probe used to assert a router-level `405` for unsupported methods, but +/// Viceroy/Fastly method handling can fall through to the publisher fallback. +/// Keep the request as a non-fatal diagnostic so the `EdgeZero` CI job still runs +/// the EC lifecycle scenarios instead of failing on a routing canary that is not +/// stable across runtime versions. +pub fn assert_edgezero_entry_point(base_url: &str) -> TestResult<()> { + let client = Client::builder() + .redirect(reqwest::redirect::Policy::none()) + .build() + .expect("should build EdgeZero canary client"); + let response = client + .request( + reqwest::Method::OPTIONS, + format!("{base_url}/_ts/api/v1/batch-sync"), + ) + .send() + .change_context(TestError::HttpRequest) + .attach("OPTIONS /_ts/api/v1/batch-sync (EdgeZero entry-point probe)")?; + if response.status().as_u16() != 405 { + log::warn!( + "EdgeZero entry-point probe returned status {}; continuing with EC lifecycle scenarios", + response.status() + ); + } + Ok(()) +} + pub fn assert_status(resp: &Response, expected: u16) -> TestResult<()> { let actual = resp.status().as_u16(); if actual != expected { diff --git a/crates/trusted-server-integration-tests/tests/integration.rs b/crates/trusted-server-integration-tests/tests/integration.rs index 2e0be2ff..e66059e4 100644 --- a/crates/trusted-server-integration-tests/tests/integration.rs +++ b/crates/trusted-server-integration-tests/tests/integration.rs @@ -201,6 +201,17 @@ fn test_ec_lifecycle_fastly() { process.base_url ); + // EdgeZero entry-point probe. This same test runs in two CI jobs: the + // legacy `integration-tests` job (generated legacy config) and the + // `integration-tests-edgezero` job (generated EdgeZero rollout config). Only + // run the diagnostic probe when the job opts into the EdgeZero path via + // EXPECT_EDGEZERO_ENTRY_POINT; the lifecycle scenarios below are the + // authoritative compatibility check. + if std::env::var("EXPECT_EDGEZERO_ENTRY_POINT").as_deref() == Ok("true") { + common::ec::assert_edgezero_entry_point(&process.base_url) + .expect("EdgeZero entry-point probe request failed"); + } + for scenario in EcScenario::all() { log::info!(" Running EC scenario: {scenario:?}"); let result = scenario.run(&process.base_url); diff --git a/docs/guide/architecture.md b/docs/guide/architecture.md index 54fbdbee..b1847477 100644 --- a/docs/guide/architecture.md +++ b/docs/guide/architecture.md @@ -66,7 +66,7 @@ Native Axum dev/test adapter (native binary): Fermyon Spin adapter (`wasm32-wasip1` component): - Production-capable deployment target for the Spin runtime -- Platform services (config store, secret store, KV) backed by Spin component variables and the EdgeZero KV handle +- Startup app-config blob loaded from a Spin key-value store (`app_config`, seeded by `ts config push --adapter spin`); per-request request-signing config and secrets read from Spin component variables; KV via the EdgeZero KV handle - Outbound HTTP via `spin_sdk::http::send` — no configurable per-request timeout (see rustdoc) - Single auction provider only; multi-provider fan-out requires the Fastly adapter diff --git a/docs/superpowers/specs/2026-06-27-edgezero-http-config-503-design.md b/docs/superpowers/specs/2026-06-27-edgezero-http-config-503-design.md new file mode 100644 index 00000000..a6313ea3 --- /dev/null +++ b/docs/superpowers/specs/2026-06-27-edgezero-http-config-503-design.md @@ -0,0 +1,91 @@ +# Design: HTTP-Layer Config-Store Load Hardening (503) + +- **Date:** 2026-06-27 +- **Author:** Prakash (HTTP-layer / runtime). +- **Status:** implemented on `feature/edgezero-269-http` (targets `main`). +- **Supersedes:** the earlier per-key flatten/hash variant of this design. The + CLI now stores Trusted Server config as a single **blob** (optionally chunked + for Fastly value-size limits — `config_payload::settings_from_config_blob`, + `settings_data::FastlyChunkPointer`), so the load path and this spec are + re-derived against that model. + +--- + +## 1. Problem + +The runtime rebuilds `Settings` at boot by reading the `app_config` config +store. Before this change every read failure — including an **unseeded** store — +mapped to `TrustedServerError::Configuration` → **500**, indistinguishable from a +genuine code bug. `trusted-server.toml` is deleted, so an unseeded store is an +expected operational state (fresh install, or cutover before `ts config push`), +not a bug. + +## 2. Load sequence (blob model) + +`crates/trusted-server-core/src/settings_data.rs`: + +``` +get_settings_from_services + → get_settings_from_config_store(store, name, key) + → read_config_entry(key) // READ — the blob value + → resolve_fastly_chunk_pointer(value) // if a chunk pointer: + → read_config_entry(chunk.key) × N // READ — each chunk + → verify chunk len + sha, envelope len + sha // VERIFY + → settings_from_config_blob(envelope_json) // VERIFY — parse + validate +``` + +`read_config_entry` is the **single read seam** (used for both the top-level +blob key and every chunk key). `key` resolves via +`EnvConfig::store_key("config", "app_config")`. + +## 3. Behavior matrix (the contract) + +The boundary is **"couldn't read the config"** vs **"read it but it's +invalid"** — classified by **call site**, because `PlatformConfigStore::get` +collapses key-absent and transport failure into one `Err` (see §5). + +| Situation | Where | Status | +| ---------------------------------------------------------------------------------------------------- | ------------------------------------- | -------------------------------------------------------------------------- | +| Blob key or a referenced chunk cannot be read (store unseeded, transient backend, chunk key missing) | `read_config_entry` | **503** `ConfigStoreUnavailable`, actionable hint `run \`ts config push\`` | +| Chunk len/sha mismatch, envelope len/sha mismatch, unsupported pointer version | `resolve_fastly_chunk_pointer` verify | **500** `Configuration` | +| Blob read OK but not a valid envelope / settings invalid | `settings_from_config_blob` | **500** `Configuration` | +| Seeded + valid | — | `Settings` loads | + +503 is correct for the read column: unseeded → seed it; transient → retry. + +## 4. Mechanism (one new variant) + +`TrustedServerError::ConfigStoreUnavailable { store_name, message }` → +`StatusCode::SERVICE_UNAVAILABLE` (precedent: the existing `KvStore` 503 arm). +Only `read_config_entry`'s `change_context` target changes from `Configuration` +to the new variant; all verify/parse paths are untouched. No `PlatformError` +or `PlatformConfigStore` change. + +**Security:** the actionable hint rides the error chain (`Display`) to the +**server log** only. The public 503 body is a generic, retry-flavored +`user_message()` arm shared by the retryable 503 variants +(`ConfigStoreUnavailable | KvStore`): `"Service temporarily unavailable"`. +It carries no internal detail, but — unlike the 500-flavored catch-all +(`"An internal error occurred"`) — lets clients and monitoring distinguish +_retryable_ from _terminal_ without leaking tooling/paths. + +## 5. Out of scope / follow-up + +- `PlatformConfigStore::get → Result>` (absence as a value, not an + error) would let the runtime distinguish **unseeded** (`Ok(None)`) from + **transient** (`Err`) precisely instead of classifying by call site. It is the + store-convergence direction (edgezero's own `ConfigStore::get` shape) and + touches every impl + caller across request-signing and DataDome — tracked as a + separate, cross-cutting change, not this PR. +- Non-Fastly adapter (cloudflare/spin/axum) parity rides the EdgeZero adapter + stack. +- `settings_data::resolve_fastly_chunk_pointer` duplicates + `edgezero-adapter-fastly`'s `chunked_config` resolver (same wire format: + `edgezero_kind = "fastly_config_chunks"`, version 1, per-chunk len + sha, + envelope len + sha). The upstream resolver is `pub(crate)` and, when reached + transparently through `edgezero_adapter_fastly::config_store::FastlyConfigStore::get`, + collapses **missing chunk** (retryable, this spec's 503) and **corrupt chunk** + (terminal, 500) into one opaque error — so delegating today would lose the + 503/500 contract above. Swap to the upstream resolver and delete the local + copy once edgezero exports it (or its error taxonomy distinguishes + missing from corrupt) — needs an upstream change + repin. diff --git a/edgezero.toml b/edgezero.toml index 2120ca5c..ec12d9c7 100644 --- a/edgezero.toml +++ b/edgezero.toml @@ -21,16 +21,23 @@ version = "0.1.0" # any one platform's names. `default` is the primary logical id. [stores.kv] -ids = ["trusted_server_kv"] -default = "trusted_server_kv" - +ids = ["ec_identity_store"] +default = "ec_identity_store" + +# The config store holds the pushed app-config blob. Its id must match the +# runtime bootstrap store the HTTP layer opens before Settings are loaded +# (`DEFAULT_CONFIG_STORE_ID`/`CONFIG_BLOB_KEY` = `app_config` in +# `trusted-server-core::settings_data`). `ts config push` defaults its target +# to `[stores.config].default`, and Fastly opens config stores by this id +# directly (no logical->physical binding), so a mismatch would push the blob to +# a store the runtime never reads -> every request 503s. Keep this `app_config`. [stores.config] -ids = ["trusted_server_config"] -default = "trusted_server_config" +ids = ["app_config"] +default = "app_config" [stores.secrets] -ids = ["trusted_server_secrets"] -default = "trusted_server_secrets" +ids = ["secrets"] +default = "secrets" # -- Fastly Compute (production) -------------------------------------------- @@ -94,5 +101,5 @@ features = ["spin"] [adapters.spin.commands] build = "cargo build --package trusted-server-adapter-spin --target wasm32-wasip1 --features spin --release" -serve = "spin up --from crates/trusted-server-adapter-spin" +serve = "spin up --from crates/trusted-server-adapter-spin --runtime-config-file crates/trusted-server-adapter-spin/runtime-config.toml" deploy = "spin deploy --from crates/trusted-server-adapter-spin"