From 6615ff82e388879c13ad9dc98bf43f682ec0af4d Mon Sep 17 00:00:00 2001 From: Artem Goncharov Date: Thu, 2 Jul 2026 10:50:43 +0200 Subject: [PATCH] fix: Stop swagger-ui redirect loop, split main() NormalizePathLayer wrapped SwaggerUi along with the API router, so trimming the trailing slash raced SwaggerUi's own "/swagger-ui" -> "/swagger-ui/" redirect into an infinite loop (utoipa#1467). SwaggerUi is now mounted via fallback_service outside the normalized service. - extract main() into init_tracing, init_audit, subscribe_event_hooks, build_router, start_raft, spawn_opa_subprocess, and per-interface listener functions - wrap the audit KEK in SecretBox so it zeroizes on drop - return Result from spawn_internal_listener instead of logging and continuing on a non-SPIFFE internal-interface misconfiguration - replace axum::serve(...).unwrap() on the public listener with the same log-and-cancel pattern used by the other listeners - fix extract_provider_name's &Box clippy lint, allow get_domain's currently-unused-but-tested dead code - add regression tests for the swagger-ui fix, trailing-slash normalization, and audit KEK generate/reuse/reject-bad-length Signed-off-by: Artem Goncharov --- Cargo.lock | 1 + crates/keystone/Cargo.toml | 1 + crates/keystone/src/api/common.rs | 3 + crates/keystone/src/audit.rs | 8 +- crates/keystone/src/bin/keystone.rs | 548 +++++++++++++----- .../src/server/listener/spiffe_tls.rs | 3 +- .../src/server/listener/spiffe_tls_uds.rs | 3 +- 7 files changed, 415 insertions(+), 152 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8cfee7103..1b0d65563 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3839,6 +3839,7 @@ dependencies = [ "spiffe", "spiffe-rustls", "spiffe-rustls-tokio", + "tempfile", "thiserror 2.0.18", "tokio", "tokio-rustls", diff --git a/crates/keystone/Cargo.toml b/crates/keystone/Cargo.toml index 6fc6e0b87..a3ac4c378 100644 --- a/crates/keystone/Cargo.toml +++ b/crates/keystone/Cargo.toml @@ -100,6 +100,7 @@ openstack-keystone-core-types = { workspace = true, features = ["mock"] } openstack-keystone-distributed-storage = { workspace = true, features = ["mock"] } rstest.workspace = true secrecy = { workspace = true, features = ["serde"] } +tempfile.workspace = true tokio = { workspace = true, features = ["process", "macros"] } tracing-test = { workspace = true, features = ["no-env-filter"] } url.workspace = true diff --git a/crates/keystone/src/api/common.rs b/crates/keystone/src/api/common.rs index 6f332f8a0..4b042e61d 100644 --- a/crates/keystone/src/api/common.rs +++ b/crates/keystone/src/api/common.rs @@ -32,6 +32,9 @@ use crate::keystone::ServiceState; /// /// # Returns /// * `Result` - The domain object +// Not yet wired into an endpoint (no id-or-name domain lookup route exists +// yet); kept for the domain-scoped auth work it was written for. +#[allow(dead_code)] pub async fn get_domain, N: AsRef>( state: &ServiceState, id: Option, diff --git a/crates/keystone/src/audit.rs b/crates/keystone/src/audit.rs index 3f1773e17..45859d196 100644 --- a/crates/keystone/src/audit.rs +++ b/crates/keystone/src/audit.rs @@ -106,7 +106,7 @@ pub fn sanitize_authentication_error(e: &AuthenticationError) -> &'static str { AuthenticationError::UserLocked(_) => "UserLocked", AuthenticationError::UserPasswordExpired(_) => "UserPasswordExpired", AuthenticationError::Provider { source, .. } => { - extract_provider_name(source).unwrap_or("ProviderError") + extract_provider_name(source.as_ref()).unwrap_or("ProviderError") } AuthenticationError::Validation(_) => "ValidationError", AuthenticationError::StructBuilder { .. } => "StructBuilderError", @@ -137,7 +137,7 @@ pub fn sanitize_authentication_error(e: &AuthenticationError) -> &'static str { /// Guarantees PII in third-party provider errors (emails, tokens) never /// reaches audit records. pub fn extract_provider_name( - source: &Box, + source: &(dyn std::error::Error + Send + Sync + 'static), ) -> Option<&'static str> { if source.is::() { Some("Identity") @@ -288,7 +288,7 @@ mod tests { fn extract_provider_name_identity() { let e: Box = Box::new(IdentityProviderError::UserNotFound("x".into())); - assert_eq!(extract_provider_name(&e), Some("Identity")); + assert_eq!(extract_provider_name(e.as_ref()), Some("Identity")); } #[test] @@ -297,7 +297,7 @@ mod tests { #[error("unknown")] struct Unknown; let e: Box = Box::new(Unknown); - assert_eq!(extract_provider_name(&e), None); + assert_eq!(extract_provider_name(e.as_ref()), None); } #[test] diff --git a/crates/keystone/src/bin/keystone.rs b/crates/keystone/src/bin/keystone.rs index fdbbbd60d..b40989459 100644 --- a/crates/keystone/src/bin/keystone.rs +++ b/crates/keystone/src/bin/keystone.rs @@ -30,7 +30,7 @@ use axum::{ use clap::{Parser, ValueEnum}; use color_eyre::eyre::{Report, Result, WrapErr}; use sea_orm::{ConnectOptions, Database}; -use secrecy::ExposeSecret; +use secrecy::{ExposeSecret, SecretBox}; use tokio::net::TcpListener; use tokio::{signal, spawn, time}; use tokio_util::sync::CancellationToken; @@ -58,7 +58,7 @@ use uuid::Uuid; use openstack_keystone::application_credential::ApplicationCredentialHook; use openstack_keystone::assignment::AssignmentHook; use openstack_keystone::catalog::CatalogHook; -use openstack_keystone::config::{ConfigManager, Interface, ListenerConfig}; +use openstack_keystone::config::{Config, ConfigManager, Interface, ListenerConfig}; use openstack_keystone::federation::FederationHook; use openstack_keystone::identity::IdentityHook; use openstack_keystone::idmapping::IdMappingHook; @@ -90,6 +90,9 @@ use openstack_keystone_distributed_storage::{StorageApi, app::Storage}; // Default body limit 256kB const DEFAULT_BODY_LIMIT: usize = 1024 * 256; +/// Version tag stamped on audit HMAC keys (ADR 0023 / ADR 0016-v2 §3.1). +const AUDIT_HMAC_KEY_VERSION: u64 = 1; + /// `OpenStack` Keystone. /// /// Keystone is an `OpenStack` service that provides API client authentication, @@ -141,30 +144,6 @@ impl MakeRequestId for OpenStackRequestId { async fn main() -> Result<(), Report> { let args = Args::parse(); - let external_deps_log_level = match args.verbose { - 0 => LevelFilter::WARN, - 1 => LevelFilter::INFO, - _ => LevelFilter::DEBUG, - }; - let stderr_log_filter = Targets::new() - .with_default(match args.verbose { - 0 => LevelFilter::WARN, - 1 => LevelFilter::INFO, - 2 => LevelFilter::DEBUG, - _ => LevelFilter::TRACE, - }) - .with_target("h2", external_deps_log_level) - .with_target("rustls", external_deps_log_level) - .with_target("tower", external_deps_log_level) - .with_target("openraft", external_deps_log_level) - .with_target("lsm_tree", external_deps_log_level); - - // Build the stderr log layer. - let stderr_layer = tracing_subscriber::fmt::layer() - .with_writer(io::stderr) - .with_filter(stderr_log_filter) - .boxed(); - // When only dumping of the openapi spec is necessary we should not even start // parsing the config file. This means we cannot initialize the logging yet. let mut openapi = api::ApiDoc::openapi(); @@ -187,48 +166,10 @@ async fn main() -> Result<(), Report> { let cfg_mgr = ConfigManager::watched(args.config).await?; let cfg = cfg_mgr.config.read().await.clone(); - let mut log_layers = Vec::new(); - - if cfg.default.use_stderr { - log_layers.push(stderr_layer); - } - - let non_blocking; - let _guard; - - if let Some(log_dir) = &cfg.default.log_dir { - // create a file appender that rotates hourly - let file_appender = tracing_appender::rolling::never(log_dir, "keystone.log"); - // make the file appender non-blocking - // the guard exists outside the scope to make sure buffered logs get flushed to - // output - (non_blocking, _guard) = tracing_appender::non_blocking(file_appender); - - let log_file_filter = Targets::new() - .with_default(if cfg.default.debug { - LevelFilter::DEBUG - } else { - LevelFilter::INFO - }) - .with_target("h2", external_deps_log_level) - .with_target("rustls", external_deps_log_level) - .with_target("tower", external_deps_log_level) - .with_target("openraft", external_deps_log_level) - .with_target("lsm_tree", external_deps_log_level); - log_layers.push( - tracing_subscriber::fmt::layer() - // No colors in the log file - .with_ansi(false) - .with_writer(non_blocking) - .with_filter(log_file_filter) - .boxed(), - ); - } - // build the tracing registry - tracing_subscriber::registry() - .with(ErrorLayer::default()) - .with(log_layers) - .init(); + // Guard must stay alive for the process lifetime to flush buffered + // file-appender logs; binding to `_guard` (rather than dropping the + // `Option`) keeps that lifetime tied to `main`'s scope. + let _guard = init_tracing(args.verbose, &cfg); color_eyre::install()?; info!("Starting Keystone..."); @@ -274,6 +215,147 @@ async fn main() -> Result<(), Report> { .map(Arc::clone) .map(|s| s as Arc); + let audit_dispatcher = init_audit(&cfg).await?; + + let shared_state = Arc::new( + KeystoneServiceState::new( + cfg_mgr, + conn, + provider, + Arc::new(policy), + audit_dispatcher, + storage_for_service, + ) + .await?, + ); + + spawn(cleanup(cloned_token, shared_state.clone())); + + // API Key (SCIM ingress) janitor: proactive inactivity disablement and + // tombstone purge (ADR 0021 §6.F). Runs on every node; gated to actually + // do work only on the current Raft leader. + api_key_janitor::spawn(shared_state.clone()); + + // Reset the dummy-password-hash cache whenever the configuration is + // hot-reloaded. The cache is keyed by (algorithm, rounds); if the operator + // changes `password_hashing_algorithm` or `password_hash_rounds` at runtime, + // stale entries would otherwise keep being served and reintroduce the very + // timing side-channel the dummy hash exists to close. + spawn(reset_dummy_hash_on_reload( + token.clone(), + shared_state.clone(), + )); + + subscribe_event_hooks(&shared_state).await; + + let app = build_router(&shared_state, &token, main_router, openapi).await?; + + // Shutdown watcher + let global_shutdown_token = token.clone(); + let signal_state = shared_state.clone(); + tokio::spawn(async move { + // Your existing handler that takes Arc + // Instead of calling handle.graceful_shutdown, just cancel the token + shutdown_signal(signal_state).await; + global_shutdown_token.cancel(); + }); + + let mut handles: tokio::task::JoinSet<()> = tokio::task::JoinSet::new(); + + start_raft(&cfg, &concrete_storage, &token, &mut handles).await?; + spawn_opa_subprocess(&cfg, &token, &mut handles)?; + spawn_public_listener(&cfg, app.clone(), &token, &mut handles).await?; + spawn_internal_listener(&cfg, app.clone(), &token, &mut handles)?; + spawn_admin_listener(&cfg, app, &token, &mut handles); + + // Wait for both (or handle errors) + handles.join_all().await; + token.cancel(); + Ok(()) +} + +/// Initialize the tracing subscriber registry (stderr, and optionally a +/// rotating file appender) based on CLI verbosity and the loaded config. +/// +/// Returns the file-appender's `WorkerGuard`, if file logging is enabled. +/// The caller must keep this alive for the process lifetime — dropping it +/// stops buffered log lines from being flushed. +fn init_tracing(verbose: u8, cfg: &Config) -> Option { + let external_deps_log_level = match verbose { + 0 => LevelFilter::WARN, + 1 => LevelFilter::INFO, + _ => LevelFilter::DEBUG, + }; + let stderr_log_filter = Targets::new() + .with_default(match verbose { + 0 => LevelFilter::WARN, + 1 => LevelFilter::INFO, + 2 => LevelFilter::DEBUG, + _ => LevelFilter::TRACE, + }) + .with_target("h2", external_deps_log_level) + .with_target("rustls", external_deps_log_level) + .with_target("tower", external_deps_log_level) + .with_target("openraft", external_deps_log_level) + .with_target("lsm_tree", external_deps_log_level); + + // Build the stderr log layer. + let stderr_layer = tracing_subscriber::fmt::layer() + .with_writer(io::stderr) + .with_filter(stderr_log_filter) + .boxed(); + + let mut log_layers = Vec::new(); + + if cfg.default.use_stderr { + log_layers.push(stderr_layer); + } + + let mut guard = None; + + if let Some(log_dir) = &cfg.default.log_dir { + // create a file appender that rotates hourly + let file_appender = tracing_appender::rolling::never(log_dir, "keystone.log"); + // make the file appender non-blocking; the guard must outlive the + // registry to make sure buffered logs get flushed to output. + let (non_blocking, file_guard) = tracing_appender::non_blocking(file_appender); + guard = Some(file_guard); + + let log_file_filter = Targets::new() + .with_default(if cfg.default.debug { + LevelFilter::DEBUG + } else { + LevelFilter::INFO + }) + .with_target("h2", external_deps_log_level) + .with_target("rustls", external_deps_log_level) + .with_target("tower", external_deps_log_level) + .with_target("openraft", external_deps_log_level) + .with_target("lsm_tree", external_deps_log_level); + log_layers.push( + tracing_subscriber::fmt::layer() + // No colors in the log file + .with_ansi(false) + .with_writer(non_blocking) + .with_filter(log_file_filter) + .boxed(), + ); + } + // build the tracing registry + tracing_subscriber::registry() + .with(ErrorLayer::default()) + .with(log_layers) + .init(); + + guard +} + +/// Load or generate the persisted audit HMAC key-encryption-key (KEK), +/// derive the per-node signing key, build the `AuditDispatcher`, replay any +/// events spooled by a previous run (at-least-once delivery), and spawn the +/// background spool writers for both QoS channels. See ADR 0023 / ADR +/// 0016-v2 §3.1. +async fn init_audit(cfg: &Config) -> Result, Report> { let audit_cfg = cfg.audit.clone(); let spool_dir = audit_cfg.spool_dir.clone(); std::fs::create_dir_all(&spool_dir).wrap_err("failed to create audit spool directory")?; @@ -282,9 +364,10 @@ async fn main() -> Result<(), Report> { // used as the HMAC signing key — a per-node key is derived from it via // HKDF-Expand (see `derive_audit_hmac_key`). Storing the KEK means a // restart can re-derive the same per-node key and replay the spool. - const AUDIT_HMAC_KEY_VERSION: u64 = 1; let kek_file = spool_dir.join("hmac-key.bin"); - let audit_kek: Vec = match std::fs::read(&kek_file) { + // `SecretBox` zeroizes the KEK bytes on drop, so the key-encryption-key + // does not linger in memory beyond `init_audit`'s scope. + let audit_kek: SecretBox> = SecretBox::new(Box::new(match std::fs::read(&kek_file) { Ok(bytes) => { if bytes.len() != 32 { return Err(eyre::eyre!( @@ -323,17 +406,17 @@ async fn main() -> Result<(), Report> { Err(e) => { // File exists but can't be read — permissions error or similar. // Fall back to regenerating (the old file will be silently left). - return Err(e) - .wrap_err("failed to read audit KEK; fix permissions or delete the file")?; + return Err(e).wrap_err("failed to read audit KEK; fix permissions or delete the file"); } - }; + })); // Derive the per-node signing key: // HKDF-Expand(KEK, info="keystone-audit-hmac-v1:{node_id}", L=32) // Per ADR 0023 / ADR 0016-v2 §3.1: per-node derivation ensures a // compromised node cannot forge records attributed to other nodes. - let audit_hmac_key: Arc<[u8]> = - Arc::from(derive_audit_hmac_key(&audit_kek, audit_cfg.node_id.as_str()).as_slice()); + let audit_hmac_key: Arc<[u8]> = Arc::from( + derive_audit_hmac_key(audit_kek.expose_secret(), audit_cfg.node_id.as_str()).as_slice(), + ); let (audit_dispatcher, audit_receivers) = AuditDispatcher::new( audit_cfg.node_id.as_str(), @@ -383,35 +466,14 @@ async fn main() -> Result<(), Report> { audit_cfg.node_id.clone(), )); - let shared_state = Arc::new( - KeystoneServiceState::new( - cfg_mgr, - conn, - provider, - Arc::new(policy), - audit_dispatcher, - storage_for_service, - ) - .await?, - ); - - spawn(cleanup(cloned_token, shared_state.clone())); - - // API Key (SCIM ingress) janitor: proactive inactivity disablement and - // tombstone purge (ADR 0021 §6.F). Runs on every node; gated to actually - // do work only on the current Raft leader. - api_key_janitor::spawn(shared_state.clone()); - - // Reset the dummy-password-hash cache whenever the configuration is - // hot-reloaded. The cache is keyed by (algorithm, rounds); if the operator - // changes `password_hashing_algorithm` or `password_hash_rounds` at runtime, - // stale entries would otherwise keep being served and reintroduce the very - // timing side-channel the dummy hash exists to close. - spawn(reset_dummy_hash_on_reload( - token.clone(), - shared_state.clone(), - )); + Ok(audit_dispatcher) +} +/// Subscribe all provider event hooks (application-credential, assignment, +/// catalog, federation, identity, ID-mapping, k8s-auth, resource, revoke, +/// role, token, trust) plus the CADF audit hook to `shared_state`'s event +/// dispatcher. +async fn subscribe_event_hooks(shared_state: &ServiceState) { shared_state .event_dispatcher .subscribe(Arc::new(ApplicationCredentialHook::new( @@ -470,7 +532,33 @@ async fn main() -> Result<(), Report> { &shared_state.audit_dispatcher, )))) .await; +} +/// Assemble the full Axum application: merges the `OpenAPI`-generated +/// routes, metrics endpoint, optional `WebAuthN` extension, and SCIM +/// ingress sub-router; layers on request-id/tracing/compression +/// middleware; then wraps the result in `NormalizePathLayer` with Swagger +/// UI mounted outside the normalization boundary. +/// +/// Serving a path with or without a trailing slash from the same handler +/// (matches Python Keystone, see issue #734) requires `NormalizePathLayer` +/// to rewrite the request URI *before* routing, so it must wrap the Router +/// from the outside, not be added via `Router::layer()` (which runs *after* +/// route matching, by which point "/v3/users/" has already failed to match +/// the "/v3/users" route). No HTTP redirect is involved. +/// +/// +/// SwaggerUi is deliberately merged in *after* normalization and kept out +/// of the normalized service: SwaggerUi's own handler issues an internal +/// redirect between "/swagger-ui" and "/swagger-ui/", and trimming the +/// trailing slash before that handler runs turns the redirect into an +/// infinite loop. +async fn build_router( + shared_state: &ServiceState, + token: &CancellationToken, + main_router: Router, + openapi: utoipa::openapi::OpenApi, +) -> Result { let x_request_id = HeaderName::from_static("x-openstack-request-id"); let sensitive_headers: Arc<[_]> = vec![ header::AUTHORIZATION, @@ -559,32 +647,24 @@ async fn main() -> Result<(), Report> { // `/v3`/`/v4` so that only these routes accept API-Key bearer tokens. app = app.nest("/SCIM/v2", scim::router().with_state(shared_state.clone())); - app = app - .merge(SwaggerUi::new("/swagger-ui").url("/api-docs/openapi.json", openapi)) - .layer(middleware); + app = app.layer(middleware); - // Serve a path with or without a trailing slash from the same handler - // (matches Python Keystone, see issue #734). NormalizePathLayer rewrites - // the request URI *before* routing, so it must wrap the Router from the - // outside, not be added via Router::layer() (which runs *after* route - // matching, by which point "/v3/users/" has already failed to match the - // "/v3/users" route). No HTTP redirect is involved. - // https://docs.rs/tower-http/latest/tower_http/normalize_path/index.html - let app = NormalizePathLayer::trim_trailing_slash().layer(app); - - // Shutdown watcher - let global_shutdown_token = token.clone(); - let signal_state = shared_state.clone(); - tokio::spawn(async move { - // Your existing handler that takes Arc - // Instead of calling handle.graceful_shutdown, just cancel the token - shutdown_signal(signal_state).await; - global_shutdown_token.cancel(); - }); + let normalized_app = NormalizePathLayer::trim_trailing_slash().layer(app); + let app = Router::new() + .merge(SwaggerUi::new("/swagger-ui").url("/api-docs/openapi.json", openapi)) + .fallback_service(normalized_app); - let mut handles = tokio::task::JoinSet::new(); + Ok(app) +} - // Raft +/// Start the Raft gRPC listener and join the cluster, when distributed +/// storage is configured. +async fn start_raft( + cfg: &Config, + concrete_storage: &Option>, + token: &CancellationToken, + handles: &mut tokio::task::JoinSet<()>, +) -> Result<(), Report> { if cfg.distributed_storage.is_some() { let raft_cancel_token = token.clone(); let raft_config = cfg.clone(); @@ -603,8 +683,16 @@ async fn main() -> Result<(), Report> { }); raft_grpc::ensure_raft_initialized(raft_storage_init, cfg.clone(), raft_bound_rx).await?; } + Ok(()) +} - // OPA Subprocess +/// Launch the local OPA subprocess when `api_policy.opa_policies_path` is +/// configured, and watch it for unexpected exit / cancellation. +fn spawn_opa_subprocess( + cfg: &Config, + token: &CancellationToken, + handles: &mut tokio::task::JoinSet<()>, +) -> Result<(), Report> { if let Some(policies_path) = &cfg.api_policy.opa_policies_path { let opa_url = &cfg.api_policy.opa_base_url; let addr = match opa_url.scheme() { @@ -655,19 +743,26 @@ async fn main() -> Result<(), Report> { } } } + Ok(()) +} - // Start the public interface listener +/// Start the public HTTP REST API listener. +async fn spawn_public_listener( + cfg: &Config, + app: Router, + token: &CancellationToken, + handles: &mut tokio::task::JoinSet<()>, +) -> Result<(), Report> { match cfg.interface_public.listener { ListenerConfig::Http => { info!("Starting Rest API at {}", cfg.interface_public.tcp_address); let listener = TcpListener::bind(&cfg.interface_public.tcp_address).await?; let rest_cancel_token = token.clone(); - let rest_app = app.clone(); + let rest_app = app; handles.spawn(async move { - // `rest_app` is `NormalizePath` (issue #734 wraps the - // Router from the outside), which has no - // `Router::into_make_service_with_connect_info`; use axum's - // `ServiceExt` (blanket-impl'd for any `Service`) with an + // `rest_app` is a `Router` whose fallback is the + // `NormalizePath`-wrapped API service (issue #734, #1467); use + // axum's `ServiceExt` (blanket-impl'd for any `Service`) with an // explicit request type to satisfy inference (E0284). // // `into_make_service_with_connect_info::` stores the @@ -678,7 +773,8 @@ async fn main() -> Result<(), Report> { // layer (mirroring oslo_middleware's `enable_proxy_headers_parsing`, // off by default) is a deliberate follow-up before this is used // for any IP-based login control. See issue #358. - axum::serve( + let cancel_token = rest_cancel_token.clone(); + if let Err(e) = axum::serve( listener, ServiceExt::::into_make_service_with_connect_info::< SocketAddr, @@ -688,7 +784,10 @@ async fn main() -> Result<(), Report> { rest_cancel_token.cancelled().await; }) .await - .unwrap(); + { + error!("Public REST API listener error: {:#}", e); + cancel_token.cancel(); + } }); } _ => { @@ -696,14 +795,26 @@ async fn main() -> Result<(), Report> { error!("only HTTP is supported for public interface"); } } + Ok(()) +} - // Start listener on the internal interface when necessary +/// Start the SPIFFE mTLS listener on the internal interface, when configured. +/// +/// Returns an error if the internal interface is configured with a listener +/// type other than SPIFFE — that is a startup-time misconfiguration, not a +/// condition to silently continue past. +fn spawn_internal_listener( + cfg: &Config, + app: Router, + token: &CancellationToken, + handles: &mut tokio::task::JoinSet<()>, +) -> Result<(), Report> { if let Some(internal_if) = &cfg.interface_internal { match &internal_if.listener { ListenerConfig::Spiffe(spiffe) => { // Spiffe listener let rest_addr = internal_if.tcp_address; - let rest_app = app.clone(); + let rest_app = app; let rest_cancel_token = token.clone(); let rest_spiffe_trust_domains = spiffe.trust_domains.clone(); @@ -724,15 +835,27 @@ async fn main() -> Result<(), Report> { }); } _ => { - error!("only SPIFFE is supported for internal interface"); + return Err(eyre::eyre!( + "only SPIFFE is supported for internal interface" + )); } } } + Ok(()) +} +/// Start the SPIFFE mTLS listener on the admin Unix-domain-socket interface, +/// when configured. +fn spawn_admin_listener( + cfg: &Config, + app: Router, + token: &CancellationToken, + handles: &mut tokio::task::JoinSet<()>, +) { if let Some(admin_if) = &cfg.interface_admin { // admin spiffe UDS listener let socket_path = admin_if.listener.socket_path.clone(); - let rest_app = app.clone(); + let rest_app = app; let rest_cancel_token = token.clone(); let rest_spiffe_trust_domains = admin_if.listener.trust_domains.clone(); let peer_uid = admin_if.listener.peer_uid; @@ -758,11 +881,6 @@ async fn main() -> Result<(), Report> { } }); } - - // Wait for both (or handle errors) - handles.join_all().await; - token.cancel(); - Ok(()) } /// Prometheus scrape endpoint — returns the three audit counters in text @@ -869,3 +987,145 @@ async fn shutdown_signal(state: ServiceState) { () = terminate => {state.terminate().await.ok();}, } } + +#[cfg(test)] +mod tests { + use std::path::PathBuf; + + use http_body_util::BodyExt as _; + use sea_orm::DatabaseConnection; + use tower::ServiceExt as _; + + use openstack_keystone_core::keystone::Service; + use openstack_keystone_core::policy::MockPolicy; + use openstack_keystone_core::provider::Provider; + + use super::*; + + /// Build a `ServiceState` with a disconnected DB and mocked + /// provider/policy/audit components — mirrors + /// `openstack_keystone_core::tests::get_mocked_state`, which is + /// `#[cfg(test)]`-gated inside the `core` crate and therefore not + /// visible from this crate. + async fn test_state(cfg: Config) -> ServiceState { + Arc::new( + Service::new( + ConfigManager::not_watched(cfg), + DatabaseConnection::Disconnected, + Provider::mocked_builder().build().unwrap(), + Arc::new(MockPolicy::default()), + AuditDispatcher::noop(), + None, + ) + .await + .unwrap(), + ) + } + + fn test_config(spool_dir: PathBuf) -> Config { + let mut cfg = Config::default(); + cfg.audit.spool_dir = spool_dir; + cfg.audit.node_id = "test-node".into(); + cfg + } + + // Regression test for https://github.com/juhaku/utoipa/issues/1467: + // wrapping SwaggerUi in `NormalizePathLayer` turns its internal + // "/swagger-ui" -> "/swagger-ui/" redirect into an infinite loop, because + // the layer strips the trailing slash the redirect just added before + // SwaggerUi's own router ever sees it. `build_router` keeps SwaggerUi + // outside the normalized service (mounted via `fallback_service`), so a + // direct request to "/swagger-ui/" must resolve without another redirect. + #[tokio::test] + async fn build_router_serves_swagger_ui_without_redirect_loop() { + let tmp = tempfile::tempdir().unwrap(); + let state = test_state(test_config(tmp.path().to_path_buf())).await; + let token = CancellationToken::new(); + let (main_router, _main_api) = api::openapi_router().split_for_parts(); + let openapi = api::ApiDoc::openapi(); + + let app = build_router(&state, &token, main_router, openapi) + .await + .expect("router assembly succeeds"); + + let response = app + .oneshot( + Request::builder() + .uri("/swagger-ui/") + .body(axum::body::Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + + assert_eq!( + response.status(), + StatusCode::OK, + "expected swagger-ui to serve directly, not redirect (utoipa#1467 regression)" + ); + } + + // A path handled by NormalizePathLayer's remit (issue #734): a route + // registered without a trailing slash must still resolve when the client + // requests it with one, and vice versa, since the layer wraps everything + // except SwaggerUi. + #[tokio::test] + async fn build_router_normalizes_trailing_slash_on_api_routes() { + let tmp = tempfile::tempdir().unwrap(); + let state = test_state(test_config(tmp.path().to_path_buf())).await; + let token = CancellationToken::new(); + let (main_router, _main_api) = api::openapi_router().split_for_parts(); + let openapi = api::ApiDoc::openapi(); + + let app = build_router(&state, &token, main_router, openapi) + .await + .expect("router assembly succeeds"); + + let response = app + .oneshot( + Request::builder() + .uri("/metrics/") + .body(axum::body::Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + + // "/metrics" is registered without a trailing slash; NormalizePathLayer + // must still route "/metrics/" to it rather than 404ing. + assert_eq!(response.status(), StatusCode::OK); + let body = response.into_body().collect().await.unwrap().to_bytes(); + assert!(!body.is_empty()); + } + + #[tokio::test] + async fn init_audit_generates_and_reuses_kek() { + let tmp = tempfile::tempdir().unwrap(); + let cfg = test_config(tmp.path().to_path_buf()); + + init_audit(&cfg).await.expect("first init generates a KEK"); + let kek_file = tmp.path().join("hmac-key.bin"); + let generated = std::fs::read(&kek_file).expect("KEK file was written"); + assert_eq!(generated.len(), 32); + + // A second init on the same spool_dir must reuse the persisted KEK + // rather than silently regenerating it (which would invalidate any + // spooled events signed with the old key). + init_audit(&cfg).await.expect("second init reuses the KEK"); + let reused = std::fs::read(&kek_file).unwrap(); + assert_eq!(generated, reused); + } + + #[tokio::test] + async fn init_audit_rejects_wrong_length_kek() { + let tmp = tempfile::tempdir().unwrap(); + let cfg = test_config(tmp.path().to_path_buf()); + std::fs::create_dir_all(&cfg.audit.spool_dir).unwrap(); + std::fs::write(cfg.audit.spool_dir.join("hmac-key.bin"), b"too-short").unwrap(); + + match init_audit(&cfg).await { + Ok(_) => panic!("expected init_audit to reject a wrong-length KEK"), + Err(e) => assert!(e.to_string().contains("expected 32")), + } + } +} diff --git a/crates/keystone/src/server/listener/spiffe_tls.rs b/crates/keystone/src/server/listener/spiffe_tls.rs index 827d32e52..4ee270062 100644 --- a/crates/keystone/src/server/listener/spiffe_tls.rs +++ b/crates/keystone/src/server/listener/spiffe_tls.rs @@ -21,7 +21,6 @@ use spiffe_rustls_tokio::TlsAcceptor; use tokio::net::TcpListener; use tokio_util::sync::CancellationToken; use tower::Service; -use tower_http::normalize_path::NormalizePath; use tracing::info; use openstack_keystone_core::common::SpiffeId as CoreSpiffeId; @@ -35,7 +34,7 @@ use crate::server::listener::spiffe_common; /// the SPIFFE workload API. pub async fn start_axum_app( addr: std::net::SocketAddr, - app: NormalizePath, + app: Router, token: CancellationToken, trust_domains: Vec, interface: Interface, diff --git a/crates/keystone/src/server/listener/spiffe_tls_uds.rs b/crates/keystone/src/server/listener/spiffe_tls_uds.rs index 5f7dd6f61..333bc0c08 100644 --- a/crates/keystone/src/server/listener/spiffe_tls_uds.rs +++ b/crates/keystone/src/server/listener/spiffe_tls_uds.rs @@ -27,7 +27,6 @@ use tokio::net::UnixListener; use tokio_rustls::TlsAcceptor; use tokio_util::sync::CancellationToken; use tower::Service; -use tower_http::normalize_path::NormalizePath; use tracing::info; use crate::config::Interface; @@ -75,7 +74,7 @@ fn verify_peer_credentials( /// before the TLS handshake when `peer_uid` or `peer_gid` are configured. pub async fn start_axum_app( socket_path: &Path, - app: NormalizePath, + app: Router, token: CancellationToken, trust_domains: Vec, interface: Interface,