diff --git a/.env_integration_tests.example b/.env_integration_tests.example index e8c7a77..f91620c 100644 --- a/.env_integration_tests.example +++ b/.env_integration_tests.example @@ -34,6 +34,17 @@ AICORE_MODEL=anthropic--claude-3-5-haiku AICORE_FILTER_TEST_MODEL=sap/gpt-4o-mini AICORE_FILTER_TEST_SELF_HARM_PROMPT= +# AI CORE fallback integration tests (tests/aicore/integration/fallback.feature) +# AICORE_FALLBACK_TEST_PRIMARY_MODEL: sap/* model name that the orchestration +# server reports as unsupported in your deployed region. Picks the canonical +# way to force the fallback path without depending on transient 5xx errors. +# A genuinely nonexistent name like sap/this-model-does-not-exist works. +# AICORE_FALLBACK_TEST_FALLBACK_MODEL: sap/* model that your resource group +# CAN call. Used as the single fallback preference. +# When either var is unset, the fallback BDD scenarios skip cleanly. +AICORE_FALLBACK_TEST_PRIMARY_MODEL=sap/this-model-does-not-exist +AICORE_FALLBACK_TEST_FALLBACK_MODEL=sap/mistralai--mistral-small-instruct + # AUDITLOG CLOUD_SDK_CFG_AUDITLOG_DEFAULT_URL=https://your-auditlog-api-url-here CLOUD_SDK_CFG_AUDITLOG_DEFAULT_UAA='{"url":"https://your-auth-url","clientid":"your-client-id","clientsecret":"your-client-secret"}' diff --git a/src/sap_cloud_sdk/aicore/__init__.py b/src/sap_cloud_sdk/aicore/__init__.py index d92ad83..61aa9da 100644 --- a/src/sap_cloud_sdk/aicore/__init__.py +++ b/src/sap_cloud_sdk/aicore/__init__.py @@ -13,6 +13,7 @@ from sap_cloud_sdk.core.telemetry.metrics_decorator import record_metrics from sap_cloud_sdk.core.telemetry.module import Module from sap_cloud_sdk.core.telemetry.operation import Operation +from .fallback import FallbackConfig, FallbackModel, set_fallbacks from .filtering import ( AzureContentFilter, ContentFilter, @@ -134,6 +135,11 @@ def set_aicore_config(instance_name: str = "aicore-instance") -> None: call :func:`set_filtering` afterward. Use :func:`disable_filtering` to turn filtering off at runtime, or set ``AICORE_FILTER_ENABLED=false`` to keep it off entirely. + + Model fallback is **opt-in** and is NOT activated by this function. To + enable it, call :func:`set_fallbacks` programmatically (or set + ``AICORE_FALLBACK_ENABLED=true`` and any of ``AICORE_FALLBACK_MODELS`` / + ``AICORE_FALLBACK_CONFIG`` and call ``set_fallbacks()`` with no args). """ # Load secrets client_id = _get_secret("AICORE_CLIENT_ID", "clientid", instance_name=instance_name) @@ -189,4 +195,7 @@ def set_aicore_config(instance_name: str = "aicore-instance") -> None: "ContentFilteredError", "OrchestrationError", "extract_filter_blocked", + "set_fallbacks", + "FallbackConfig", + "FallbackModel", ] diff --git a/src/sap_cloud_sdk/aicore/fallback/__init__.py b/src/sap_cloud_sdk/aicore/fallback/__init__.py new file mode 100644 index 0000000..3bd443c --- /dev/null +++ b/src/sap_cloud_sdk/aicore/fallback/__init__.py @@ -0,0 +1,9 @@ +"""Model-fallback subpackage for SAP AI Core Orchestration v2. + +Re-exports the public surface defined in :mod:`.fallback`. Users should import +flat from :mod:`sap_cloud_sdk.aicore`; this package is the source of truth. +""" + +from .fallback import FallbackConfig, FallbackModel, set_fallbacks + +__all__ = ["FallbackModel", "FallbackConfig", "set_fallbacks"] diff --git a/src/sap_cloud_sdk/aicore/fallback/_patch.py b/src/sap_cloud_sdk/aicore/fallback/_patch.py new file mode 100644 index 0000000..7b3782c --- /dev/null +++ b/src/sap_cloud_sdk/aicore/fallback/_patch.py @@ -0,0 +1,199 @@ +"""LiteLLM transport patch that adds model-fallback support on top of filtering. + +Patches ``litellm.GenAIHubOrchestrationConfig`` with a subclass of the +filtering patch (:class:`sap_cloud_sdk.aicore.filtering._patch.FilteringOrchestrationConfig`) +that adds the fallback-side hooks: + +- ``transform_request``: + 1. Injects ``fallback_sap_modules`` into ``optional_params`` before super + reads it. LiteLLM's ``GenAIHubOrchestrationConfig.transform_request`` + pops that key to build ``body["config"]["modules"]`` as a list. + 2. After super returns, copies the primary module's prompt template into + every fallback module entry — litellm builds the primary template from + ``messages`` but defaults each fallback's template to ``[]``, which the + orchestration server rejects with + ``"config.modules[N].prompt_templating.prompt.template should be non-empty"``. + 3. When filtering is active, broadcasts the filtering configuration across + every module entry (primary + every fallback). The filtering parent + class only injects on ``modules[0]``; the broadcast here keeps the + same filter set applied for every preference the server might pick. + +- ``transform_response``: after super has handled filter-rejection detection, + attaches ``intermediate_failures`` (the per-preference failure list) from + the 200 response body onto the returned :class:`ModelResponse` so callers + can inspect which preferences were skipped. ``None`` when the primary + succeeded. Non-streaming only in v1. + +The two patches share the monkeypatch slot. :func:`_install_fallback` +installs this subclass (which still does filtering thanks to inheritance); +clearing fallback restores the filtering-only class (or the original) by +calling :func:`sap_cloud_sdk.aicore.filtering._patch._install` with the +filtering side's current state — that path knows nothing about fallback, +so the filtering module never imports this one. Idempotent. +""" + +from __future__ import annotations + +import logging +from typing import Any + +import litellm +from litellm.types.utils import ModelResponse + +from ..filtering import _patch as _filter_patch +from ..filtering._patch import FilteringOrchestrationConfig + +logger = logging.getLogger(__name__) + + +# Module-level fallback state. ``None`` means fallback is inactive; the +# filtering module is the source of truth for the installed class in that +# case (see :func:`_install_fallback`). +_active_fallback_cfg: Any = None # FallbackConfig | None + + +class OrchestrationPatchConfig(FilteringOrchestrationConfig): + """Adds model-fallback request/response hooks to the filtering patch. + + Inherits filtering injection + rejection handling from + :class:`FilteringOrchestrationConfig`. Adds, in order: + + - ``fallback_sap_modules`` injection (so litellm builds ``modules`` as a + list of preference dicts). + - Prompt-template broadcast to every fallback module entry. + - Filtering broadcast across every module entry (overriding the parent's + primary-only injection). + - ``intermediate_failures`` attachment on the returned ``ModelResponse``. + """ + + def transform_request( + self, + model: str, + messages: list, + optional_params: dict, + litellm_params: dict, + headers: dict, + ) -> dict: + # Inject fallback into optional_params BEFORE super reads it. + # LiteLLM's transform_request copies optional_params and pops + # ``"fallback_sap_modules"`` to build the modules list. + if _active_fallback_cfg is not None: + optional_params["fallback_sap_modules"] = ( + _active_fallback_cfg.to_litellm_kwarg() + ) + + body = super().transform_request( + model=model, + messages=messages, + optional_params=optional_params, + litellm_params=litellm_params, + headers=headers, + ) + + modules = body["config"]["modules"] + # No fallback => single dict, nothing else to do here. + if not isinstance(modules, list) or len(modules) <= 1: + return body + + # Broadcast the primary's prompt template to every fallback entry. + # litellm only builds the primary's template from ``messages``; + # fallback entries get whatever was popped from their dict's + # ``"messages"`` key (litellm transformation.py L371), which is + # ``[]`` for ``FallbackModel.to_dict()``. Without this copy, the + # server rejects with + # "config.modules[N].prompt_templating.prompt.template should be + # non-empty". + primary_template = ( + modules[0].get("prompt_templating", {}).get("prompt", {}).get("template") + ) + if primary_template: + for entry in modules[1:]: + entry.setdefault("prompt_templating", {}).setdefault("prompt", {})[ + "template" + ] = primary_template + + # Broadcast filtering across every module entry. The filtering parent + # installed it on ``modules[0]`` only; broadcasting keeps the same + # filter set applied for every preference the server might pick. + # To opt a fallback out of filtering, call ``disable_filtering()`` + # before the call. + if _filter_patch._active_cfg is not None: + filtering_dict = _filter_patch._active_cfg.to_dict() + if filtering_dict: + for entry in modules[1:]: + entry["filtering"] = filtering_dict + + return body + + def transform_response( + self, + model: str, + raw_response: Any, + model_response: ModelResponse, + logging_obj: Any, + request_data: dict, + messages: list, + optional_params: dict, + litellm_params: dict, + encoding: Any, + api_key: str | None = None, + json_mode: bool | None = None, + ) -> ModelResponse: + # Let the filtering parent handle filter-rejection detection first + # (it raises ``ContentFilteredError`` before falling through to + # super-super). If it raises, we never reach the attach below. + result = super().transform_response( + model=model, + raw_response=raw_response, + model_response=model_response, + logging_obj=logging_obj, + request_data=request_data, + messages=messages, + optional_params=optional_params, + litellm_params=litellm_params, + encoding=encoding, + api_key=api_key, + json_mode=json_mode, + ) + + # Surface ``intermediate_failures`` on the returned ``ModelResponse`` + # so callers can see which preferences were skipped. Only present on + # non-streaming 200 responses — streaming surfacing is deferred. + if raw_response.status_code == 200: + try: + payload = raw_response.json() + except ValueError: + return result + failures = payload.get("intermediate_failures") + if failures is not None: + # ``ModelResponse`` uses pydantic ``extra="allow"`` so dynamic + # attribute assignment is supported at runtime. ``setattr`` + # keeps the static type checker happy. + setattr(result, "intermediate_failures", failures) + + return result + + +def _install_fallback(cfg: Any) -> None: # cfg: FallbackConfig | None + """Set the active fallback config and refresh the installed patch class. + + When ``cfg`` is non-``None``, installs :class:`OrchestrationPatchConfig` + (which inherits filtering, so filtering still works when active). + + When ``cfg`` is ``None``, defers to the filtering module: re-runs its + ``_install`` with whatever filtering state is currently active, which + restores either ``FilteringOrchestrationConfig`` (filtering on) or + ``_ORIGINAL_CONFIG`` (both off). + + Idempotent — repeated calls with the same value are safe. + """ + global _active_fallback_cfg + _active_fallback_cfg = cfg + if cfg is None: + # Hand back control to the filtering installer so it restores the + # correct class for the current filtering state. + _filter_patch._install(_filter_patch._active_cfg) + logger.debug("model fallback disabled") + else: + litellm.GenAIHubOrchestrationConfig = OrchestrationPatchConfig + logger.info("model fallback active (OrchestrationPatchConfig)") diff --git a/src/sap_cloud_sdk/aicore/fallback/fallback.py b/src/sap_cloud_sdk/aicore/fallback/fallback.py new file mode 100644 index 0000000..716541c --- /dev/null +++ b/src/sap_cloud_sdk/aicore/fallback/fallback.py @@ -0,0 +1,227 @@ +"""Public model-fallback API for SAP AI Core Orchestration v2. + +Orchestration v2 supports module-configuration fallbacks: when the primary +configuration fails (model unsupported in region, 429, 408, or 5xx — and only +unsupported-model for streams), orchestration retries with the next preference. +See ``context/fallback.md``. + +The litellm SAP provider already supports this: passing ``fallback_sap_modules`` +through ``optional_params`` builds ``body["config"]["modules"]`` as a list. +This module is the SDK-side ergonomic layer: typed ``FallbackModel`` / +``FallbackConfig`` dataclasses, an env-driven ``from_env()`` builder, and the +``set_fallbacks()`` entry point that installs them into the shared +``OrchestrationPatchConfig`` patch (alongside any active filtering config). + +Fallback is **opt-in**: ``set_aicore_config()`` does not enable it. Developers +must either call ``set_fallbacks(...)`` programmatically or set +``AICORE_FALLBACK_ENABLED=true`` and call ``set_fallbacks()`` (with no args). + +The companion ``intermediate_failures`` field from the orchestration response +is surfaced as an attribute on the returned ``ModelResponse``. Non-streaming +only in v1 — streaming surfacing is deferred. +""" + +from __future__ import annotations + +import json +import logging +import os +from dataclasses import dataclass, field + +from sap_cloud_sdk.core.telemetry.metrics_decorator import record_metrics +from sap_cloud_sdk.core.telemetry.module import Module +from sap_cloud_sdk.core.telemetry.operation import Operation + +from ._patch import _install_fallback + +logger = logging.getLogger(__name__) + + +# --------------------------------------------------------------------------- +# Env-var helpers (kept local — small, simple, no dependency on filtering) +# --------------------------------------------------------------------------- + +_TRUTHY = frozenset({"true", "1", "yes"}) + + +def _read_env_str(key: str, default: str = "") -> str: + """Read a string env var. Trims whitespace. Returns ``default`` if absent.""" + raw = os.environ.get(key) + return raw.strip() if raw is not None else default + + +def _read_env_bool(key: str, default: bool = False) -> bool: + """Read a boolean env var. + + ``true``/``1``/``yes`` (case-insensitive) are True; anything else is False. + Returns ``default`` if the variable is absent. + """ + raw = os.environ.get(key) + return (raw.strip().lower() in _TRUTHY) if raw is not None else default + + +# --------------------------------------------------------------------------- +# Public dataclasses +# --------------------------------------------------------------------------- + + +@dataclass +class FallbackModel: + """A single fallback preference. + + Args: + model: Model name passed to orchestration (e.g. ``"sap/gpt-4o"``). + params: Per-model params (``max_tokens``, ``temperature``, …). Optional. + When omitted, the orchestration server falls back to its defaults + for the model — it does NOT inherit the primary call's params. + model_version: Specific model version. Defaults to ``"latest"`` on + the server side when omitted. + """ + + model: str + params: dict | None = None + model_version: str | None = None + + def to_dict(self) -> dict: + """Wire shape consumed by litellm's ``_build_prompt_module``. + + litellm pops ``model`` and ``model_version`` from the dict and treats + everything else as model params. We keep this shape minimal. + """ + result: dict = {"model": self.model} + if self.model_version is not None: + result["model_version"] = self.model_version + if self.params: + result.update(self.params) + return result + + +@dataclass +class FallbackConfig: + """Ordered list of fallback preferences. + + The orchestration server tries preferences in order; the first to succeed + wins. Empty lists are accepted but have no effect (equivalent to no + fallback). + + Args: + models: Ordered list of :class:`FallbackModel` instances. Element 0 + is tried first after the primary call fails. + """ + + models: list[FallbackModel] = field(default_factory=list) + + def to_litellm_kwarg(self) -> list[dict]: + """Build the list passed to litellm as ``fallback_sap_modules``.""" + return [m.to_dict() for m in self.models] + + @classmethod + def from_env(cls) -> "FallbackConfig | None": + """Build from ``AICORE_FALLBACK_*`` environment variables. + + Returns ``None`` when ``AICORE_FALLBACK_ENABLED`` is not truthy, or + when enabled but neither ``AICORE_FALLBACK_CONFIG`` nor + ``AICORE_FALLBACK_MODELS`` is set (treated as disabled — a warning is + logged). + + Reads: + AICORE_FALLBACK_ENABLED (bool, default false) — opt-in switch + AICORE_FALLBACK_CONFIG (JSON string) — full per-model config, + shape ``[{"model": ..., "params": {...}, "model_version": ...}]``. + Takes precedence over MODELS when set. Malformed JSON raises. + AICORE_FALLBACK_MODELS (comma list) — simple model-only form. + Each entry becomes ``FallbackModel(model=name)``. + + Raises: + ValueError: If ``AICORE_FALLBACK_CONFIG`` is set but not valid + JSON, or does not decode to a list of objects. + """ + if not _read_env_bool("AICORE_FALLBACK_ENABLED", default=False): + return None + + config_raw = _read_env_str("AICORE_FALLBACK_CONFIG") + if config_raw: + try: + parsed = json.loads(config_raw) + except ValueError as e: + raise ValueError( + f"AICORE_FALLBACK_CONFIG must be valid JSON, got: {config_raw!r}" + ) from e + if not isinstance(parsed, list): + raise ValueError( + f"AICORE_FALLBACK_CONFIG must decode to a list, got " + f"{type(parsed).__name__}" + ) + models = [ + FallbackModel( + model=entry["model"], + params=entry.get("params"), + model_version=entry.get("model_version"), + ) + for entry in parsed + ] + return cls(models=models) + + models_raw = _read_env_str("AICORE_FALLBACK_MODELS") + if models_raw: + names = [n.strip() for n in models_raw.split(",") if n.strip()] + return cls(models=[FallbackModel(model=n) for n in names]) + + logger.warning( + "AICORE_FALLBACK_ENABLED is true but neither AICORE_FALLBACK_CONFIG " + "nor AICORE_FALLBACK_MODELS is set; fallback remains inactive" + ) + return None + + +# --------------------------------------------------------------------------- +# Entry point +# --------------------------------------------------------------------------- + + +@record_metrics(Module.AICORE, Operation.AICORE_SET_FALLBACKS) +def set_fallbacks(config: FallbackConfig | None = None) -> None: + """Install a model-fallback configuration. + + Fallback is **opt-in**. ``set_aicore_config()`` does NOT activate it; + the developer must call this function (or set the + ``AICORE_FALLBACK_*`` env vars and call this function with no args). + + Args: + config: A :class:`FallbackConfig` to install. If ``None`` (the + default), reads ``AICORE_FALLBACK_*`` env vars via + :meth:`FallbackConfig.from_env`. Pass ``None`` after an earlier + call to clear an installed fallback at runtime. + + Examples: + Programmatic:: + + from sap_cloud_sdk.aicore import ( + FallbackConfig, FallbackModel, set_fallbacks, + ) + + set_fallbacks(FallbackConfig([ + FallbackModel( + model="sap/mistralai--mistral-small-instruct", + params={"temperature": 0.7, "max_tokens": 300}, + ), + ])) + + From environment:: + + import os + from sap_cloud_sdk.aicore import set_fallbacks + + os.environ["AICORE_FALLBACK_ENABLED"] = "true" + os.environ["AICORE_FALLBACK_MODELS"] = ( + "sap/mistralai--mistral-small-instruct" + ) + set_fallbacks() + """ + if config is None: + _install_fallback(FallbackConfig.from_env()) + return + _install_fallback(config) + + +__all__ = ["FallbackModel", "FallbackConfig", "set_fallbacks"] diff --git a/src/sap_cloud_sdk/aicore/fallback/py.typed b/src/sap_cloud_sdk/aicore/fallback/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/src/sap_cloud_sdk/aicore/filtering/_patch.py b/src/sap_cloud_sdk/aicore/filtering/_patch.py index 5cec892..8c2b726 100644 --- a/src/sap_cloud_sdk/aicore/filtering/_patch.py +++ b/src/sap_cloud_sdk/aicore/filtering/_patch.py @@ -160,9 +160,33 @@ def _install(cfg: Any) -> None: # cfg: ContentFiltering | None """Patch litellm.GenAIHubOrchestrationConfig. Idempotent. cfg=None restores the original config and disables filtering. + + When the model-fallback module has an active config installed, this + function only updates ``_active_cfg`` and leaves the installed class + (:class:`sap_cloud_sdk.aicore.fallback._patch.OrchestrationPatchConfig`) + in place — that class inherits from :class:`FilteringOrchestrationConfig` + and reads ``_active_cfg`` at request time, so the filtering toggle still + takes effect. Lazy import of the fallback patch module avoids a circular + dependency at package import time. """ global _active_cfg _active_cfg = cfg + + # Defer to fallback when it has installed its own subclass — clobbering + # the class here would silently disable model fallback. + try: + from ..fallback._patch import _active_fallback_cfg + except ImportError: + _active_fallback_cfg = None + if _active_fallback_cfg is not None: + if cfg is None: + logger.debug("content filtering disabled (fallback patch still installed)") + else: + logger.info( + "content filtering active (delegated through OrchestrationPatchConfig)" + ) + return + if cfg is None: litellm.GenAIHubOrchestrationConfig = _ORIGINAL_CONFIG logger.debug("content filtering disabled") diff --git a/src/sap_cloud_sdk/aicore/user-guide.md b/src/sap_cloud_sdk/aicore/user-guide.md index e40a54e..0e22c3c 100644 --- a/src/sap_cloud_sdk/aicore/user-guide.md +++ b/src/sap_cloud_sdk/aicore/user-guide.md @@ -58,6 +58,9 @@ The `set_aicore_config()` function: 4. **Sets resource group** (defaults to "default" if not specified) 5. **Activates content filtering** — Azure Content Safety + prompt shield enabled by default *(new in 0.28.0)* +Model fallback is **not** auto-activated — it is opt-in via +[`set_fallbacks()`](#model-fallback-opt-in). + --- ## Content Filtering (enabled by default from 0.28.0) @@ -256,6 +259,103 @@ Env vars also renamed: `ORCH_FILTER_*` → `AICORE_FILTER_*`. The --- +## Model Fallback (opt-in) + +Orchestration v2 supports fallback configurations: when the primary model +fails (unsupported in region, 429 Too Many Requests, 408 Request Timeout, or +any 5xx — and only unsupported-in-region for streaming requests), the server +automatically retries with the next preference in your list. + +Unlike content filtering, **fallback is opt-in**. `set_aicore_config()` does +not enable it. The developer must call `set_fallbacks()` (or set the +`AICORE_FALLBACK_*` env vars and call `set_fallbacks()` with no args). + +### Programmatic configuration + +```python +from sap_cloud_sdk.aicore import ( + FallbackConfig, FallbackModel, set_aicore_config, set_fallbacks, +) +from litellm import completion + +set_aicore_config() +set_fallbacks(FallbackConfig([ + FallbackModel( + model="sap/mistralai--mistral-small-instruct", + params={"temperature": 0.7, "max_tokens": 300}, + ), +])) + +response = completion( + model="sap/gpt-4o", + messages=[{"role": "user", "content": "Translate 'hello' to German."}], +) +``` + +The orchestration server tries the primary model first. If it fails for a +fallback-eligible reason, the server transparently uses each fallback in +order. The first to succeed wins. + +When a fallback is used, the returned response carries an +`intermediate_failures` attribute listing the reasons each higher-preference +model was skipped: + +```python +failures = getattr(response, "intermediate_failures", None) +if failures: + for f in failures: + print(f"skipped preference: {f.get('code')} — {f.get('message')}") +``` + +`intermediate_failures` is `None` (or absent via `getattr`) when the primary +succeeded — useful as a quick check for whether the fallback was exercised. +This field is currently surfaced for non-streaming responses only. + +### Configure via environment + +Set these **before** calling `set_fallbacks()`: + +| Variable | Default | Description | +|---|---|---| +| `AICORE_FALLBACK_ENABLED` | `false` | Opt-in switch. | +| `AICORE_FALLBACK_MODELS` | `""` | Comma list of model names. Each becomes a fallback with no params. Simple form. | +| `AICORE_FALLBACK_CONFIG` | `""` | JSON: `[{"model": "...", "params": {...}, "model_version": "..."}, ...]`. Takes precedence over `MODELS`. | + +```bash +AICORE_FALLBACK_ENABLED=true +AICORE_FALLBACK_MODELS=sap/mistralai--mistral-small-instruct,sap/anthropic--claude-4.5-sonnet +``` + +```python +from sap_cloud_sdk.aicore import set_fallbacks +set_fallbacks() # reads the env vars +``` + +### Filtering composes with fallback + +If filtering is also active, the same filtering configuration applies to the +primary model AND every fallback preference. The filter set is broadcast +across all module entries on the wire. To run a fallback without filtering, +explicitly `disable_filtering()` before the call (filtering is on by default +after `set_aicore_config()`). + +### Clearing at runtime + +There is no `disable_fallbacks()` function. To clear a previously-installed +fallback configuration at runtime, call `set_fallbacks(None)` after clearing +the `AICORE_FALLBACK_*` env vars (or with them unset). Most applications enable +fallback once at startup and leave it on. + +### Error responses + +If every preference fails, orchestration returns an error response listing the +failure for each attempted preference. This surfaces in user code the same way +any orchestration error does — LiteLLM raises one of its exception types +(`APIConnectionError`, etc.). The exception message contains the per-preference +error list. + +--- + ### Credentials Loaded The function loads and configures these credentials: diff --git a/src/sap_cloud_sdk/core/telemetry/operation.py b/src/sap_cloud_sdk/core/telemetry/operation.py index 235a1ec..f11c999 100644 --- a/src/sap_cloud_sdk/core/telemetry/operation.py +++ b/src/sap_cloud_sdk/core/telemetry/operation.py @@ -144,6 +144,7 @@ class Operation(str, Enum): AICORE_SET_FILTERING = "set_filtering" AICORE_DISABLE_FILTERING = "disable_filtering" AICORE_EXTRACT_FILTER_BLOCKED = "extract_filter_blocked" + AICORE_SET_FALLBACKS = "set_fallbacks" # Print Operations PRINT_LIST_QUEUES = "list_queues" diff --git a/tests/aicore/fallback/__init__.py b/tests/aicore/fallback/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/aicore/fallback/unit/__init__.py b/tests/aicore/fallback/unit/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/aicore/fallback/unit/test_fallback_config.py b/tests/aicore/fallback/unit/test_fallback_config.py new file mode 100644 index 0000000..4f34416 --- /dev/null +++ b/tests/aicore/fallback/unit/test_fallback_config.py @@ -0,0 +1,170 @@ +"""Unit tests for FallbackModel, FallbackConfig, and FallbackConfig.from_env.""" + +from __future__ import annotations + +import logging +import os + +import pytest + +from sap_cloud_sdk.aicore.fallback.fallback import FallbackConfig, FallbackModel + + +# --------------------------------------------------------------------------- +# FallbackModel +# --------------------------------------------------------------------------- + + +class TestFallbackModelToDict: + def test_to_dict_minimal_has_only_model(self): + m = FallbackModel(model="sap/x") + assert m.to_dict() == {"model": "sap/x"} + + def test_to_dict_with_params_merges_them(self): + m = FallbackModel(model="sap/x", params={"temperature": 0.7, "max_tokens": 300}) + assert m.to_dict() == { + "model": "sap/x", + "temperature": 0.7, + "max_tokens": 300, + } + + def test_to_dict_with_model_version_includes_key(self): + m = FallbackModel(model="sap/x", model_version="v2") + assert m.to_dict() == {"model": "sap/x", "model_version": "v2"} + + def test_to_dict_with_empty_params_omits_them(self): + m = FallbackModel(model="sap/x", params={}) + assert m.to_dict() == {"model": "sap/x"} + + def test_to_dict_all_fields_set(self): + m = FallbackModel( + model="sap/x", + params={"temperature": 0.5}, + model_version="v3", + ) + assert m.to_dict() == { + "model": "sap/x", + "model_version": "v3", + "temperature": 0.5, + } + + +# --------------------------------------------------------------------------- +# FallbackConfig +# --------------------------------------------------------------------------- + + +class TestFallbackConfigToLitellmKwarg: + def test_preserves_order(self): + cfg = FallbackConfig( + [ + FallbackModel(model="sap/a"), + FallbackModel(model="sap/b"), + FallbackModel(model="sap/c"), + ] + ) + assert [m["model"] for m in cfg.to_litellm_kwarg()] == [ + "sap/a", + "sap/b", + "sap/c", + ] + + def test_empty_list_returns_empty(self): + cfg = FallbackConfig([]) + assert cfg.to_litellm_kwarg() == [] + + def test_default_factory_produces_empty_list(self): + cfg = FallbackConfig() + assert cfg.models == [] + assert cfg.to_litellm_kwarg() == [] + + def test_per_model_params_propagated(self): + cfg = FallbackConfig( + [ + FallbackModel(model="sap/a", params={"temperature": 0.1}), + FallbackModel(model="sap/b", params={"max_tokens": 100}), + ] + ) + out = cfg.to_litellm_kwarg() + assert out[0] == {"model": "sap/a", "temperature": 0.1} + assert out[1] == {"model": "sap/b", "max_tokens": 100} + + +# --------------------------------------------------------------------------- +# FallbackConfig.from_env +# --------------------------------------------------------------------------- + + +@pytest.fixture(autouse=True) +def clean_fallback_env(monkeypatch): + """Clear every AICORE_FALLBACK_* variable before each test.""" + for key in list(os.environ): + if key.startswith("AICORE_FALLBACK"): + monkeypatch.delenv(key, raising=False) + yield + + +class TestFromEnv: + def test_returns_none_when_enabled_absent(self): + assert FallbackConfig.from_env() is None + + def test_returns_none_when_enabled_false(self, monkeypatch): + monkeypatch.setenv("AICORE_FALLBACK_ENABLED", "false") + monkeypatch.setenv("AICORE_FALLBACK_MODELS", "sap/x") + assert FallbackConfig.from_env() is None + + def test_returns_none_when_enabled_true_but_nothing_set(self, monkeypatch, caplog): + monkeypatch.setenv("AICORE_FALLBACK_ENABLED", "true") + with caplog.at_level(logging.WARNING): + assert FallbackConfig.from_env() is None + assert any("fallback remains inactive" in r.message for r in caplog.records) + + def test_parses_models_csv(self, monkeypatch): + monkeypatch.setenv("AICORE_FALLBACK_ENABLED", "true") + monkeypatch.setenv("AICORE_FALLBACK_MODELS", "sap/a, sap/b ,sap/c") + cfg = FallbackConfig.from_env() + assert cfg is not None + assert [m.model for m in cfg.models] == ["sap/a", "sap/b", "sap/c"] + # No params or version inherited from env in the simple form. + assert all(m.params is None and m.model_version is None for m in cfg.models) + + def test_parses_models_csv_skips_empty_entries(self, monkeypatch): + monkeypatch.setenv("AICORE_FALLBACK_ENABLED", "true") + monkeypatch.setenv("AICORE_FALLBACK_MODELS", ",sap/a,,sap/b,") + cfg = FallbackConfig.from_env() + assert cfg is not None + assert [m.model for m in cfg.models] == ["sap/a", "sap/b"] + + def test_parses_config_json(self, monkeypatch): + monkeypatch.setenv("AICORE_FALLBACK_ENABLED", "true") + monkeypatch.setenv( + "AICORE_FALLBACK_CONFIG", + '[{"model":"sap/a","params":{"temperature":0.7}},' + ' {"model":"sap/b","model_version":"v2"}]', + ) + cfg = FallbackConfig.from_env() + assert cfg is not None + assert cfg.models[0].model == "sap/a" + assert cfg.models[0].params == {"temperature": 0.7} + assert cfg.models[1].model == "sap/b" + assert cfg.models[1].model_version == "v2" + + def test_config_takes_precedence_over_models(self, monkeypatch): + monkeypatch.setenv("AICORE_FALLBACK_ENABLED", "true") + monkeypatch.setenv("AICORE_FALLBACK_MODELS", "sap/from-models") + monkeypatch.setenv("AICORE_FALLBACK_CONFIG", '[{"model":"sap/from-config"}]') + cfg = FallbackConfig.from_env() + assert cfg is not None + assert [m.model for m in cfg.models] == ["sap/from-config"] + + def test_malformed_json_raises(self, monkeypatch): + monkeypatch.setenv("AICORE_FALLBACK_ENABLED", "true") + monkeypatch.setenv("AICORE_FALLBACK_CONFIG", "{not json") + with pytest.raises(ValueError, match="valid JSON"): + FallbackConfig.from_env() + + def test_non_list_json_raises(self, monkeypatch): + monkeypatch.setenv("AICORE_FALLBACK_ENABLED", "true") + monkeypatch.setenv("AICORE_FALLBACK_CONFIG", '{"model": "sap/x"}') + with pytest.raises(ValueError, match="decode to a list"): + FallbackConfig.from_env() diff --git a/tests/aicore/fallback/unit/test_patch.py b/tests/aicore/fallback/unit/test_patch.py new file mode 100644 index 0000000..8361b80 --- /dev/null +++ b/tests/aicore/fallback/unit/test_patch.py @@ -0,0 +1,499 @@ +"""Unit tests for OrchestrationPatchConfig — the fallback-side concerns. + +Filtering-side coverage lives in :mod:`tests.aicore.filtering.unit.test_patch`. +This file targets: + +- ``transform_request`` injects ``fallback_sap_modules`` into ``optional_params`` + before delegating to super. +- Filtering broadcasts to every module entry when both filtering and fallback + are active (the behaviour change vs. the original modules[0]-only logic). +- ``transform_response`` attaches ``intermediate_failures`` on the returned + ``ModelResponse`` (and only when present). +""" + +from __future__ import annotations + +from typing import Any +from unittest.mock import MagicMock, patch + +import httpx +import pytest + +from sap_cloud_sdk.aicore.fallback._patch import ( + OrchestrationPatchConfig, + _install_fallback, +) +from sap_cloud_sdk.aicore.fallback.fallback import FallbackConfig, FallbackModel +from sap_cloud_sdk.aicore.filtering._models import ( + AzureContentFilter, + ContentFiltering, + InputFiltering, + OutputFiltering, +) +from sap_cloud_sdk.aicore.filtering._patch import _install as _install_filter +from sap_cloud_sdk.aicore.filtering.exceptions import ContentFilteredError + + +@pytest.fixture(autouse=True) +def restore_litellm_config(): + """Each test starts with a clean patch state and ends the same way.""" + _install_filter(None) + _install_fallback(None) + yield + _install_filter(None) + _install_fallback(None) + + +def _stub_response(status: int, body: dict) -> httpx.Response: + return httpx.Response(status, json=body) + + +def _default_filtering() -> ContentFiltering: + return ContentFiltering( + input_filtering=InputFiltering(filters=[AzureContentFilter()]), + output_filtering=OutputFiltering(filters=[AzureContentFilter()]), + ) + + +# --------------------------------------------------------------------------- +# transform_request — fallback injection +# --------------------------------------------------------------------------- + + +class TestTransformRequestFallback: + @staticmethod + def _list_modules_body() -> dict: + """Body shape litellm produces when fallback is active.""" + return { + "config": { + "modules": [ + { + "prompt_templating": { + "prompt": {"template": []}, + "model": { + "name": "anthropic--claude-4.5-sonnet", + "params": {}, + "version": "latest", + }, + } + }, + { + "prompt_templating": { + "prompt": {"template": []}, + "model": { + "name": "mistral-small", + "params": {}, + "version": "latest", + }, + } + }, + ] + } + } + + @staticmethod + def _dict_modules_body() -> dict: + """Body shape litellm produces with no fallback.""" + return { + "config": { + "modules": { + "prompt_templating": { + "prompt": {"template": []}, + "model": { + "name": "anthropic--claude-4.5-sonnet", + "params": {}, + "version": "latest", + }, + } + } + } + } + + @staticmethod + def _realistic_list_modules_body() -> dict: + """Body shape litellm actually produces — primary has a real template, + fallback entries have ``template: []`` because litellm only converts + the top-level ``messages`` for the primary module. The SDK is + responsible for broadcasting the primary's template to every + fallback entry; without that the orchestration server rejects with + ``config.modules[N].prompt_templating.prompt.template should be + non-empty`` (the exact failure that reached integration tests). + """ + primary_template = [{"role": "user", "content": "Reply with 'ok'."}] + return { + "config": { + "modules": [ + { + "prompt_templating": { + "prompt": {"template": primary_template}, + "model": { + "name": "anthropic--claude-4.5-sonnet", + "params": {}, + "version": "latest", + }, + } + }, + { + "prompt_templating": { + "prompt": {"template": []}, + "model": { + "name": "mistral-small", + "params": {}, + "version": "latest", + }, + } + }, + ] + } + } + + def test_fallback_injected_into_optional_params_before_super(self): + _install_fallback(FallbackConfig([FallbackModel(model="sap/mistral-small")])) + optional_params: dict = {} + captured: dict = {} + + def fake_super_transform(**kwargs): + captured.update(kwargs) + return self._list_modules_body() + + with patch( + "sap_cloud_sdk.aicore.filtering._patch." + "GenAIHubOrchestrationConfig.transform_request", + side_effect=fake_super_transform, + ): + OrchestrationPatchConfig().transform_request( + model="sap/anthropic--claude-4.5-sonnet", + messages=[], + optional_params=optional_params, + litellm_params={}, + headers={}, + ) + + assert captured["optional_params"]["fallback_sap_modules"] == [ + {"model": "sap/mistral-small"} + ] + + def test_no_fallback_injection_when_inactive(self): + # Filtering inactive too — but inactive _is_ the default; this test + # asserts the injection is opt-in. + optional_params: dict = {} + with patch( + "sap_cloud_sdk.aicore.filtering._patch." + "GenAIHubOrchestrationConfig.transform_request", + return_value=self._dict_modules_body(), + ): + OrchestrationPatchConfig().transform_request( + model="sap/anthropic--claude-4.5-sonnet", + messages=[], + optional_params=optional_params, + litellm_params={}, + headers={}, + ) + assert "fallback_sap_modules" not in optional_params + + def test_filtering_broadcasts_to_every_module_entry(self): + _install_filter(_default_filtering()) + _install_fallback(FallbackConfig([FallbackModel(model="sap/mistral-small")])) + + with patch( + "sap_cloud_sdk.aicore.filtering._patch." + "GenAIHubOrchestrationConfig.transform_request", + return_value=self._list_modules_body(), + ): + body = OrchestrationPatchConfig().transform_request( + model="sap/anthropic--claude-4.5-sonnet", + messages=[], + optional_params={}, + litellm_params={}, + headers={}, + ) + + modules = body["config"]["modules"] + assert isinstance(modules, list) + assert len(modules) == 2 + # Every entry — primary AND fallback — carries filtering. + for entry in modules: + assert "filtering" in entry + assert "input" in entry["filtering"] + assert "output" in entry["filtering"] + + def test_filtering_on_dict_modules_unchanged_when_no_fallback(self): + _install_filter(_default_filtering()) + + with patch( + "sap_cloud_sdk.aicore.filtering._patch." + "GenAIHubOrchestrationConfig.transform_request", + return_value=self._dict_modules_body(), + ): + body = OrchestrationPatchConfig().transform_request( + model="sap/anthropic--claude-4.5-sonnet", + messages=[], + optional_params={}, + litellm_params={}, + headers={}, + ) + + modules = body["config"]["modules"] + assert isinstance(modules, dict) + assert "filtering" in modules + + def test_fallback_only_no_filtering_keys(self): + _install_fallback(FallbackConfig([FallbackModel(model="sap/mistral-small")])) + + with patch( + "sap_cloud_sdk.aicore.filtering._patch." + "GenAIHubOrchestrationConfig.transform_request", + return_value=self._list_modules_body(), + ): + body = OrchestrationPatchConfig().transform_request( + model="sap/anthropic--claude-4.5-sonnet", + messages=[], + optional_params={}, + litellm_params={}, + headers={}, + ) + + # No filtering installed, so no entry should carry one. + for entry in body["config"]["modules"]: + assert "filtering" not in entry + + def test_primary_template_broadcasts_to_fallback_entries(self): + # Regression: previously fallback entries went out with + # ``prompt.template == []`` and the orchestration server rejected with + # ``config.modules[1].prompt_templating.prompt.template should be + # non-empty``. The patch now copies the primary's template across. + _install_fallback(FallbackConfig([FallbackModel(model="sap/mistral-small")])) + + with patch( + "sap_cloud_sdk.aicore.filtering._patch." + "GenAIHubOrchestrationConfig.transform_request", + return_value=self._realistic_list_modules_body(), + ): + body = OrchestrationPatchConfig().transform_request( + model="sap/anthropic--claude-4.5-sonnet", + messages=[{"role": "user", "content": "Reply with 'ok'."}], + optional_params={}, + litellm_params={}, + headers={}, + ) + + modules = body["config"]["modules"] + primary_template = modules[0]["prompt_templating"]["prompt"]["template"] + assert primary_template, "primary template should be non-empty" + for entry in modules[1:]: + assert entry["prompt_templating"]["prompt"]["template"] == primary_template + + def test_template_broadcast_noop_for_single_module_body(self): + # No fallback installed → litellm emits a single dict (not a list); + # the broadcast must not touch it. (Also: nothing to broadcast to.) + with patch( + "sap_cloud_sdk.aicore.filtering._patch." + "GenAIHubOrchestrationConfig.transform_request", + return_value=self._dict_modules_body(), + ): + body = OrchestrationPatchConfig().transform_request( + model="sap/anthropic--claude-4.5-sonnet", + messages=[], + optional_params={}, + litellm_params={}, + headers={}, + ) + + modules = body["config"]["modules"] + assert isinstance(modules, dict) + # Untouched — same empty template the fixture started with. + assert modules["prompt_templating"]["prompt"]["template"] == [] + + def test_template_broadcast_skipped_when_primary_template_empty(self): + # Defensive: if the primary itself somehow has no template, do not + # propagate the empty value (no point) — leave fallback entries alone. + _install_fallback(FallbackConfig([FallbackModel(model="sap/mistral-small")])) + + with patch( + "sap_cloud_sdk.aicore.filtering._patch." + "GenAIHubOrchestrationConfig.transform_request", + return_value=self._list_modules_body(), + ): + body = OrchestrationPatchConfig().transform_request( + model="sap/anthropic--claude-4.5-sonnet", + messages=[], + optional_params={}, + litellm_params={}, + headers={}, + ) + + modules = body["config"]["modules"] + for entry in modules: + assert entry["prompt_templating"]["prompt"]["template"] == [] + + +# --------------------------------------------------------------------------- +# transform_response — intermediate_failures attachment +# --------------------------------------------------------------------------- + + +_SUCCESS_BODY_WITH_FAILURES = { + "request_id": "req-fallback", + "intermediate_results": {}, + "final_result": { + "id": "x", + "object": "chat.completion", + "model": "mistralai--mistral-small-instruct", + "choices": [ + { + "index": 0, + "message": {"role": "assistant", "content": "Servus!"}, + "finish_reason": "stop", + } + ], + "usage": {"completion_tokens": 5, "prompt_tokens": 10, "total_tokens": 15}, + }, + "intermediate_failures": [ + { + "code": 400, + "message": "Model gpt-4o not supported.", + "location": "Request Body", + } + ], +} + +_SUCCESS_BODY_NO_FAILURES = { + "request_id": "req-primary", + "intermediate_results": {}, + "final_result": { + "id": "x", + "object": "chat.completion", + "model": "gpt-4o", + "choices": [ + { + "index": 0, + "message": {"role": "assistant", "content": "Hi!"}, + "finish_reason": "stop", + } + ], + "usage": {"completion_tokens": 5, "prompt_tokens": 10, "total_tokens": 15}, + }, +} + +_OUTPUT_FILTER_BODY = { + "request_id": "req-blocked", + "intermediate_results": { + "output_filtering": { + "data": {"choices": [{"index": 0, "azure_content_safety": {"Sexual": 4}}]} + } + }, + "final_result": { + "id": "x", + "model": "m", + "choices": [ + { + "index": 0, + "message": {"role": "assistant", "content": ""}, + "finish_reason": "content_filter", + } + ], + "usage": {"completion_tokens": 0, "prompt_tokens": 10, "total_tokens": 10}, + }, +} + + +class TestTransformResponseIntermediateFailures: + def _call(self, response: httpx.Response): + from litellm.types.utils import ModelResponse + + with patch( + "sap_cloud_sdk.aicore.filtering._patch." + "GenAIHubOrchestrationConfig.transform_response", + return_value=ModelResponse(), + ): + return OrchestrationPatchConfig().transform_response( + model="sap/anthropic--claude-4.5-sonnet", + raw_response=response, + model_response=ModelResponse(), + logging_obj=MagicMock(), + request_data={}, + messages=[], + optional_params={}, + litellm_params={}, + encoding=None, + ) + + def test_intermediate_failures_attached_when_present(self): + result = self._call(_stub_response(200, _SUCCESS_BODY_WITH_FAILURES)) + failures = getattr(result, "intermediate_failures", None) + assert failures is not None + assert len(failures) == 1 + assert failures[0]["code"] == 400 + assert "gpt-4o" in failures[0]["message"] + + def test_intermediate_failures_absent_when_key_missing(self): + result = self._call(_stub_response(200, _SUCCESS_BODY_NO_FAILURES)) + assert getattr(result, "intermediate_failures", None) is None + + def test_output_filter_still_raises_with_intermediate_failures(self): + # Even if the body carries intermediate_failures, output-filter rejection + # must take precedence — the response is still a filter block. + body: dict[str, Any] = dict(_OUTPUT_FILTER_BODY) + body["intermediate_failures"] = [ + {"code": 429, "message": "rate limited", "location": "LLM"} + ] + with pytest.raises(ContentFilteredError) as ei: + self._call(_stub_response(200, body)) + assert ei.value.direction == "output" + + +# --------------------------------------------------------------------------- +# Cross-concern install lifecycle (filtering + fallback in one patch) +# --------------------------------------------------------------------------- + + +class TestInstallComposition: + def test_patch_installed_when_only_filtering(self): + import litellm + + from sap_cloud_sdk.aicore.filtering._patch import ( + FilteringOrchestrationConfig, + _ORIGINAL_CONFIG, + ) + + # Filtering-only installs the filtering class. ``OrchestrationPatchConfig`` + # (the combined subclass) is reserved for when fallback is active. + _install_filter(_default_filtering()) + assert litellm.GenAIHubOrchestrationConfig is FilteringOrchestrationConfig + _install_filter(None) + assert litellm.GenAIHubOrchestrationConfig is _ORIGINAL_CONFIG + + def test_patch_installed_when_only_fallback(self): + import litellm + + from sap_cloud_sdk.aicore.filtering._patch import _ORIGINAL_CONFIG + + _install_fallback(FallbackConfig([FallbackModel(model="sap/x")])) + assert litellm.GenAIHubOrchestrationConfig is OrchestrationPatchConfig + _install_fallback(None) + assert litellm.GenAIHubOrchestrationConfig is _ORIGINAL_CONFIG + + def test_patch_stays_when_one_concern_cleared_other_active(self): + import litellm + + _install_filter(_default_filtering()) + _install_fallback(FallbackConfig([FallbackModel(model="sap/x")])) + assert litellm.GenAIHubOrchestrationConfig is OrchestrationPatchConfig + + # Clear only filtering — patch stays because fallback is still active. + _install_filter(None) + assert litellm.GenAIHubOrchestrationConfig is OrchestrationPatchConfig + + # Clear fallback — now both inactive, original restored. + _install_fallback(None) + from sap_cloud_sdk.aicore.filtering._patch import _ORIGINAL_CONFIG + + assert litellm.GenAIHubOrchestrationConfig is _ORIGINAL_CONFIG + + def test_install_fallback_idempotent(self): + import litellm + + cfg = FallbackConfig([FallbackModel(model="sap/x")]) + _install_fallback(cfg) + _install_fallback(cfg) + assert litellm.GenAIHubOrchestrationConfig is OrchestrationPatchConfig diff --git a/tests/aicore/fallback/unit/test_set_fallbacks.py b/tests/aicore/fallback/unit/test_set_fallbacks.py new file mode 100644 index 0000000..d123b31 --- /dev/null +++ b/tests/aicore/fallback/unit/test_set_fallbacks.py @@ -0,0 +1,72 @@ +"""Unit tests for :func:`set_fallbacks` lifecycle and env-driven activation.""" + +from __future__ import annotations + +import os + +import litellm +import pytest + +from sap_cloud_sdk.aicore.fallback import _patch as _fallback_patch +from sap_cloud_sdk.aicore.fallback._patch import ( + OrchestrationPatchConfig, + _install_fallback, +) +from sap_cloud_sdk.aicore.fallback.fallback import ( + FallbackConfig, + FallbackModel, + set_fallbacks, +) +from sap_cloud_sdk.aicore.filtering._patch import ( + _ORIGINAL_CONFIG, + _install as _install_filter, +) + + +@pytest.fixture(autouse=True) +def clean_state(monkeypatch): + """Clear env and patch state before/after each test.""" + for key in list(os.environ): + if key.startswith("AICORE_FALLBACK"): + monkeypatch.delenv(key, raising=False) + _install_filter(None) + _install_fallback(None) + yield + _install_filter(None) + _install_fallback(None) + + +class TestSetFallbacks: + def test_with_explicit_config_installs_patch(self): + set_fallbacks(FallbackConfig([FallbackModel(model="sap/x")])) + assert litellm.GenAIHubOrchestrationConfig is OrchestrationPatchConfig + assert _fallback_patch._active_fallback_cfg is not None + + def test_with_none_no_env_clears(self): + set_fallbacks(FallbackConfig([FallbackModel(model="sap/x")])) + set_fallbacks(None) + assert _fallback_patch._active_fallback_cfg is None + assert litellm.GenAIHubOrchestrationConfig is _ORIGINAL_CONFIG + + def test_with_none_reads_env_when_enabled(self, monkeypatch): + monkeypatch.setenv("AICORE_FALLBACK_ENABLED", "true") + monkeypatch.setenv("AICORE_FALLBACK_MODELS", "sap/a,sap/b") + set_fallbacks(None) + assert _fallback_patch._active_fallback_cfg is not None + assert [m.model for m in _fallback_patch._active_fallback_cfg.models] == [ + "sap/a", + "sap/b", + ] + assert litellm.GenAIHubOrchestrationConfig is OrchestrationPatchConfig + + def test_with_none_env_disabled_keeps_inactive(self): + # AICORE_FALLBACK_ENABLED unset → from_env returns None → install None. + set_fallbacks(None) + assert _fallback_patch._active_fallback_cfg is None + assert litellm.GenAIHubOrchestrationConfig is _ORIGINAL_CONFIG + + def test_idempotent(self): + cfg = FallbackConfig([FallbackModel(model="sap/x")]) + set_fallbacks(cfg) + set_fallbacks(cfg) + assert litellm.GenAIHubOrchestrationConfig is OrchestrationPatchConfig diff --git a/tests/aicore/integration/conftest.py b/tests/aicore/integration/conftest.py index b6184e4..d8993b5 100644 --- a/tests/aicore/integration/conftest.py +++ b/tests/aicore/integration/conftest.py @@ -1,4 +1,7 @@ -"""Pytest configuration and fixtures for AI Core filtering integration tests.""" +"""Pytest configuration and fixtures for AI Core integration tests. + +Covers both filtering and fallback BDD suites. +""" import os from pathlib import Path @@ -6,10 +9,11 @@ import pytest from dotenv import load_dotenv -from sap_cloud_sdk.aicore import disable_filtering, set_aicore_config +from sap_cloud_sdk.aicore import disable_filtering, set_aicore_config, set_fallbacks -_REQUIRED_VARS = [ +# Filtering integration vars — required for the filtering.feature scenarios. +_FILTERING_VARS = [ "AICORE_CLIENT_ID", "AICORE_CLIENT_SECRET", "AICORE_AUTH_URL", @@ -18,6 +22,15 @@ "AICORE_FILTER_TEST_MODEL", ] +# Core credentials shared by both suites. +_CORE_CREDS = _FILTERING_VARS[:-1] + +# Fallback integration vars — required only for fallback.feature scenarios. +_FALLBACK_VARS = [ + "AICORE_FALLBACK_TEST_PRIMARY_MODEL", + "AICORE_FALLBACK_TEST_FALLBACK_MODEL", +] + def _load_env() -> None: """Load .env_integration_tests from the repo root if present.""" @@ -28,28 +41,55 @@ def _load_env() -> None: @pytest.fixture(scope="session", autouse=True) def aicore_configured(): - """Load env, configure AI Core, restore unfiltered state on teardown.""" + """Load env, configure AI Core, restore unfiltered state on teardown. + + Skips the whole module when AI Core credentials are missing. Fallback-only + scenarios additionally skip themselves when AICORE_FALLBACK_TEST_* vars + are missing (see ``fallback_models`` fixture). + """ _load_env() - missing = [k for k in _REQUIRED_VARS if not os.environ.get(k)] + missing = [k for k in _CORE_CREDS if not os.environ.get(k)] if missing: - pytest.skip(f"Missing env vars for filtering integration tests: {missing}") + pytest.skip(f"Missing core env vars for AI Core integration tests: {missing}") set_aicore_config() yield disable_filtering() + set_fallbacks(None) @pytest.fixture(scope="session") def test_model() -> str: - """Model name to use in live completion calls.""" - return os.environ["AICORE_FILTER_TEST_MODEL"] + """Model name for the filtering integration scenarios.""" + model = os.environ.get("AICORE_FILTER_TEST_MODEL") + if not model: + pytest.skip("AICORE_FILTER_TEST_MODEL not set") + return model + + +@pytest.fixture(scope="session") +def fallback_models() -> tuple[str, str]: + """(primary, fallback) model names for the fallback integration scenarios. + + The "primary" should be a model name known to be unsupported in the + deployed region so that fallback fires; the "fallback" must be supported. + """ + missing = [k for k in _FALLBACK_VARS if not os.environ.get(k)] + if missing: + pytest.skip(f"Missing env vars for fallback integration tests: {missing}") + return ( + os.environ["AICORE_FALLBACK_TEST_PRIMARY_MODEL"], + os.environ["AICORE_FALLBACK_TEST_FALLBACK_MODEL"], + ) @pytest.fixture(autouse=True) -def reset_filtering_between_tests(): +def reset_aicore_state_between_tests(): """Each scenario opts in/out via its Given step.""" disable_filtering() + set_fallbacks(None) yield disable_filtering() + set_fallbacks(None) def pytest_configure(config): diff --git a/tests/aicore/integration/fallback.feature b/tests/aicore/integration/fallback.feature new file mode 100644 index 0000000..7c595b8 --- /dev/null +++ b/tests/aicore/integration/fallback.feature @@ -0,0 +1,32 @@ +Feature: Model fallback with SAP AI Core Orchestration v2 + As an SDK user + I want orchestration to transparently retry with a fallback model when the primary fails + So that my application is resilient to transient errors and region-unsupported models + + Background: + Given AI Core credentials are configured + And primary and fallback test models are configured + + Scenario: Fallback OFF — primary call succeeds with no intermediate_failures + Given fallback is disabled + When I send a benign prompt to the fallback test model + Then the response should contain a non-empty completion + And the response has no intermediate_failures + + Scenario: Primary model unsupported — fallback model is used + Given fallback is configured with the test fallback model + When I send a benign prompt to the unsupported primary model + Then the response should contain a non-empty completion + And the response has a non-empty intermediate_failures list + + Scenario: Filtering composes with fallback — call succeeds, no filter rejection + Given fallback is configured with the test fallback model + And filtering is enabled with default thresholds + When I send a benign prompt to the unsupported primary model + Then the response should contain a non-empty completion + And no ContentFilteredError is raised + + Scenario: Streaming with fallback — fallback fires when primary unsupported + Given fallback is configured with the test fallback model + When I send a benign streaming prompt to the unsupported primary model + Then the streamed response should contain non-empty content diff --git a/tests/aicore/integration/test_fallback_bdd.py b/tests/aicore/integration/test_fallback_bdd.py new file mode 100644 index 0000000..a00893e --- /dev/null +++ b/tests/aicore/integration/test_fallback_bdd.py @@ -0,0 +1,179 @@ +"""BDD step definitions for fallback integration tests. + +Run against a live AI Core orchestration deployment: + + AICORE_CLIENT_ID=... AICORE_CLIENT_SECRET=... AICORE_AUTH_URL=... \\ + AICORE_BASE_URL=... AICORE_RESOURCE_GROUP=... \\ + AICORE_FALLBACK_TEST_PRIMARY_MODEL=sap/unsupported-in-region \\ + AICORE_FALLBACK_TEST_FALLBACK_MODEL=sap/mistralai--mistral-small-instruct \\ + uv run python -m pytest tests/aicore/integration/test_fallback_bdd.py -v + +The conftest skips fallback scenarios cleanly when the AICORE_FALLBACK_TEST_* +env vars are missing. + +The "primary" model should be one that the orchestration server reports as +unsupported in the deployed region (the canonical way to force fallback +without relying on transient 5xx errors). The "fallback" must be a supported +model that the resource group can call. +""" + +from __future__ import annotations + +from typing import Any, Optional + +import pytest +from litellm import completion +from pytest_bdd import given, scenarios, then, when + +from sap_cloud_sdk.aicore import ( + ContentFilteredError, + FallbackConfig, + FallbackModel, + extract_filter_blocked, + set_fallbacks, + set_filtering, +) + +scenarios("fallback.feature") + + +BENIGN_PROMPT = "Reply with 'ok' in English." + + +class ScenarioContext: + """Per-scenario state.""" + + def __init__(self) -> None: + self.response: Any = None + self.streamed_content: str = "" + self.error: Optional[Exception] = None + + +@pytest.fixture +def ctx() -> ScenarioContext: + return ScenarioContext() + + +# ---------------- Background ---------------- + + +@given("AI Core credentials are configured") +def creds_configured(): + """Background — covered by the session-scoped fixture in conftest.""" + + +@given("primary and fallback test models are configured") +def models_configured(fallback_models: tuple[str, str]): + """Background — assert the fallback fixture resolved (else it skips).""" + primary, fallback = fallback_models + assert primary and fallback + + +# ---------------- Given (fallback / filtering state) ---------------- + + +@given("fallback is disabled") +def fallback_off(): + set_fallbacks(None) + + +@given("fallback is configured with the test fallback model") +def fallback_on(fallback_models: tuple[str, str]): + _primary, fallback = fallback_models + set_fallbacks(FallbackConfig([FallbackModel(model=fallback)])) + + +@given("filtering is enabled with default thresholds") +def filtering_default(): + set_filtering() + + +# ---------------- When (send prompt) ---------------- + + +def _capture_completion(ctx: ScenarioContext, model: str, prompt: str) -> None: + """Send a non-streaming completion and capture the response or error.""" + try: + ctx.response = completion( + model=model, + messages=[{"role": "user", "content": prompt}], + ) + except ContentFilteredError as e: + ctx.error = e + except Exception as e: + # LiteLLM may wrap input-filter rejections in APIConnectionError. + if blocked := extract_filter_blocked(e): + ctx.error = blocked + else: + ctx.error = e + + +@when("I send a benign prompt to the fallback test model") +def send_to_fallback_model(ctx: ScenarioContext, fallback_models: tuple[str, str]): + _primary, fallback = fallback_models + _capture_completion(ctx, fallback, BENIGN_PROMPT) + + +@when("I send a benign prompt to the unsupported primary model") +def send_to_primary(ctx: ScenarioContext, fallback_models: tuple[str, str]): + primary, _fallback = fallback_models + _capture_completion(ctx, primary, BENIGN_PROMPT) + + +@when("I send a benign streaming prompt to the unsupported primary model") +def send_streaming_to_primary(ctx: ScenarioContext, fallback_models: tuple[str, str]): + primary, _fallback = fallback_models + try: + stream = completion( + model=primary, + messages=[{"role": "user", "content": BENIGN_PROMPT}], + stream=True, + ) + parts: list[str] = [] + for chunk in stream: + delta = chunk.choices[0].delta.content + if delta: + parts.append(delta) + ctx.streamed_content = "".join(parts) + except Exception as e: + ctx.error = e + + +# ---------------- Then (assertions) ---------------- + + +@then("the response should contain a non-empty completion") +def response_non_empty(ctx: ScenarioContext): + assert ctx.response is not None, f"no response (error={ctx.error})" + content = ctx.response.choices[0].message.content + assert isinstance(content, str) and content.strip(), ( + f"expected non-empty completion, got {content!r}" + ) + + +@then("the response has no intermediate_failures") +def no_intermediate_failures(ctx: ScenarioContext): + assert ctx.response is not None + assert getattr(ctx.response, "intermediate_failures", None) is None + + +@then("the response has a non-empty intermediate_failures list") +def has_intermediate_failures(ctx: ScenarioContext): + assert ctx.response is not None + failures = getattr(ctx.response, "intermediate_failures", None) + assert failures, f"expected non-empty intermediate_failures, got {failures!r}" + + +@then("no ContentFilteredError is raised") +def no_filter_error(ctx: ScenarioContext): + assert not isinstance(ctx.error, ContentFilteredError), ( + f"unexpected ContentFilteredError: {ctx.error}" + ) + + +@then("the streamed response should contain non-empty content") +def streamed_non_empty(ctx: ScenarioContext): + assert ctx.error is None, f"streaming failed: {ctx.error}" + assert ctx.streamed_content.strip(), ( + f"expected non-empty streamed content, got {ctx.streamed_content!r}" + ) diff --git a/tests/core/unit/telemetry/test_operation.py b/tests/core/unit/telemetry/test_operation.py index 83e0970..06da49a 100644 --- a/tests/core/unit/telemetry/test_operation.py +++ b/tests/core/unit/telemetry/test_operation.py @@ -211,6 +211,6 @@ def test_operation_count(self): """Test that we have the expected number of operations.""" all_operations = list(Operation) # 3 auditlog + 11 destination + 10 certificate + 10 fragment + 8 objectstore - # + 2 extensibility + 5 aicore + 23 dms + 4 agentgateway + 13 agent_memory - # + 5 data_anonymization + 52 adms + 6 print = 152 - assert len(all_operations) == 152 + # + 2 extensibility + 6 aicore + 23 dms + 4 agentgateway + 13 agent_memory + # + 5 data_anonymization + 52 adms + 6 print = 153 + assert len(all_operations) == 153