diff --git a/.env_integration_tests.example b/.env_integration_tests.example
index e8c7a77..f91620c 100644
--- a/.env_integration_tests.example
+++ b/.env_integration_tests.example
@@ -34,6 +34,17 @@ AICORE_MODEL=anthropic--claude-3-5-haiku
 AICORE_FILTER_TEST_MODEL=sap/gpt-4o-mini
 AICORE_FILTER_TEST_SELF_HARM_PROMPT=
 
+# AI CORE fallback integration tests (tests/aicore/integration/fallback.feature)
+# AICORE_FALLBACK_TEST_PRIMARY_MODEL: sap/* model name that the orchestration
+#   server reports as unsupported in your deployed region. Picks the canonical
+#   way to force the fallback path without depending on transient 5xx errors.
+#   A genuinely nonexistent name like sap/this-model-does-not-exist works.
+# AICORE_FALLBACK_TEST_FALLBACK_MODEL: sap/* model that your resource group
+#   CAN call. Used as the single fallback preference.
+# When either var is unset, the fallback BDD scenarios skip cleanly.
+AICORE_FALLBACK_TEST_PRIMARY_MODEL=sap/this-model-does-not-exist
+AICORE_FALLBACK_TEST_FALLBACK_MODEL=sap/mistralai--mistral-small-instruct
+
 # AUDITLOG
 CLOUD_SDK_CFG_AUDITLOG_DEFAULT_URL=https://your-auditlog-api-url-here
 CLOUD_SDK_CFG_AUDITLOG_DEFAULT_UAA='{"url":"https://your-auth-url","clientid":"your-client-id","clientsecret":"your-client-secret"}'
diff --git a/src/sap_cloud_sdk/aicore/__init__.py b/src/sap_cloud_sdk/aicore/__init__.py
index d92ad83..61aa9da 100644
--- a/src/sap_cloud_sdk/aicore/__init__.py
+++ b/src/sap_cloud_sdk/aicore/__init__.py
@@ -13,6 +13,7 @@
 from sap_cloud_sdk.core.telemetry.metrics_decorator import record_metrics
 from sap_cloud_sdk.core.telemetry.module import Module
 from sap_cloud_sdk.core.telemetry.operation import Operation
+from .fallback import FallbackConfig, FallbackModel, set_fallbacks
 from .filtering import (
     AzureContentFilter,
     ContentFilter,
@@ -134,6 +135,11 @@ def set_aicore_config(instance_name: str = "aicore-instance") -> None:
     call :func:`set_filtering` afterward. Use :func:`disable_filtering`
     to turn filtering off at runtime, or set ``AICORE_FILTER_ENABLED=false``
     to keep it off entirely.
+
+    Model fallback is **opt-in** and is NOT activated by this function. To
+    enable it, call :func:`set_fallbacks` programmatically (or set
+    ``AICORE_FALLBACK_ENABLED=true`` and any of ``AICORE_FALLBACK_MODELS`` /
+    ``AICORE_FALLBACK_CONFIG`` and call ``set_fallbacks()`` with no args).
     """
     # Load secrets
     client_id = _get_secret("AICORE_CLIENT_ID", "clientid", instance_name=instance_name)
@@ -189,4 +195,7 @@ def set_aicore_config(instance_name: str = "aicore-instance") -> None:
     "ContentFilteredError",
     "OrchestrationError",
     "extract_filter_blocked",
+    "set_fallbacks",
+    "FallbackConfig",
+    "FallbackModel",
 ]
diff --git a/src/sap_cloud_sdk/aicore/fallback/__init__.py b/src/sap_cloud_sdk/aicore/fallback/__init__.py
new file mode 100644
index 0000000..3bd443c
--- /dev/null
+++ b/src/sap_cloud_sdk/aicore/fallback/__init__.py
@@ -0,0 +1,9 @@
+"""Model-fallback subpackage for SAP AI Core Orchestration v2.
+
+Re-exports the public surface defined in :mod:`.fallback`. Users should import
+flat from :mod:`sap_cloud_sdk.aicore`; this package is the source of truth.
+"""
+
+from .fallback import FallbackConfig, FallbackModel, set_fallbacks
+
+__all__ = ["FallbackModel", "FallbackConfig", "set_fallbacks"]
diff --git a/src/sap_cloud_sdk/aicore/fallback/_patch.py b/src/sap_cloud_sdk/aicore/fallback/_patch.py
new file mode 100644
index 0000000..7b3782c
--- /dev/null
+++ b/src/sap_cloud_sdk/aicore/fallback/_patch.py
@@ -0,0 +1,199 @@
+"""LiteLLM transport patch that adds model-fallback support on top of filtering.
+
+Patches ``litellm.GenAIHubOrchestrationConfig`` with a subclass of the
+filtering patch (:class:`sap_cloud_sdk.aicore.filtering._patch.FilteringOrchestrationConfig`)
+that adds the fallback-side hooks:
+
+- ``transform_request``:
+  1. Injects ``fallback_sap_modules`` into ``optional_params`` before super
+     reads it. LiteLLM's ``GenAIHubOrchestrationConfig.transform_request``
+     pops that key to build ``body["config"]["modules"]`` as a list.
+  2. After super returns, copies the primary module's prompt template into
+     every fallback module entry — litellm builds the primary template from
+     ``messages`` but defaults each fallback's template to ``[]``, which the
+     orchestration server rejects with
+     ``"config.modules[N].prompt_templating.prompt.template should be non-empty"``.
+  3. When filtering is active, broadcasts the filtering configuration across
+     every module entry (primary + every fallback). The filtering parent
+     class only injects on ``modules[0]``; the broadcast here keeps the
+     same filter set applied for every preference the server might pick.
+
+- ``transform_response``: after super has handled filter-rejection detection,
+  attaches ``intermediate_failures`` (the per-preference failure list) from
+  the 200 response body onto the returned :class:`ModelResponse` so callers
+  can inspect which preferences were skipped. ``None`` when the primary
+  succeeded. Non-streaming only in v1.
+
+The two patches share the monkeypatch slot. :func:`_install_fallback`
+installs this subclass (which still does filtering thanks to inheritance);
+clearing fallback restores the filtering-only class (or the original) by
+calling :func:`sap_cloud_sdk.aicore.filtering._patch._install` with the
+filtering side's current state — that path knows nothing about fallback,
+so the filtering module never imports this one. Idempotent.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Any
+
+import litellm
+from litellm.types.utils import ModelResponse
+
+from ..filtering import _patch as _filter_patch
+from ..filtering._patch import FilteringOrchestrationConfig
+
+logger = logging.getLogger(__name__)
+
+
+# Module-level fallback state. ``None`` means fallback is inactive; the
+# filtering module is the source of truth for the installed class in that
+# case (see :func:`_install_fallback`).
+_active_fallback_cfg: Any = None  # FallbackConfig | None
+
+
+class OrchestrationPatchConfig(FilteringOrchestrationConfig):
+    """Adds model-fallback request/response hooks to the filtering patch.
+
+    Inherits filtering injection + rejection handling from
+    :class:`FilteringOrchestrationConfig`. Adds, in order:
+
+    - ``fallback_sap_modules`` injection (so litellm builds ``modules`` as a
+      list of preference dicts).
+    - Prompt-template broadcast to every fallback module entry.
+    - Filtering broadcast across every module entry (overriding the parent's
+      primary-only injection).
+    - ``intermediate_failures`` attachment on the returned ``ModelResponse``.
+    """
+
+    def transform_request(
+        self,
+        model: str,
+        messages: list,
+        optional_params: dict,
+        litellm_params: dict,
+        headers: dict,
+    ) -> dict:
+        # Inject fallback into optional_params BEFORE super reads it.
+        # LiteLLM's transform_request copies optional_params and pops
+        # ``"fallback_sap_modules"`` to build the modules list.
+        if _active_fallback_cfg is not None:
+            optional_params["fallback_sap_modules"] = (
+                _active_fallback_cfg.to_litellm_kwarg()
+            )
+
+        body = super().transform_request(
+            model=model,
+            messages=messages,
+            optional_params=optional_params,
+            litellm_params=litellm_params,
+            headers=headers,
+        )
+
+        modules = body["config"]["modules"]
+        # No fallback => single dict, nothing else to do here.
+        if not isinstance(modules, list) or len(modules) <= 1:
+            return body
+
+        # Broadcast the primary's prompt template to every fallback entry.
+        # litellm only builds the primary's template from ``messages``;
+        # fallback entries get whatever was popped from their dict's
+        # ``"messages"`` key (litellm transformation.py L371), which is
+        # ``[]`` for ``FallbackModel.to_dict()``. Without this copy, the
+        # server rejects with
+        # "config.modules[N].prompt_templating.prompt.template should be
+        # non-empty".
+        primary_template = (
+            modules[0].get("prompt_templating", {}).get("prompt", {}).get("template")
+        )
+        if primary_template:
+            for entry in modules[1:]:
+                entry.setdefault("prompt_templating", {}).setdefault("prompt", {})[
+                    "template"
+                ] = primary_template
+
+        # Broadcast filtering across every module entry. The filtering parent
+        # installed it on ``modules[0]`` only; broadcasting keeps the same
+        # filter set applied for every preference the server might pick.
+        # To opt a fallback out of filtering, call ``disable_filtering()``
+        # before the call.
+        if _filter_patch._active_cfg is not None:
+            filtering_dict = _filter_patch._active_cfg.to_dict()
+            if filtering_dict:
+                for entry in modules[1:]:
+                    entry["filtering"] = filtering_dict
+
+        return body
+
+    def transform_response(
+        self,
+        model: str,
+        raw_response: Any,
+        model_response: ModelResponse,
+        logging_obj: Any,
+        request_data: dict,
+        messages: list,
+        optional_params: dict,
+        litellm_params: dict,
+        encoding: Any,
+        api_key: str | None = None,
+        json_mode: bool | None = None,
+    ) -> ModelResponse:
+        # Let the filtering parent handle filter-rejection detection first
+        # (it raises ``ContentFilteredError`` before falling through to
+        # super-super). If it raises, we never reach the attach below.
+        result = super().transform_response(
+            model=model,
+            raw_response=raw_response,
+            model_response=model_response,
+            logging_obj=logging_obj,
+            request_data=request_data,
+            messages=messages,
+            optional_params=optional_params,
+            litellm_params=litellm_params,
+            encoding=encoding,
+            api_key=api_key,
+            json_mode=json_mode,
+        )
+
+        # Surface ``intermediate_failures`` on the returned ``ModelResponse``
+        # so callers can see which preferences were skipped. Only present on
+        # non-streaming 200 responses — streaming surfacing is deferred.
+        if raw_response.status_code == 200:
+            try:
+                payload = raw_response.json()
+            except ValueError:
+                return result
+            failures = payload.get("intermediate_failures")
+            if failures is not None:
+                # ``ModelResponse`` uses pydantic ``extra="allow"`` so dynamic
+                # attribute assignment is supported at runtime. ``setattr``
+                # keeps the static type checker happy.
+                setattr(result, "intermediate_failures", failures)
+
+        return result
+
+
+def _install_fallback(cfg: Any) -> None:  # cfg: FallbackConfig | None
+    """Set the active fallback config and refresh the installed patch class.
+
+    When ``cfg`` is non-``None``, installs :class:`OrchestrationPatchConfig`
+    (which inherits filtering, so filtering still works when active).
+
+    When ``cfg`` is ``None``, defers to the filtering module: re-runs its
+    ``_install`` with whatever filtering state is currently active, which
+    restores either ``FilteringOrchestrationConfig`` (filtering on) or
+    ``_ORIGINAL_CONFIG`` (both off).
+
+    Idempotent — repeated calls with the same value are safe.
+    """
+    global _active_fallback_cfg
+    _active_fallback_cfg = cfg
+    if cfg is None:
+        # Hand back control to the filtering installer so it restores the
+        # correct class for the current filtering state.
+        _filter_patch._install(_filter_patch._active_cfg)
+        logger.debug("model fallback disabled")
+    else:
+        litellm.GenAIHubOrchestrationConfig = OrchestrationPatchConfig
+        logger.info("model fallback active (OrchestrationPatchConfig)")
diff --git a/src/sap_cloud_sdk/aicore/fallback/fallback.py b/src/sap_cloud_sdk/aicore/fallback/fallback.py
new file mode 100644
index 0000000..716541c
--- /dev/null
+++ b/src/sap_cloud_sdk/aicore/fallback/fallback.py
@@ -0,0 +1,227 @@
+"""Public model-fallback API for SAP AI Core Orchestration v2.
+
+Orchestration v2 supports module-configuration fallbacks: when the primary
+configuration fails (model unsupported in region, 429, 408, or 5xx — and only
+unsupported-model for streams), orchestration retries with the next preference.
+See ``context/fallback.md``.
+
+The litellm SAP provider already supports this: passing ``fallback_sap_modules``
+through ``optional_params`` builds ``body["config"]["modules"]`` as a list.
+This module is the SDK-side ergonomic layer: typed ``FallbackModel`` /
+``FallbackConfig`` dataclasses, an env-driven ``from_env()`` builder, and the
+``set_fallbacks()`` entry point that installs them into the shared
+``OrchestrationPatchConfig`` patch (alongside any active filtering config).
+
+Fallback is **opt-in**: ``set_aicore_config()`` does not enable it. Developers
+must either call ``set_fallbacks(...)`` programmatically or set
+``AICORE_FALLBACK_ENABLED=true`` and call ``set_fallbacks()`` (with no args).
+
+The companion ``intermediate_failures`` field from the orchestration response
+is surfaced as an attribute on the returned ``ModelResponse``. Non-streaming
+only in v1 — streaming surfacing is deferred.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+from dataclasses import dataclass, field
+
+from sap_cloud_sdk.core.telemetry.metrics_decorator import record_metrics
+from sap_cloud_sdk.core.telemetry.module import Module
+from sap_cloud_sdk.core.telemetry.operation import Operation
+
+from ._patch import _install_fallback
+
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Env-var helpers (kept local — small, simple, no dependency on filtering)
+# ---------------------------------------------------------------------------
+
+_TRUTHY = frozenset({"true", "1", "yes"})
+
+
+def _read_env_str(key: str, default: str = "") -> str:
+    """Read a string env var. Trims whitespace. Returns ``default`` if absent."""
+    raw = os.environ.get(key)
+    return raw.strip() if raw is not None else default
+
+
+def _read_env_bool(key: str, default: bool = False) -> bool:
+    """Read a boolean env var.
+
+    ``true``/``1``/``yes`` (case-insensitive) are True; anything else is False.
+    Returns ``default`` if the variable is absent.
+    """
+    raw = os.environ.get(key)
+    return (raw.strip().lower() in _TRUTHY) if raw is not None else default
+
+
+# ---------------------------------------------------------------------------
+# Public dataclasses
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class FallbackModel:
+    """A single fallback preference.
+
+    Args:
+        model: Model name passed to orchestration (e.g. ``"sap/gpt-4o"``).
+        params: Per-model params (``max_tokens``, ``temperature``, …). Optional.
+            When omitted, the orchestration server falls back to its defaults
+            for the model — it does NOT inherit the primary call's params.
+        model_version: Specific model version. Defaults to ``"latest"`` on
+            the server side when omitted.
+    """
+
+    model: str
+    params: dict | None = None
+    model_version: str | None = None
+
+    def to_dict(self) -> dict:
+        """Wire shape consumed by litellm's ``_build_prompt_module``.
+
+        litellm pops ``model`` and ``model_version`` from the dict and treats
+        everything else as model params. We keep this shape minimal.
+        """
+        result: dict = {"model": self.model}
+        if self.model_version is not None:
+            result["model_version"] = self.model_version
+        if self.params:
+            result.update(self.params)
+        return result
+
+
+@dataclass
+class FallbackConfig:
+    """Ordered list of fallback preferences.
+
+    The orchestration server tries preferences in order; the first to succeed
+    wins. Empty lists are accepted but have no effect (equivalent to no
+    fallback).
+
+    Args:
+        models: Ordered list of :class:`FallbackModel` instances. Element 0
+            is tried first after the primary call fails.
+    """
+
+    models: list[FallbackModel] = field(default_factory=list)
+
+    def to_litellm_kwarg(self) -> list[dict]:
+        """Build the list passed to litellm as ``fallback_sap_modules``."""
+        return [m.to_dict() for m in self.models]
+
+    @classmethod
+    def from_env(cls) -> "FallbackConfig | None":
+        """Build from ``AICORE_FALLBACK_*`` environment variables.
+
+        Returns ``None`` when ``AICORE_FALLBACK_ENABLED`` is not truthy, or
+        when enabled but neither ``AICORE_FALLBACK_CONFIG`` nor
+        ``AICORE_FALLBACK_MODELS`` is set (treated as disabled — a warning is
+        logged).
+
+        Reads:
+            AICORE_FALLBACK_ENABLED  (bool, default false) — opt-in switch
+            AICORE_FALLBACK_CONFIG   (JSON string) — full per-model config,
+                shape ``[{"model": ..., "params": {...}, "model_version": ...}]``.
+                Takes precedence over MODELS when set. Malformed JSON raises.
+            AICORE_FALLBACK_MODELS   (comma list) — simple model-only form.
+                Each entry becomes ``FallbackModel(model=name)``.
+
+        Raises:
+            ValueError: If ``AICORE_FALLBACK_CONFIG`` is set but not valid
+                JSON, or does not decode to a list of objects.
+        """
+        if not _read_env_bool("AICORE_FALLBACK_ENABLED", default=False):
+            return None
+
+        config_raw = _read_env_str("AICORE_FALLBACK_CONFIG")
+        if config_raw:
+            try:
+                parsed = json.loads(config_raw)
+            except ValueError as e:
+                raise ValueError(
+                    f"AICORE_FALLBACK_CONFIG must be valid JSON, got: {config_raw!r}"
+                ) from e
+            if not isinstance(parsed, list):
+                raise ValueError(
+                    f"AICORE_FALLBACK_CONFIG must decode to a list, got "
+                    f"{type(parsed).__name__}"
+                )
+            models = [
+                FallbackModel(
+                    model=entry["model"],
+                    params=entry.get("params"),
+                    model_version=entry.get("model_version"),
+                )
+                for entry in parsed
+            ]
+            return cls(models=models)
+
+        models_raw = _read_env_str("AICORE_FALLBACK_MODELS")
+        if models_raw:
+            names = [n.strip() for n in models_raw.split(",") if n.strip()]
+            return cls(models=[FallbackModel(model=n) for n in names])
+
+        logger.warning(
+            "AICORE_FALLBACK_ENABLED is true but neither AICORE_FALLBACK_CONFIG "
+            "nor AICORE_FALLBACK_MODELS is set; fallback remains inactive"
+        )
+        return None
+
+
+# ---------------------------------------------------------------------------
+# Entry point
+# ---------------------------------------------------------------------------
+
+
+@record_metrics(Module.AICORE, Operation.AICORE_SET_FALLBACKS)
+def set_fallbacks(config: FallbackConfig | None = None) -> None:
+    """Install a model-fallback configuration.
+
+    Fallback is **opt-in**. ``set_aicore_config()`` does NOT activate it;
+    the developer must call this function (or set the
+    ``AICORE_FALLBACK_*`` env vars and call this function with no args).
+
+    Args:
+        config: A :class:`FallbackConfig` to install. If ``None`` (the
+            default), reads ``AICORE_FALLBACK_*`` env vars via
+            :meth:`FallbackConfig.from_env`. Pass ``None`` after an earlier
+            call to clear an installed fallback at runtime.
+
+    Examples:
+        Programmatic::
+
+            from sap_cloud_sdk.aicore import (
+                FallbackConfig, FallbackModel, set_fallbacks,
+            )
+
+            set_fallbacks(FallbackConfig([
+                FallbackModel(
+                    model="sap/mistralai--mistral-small-instruct",
+                    params={"temperature": 0.7, "max_tokens": 300},
+                ),
+            ]))
+
+        From environment::
+
+            import os
+            from sap_cloud_sdk.aicore import set_fallbacks
+
+            os.environ["AICORE_FALLBACK_ENABLED"] = "true"
+            os.environ["AICORE_FALLBACK_MODELS"] = (
+                "sap/mistralai--mistral-small-instruct"
+            )
+            set_fallbacks()
+    """
+    if config is None:
+        _install_fallback(FallbackConfig.from_env())
+        return
+    _install_fallback(config)
+
+
+__all__ = ["FallbackModel", "FallbackConfig", "set_fallbacks"]
diff --git a/src/sap_cloud_sdk/aicore/fallback/py.typed b/src/sap_cloud_sdk/aicore/fallback/py.typed
new file mode 100644
index 0000000..e69de29
diff --git a/src/sap_cloud_sdk/aicore/filtering/_patch.py b/src/sap_cloud_sdk/aicore/filtering/_patch.py
index 5cec892..8c2b726 100644
--- a/src/sap_cloud_sdk/aicore/filtering/_patch.py
+++ b/src/sap_cloud_sdk/aicore/filtering/_patch.py
@@ -160,9 +160,33 @@ def _install(cfg: Any) -> None:  # cfg: ContentFiltering | None
     """Patch litellm.GenAIHubOrchestrationConfig. Idempotent.
 
     cfg=None restores the original config and disables filtering.
+
+    When the model-fallback module has an active config installed, this
+    function only updates ``_active_cfg`` and leaves the installed class
+    (:class:`sap_cloud_sdk.aicore.fallback._patch.OrchestrationPatchConfig`)
+    in place — that class inherits from :class:`FilteringOrchestrationConfig`
+    and reads ``_active_cfg`` at request time, so the filtering toggle still
+    takes effect. Lazy import of the fallback patch module avoids a circular
+    dependency at package import time.
     """
     global _active_cfg
     _active_cfg = cfg
+
+    # Defer to fallback when it has installed its own subclass — clobbering
+    # the class here would silently disable model fallback.
+    try:
+        from ..fallback._patch import _active_fallback_cfg
+    except ImportError:
+        _active_fallback_cfg = None
+    if _active_fallback_cfg is not None:
+        if cfg is None:
+            logger.debug("content filtering disabled (fallback patch still installed)")
+        else:
+            logger.info(
+                "content filtering active (delegated through OrchestrationPatchConfig)"
+            )
+        return
+
     if cfg is None:
         litellm.GenAIHubOrchestrationConfig = _ORIGINAL_CONFIG
         logger.debug("content filtering disabled")
diff --git a/src/sap_cloud_sdk/aicore/user-guide.md b/src/sap_cloud_sdk/aicore/user-guide.md
index e40a54e..0e22c3c 100644
--- a/src/sap_cloud_sdk/aicore/user-guide.md
+++ b/src/sap_cloud_sdk/aicore/user-guide.md
@@ -58,6 +58,9 @@ The `set_aicore_config()` function:
 4. **Sets resource group** (defaults to "default" if not specified)
 5. **Activates content filtering** — Azure Content Safety + prompt shield enabled by default *(new in 0.28.0)*
 
+Model fallback is **not** auto-activated — it is opt-in via
+[`set_fallbacks()`](#model-fallback-opt-in).
+
 ---
 
 ## Content Filtering (enabled by default from 0.28.0)
@@ -256,6 +259,103 @@ Env vars also renamed: `ORCH_FILTER_*` → `AICORE_FILTER_*`. The
 
 ---
 
+## Model Fallback (opt-in)
+
+Orchestration v2 supports fallback configurations: when the primary model
+fails (unsupported in region, 429 Too Many Requests, 408 Request Timeout, or
+any 5xx — and only unsupported-in-region for streaming requests), the server
+automatically retries with the next preference in your list.
+
+Unlike content filtering, **fallback is opt-in**. `set_aicore_config()` does
+not enable it. The developer must call `set_fallbacks()` (or set the
+`AICORE_FALLBACK_*` env vars and call `set_fallbacks()` with no args).
+
+### Programmatic configuration
+
+```python
+from sap_cloud_sdk.aicore import (
+    FallbackConfig, FallbackModel, set_aicore_config, set_fallbacks,
+)
+from litellm import completion
+
+set_aicore_config()
+set_fallbacks(FallbackConfig([
+    FallbackModel(
+        model="sap/mistralai--mistral-small-instruct",
+        params={"temperature": 0.7, "max_tokens": 300},
+    ),
+]))
+
+response = completion(
+    model="sap/gpt-4o",
+    messages=[{"role": "user", "content": "Translate 'hello' to German."}],
+)
+```
+
+The orchestration server tries the primary model first. If it fails for a
+fallback-eligible reason, the server transparently uses each fallback in
+order. The first to succeed wins.
+
+When a fallback is used, the returned response carries an
+`intermediate_failures` attribute listing the reasons each higher-preference
+model was skipped:
+
+```python
+failures = getattr(response, "intermediate_failures", None)
+if failures:
+    for f in failures:
+        print(f"skipped preference: {f.get('code')} — {f.get('message')}")
+```
+
+`intermediate_failures` is `None` (or absent via `getattr`) when the primary
+succeeded — useful as a quick check for whether the fallback was exercised.
+This field is currently surfaced for non-streaming responses only.
+
+### Configure via environment
+
+Set these **before** calling `set_fallbacks()`:
+
+| Variable | Default | Description |
+|---|---|---|
+| `AICORE_FALLBACK_ENABLED` | `false` | Opt-in switch. |
+| `AICORE_FALLBACK_MODELS` | `""` | Comma list of model names. Each becomes a fallback with no params. Simple form. |
+| `AICORE_FALLBACK_CONFIG` | `""` | JSON: `[{"model": "...", "params": {...}, "model_version": "..."}, ...]`. Takes precedence over `MODELS`. |
+
+```bash
+AICORE_FALLBACK_ENABLED=true
+AICORE_FALLBACK_MODELS=sap/mistralai--mistral-small-instruct,sap/anthropic--claude-4.5-sonnet
+```
+
+```python
+from sap_cloud_sdk.aicore import set_fallbacks
+set_fallbacks()   # reads the env vars
+```
+
+### Filtering composes with fallback
+
+If filtering is also active, the same filtering configuration applies to the
+primary model AND every fallback preference. The filter set is broadcast
+across all module entries on the wire. To run a fallback without filtering,
+explicitly `disable_filtering()` before the call (filtering is on by default
+after `set_aicore_config()`).
+
+### Clearing at runtime
+
+There is no `disable_fallbacks()` function. To clear a previously-installed
+fallback configuration at runtime, call `set_fallbacks(None)` after clearing
+the `AICORE_FALLBACK_*` env vars (or with them unset). Most applications enable
+fallback once at startup and leave it on.
+
+### Error responses
+
+If every preference fails, orchestration returns an error response listing the
+failure for each attempted preference. This surfaces in user code the same way
+any orchestration error does — LiteLLM raises one of its exception types
+(`APIConnectionError`, etc.). The exception message contains the per-preference
+error list.
+
+---
+
 ### Credentials Loaded
 
 The function loads and configures these credentials:
diff --git a/src/sap_cloud_sdk/core/telemetry/operation.py b/src/sap_cloud_sdk/core/telemetry/operation.py
index 235a1ec..f11c999 100644
--- a/src/sap_cloud_sdk/core/telemetry/operation.py
+++ b/src/sap_cloud_sdk/core/telemetry/operation.py
@@ -144,6 +144,7 @@ class Operation(str, Enum):
     AICORE_SET_FILTERING = "set_filtering"
     AICORE_DISABLE_FILTERING = "disable_filtering"
     AICORE_EXTRACT_FILTER_BLOCKED = "extract_filter_blocked"
+    AICORE_SET_FALLBACKS = "set_fallbacks"
 
     # Print Operations
     PRINT_LIST_QUEUES = "list_queues"
diff --git a/tests/aicore/fallback/__init__.py b/tests/aicore/fallback/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/aicore/fallback/unit/__init__.py b/tests/aicore/fallback/unit/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/aicore/fallback/unit/test_fallback_config.py b/tests/aicore/fallback/unit/test_fallback_config.py
new file mode 100644
index 0000000..4f34416
--- /dev/null
+++ b/tests/aicore/fallback/unit/test_fallback_config.py
@@ -0,0 +1,170 @@
+"""Unit tests for FallbackModel, FallbackConfig, and FallbackConfig.from_env."""
+
+from __future__ import annotations
+
+import logging
+import os
+
+import pytest
+
+from sap_cloud_sdk.aicore.fallback.fallback import FallbackConfig, FallbackModel
+
+
+# ---------------------------------------------------------------------------
+# FallbackModel
+# ---------------------------------------------------------------------------
+
+
+class TestFallbackModelToDict:
+    def test_to_dict_minimal_has_only_model(self):
+        m = FallbackModel(model="sap/x")
+        assert m.to_dict() == {"model": "sap/x"}
+
+    def test_to_dict_with_params_merges_them(self):
+        m = FallbackModel(model="sap/x", params={"temperature": 0.7, "max_tokens": 300})
+        assert m.to_dict() == {
+            "model": "sap/x",
+            "temperature": 0.7,
+            "max_tokens": 300,
+        }
+
+    def test_to_dict_with_model_version_includes_key(self):
+        m = FallbackModel(model="sap/x", model_version="v2")
+        assert m.to_dict() == {"model": "sap/x", "model_version": "v2"}
+
+    def test_to_dict_with_empty_params_omits_them(self):
+        m = FallbackModel(model="sap/x", params={})
+        assert m.to_dict() == {"model": "sap/x"}
+
+    def test_to_dict_all_fields_set(self):
+        m = FallbackModel(
+            model="sap/x",
+            params={"temperature": 0.5},
+            model_version="v3",
+        )
+        assert m.to_dict() == {
+            "model": "sap/x",
+            "model_version": "v3",
+            "temperature": 0.5,
+        }
+
+
+# ---------------------------------------------------------------------------
+# FallbackConfig
+# ---------------------------------------------------------------------------
+
+
+class TestFallbackConfigToLitellmKwarg:
+    def test_preserves_order(self):
+        cfg = FallbackConfig(
+            [
+                FallbackModel(model="sap/a"),
+                FallbackModel(model="sap/b"),
+                FallbackModel(model="sap/c"),
+            ]
+        )
+        assert [m["model"] for m in cfg.to_litellm_kwarg()] == [
+            "sap/a",
+            "sap/b",
+            "sap/c",
+        ]
+
+    def test_empty_list_returns_empty(self):
+        cfg = FallbackConfig([])
+        assert cfg.to_litellm_kwarg() == []
+
+    def test_default_factory_produces_empty_list(self):
+        cfg = FallbackConfig()
+        assert cfg.models == []
+        assert cfg.to_litellm_kwarg() == []
+
+    def test_per_model_params_propagated(self):
+        cfg = FallbackConfig(
+            [
+                FallbackModel(model="sap/a", params={"temperature": 0.1}),
+                FallbackModel(model="sap/b", params={"max_tokens": 100}),
+            ]
+        )
+        out = cfg.to_litellm_kwarg()
+        assert out[0] == {"model": "sap/a", "temperature": 0.1}
+        assert out[1] == {"model": "sap/b", "max_tokens": 100}
+
+
+# ---------------------------------------------------------------------------
+# FallbackConfig.from_env
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture(autouse=True)
+def clean_fallback_env(monkeypatch):
+    """Clear every AICORE_FALLBACK_* variable before each test."""
+    for key in list(os.environ):
+        if key.startswith("AICORE_FALLBACK"):
+            monkeypatch.delenv(key, raising=False)
+    yield
+
+
+class TestFromEnv:
+    def test_returns_none_when_enabled_absent(self):
+        assert FallbackConfig.from_env() is None
+
+    def test_returns_none_when_enabled_false(self, monkeypatch):
+        monkeypatch.setenv("AICORE_FALLBACK_ENABLED", "false")
+        monkeypatch.setenv("AICORE_FALLBACK_MODELS", "sap/x")
+        assert FallbackConfig.from_env() is None
+
+    def test_returns_none_when_enabled_true_but_nothing_set(self, monkeypatch, caplog):
+        monkeypatch.setenv("AICORE_FALLBACK_ENABLED", "true")
+        with caplog.at_level(logging.WARNING):
+            assert FallbackConfig.from_env() is None
+        assert any("fallback remains inactive" in r.message for r in caplog.records)
+
+    def test_parses_models_csv(self, monkeypatch):
+        monkeypatch.setenv("AICORE_FALLBACK_ENABLED", "true")
+        monkeypatch.setenv("AICORE_FALLBACK_MODELS", "sap/a, sap/b ,sap/c")
+        cfg = FallbackConfig.from_env()
+        assert cfg is not None
+        assert [m.model for m in cfg.models] == ["sap/a", "sap/b", "sap/c"]
+        # No params or version inherited from env in the simple form.
+        assert all(m.params is None and m.model_version is None for m in cfg.models)
+
+    def test_parses_models_csv_skips_empty_entries(self, monkeypatch):
+        monkeypatch.setenv("AICORE_FALLBACK_ENABLED", "true")
+        monkeypatch.setenv("AICORE_FALLBACK_MODELS", ",sap/a,,sap/b,")
+        cfg = FallbackConfig.from_env()
+        assert cfg is not None
+        assert [m.model for m in cfg.models] == ["sap/a", "sap/b"]
+
+    def test_parses_config_json(self, monkeypatch):
+        monkeypatch.setenv("AICORE_FALLBACK_ENABLED", "true")
+        monkeypatch.setenv(
+            "AICORE_FALLBACK_CONFIG",
+            '[{"model":"sap/a","params":{"temperature":0.7}},'
+            ' {"model":"sap/b","model_version":"v2"}]',
+        )
+        cfg = FallbackConfig.from_env()
+        assert cfg is not None
+        assert cfg.models[0].model == "sap/a"
+        assert cfg.models[0].params == {"temperature": 0.7}
+        assert cfg.models[1].model == "sap/b"
+        assert cfg.models[1].model_version == "v2"
+
+    def test_config_takes_precedence_over_models(self, monkeypatch):
+        monkeypatch.setenv("AICORE_FALLBACK_ENABLED", "true")
+        monkeypatch.setenv("AICORE_FALLBACK_MODELS", "sap/from-models")
+        monkeypatch.setenv("AICORE_FALLBACK_CONFIG", '[{"model":"sap/from-config"}]')
+        cfg = FallbackConfig.from_env()
+        assert cfg is not None
+        assert [m.model for m in cfg.models] == ["sap/from-config"]
+
+    def test_malformed_json_raises(self, monkeypatch):
+        monkeypatch.setenv("AICORE_FALLBACK_ENABLED", "true")
+        monkeypatch.setenv("AICORE_FALLBACK_CONFIG", "{not json")
+        with pytest.raises(ValueError, match="valid JSON"):
+            FallbackConfig.from_env()
+
+    def test_non_list_json_raises(self, monkeypatch):
+        monkeypatch.setenv("AICORE_FALLBACK_ENABLED", "true")
+        monkeypatch.setenv("AICORE_FALLBACK_CONFIG", '{"model": "sap/x"}')
+        with pytest.raises(ValueError, match="decode to a list"):
+            FallbackConfig.from_env()
diff --git a/tests/aicore/fallback/unit/test_patch.py b/tests/aicore/fallback/unit/test_patch.py
new file mode 100644
index 0000000..8361b80
--- /dev/null
+++ b/tests/aicore/fallback/unit/test_patch.py
@@ -0,0 +1,499 @@
+"""Unit tests for OrchestrationPatchConfig — the fallback-side concerns.
+
+Filtering-side coverage lives in :mod:`tests.aicore.filtering.unit.test_patch`.
+This file targets:
+
+- ``transform_request`` injects ``fallback_sap_modules`` into ``optional_params``
+  before delegating to super.
+- Filtering broadcasts to every module entry when both filtering and fallback
+  are active (the behaviour change vs. the original modules[0]-only logic).
+- ``transform_response`` attaches ``intermediate_failures`` on the returned
+  ``ModelResponse`` (and only when present).
+"""
+
+from __future__ import annotations
+
+from typing import Any
+from unittest.mock import MagicMock, patch
+
+import httpx
+import pytest
+
+from sap_cloud_sdk.aicore.fallback._patch import (
+    OrchestrationPatchConfig,
+    _install_fallback,
+)
+from sap_cloud_sdk.aicore.fallback.fallback import FallbackConfig, FallbackModel
+from sap_cloud_sdk.aicore.filtering._models import (
+    AzureContentFilter,
+    ContentFiltering,
+    InputFiltering,
+    OutputFiltering,
+)
+from sap_cloud_sdk.aicore.filtering._patch import _install as _install_filter
+from sap_cloud_sdk.aicore.filtering.exceptions import ContentFilteredError
+
+
+@pytest.fixture(autouse=True)
+def restore_litellm_config():
+    """Each test starts with a clean patch state and ends the same way."""
+    _install_filter(None)
+    _install_fallback(None)
+    yield
+    _install_filter(None)
+    _install_fallback(None)
+
+
+def _stub_response(status: int, body: dict) -> httpx.Response:
+    return httpx.Response(status, json=body)
+
+
+def _default_filtering() -> ContentFiltering:
+    return ContentFiltering(
+        input_filtering=InputFiltering(filters=[AzureContentFilter()]),
+        output_filtering=OutputFiltering(filters=[AzureContentFilter()]),
+    )
+
+
+# ---------------------------------------------------------------------------
+# transform_request — fallback injection
+# ---------------------------------------------------------------------------
+
+
+class TestTransformRequestFallback:
+    @staticmethod
+    def _list_modules_body() -> dict:
+        """Body shape litellm produces when fallback is active."""
+        return {
+            "config": {
+                "modules": [
+                    {
+                        "prompt_templating": {
+                            "prompt": {"template": []},
+                            "model": {
+                                "name": "anthropic--claude-4.5-sonnet",
+                                "params": {},
+                                "version": "latest",
+                            },
+                        }
+                    },
+                    {
+                        "prompt_templating": {
+                            "prompt": {"template": []},
+                            "model": {
+                                "name": "mistral-small",
+                                "params": {},
+                                "version": "latest",
+                            },
+                        }
+                    },
+                ]
+            }
+        }
+
+    @staticmethod
+    def _dict_modules_body() -> dict:
+        """Body shape litellm produces with no fallback."""
+        return {
+            "config": {
+                "modules": {
+                    "prompt_templating": {
+                        "prompt": {"template": []},
+                        "model": {
+                            "name": "anthropic--claude-4.5-sonnet",
+                            "params": {},
+                            "version": "latest",
+                        },
+                    }
+                }
+            }
+        }
+
+    @staticmethod
+    def _realistic_list_modules_body() -> dict:
+        """Body shape litellm actually produces — primary has a real template,
+        fallback entries have ``template: []`` because litellm only converts
+        the top-level ``messages`` for the primary module. The SDK is
+        responsible for broadcasting the primary's template to every
+        fallback entry; without that the orchestration server rejects with
+        ``config.modules[N].prompt_templating.prompt.template should be
+        non-empty`` (the exact failure that reached integration tests).
+        """
+        primary_template = [{"role": "user", "content": "Reply with 'ok'."}]
+        return {
+            "config": {
+                "modules": [
+                    {
+                        "prompt_templating": {
+                            "prompt": {"template": primary_template},
+                            "model": {
+                                "name": "anthropic--claude-4.5-sonnet",
+                                "params": {},
+                                "version": "latest",
+                            },
+                        }
+                    },
+                    {
+                        "prompt_templating": {
+                            "prompt": {"template": []},
+                            "model": {
+                                "name": "mistral-small",
+                                "params": {},
+                                "version": "latest",
+                            },
+                        }
+                    },
+                ]
+            }
+        }
+
+    def test_fallback_injected_into_optional_params_before_super(self):
+        _install_fallback(FallbackConfig([FallbackModel(model="sap/mistral-small")]))
+        optional_params: dict = {}
+        captured: dict = {}
+
+        def fake_super_transform(**kwargs):
+            captured.update(kwargs)
+            return self._list_modules_body()
+
+        with patch(
+            "sap_cloud_sdk.aicore.filtering._patch."
+            "GenAIHubOrchestrationConfig.transform_request",
+            side_effect=fake_super_transform,
+        ):
+            OrchestrationPatchConfig().transform_request(
+                model="sap/anthropic--claude-4.5-sonnet",
+                messages=[],
+                optional_params=optional_params,
+                litellm_params={},
+                headers={},
+            )
+
+        assert captured["optional_params"]["fallback_sap_modules"] == [
+            {"model": "sap/mistral-small"}
+        ]
+
+    def test_no_fallback_injection_when_inactive(self):
+        # Filtering inactive too — but inactive _is_ the default; this test
+        # asserts the injection is opt-in.
+        optional_params: dict = {}
+        with patch(
+            "sap_cloud_sdk.aicore.filtering._patch."
+            "GenAIHubOrchestrationConfig.transform_request",
+            return_value=self._dict_modules_body(),
+        ):
+            OrchestrationPatchConfig().transform_request(
+                model="sap/anthropic--claude-4.5-sonnet",
+                messages=[],
+                optional_params=optional_params,
+                litellm_params={},
+                headers={},
+            )
+        assert "fallback_sap_modules" not in optional_params
+
+    def test_filtering_broadcasts_to_every_module_entry(self):
+        _install_filter(_default_filtering())
+        _install_fallback(FallbackConfig([FallbackModel(model="sap/mistral-small")]))
+
+        with patch(
+            "sap_cloud_sdk.aicore.filtering._patch."
+            "GenAIHubOrchestrationConfig.transform_request",
+            return_value=self._list_modules_body(),
+        ):
+            body = OrchestrationPatchConfig().transform_request(
+                model="sap/anthropic--claude-4.5-sonnet",
+                messages=[],
+                optional_params={},
+                litellm_params={},
+                headers={},
+            )
+
+        modules = body["config"]["modules"]
+        assert isinstance(modules, list)
+        assert len(modules) == 2
+        # Every entry — primary AND fallback — carries filtering.
+        for entry in modules:
+            assert "filtering" in entry
+            assert "input" in entry["filtering"]
+            assert "output" in entry["filtering"]
+
+    def test_filtering_on_dict_modules_unchanged_when_no_fallback(self):
+        _install_filter(_default_filtering())
+
+        with patch(
+            "sap_cloud_sdk.aicore.filtering._patch."
+            "GenAIHubOrchestrationConfig.transform_request",
+            return_value=self._dict_modules_body(),
+        ):
+            body = OrchestrationPatchConfig().transform_request(
+                model="sap/anthropic--claude-4.5-sonnet",
+                messages=[],
+                optional_params={},
+                litellm_params={},
+                headers={},
+            )
+
+        modules = body["config"]["modules"]
+        assert isinstance(modules, dict)
+        assert "filtering" in modules
+
+    def test_fallback_only_no_filtering_keys(self):
+        _install_fallback(FallbackConfig([FallbackModel(model="sap/mistral-small")]))
+
+        with patch(
+            "sap_cloud_sdk.aicore.filtering._patch."
+            "GenAIHubOrchestrationConfig.transform_request",
+            return_value=self._list_modules_body(),
+        ):
+            body = OrchestrationPatchConfig().transform_request(
+                model="sap/anthropic--claude-4.5-sonnet",
+                messages=[],
+                optional_params={},
+                litellm_params={},
+                headers={},
+            )
+
+        # No filtering installed, so no entry should carry one.
+        for entry in body["config"]["modules"]:
+            assert "filtering" not in entry
+
+    def test_primary_template_broadcasts_to_fallback_entries(self):
+        # Regression: previously fallback entries went out with
+        # ``prompt.template == []`` and the orchestration server rejected with
+        # ``config.modules[1].prompt_templating.prompt.template should be
+        # non-empty``. The patch now copies the primary's template across.
+        _install_fallback(FallbackConfig([FallbackModel(model="sap/mistral-small")]))
+
+        with patch(
+            "sap_cloud_sdk.aicore.filtering._patch."
+            "GenAIHubOrchestrationConfig.transform_request",
+            return_value=self._realistic_list_modules_body(),
+        ):
+            body = OrchestrationPatchConfig().transform_request(
+                model="sap/anthropic--claude-4.5-sonnet",
+                messages=[{"role": "user", "content": "Reply with 'ok'."}],
+                optional_params={},
+                litellm_params={},
+                headers={},
+            )
+
+        modules = body["config"]["modules"]
+        primary_template = modules[0]["prompt_templating"]["prompt"]["template"]
+        assert primary_template, "primary template should be non-empty"
+        for entry in modules[1:]:
+            assert entry["prompt_templating"]["prompt"]["template"] == primary_template
+
+    def test_template_broadcast_noop_for_single_module_body(self):
+        # No fallback installed → litellm emits a single dict (not a list);
+        # the broadcast must not touch it. (Also: nothing to broadcast to.)
+        with patch(
+            "sap_cloud_sdk.aicore.filtering._patch."
+            "GenAIHubOrchestrationConfig.transform_request",
+            return_value=self._dict_modules_body(),
+        ):
+            body = OrchestrationPatchConfig().transform_request(
+                model="sap/anthropic--claude-4.5-sonnet",
+                messages=[],
+                optional_params={},
+                litellm_params={},
+                headers={},
+            )
+
+        modules = body["config"]["modules"]
+        assert isinstance(modules, dict)
+        # Untouched — same empty template the fixture started with.
+        assert modules["prompt_templating"]["prompt"]["template"] == []
+
+    def test_template_broadcast_skipped_when_primary_template_empty(self):
+        # Defensive: if the primary itself somehow has no template, do not
+        # propagate the empty value (no point) — leave fallback entries alone.
+        _install_fallback(FallbackConfig([FallbackModel(model="sap/mistral-small")]))
+
+        with patch(
+            "sap_cloud_sdk.aicore.filtering._patch."
+            "GenAIHubOrchestrationConfig.transform_request",
+            return_value=self._list_modules_body(),
+        ):
+            body = OrchestrationPatchConfig().transform_request(
+                model="sap/anthropic--claude-4.5-sonnet",
+                messages=[],
+                optional_params={},
+                litellm_params={},
+                headers={},
+            )
+
+        modules = body["config"]["modules"]
+        for entry in modules:
+            assert entry["prompt_templating"]["prompt"]["template"] == []
+
+
+# ---------------------------------------------------------------------------
+# transform_response — intermediate_failures attachment
+# ---------------------------------------------------------------------------
+
+
+_SUCCESS_BODY_WITH_FAILURES = {
+    "request_id": "req-fallback",
+    "intermediate_results": {},
+    "final_result": {
+        "id": "x",
+        "object": "chat.completion",
+        "model": "mistralai--mistral-small-instruct",
+        "choices": [
+            {
+                "index": 0,
+                "message": {"role": "assistant", "content": "Servus!"},
+                "finish_reason": "stop",
+            }
+        ],
+        "usage": {"completion_tokens": 5, "prompt_tokens": 10, "total_tokens": 15},
+    },
+    "intermediate_failures": [
+        {
+            "code": 400,
+            "message": "Model gpt-4o not supported.",
+            "location": "Request Body",
+        }
+    ],
+}
+
+_SUCCESS_BODY_NO_FAILURES = {
+    "request_id": "req-primary",
+    "intermediate_results": {},
+    "final_result": {
+        "id": "x",
+        "object": "chat.completion",
+        "model": "gpt-4o",
+        "choices": [
+            {
+                "index": 0,
+                "message": {"role": "assistant", "content": "Hi!"},
+                "finish_reason": "stop",
+            }
+        ],
+        "usage": {"completion_tokens": 5, "prompt_tokens": 10, "total_tokens": 15},
+    },
+}
+
+_OUTPUT_FILTER_BODY = {
+    "request_id": "req-blocked",
+    "intermediate_results": {
+        "output_filtering": {
+            "data": {"choices": [{"index": 0, "azure_content_safety": {"Sexual": 4}}]}
+        }
+    },
+    "final_result": {
+        "id": "x",
+        "model": "m",
+        "choices": [
+            {
+                "index": 0,
+                "message": {"role": "assistant", "content": ""},
+                "finish_reason": "content_filter",
+            }
+        ],
+        "usage": {"completion_tokens": 0, "prompt_tokens": 10, "total_tokens": 10},
+    },
+}
+
+
+class TestTransformResponseIntermediateFailures:
+    def _call(self, response: httpx.Response):
+        from litellm.types.utils import ModelResponse
+
+        with patch(
+            "sap_cloud_sdk.aicore.filtering._patch."
+            "GenAIHubOrchestrationConfig.transform_response",
+            return_value=ModelResponse(),
+        ):
+            return OrchestrationPatchConfig().transform_response(
+                model="sap/anthropic--claude-4.5-sonnet",
+                raw_response=response,
+                model_response=ModelResponse(),
+                logging_obj=MagicMock(),
+                request_data={},
+                messages=[],
+                optional_params={},
+                litellm_params={},
+                encoding=None,
+            )
+
+    def test_intermediate_failures_attached_when_present(self):
+        result = self._call(_stub_response(200, _SUCCESS_BODY_WITH_FAILURES))
+        failures = getattr(result, "intermediate_failures", None)
+        assert failures is not None
+        assert len(failures) == 1
+        assert failures[0]["code"] == 400
+        assert "gpt-4o" in failures[0]["message"]
+
+    def test_intermediate_failures_absent_when_key_missing(self):
+        result = self._call(_stub_response(200, _SUCCESS_BODY_NO_FAILURES))
+        assert getattr(result, "intermediate_failures", None) is None
+
+    def test_output_filter_still_raises_with_intermediate_failures(self):
+        # Even if the body carries intermediate_failures, output-filter rejection
+        # must take precedence — the response is still a filter block.
+        body: dict[str, Any] = dict(_OUTPUT_FILTER_BODY)
+        body["intermediate_failures"] = [
+            {"code": 429, "message": "rate limited", "location": "LLM"}
+        ]
+        with pytest.raises(ContentFilteredError) as ei:
+            self._call(_stub_response(200, body))
+        assert ei.value.direction == "output"
+
+
+# ---------------------------------------------------------------------------
+# Cross-concern install lifecycle (filtering + fallback in one patch)
+# ---------------------------------------------------------------------------
+
+
+class TestInstallComposition:
+    def test_patch_installed_when_only_filtering(self):
+        import litellm
+
+        from sap_cloud_sdk.aicore.filtering._patch import (
+            FilteringOrchestrationConfig,
+            _ORIGINAL_CONFIG,
+        )
+
+        # Filtering-only installs the filtering class. ``OrchestrationPatchConfig``
+        # (the combined subclass) is reserved for when fallback is active.
+        _install_filter(_default_filtering())
+        assert litellm.GenAIHubOrchestrationConfig is FilteringOrchestrationConfig
+        _install_filter(None)
+        assert litellm.GenAIHubOrchestrationConfig is _ORIGINAL_CONFIG
+
+    def test_patch_installed_when_only_fallback(self):
+        import litellm
+
+        from sap_cloud_sdk.aicore.filtering._patch import _ORIGINAL_CONFIG
+
+        _install_fallback(FallbackConfig([FallbackModel(model="sap/x")]))
+        assert litellm.GenAIHubOrchestrationConfig is OrchestrationPatchConfig
+        _install_fallback(None)
+        assert litellm.GenAIHubOrchestrationConfig is _ORIGINAL_CONFIG
+
+    def test_patch_stays_when_one_concern_cleared_other_active(self):
+        import litellm
+
+        _install_filter(_default_filtering())
+        _install_fallback(FallbackConfig([FallbackModel(model="sap/x")]))
+        assert litellm.GenAIHubOrchestrationConfig is OrchestrationPatchConfig
+
+        # Clear only filtering — patch stays because fallback is still active.
+        _install_filter(None)
+        assert litellm.GenAIHubOrchestrationConfig is OrchestrationPatchConfig
+
+        # Clear fallback — now both inactive, original restored.
+        _install_fallback(None)
+        from sap_cloud_sdk.aicore.filtering._patch import _ORIGINAL_CONFIG
+
+        assert litellm.GenAIHubOrchestrationConfig is _ORIGINAL_CONFIG
+
+    def test_install_fallback_idempotent(self):
+        import litellm
+
+        cfg = FallbackConfig([FallbackModel(model="sap/x")])
+        _install_fallback(cfg)
+        _install_fallback(cfg)
+        assert litellm.GenAIHubOrchestrationConfig is OrchestrationPatchConfig
diff --git a/tests/aicore/fallback/unit/test_set_fallbacks.py b/tests/aicore/fallback/unit/test_set_fallbacks.py
new file mode 100644
index 0000000..d123b31
--- /dev/null
+++ b/tests/aicore/fallback/unit/test_set_fallbacks.py
@@ -0,0 +1,72 @@
+"""Unit tests for :func:`set_fallbacks` lifecycle and env-driven activation."""
+
+from __future__ import annotations
+
+import os
+
+import litellm
+import pytest
+
+from sap_cloud_sdk.aicore.fallback import _patch as _fallback_patch
+from sap_cloud_sdk.aicore.fallback._patch import (
+    OrchestrationPatchConfig,
+    _install_fallback,
+)
+from sap_cloud_sdk.aicore.fallback.fallback import (
+    FallbackConfig,
+    FallbackModel,
+    set_fallbacks,
+)
+from sap_cloud_sdk.aicore.filtering._patch import (
+    _ORIGINAL_CONFIG,
+    _install as _install_filter,
+)
+
+
+@pytest.fixture(autouse=True)
+def clean_state(monkeypatch):
+    """Clear env and patch state before/after each test."""
+    for key in list(os.environ):
+        if key.startswith("AICORE_FALLBACK"):
+            monkeypatch.delenv(key, raising=False)
+    _install_filter(None)
+    _install_fallback(None)
+    yield
+    _install_filter(None)
+    _install_fallback(None)
+
+
+class TestSetFallbacks:
+    def test_with_explicit_config_installs_patch(self):
+        set_fallbacks(FallbackConfig([FallbackModel(model="sap/x")]))
+        assert litellm.GenAIHubOrchestrationConfig is OrchestrationPatchConfig
+        assert _fallback_patch._active_fallback_cfg is not None
+
+    def test_with_none_no_env_clears(self):
+        set_fallbacks(FallbackConfig([FallbackModel(model="sap/x")]))
+        set_fallbacks(None)
+        assert _fallback_patch._active_fallback_cfg is None
+        assert litellm.GenAIHubOrchestrationConfig is _ORIGINAL_CONFIG
+
+    def test_with_none_reads_env_when_enabled(self, monkeypatch):
+        monkeypatch.setenv("AICORE_FALLBACK_ENABLED", "true")
+        monkeypatch.setenv("AICORE_FALLBACK_MODELS", "sap/a,sap/b")
+        set_fallbacks(None)
+        assert _fallback_patch._active_fallback_cfg is not None
+        assert [m.model for m in _fallback_patch._active_fallback_cfg.models] == [
+            "sap/a",
+            "sap/b",
+        ]
+        assert litellm.GenAIHubOrchestrationConfig is OrchestrationPatchConfig
+
+    def test_with_none_env_disabled_keeps_inactive(self):
+        # AICORE_FALLBACK_ENABLED unset → from_env returns None → install None.
+        set_fallbacks(None)
+        assert _fallback_patch._active_fallback_cfg is None
+        assert litellm.GenAIHubOrchestrationConfig is _ORIGINAL_CONFIG
+
+    def test_idempotent(self):
+        cfg = FallbackConfig([FallbackModel(model="sap/x")])
+        set_fallbacks(cfg)
+        set_fallbacks(cfg)
+        assert litellm.GenAIHubOrchestrationConfig is OrchestrationPatchConfig
diff --git a/tests/aicore/integration/conftest.py b/tests/aicore/integration/conftest.py
index b6184e4..d8993b5 100644
--- a/tests/aicore/integration/conftest.py
+++ b/tests/aicore/integration/conftest.py
@@ -1,4 +1,7 @@
-"""Pytest configuration and fixtures for AI Core filtering integration tests."""
+"""Pytest configuration and fixtures for AI Core integration tests.
+
+Covers both filtering and fallback BDD suites.
+"""
 
 import os
 from pathlib import Path
@@ -6,10 +9,11 @@
 import pytest
 from dotenv import load_dotenv
 
-from sap_cloud_sdk.aicore import disable_filtering, set_aicore_config
+from sap_cloud_sdk.aicore import disable_filtering, set_aicore_config, set_fallbacks
 
 
-_REQUIRED_VARS = [
+# Filtering integration vars — required for the filtering.feature scenarios.
+_FILTERING_VARS = [
     "AICORE_CLIENT_ID",
     "AICORE_CLIENT_SECRET",
     "AICORE_AUTH_URL",
@@ -18,6 +22,15 @@
     "AICORE_FILTER_TEST_MODEL",
 ]
 
+# Core credentials shared by both suites.
+_CORE_CREDS = _FILTERING_VARS[:-1]
+
+# Fallback integration vars — required only for fallback.feature scenarios.
+_FALLBACK_VARS = [
+    "AICORE_FALLBACK_TEST_PRIMARY_MODEL",
+    "AICORE_FALLBACK_TEST_FALLBACK_MODEL",
+]
+
 
 def _load_env() -> None:
     """Load .env_integration_tests from the repo root if present."""
@@ -28,28 +41,55 @@ def _load_env() -> None:
 
 @pytest.fixture(scope="session", autouse=True)
 def aicore_configured():
-    """Load env, configure AI Core, restore unfiltered state on teardown."""
+    """Load env, configure AI Core, restore unfiltered state on teardown.
+
+    Skips the whole module when AI Core credentials are missing. Fallback-only
+    scenarios additionally skip themselves when AICORE_FALLBACK_TEST_* vars
+    are missing (see ``fallback_models`` fixture).
+    """
     _load_env()
-    missing = [k for k in _REQUIRED_VARS if not os.environ.get(k)]
+    missing = [k for k in _CORE_CREDS if not os.environ.get(k)]
     if missing:
-        pytest.skip(f"Missing env vars for filtering integration tests: {missing}")
+        pytest.skip(f"Missing core env vars for AI Core integration tests: {missing}")
     set_aicore_config()
     yield
     disable_filtering()
+    set_fallbacks(None)
 
 
 @pytest.fixture(scope="session")
 def test_model() -> str:
-    """Model name to use in live completion calls."""
-    return os.environ["AICORE_FILTER_TEST_MODEL"]
+    """Model name for the filtering integration scenarios."""
+    model = os.environ.get("AICORE_FILTER_TEST_MODEL")
+    if not model:
+        pytest.skip("AICORE_FILTER_TEST_MODEL not set")
+    return model
+
+
+@pytest.fixture(scope="session")
+def fallback_models() -> tuple[str, str]:
+    """(primary, fallback) model names for the fallback integration scenarios.
+
+    The "primary" should be a model name known to be unsupported in the
+    deployed region so that fallback fires; the "fallback" must be supported.
+    """
+    missing = [k for k in _FALLBACK_VARS if not os.environ.get(k)]
+    if missing:
+        pytest.skip(f"Missing env vars for fallback integration tests: {missing}")
+    return (
+        os.environ["AICORE_FALLBACK_TEST_PRIMARY_MODEL"],
+        os.environ["AICORE_FALLBACK_TEST_FALLBACK_MODEL"],
+    )
 
 
 @pytest.fixture(autouse=True)
-def reset_filtering_between_tests():
+def reset_aicore_state_between_tests():
     """Each scenario opts in/out via its Given step."""
     disable_filtering()
+    set_fallbacks(None)
     yield
     disable_filtering()
+    set_fallbacks(None)
 
 
 def pytest_configure(config):
diff --git a/tests/aicore/integration/fallback.feature b/tests/aicore/integration/fallback.feature
new file mode 100644
index 0000000..7c595b8
--- /dev/null
+++ b/tests/aicore/integration/fallback.feature
@@ -0,0 +1,32 @@
+Feature: Model fallback with SAP AI Core Orchestration v2
+  As an SDK user
+  I want orchestration to transparently retry with a fallback model when the primary fails
+  So that my application is resilient to transient errors and region-unsupported models
+
+  Background:
+    Given AI Core credentials are configured
+    And primary and fallback test models are configured
+
+  Scenario: Fallback OFF — primary call succeeds with no intermediate_failures
+    Given fallback is disabled
+    When I send a benign prompt to the fallback test model
+    Then the response should contain a non-empty completion
+    And the response has no intermediate_failures
+
+  Scenario: Primary model unsupported — fallback model is used
+    Given fallback is configured with the test fallback model
+    When I send a benign prompt to the unsupported primary model
+    Then the response should contain a non-empty completion
+    And the response has a non-empty intermediate_failures list
+
+  Scenario: Filtering composes with fallback — call succeeds, no filter rejection
+    Given fallback is configured with the test fallback model
+    And filtering is enabled with default thresholds
+    When I send a benign prompt to the unsupported primary model
+    Then the response should contain a non-empty completion
+    And no ContentFilteredError is raised
+
+  Scenario: Streaming with fallback — fallback fires when primary unsupported
+    Given fallback is configured with the test fallback model
+    When I send a benign streaming prompt to the unsupported primary model
+    Then the streamed response should contain non-empty content
diff --git a/tests/aicore/integration/test_fallback_bdd.py b/tests/aicore/integration/test_fallback_bdd.py
new file mode 100644
index 0000000..a00893e
--- /dev/null
+++ b/tests/aicore/integration/test_fallback_bdd.py
@@ -0,0 +1,179 @@
+"""BDD step definitions for fallback integration tests.
+
+Run against a live AI Core orchestration deployment:
+
+    AICORE_CLIENT_ID=...  AICORE_CLIENT_SECRET=...  AICORE_AUTH_URL=... \\
+    AICORE_BASE_URL=...   AICORE_RESOURCE_GROUP=... \\
+    AICORE_FALLBACK_TEST_PRIMARY_MODEL=sap/unsupported-in-region \\
+    AICORE_FALLBACK_TEST_FALLBACK_MODEL=sap/mistralai--mistral-small-instruct \\
+    uv run python -m pytest tests/aicore/integration/test_fallback_bdd.py -v
+
+The conftest skips fallback scenarios cleanly when the AICORE_FALLBACK_TEST_*
+env vars are missing.
+
+The "primary" model should be one that the orchestration server reports as
+unsupported in the deployed region (the canonical way to force fallback
+without relying on transient 5xx errors). The "fallback" must be a supported
+model that the resource group can call.
+"""
+
+from __future__ import annotations
+
+from typing import Any, Optional
+
+import pytest
+from litellm import completion
+from pytest_bdd import given, scenarios, then, when
+
+from sap_cloud_sdk.aicore import (
+    ContentFilteredError,
+    FallbackConfig,
+    FallbackModel,
+    extract_filter_blocked,
+    set_fallbacks,
+    set_filtering,
+)
+
+scenarios("fallback.feature")
+
+
+BENIGN_PROMPT = "Reply with 'ok' in English."
+
+
+class ScenarioContext:
+    """Per-scenario state."""
+
+    def __init__(self) -> None:
+        self.response: Any = None
+        self.streamed_content: str = ""
+        self.error: Optional[Exception] = None
+
+
+@pytest.fixture
+def ctx() -> ScenarioContext:
+    return ScenarioContext()
+
+
+# ---------------- Background ----------------
+
+
+@given("AI Core credentials are configured")
+def creds_configured():
+    """Background — covered by the session-scoped fixture in conftest."""
+
+
+@given("primary and fallback test models are configured")
+def models_configured(fallback_models: tuple[str, str]):
+    """Background — assert the fallback fixture resolved (else it skips)."""
+    primary, fallback = fallback_models
+    assert primary and fallback
+
+
+# ---------------- Given (fallback / filtering state) ----------------
+
+
+@given("fallback is disabled")
+def fallback_off():
+    set_fallbacks(None)
+
+
+@given("fallback is configured with the test fallback model")
+def fallback_on(fallback_models: tuple[str, str]):
+    _primary, fallback = fallback_models
+    set_fallbacks(FallbackConfig([FallbackModel(model=fallback)]))
+
+
+@given("filtering is enabled with default thresholds")
+def filtering_default():
+    set_filtering()
+
+
+# ---------------- When (send prompt) ----------------
+
+
+def _capture_completion(ctx: ScenarioContext, model: str, prompt: str) -> None:
+    """Send a non-streaming completion and capture the response or error."""
+    try:
+        ctx.response = completion(
+            model=model,
+            messages=[{"role": "user", "content": prompt}],
+        )
+    except ContentFilteredError as e:
+        ctx.error = e
+    except Exception as e:
+        # LiteLLM may wrap input-filter rejections in APIConnectionError.
+        if blocked := extract_filter_blocked(e):
+            ctx.error = blocked
+        else:
+            ctx.error = e
+
+
+@when("I send a benign prompt to the fallback test model")
+def send_to_fallback_model(ctx: ScenarioContext, fallback_models: tuple[str, str]):
+    _primary, fallback = fallback_models
+    _capture_completion(ctx, fallback, BENIGN_PROMPT)
+
+
+@when("I send a benign prompt to the unsupported primary model")
+def send_to_primary(ctx: ScenarioContext, fallback_models: tuple[str, str]):
+    primary, _fallback = fallback_models
+    _capture_completion(ctx, primary, BENIGN_PROMPT)
+
+
+@when("I send a benign streaming prompt to the unsupported primary model")
+def send_streaming_to_primary(ctx: ScenarioContext, fallback_models: tuple[str, str]):
+    primary, _fallback = fallback_models
+    try:
+        stream = completion(
+            model=primary,
+            messages=[{"role": "user", "content": BENIGN_PROMPT}],
+            stream=True,
+        )
+        parts: list[str] = []
+        for chunk in stream:
+            delta = chunk.choices[0].delta.content
+            if delta:
+                parts.append(delta)
+        ctx.streamed_content = "".join(parts)
+    except Exception as e:
+        ctx.error = e
+
+
+# ---------------- Then (assertions) ----------------
+
+
+@then("the response should contain a non-empty completion")
+def response_non_empty(ctx: ScenarioContext):
+    assert ctx.response is not None, f"no response (error={ctx.error})"
+    content = ctx.response.choices[0].message.content
+    assert isinstance(content, str) and content.strip(), (
+        f"expected non-empty completion, got {content!r}"
+    )
+
+
+@then("the response has no intermediate_failures")
+def no_intermediate_failures(ctx: ScenarioContext):
+    assert ctx.response is not None
+    assert getattr(ctx.response, "intermediate_failures", None) is None
+
+
+@then("the response has a non-empty intermediate_failures list")
+def has_intermediate_failures(ctx: ScenarioContext):
+    assert ctx.response is not None
+    failures = getattr(ctx.response, "intermediate_failures", None)
+    assert failures, f"expected non-empty intermediate_failures, got {failures!r}"
+
+
+@then("no ContentFilteredError is raised")
+def no_filter_error(ctx: ScenarioContext):
+    assert not isinstance(ctx.error, ContentFilteredError), (
+        f"unexpected ContentFilteredError: {ctx.error}"
+    )
+
+
+@then("the streamed response should contain non-empty content")
+def streamed_non_empty(ctx: ScenarioContext):
+    assert ctx.error is None, f"streaming failed: {ctx.error}"
+    assert ctx.streamed_content.strip(), (
+        f"expected non-empty streamed content, got {ctx.streamed_content!r}"
+    )
diff --git a/tests/core/unit/telemetry/test_operation.py b/tests/core/unit/telemetry/test_operation.py
index 83e0970..06da49a 100644
--- a/tests/core/unit/telemetry/test_operation.py
+++ b/tests/core/unit/telemetry/test_operation.py
@@ -211,6 +211,6 @@ def test_operation_count(self):
         """Test that we have the expected number of operations."""
         all_operations = list(Operation)
         # 3 auditlog + 11 destination + 10 certificate + 10 fragment + 8 objectstore
-        # + 2 extensibility + 5 aicore + 23 dms + 4 agentgateway + 13 agent_memory
-        # + 5 data_anonymization + 52 adms + 6 print = 152
-        assert len(all_operations) == 152
+        # + 2 extensibility + 6 aicore + 23 dms + 4 agentgateway + 13 agent_memory
+        # + 5 data_anonymization + 52 adms + 6 print = 153
+        assert len(all_operations) == 153