diff --git a/.github/workflows/collectivex-sweep.yml b/.github/workflows/collectivex-sweep.yml
index b56bc285a..91bb3c727 100644
--- a/.github/workflows/collectivex-sweep.yml
+++ b/.github/workflows/collectivex-sweep.yml
@@ -1,154 +1,851 @@
 # CollectiveX Sweep — one structured run instead of thousands of dispatches.
 #
-# Shape (mirrors the InferenceX CI tracker): setup -> sweep (a MATRIX job = "a job with other jobs
-# in it") -> aggregate (the collector "at the end"). The matrix unit is a SHARD = one allocation that
-# sweeps many cases sharing (sku, backend, mode, resource) — generate_matrix's own grouping, chunked
-# so no cell exceeds the job budget. Each cell emits a handful of per-case JSONs; the aggregate job
-# collects every shard into ONE line-delimited file (results/aggregate/*.ndjson) so there aren't
-# thousands of individual result files. Run once per backend (deepep / uccl / flashinfer /
-# deepep-hybrid / nccl-ep, + deepep_v2) for full parity.
+# Shape: sweep/probe runs setup -> GPU cells; publish-v1 and refresh-v1 stay on disposable
+# GitHub-hosted storage and skip GPU jobs. This file is registered on the default
+# branch, so its collectivex branch revision can be dispatched with --ref.
 name: CollectiveX Sweep
+permissions:
+  actions: read
+  contents: read
 on:
   workflow_dispatch:
     inputs:
+      operation:
+        description: Operation to execute
+        type: choice
+        default: sweep
+        options: [sweep, probe-precision, publish-v1, refresh-v1]
       backend:
-        description: EP library to sweep (deepep matrix is remapped onto the others, capability-filtered)
+        description: "EP library to sweep — 'all' runs every EP backend in one matrix"
         type: choice
-        default: deepep
-        options: [deepep, uccl, flashinfer, deepep-hybrid, nccl-ep]
-      deepep_v2:
-        description: DeepEP V2 from-source kernels (kernel_gen=v2; deepep backend only)
-        type: boolean
-        default: false
+        default: all
+        options: [all, deepep, deepep-v2, uccl, deepep-hybrid, mori, nccl-ep]
       suites:
         description: "'all' or comma-list of suite names"
         type: string
         default: all
       only_sku:
-        description: Restrict to one SKU (h100-dgxc|h200|b300|b200-dgxc|gb200|gb300|mi355x); blank = all
+        description: Restrict to one GHA runner pool (h100-dgxc|h200-dgxc|b300|b200-dgxc|gb200|gb300|mi325x|mi355x); blank = all
+        type: string
+        default: ''
+      min_nodes:
+        description: Keep only shards with at least this node/tray count (2 keeps every EP16 and GB EP8; blank = all)
+        type: string
+        default: ''
+      max_nodes:
+        description: Keep only shards with at most this node/tray count (1 keeps non-GB EP8; blank = all)
         type: string
         default: ''
       max_cases:
-        description: Max cases per shard cell (chunk larger shards)
+        description: Max cases per shard cell before chunking into another GHA job (128 = no chunking for current suites)
         type: string
-        default: '14'
-
+        default: '128'
+      release_tag:
+        description: Publication gate; unversioned runs are diagnostic and cannot be published
+        type: choice
+        default: unversioned
+        options: [unversioned, v1]
+      qualification_index:
+        description: Deterministic execution-order index; V1 qualification uses 1, then 2, then 3
+        type: choice
+        default: '1'
+        options: ['1', '2', '3']
+      publish_run_ids:
+        description: For publish-v1, exactly three successful V1 sweep run IDs
+        type: string
+        default: ''
+      refresh_run_id:
+        description: For refresh-v1, source publication workflow run ID
+        type: string
+        default: ''
+      refresh_digest:
+        description: For refresh-v1, exact publication dataset SHA-256
+        type: string
+        default: ''
 concurrency:
-  group: cx-sweep-${{ github.ref }}-${{ inputs.backend }}-${{ inputs.deepep_v2 }}-${{ inputs.only_sku }}
+  group: cx-${{ inputs.operation }}-${{ github.ref }}-${{ inputs.release_tag }}-${{ inputs.backend }}-${{ inputs.only_sku }}
   cancel-in-progress: false
 
 jobs:
   # ---- setup: resolve the suites into the shard matrix (the "pending jobs" node) ----
   setup:
+    if: ${{ inputs.operation == 'sweep' || inputs.operation == 'probe-precision' }}
     runs-on: ubuntu-latest
     outputs:
       matrix: ${{ steps.gen.outputs.matrix }}
       n: ${{ steps.gen.outputs.n }}
+      max_parallel: ${{ steps.gen.outputs.max_parallel }}
     steps:
       - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v5.0.0
-        with: { clean: true }
-      - run: pip install --quiet pyyaml
+        with: { clean: true, persist-credentials: false }
+      - name: Install matrix dependencies
+        run: python3 -m pip install --quiet PyYAML==6.0.2
       - id: gen
         working-directory: experimental/CollectiveX
+        env:
+          INPUT_BACKEND: ${{ inputs.backend }}
+          INPUT_OPERATION: ${{ inputs.operation }}
+          INPUT_SUITES: ${{ inputs.suites }}
+          INPUT_ONLY_SKU: ${{ inputs.only_sku }}
+          INPUT_MIN_NODES: ${{ inputs.min_nodes }}
+          INPUT_MAX_NODES: ${{ inputs.max_nodes }}
+          INPUT_MAX_CASES: ${{ inputs.max_cases }}
+          INPUT_QUALIFICATION_INDEX: ${{ inputs.qualification_index }}
+          CX_QUALIFICATION_INDEX: ${{ inputs.qualification_index }}
+          COLLECTIVEX_SOURCE_SHA: ${{ github.sha }}
+          COLLECTIVEX_ARTIFACT_NAME: cxunsupported-${{ github.run_id }}-${{ github.run_attempt }}
+          COLLECTIVEX_EXECUTION_ID: ${{ github.run_id }}_${{ github.run_attempt }}_unsupported
         run: |
           set -euo pipefail
-          ov=""; [ "${{ inputs.backend }}" != "deepep" ] && ov="--backend ${{ inputs.backend }}"
-          v2=""; [ "${{ inputs.deepep_v2 }}" = "true" ] && v2="--deepep-v2"
-          os=""; [ -n "${{ inputs.only_sku }}" ] && os="--only-sku ${{ inputs.only_sku }}"
-          # full matrix (with cases) -> artifact for the cells; slim (no cases) -> the strategy output.
-          python3 sweep_matrix.py --suites "${{ inputs.suites }}" --max-cases "${{ inputs.max_cases }}" $ov $v2 $os --out matrix_full.json >/dev/null
-          SLIM=$(python3 -c "import json;m=json.load(open('matrix_full.json'));print(json.dumps({'include':[{k:v for k,v in x.items() if k!='cases'} for x in m['include']]}))")
-          echo "matrix=$SLIM" >> "$GITHUB_OUTPUT"
-          echo "n=$(python3 -c "import json;print(len(json.load(open('matrix_full.json'))['include']))")" >> "$GITHUB_OUTPUT"
-          python3 -c "import json;m=json.load(open('matrix_full.json'));print('shard-cells:',len(m['include']),'cases:',sum(x['n'] for x in m['include']))"
+          if [ "$INPUT_OPERATION" = probe-precision ] && [ "${{ inputs.release_tag }}" != unversioned ]; then
+            echo 'precision probes cannot carry a V1 release tag' >&2
+            exit 1
+          fi
+          if [ "$INPUT_OPERATION" = sweep ] && [ "${{ inputs.release_tag }}" = v1 ]; then
+            [[ "$INPUT_QUALIFICATION_INDEX" =~ ^[123]$ ]] || {
+              echo 'V1 sweeps require qualification_index 1, 2, or 3' >&2
+              exit 1
+            }
+            [ "$INPUT_BACKEND" = all ] && [ "$INPUT_SUITES" = all ] \
+              && [ -z "$INPUT_ONLY_SKU" ] && [ -z "$INPUT_MIN_NODES" ] \
+              && [ -z "$INPUT_MAX_NODES" ] && [ "$INPUT_MAX_CASES" = 128 ] || {
+                echo 'V1 sweeps require the exact unfiltered full matrix' >&2
+                exit 1
+              }
+            python3 - <<'PY'
+          import capability
+
+          if capability.provisional_precision_targets():
+              raise SystemExit("V1 sweeps require every precision capability cell to be resolved")
+          PY
+          fi
+          if [ "$INPUT_OPERATION" = probe-precision ]; then
+            args=(--workflow-plan --backend "$INPUT_BACKEND" --out matrix_full.json)
+            [ -n "$INPUT_ONLY_SKU" ] && args+=(--only-sku "$INPUT_ONLY_SKU")
+            python3 tests/probe_precision.py "${args[@]}"
+          else
+            args=(--suites "$INPUT_SUITES" --max-cases "$INPUT_MAX_CASES")
+            case "$INPUT_BACKEND" in
+              all) args+=(--backends all) ;;
+              *) args+=(--backend "$INPUT_BACKEND") ;;
+            esac
+            [ -n "$INPUT_ONLY_SKU" ] && args+=(--only-sku "$INPUT_ONLY_SKU")
+            [ -n "$INPUT_MIN_NODES" ] && args+=(--min-nodes "$INPUT_MIN_NODES")
+            [ -n "$INPUT_MAX_NODES" ] && args+=(--max-nodes "$INPUT_MAX_NODES")
+            python3 sweep_matrix.py "${args[@]}" --out matrix_full.json >/dev/null
+          fi
+          python3 artifact_safety.py matrix_full.json
+          SLIM=$(python3 -c "import json;m=json.load(open('matrix_full.json'));print(json.dumps({'include':[{k:v for k,v in x.items() if k!='case_ids'} for x in m['include']]}))")
+          {
+            echo "matrix=$SLIM"
+            echo "n=$(python3 -c "import json;print(len(json.load(open('matrix_full.json'))['include']))")"
+            echo "source_backends=$(python3 -c "import json;m=json.load(open('matrix_full.json'));print(' '.join(sorted({x['backend'] for x in m['include']} & {'deepep-v2','deepep-hybrid'})))")"
+            echo "max_parallel=$(python3 -c "import json;m=json.load(open('matrix_full.json'));w=max(x['execution_weight'] for x in m['include']);b=64 if '$INPUT_OPERATION' == 'probe-precision' else 4096;print(max(1,min(10,b//w)))")"
+          } >> "$GITHUB_OUTPUT"
+          unsupported_n=0
+          if [ "$INPUT_OPERATION" = sweep ]; then
+            unsupported_n=$(python3 -c "import json;m=json.load(open('matrix_full.json'));print(sum(x['disposition']=='unsupported' for x in m['requested_cases']))")
+          fi
+          echo "unsupported_n=$unsupported_n" >> "$GITHUB_OUTPUT"
+          if [ "$unsupported_n" -gt 0 ]; then
+            python3 sweep_matrix.py --emit-unsupported-from matrix_full.json \
+              --out-dir unsupported
+          fi
+          python3 -c "import json;m=json.load(open('matrix_full.json'));print('execution-cells:',len(m['include']))"
+      - name: Prepare pinned backend source archive
+        if: ${{ steps.gen.outputs.source_backends != '' }}
+        working-directory: experimental/CollectiveX
+        env:
+          SOURCE_BACKENDS: ${{ steps.gen.outputs.source_backends }}
+          COLLECTIVEX_EXECUTION_ID: ${{ github.run_id }}_${{ github.run_attempt }}_sources
+        run: |
+          set -euo pipefail
+          source runtime/common.sh
+          work="$RUNNER_TEMP/collectivex-backend-sources"
+          archive="$RUNNER_TEMP/collectivex-backend-sources.tar"
+          rm -rf -- "$work" "$archive"
+          umask 077
+          mkdir -m 700 "$work"
+          mkdir -p "$work/experimental/CollectiveX"
+          read -r -a backends <<< "$SOURCE_BACKENDS"
+          [ "${#backends[@]}" -gt 0 ]
+          for backend in "${backends[@]}"; do
+            cx_prepare_backend_source "$work" "$backend"
+          done
+          cx_cleanup_private_logs 0
+          tar --sort=name --mtime='@1' --owner=0 --group=0 --numeric-owner \
+            -C "$work/experimental/CollectiveX" -cf "$archive" .cx_sources
+          sha256sum "$archive"
+          rm -rf -- "$work"
+      - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
+        if: ${{ steps.gen.outputs.source_backends != '' }}
+        with:
+          name: cxbackend-sources-${{ github.run_id }}-${{ github.run_attempt }}
+          path: ${{ runner.temp }}/collectivex-backend-sources.tar
+          if-no-files-found: error
+          retention-days: ${{ inputs.release_tag == 'v1' && 90 || 3 }}
       - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
         with:
-          name: cxsweep-matrix-${{ github.run_id }}
+          name: cxsweep-matrix-${{ github.run_id }}-${{ github.run_attempt }}
           path: experimental/CollectiveX/matrix_full.json
           if-no-files-found: error
+          retention-days: ${{ inputs.release_tag == 'v1' && 90 || 3 }}
+      - name: Create V1 release marker
+        if: ${{ inputs.operation == 'sweep' && inputs.release_tag == 'v1' }}
+        env:
+          EXPECTED_MATRIX_SHA256: f1ca85f9689922b90edd5767b9ff2a902f6b896f32f68b2ca086dde3fd2157d0
+          RUN_ID: ${{ github.run_id }}
+          RUN_ATTEMPT: ${{ github.run_attempt }}
+          SOURCE_SHA: ${{ github.sha }}
+          QUALIFICATION_INDEX: ${{ inputs.qualification_index }}
+        run: |
+          set -euo pipefail
+          destination="$RUNNER_TEMP/collectivex-release"
+          install -d -m 700 "$destination"
+          python3 - "$destination/release.json" <<'PY'
+          import hashlib
+          import json
+          import os
+          import pathlib
+          import sys
+
+          sys.path.insert(0, str(pathlib.Path("experimental/CollectiveX").resolve()))
+          import sweep_matrix
+
+          matrix = pathlib.Path("experimental/CollectiveX/matrix_full.json").read_bytes()
+          matrix_sha256 = hashlib.sha256(matrix).hexdigest()
+          if matrix_sha256 != os.environ["EXPECTED_MATRIX_SHA256"]:
+              raise SystemExit("V1 release tag requires the locked full matrix")
+          qualification_index = int(os.environ["QUALIFICATION_INDEX"])
+          execution_plan_sha256 = sweep_matrix.qualification_execution_plan_sha256(
+              json.loads(matrix), qualification_index
+          )
+          marker = {
+              "execution_plan_sha256": execution_plan_sha256,
+              "format": "collectivex.release-tag.v1",
+              "matrix_sha256": matrix_sha256,
+              "qualification_index": qualification_index,
+              "release_tag": "v1",
+              "run_attempt": os.environ["RUN_ATTEMPT"],
+              "run_id": os.environ["RUN_ID"],
+              "source_sha": os.environ["SOURCE_SHA"],
+          }
+          pathlib.Path(sys.argv[1]).write_text(
+              json.dumps(marker, sort_keys=True, separators=(",", ":")) + "\n"
+          )
+          PY
+          python3 experimental/CollectiveX/artifact_safety.py "$destination/release.json"
+      - name: Upload V1 release marker
+        if: ${{ inputs.operation == 'sweep' && inputs.release_tag == 'v1' }}
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
+        with:
+          name: cxrelease-v1-${{ github.run_id }}-${{ github.run_attempt }}
+          path: ${{ runner.temp }}/collectivex-release/release.json
+          if-no-files-found: error
+          retention-days: 90
+      - name: Validate unsupported artifact safety
+        id: unsupported_safety
+        if: ${{ always() && fromJSON(steps.gen.outputs.unsupported_n) > 0 }}
+        run: |
+          python3 experimental/CollectiveX/artifact_safety.py experimental/CollectiveX/unsupported/*.json
+      - name: Validate unsupported outcomes
+        id: unsupported_contracts
+        if: ${{ always() && fromJSON(steps.gen.outputs.unsupported_n) > 0 && steps.unsupported_safety.outcome == 'success' }}
+        env:
+          COLLECTIVEX_ARTIFACT_NAME: cxunsupported-${{ github.run_id }}-${{ github.run_attempt }}
+          COLLECTIVEX_EXECUTION_ID: ${{ github.run_id }}_${{ github.run_attempt }}_unsupported
+        run: |
+          python3 experimental/CollectiveX/contracts.py validate-delivery \
+            --source experimental/CollectiveX/matrix_full.json \
+            --disposition unsupported \
+            experimental/CollectiveX/unsupported/*.json
+      - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
+        if: ${{ always() && fromJSON(steps.gen.outputs.unsupported_n) > 0 && steps.unsupported_contracts.outcome == 'success' && steps.unsupported_safety.outcome == 'success' }}
+        with:
+          name: cxunsupported-${{ github.run_id }}-${{ github.run_attempt }}
+          path: experimental/CollectiveX/unsupported/*.json
+          if-no-files-found: error
+          retention-days: ${{ inputs.release_tag == 'v1' && 90 || 3 }}
 
   # ---- sweep: ONE matrix cell per shard (the parent job with child jobs) ----
   sweep:
     needs: setup
-    if: ${{ fromJSON(needs.setup.outputs.n) > 0 }}
+    if: ${{ (inputs.operation == 'sweep' || inputs.operation == 'probe-precision') && fromJSON(needs.setup.outputs.n) > 0 }}
     strategy:
       fail-fast: false
-      max-parallel: 10            # don't saturate the ~20-runner fleet; cells queue as slots free
+      max-parallel: ${{ fromJSON(needs.setup.outputs.max_parallel) }}
       matrix: ${{ fromJSON(needs.setup.outputs.matrix) }}
-    # h200 label spans two clusters; pin to the validated dgxc pool (mirrors collectivex-experimental).
-    runs-on: ${{ matrix.sku == 'h200' && 'h200-dgxc' || matrix.sku }}
+    runs-on: ${{ matrix.sku }}
     timeout-minutes: 350
     env:
       CX_BENCH: ${{ matrix.backend }}
-      CX_DEEPEP_V2: ${{ matrix.deepep_v2 && '1' || '' }}
       CX_NODES: ${{ matrix.nodes }}
-      CX_SHARD_FILE: results/.shard_${{ matrix.id }}.json
+      CX_GPUS_PER_NODE: ${{ matrix.gpus_per_node }}
+      CX_SCALE_UP_DOMAIN: ${{ matrix.scale_up_domain }}
+      CX_SHARD_FILE: .shards/${{ matrix.id }}.json
+      CX_SHARD_SKU: ${{ matrix.sku }}
+      CX_PRECISION_PROBE: ${{ inputs.operation == 'probe-precision' && '1' || '0' }}
+      COLLECTIVEX_CANONICAL_GHA: '1'
       COLLECTIVEX_SOURCE_SHA: ${{ github.sha }}
-      CX_NODELIST: ${{ matrix.sku == 'mi355x' && 'mia1-p01-g10,mia1-p01-g15' || '' }}
-      CX_STAGE_DIR: ${{ matrix.sku == 'gb200' && '/mnt/lustre01/users-public/sa-shared/cx-stage' || '' }}
+      COLLECTIVEX_ARTIFACT_NAME: ${{ inputs.operation == 'probe-precision' && format('cxprobe-{0}-{1}-{2}', matrix.id, github.run_id, github.run_attempt) || format('cxshard-{0}-{1}-{2}', matrix.id, github.run_id, github.run_attempt) }}
+      # Consolidated shards run one bounded build-group in one Slurm allocation, so
+      # the launcher's default 45-min --time is too short. 300 min covers a cold
+      # compute-node image import plus the shard. The allocation releases early
+      # when the shard finishes, so short shards don't waste it.
+      CX_TIME: ${{ inputs.operation == 'probe-precision' && '90' || '300' }}
+      COLLECTIVEX_EXECUTION_ID: ${{ github.run_id }}_${{ github.run_attempt }}_${{ matrix.id }}
+      CX_QUALIFICATION_INDEX: ${{ inputs.qualification_index }}
+      CX_JOB_ROOT: /tmp/inferencex-collectivex-${{ github.run_id }}-${{ github.run_attempt }}-${{ matrix.id }}
+      CX_SOURCE_ROOT: /tmp/inferencex-collectivex-${{ github.run_id }}-${{ github.run_attempt }}-${{ matrix.id }}/source
+      HOME: /tmp/inferencex-collectivex-${{ github.run_id }}-${{ github.run_attempt }}-${{ matrix.id }}/home
     steps:
-      - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v5.0.0
-        with: { clean: true }
+      - name: Prepare isolated source
+        id: source
+        env:
+          COLLECTIVEX_REPOSITORY: ${{ github.repository }}
+        run: |
+          set -euo pipefail
+          python3 - <<'PY'
+          import os
+          import re
+          import shutil
+          import stat
+          import time
+
+          pattern = re.compile(r"inferencex-collectivex-[0-9]+-[0-9]+-[A-Za-z0-9._-]+")
+          cutoff = time.time() - 86400
+          for entry in os.scandir("/tmp"):
+              if not pattern.fullmatch(entry.name):
+                  continue
+              try:
+                  metadata = entry.stat(follow_symlinks=False)
+              except FileNotFoundError:
+                  continue
+              if (
+                  not stat.S_ISDIR(metadata.st_mode)
+                  or metadata.st_uid != os.getuid()
+                  or stat.S_IMODE(metadata.st_mode) != 0o700
+                  or metadata.st_mtime >= cutoff
+              ):
+                  continue
+              marked = False
+              for marker_name in ("cleanup-safe", "cleanup-unsafe"):
+                  try:
+                      marker = os.stat(
+                          os.path.join(entry.path, marker_name), follow_symlinks=False
+                      )
+                  except FileNotFoundError:
+                      continue
+                  marked = (
+                      stat.S_ISREG(marker.st_mode)
+                      and marker.st_uid == os.getuid()
+                      and stat.S_IMODE(marker.st_mode) == 0o600
+                  )
+                  if marked:
+                      break
+              if marked:
+                  shutil.rmtree(entry.path)
+          PY
+          [[ "$CX_JOB_ROOT" =~ ^/tmp/inferencex-collectivex-[0-9]+-[0-9]+-[A-Za-z0-9._-]+$ ]] \
+            || { echo "CollectiveX isolated root is invalid" >&2; exit 1; }
+          [ "$CX_SOURCE_ROOT" = "$CX_JOB_ROOT/source" ] \
+            || { echo "CollectiveX source root is invalid" >&2; exit 1; }
+          if [ -e "$CX_JOB_ROOT" ] || [ -L "$CX_JOB_ROOT" ]; then
+            echo "CollectiveX isolated root already exists" >&2
+            exit 1
+          fi
+          umask 077
+          mkdir -m 700 -- "$CX_JOB_ROOT"
+          trap 'rc=$?; [ "$rc" = 0 ] || rm -rf -- "$CX_JOB_ROOT"; exit "$rc"' EXIT
+          mkdir -m 700 -- "$HOME" "$CX_JOB_ROOT/control" "$CX_JOB_ROOT/artifact" "$CX_SOURCE_ROOT"
+          : > "$CX_JOB_ROOT/cleanup-safe"
+          if ! {
+            GIT_CONFIG_NOSYSTEM=1 GIT_CONFIG_GLOBAL=/dev/null git init -q "$CX_SOURCE_ROOT"
+            GIT_CONFIG_NOSYSTEM=1 GIT_CONFIG_GLOBAL=/dev/null \
+              git -C "$CX_SOURCE_ROOT" remote add origin \
+                "https://github.com/${COLLECTIVEX_REPOSITORY}.git"
+            GIT_CONFIG_NOSYSTEM=1 GIT_CONFIG_GLOBAL=/dev/null \
+              git -C "$CX_SOURCE_ROOT" -c credential.helper= -c protocol.version=2 \
+                fetch -q --no-tags --depth=1 origin "$COLLECTIVEX_SOURCE_SHA"
+            GIT_CONFIG_NOSYSTEM=1 GIT_CONFIG_GLOBAL=/dev/null \
+              git -C "$CX_SOURCE_ROOT" -c advice.detachedHead=false \
+                checkout -q --detach FETCH_HEAD
+            [ "$(git -C "$CX_SOURCE_ROOT" rev-parse HEAD)" = "$COLLECTIVEX_SOURCE_SHA" ]
+          } </dev/null >/dev/null 2>&1; then
+            echo "CollectiveX source preparation failed" >&2
+            exit 1
+          fi
+          [ "$(stat -c '%a' "$CX_JOB_ROOT")" = 700 ] \
+            || { echo "CollectiveX isolated root has unsafe permissions" >&2; exit 1; }
+          echo 'prepared=true' >> "$GITHUB_OUTPUT"
+          trap - EXIT
       - uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
         with:
-          name: cxsweep-matrix-${{ github.run_id }}
-          path: experimental/CollectiveX
-      - name: Extract this shard's cases (stdlib only — no runner deps)
-        working-directory: experimental/CollectiveX
+          name: cxsweep-matrix-${{ github.run_id }}-${{ github.run_attempt }}
+          path: ${{ env.CX_JOB_ROOT }}/control
+      - name: Download pinned backend source archive
+        if: ${{ matrix.backend == 'deepep-v2' || matrix.backend == 'deepep-hybrid' }}
+        uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
+        with:
+          name: cxbackend-sources-${{ github.run_id }}-${{ github.run_attempt }}
+          path: ${{ env.CX_JOB_ROOT }}/control
+      - name: Install pinned backend source seed
+        if: ${{ matrix.backend == 'deepep-v2' || matrix.backend == 'deepep-hybrid' }}
+        env:
+          EXPECTED_BACKEND: ${{ matrix.backend }}
         run: |
           set -euo pipefail
-          python3 -c "
-          import json
-          m=json.load(open('matrix_full.json'))
-          s=[x for x in m['include'] if x['id']=='${{ matrix.id }}']
-          assert s, 'shard ${{ matrix.id }} not in matrix'
-          s=s[0]
-          json.dump({'id':s['id'],'sku':s['sku'],'backend':s['backend'],'nodes':s['nodes'],'deepep_v2':s['deepep_v2'],'cases':s['cases']}, open('results/.shard_${{ matrix.id }}.json','w'))
-          print('shard ${{ matrix.id }}:', len(s['cases']), 'cases')
-          "
-      - name: Sweep shard ${{ matrix.id }} (${{ matrix.n }} cases, one allocation)
+          archive="$CX_JOB_ROOT/control/collectivex-backend-sources.tar"
+          destination="$CX_SOURCE_ROOT/experimental/CollectiveX"
+          seed_root="$destination/.cx_sources"
+          [ -f "$archive" ] && [ ! -e "$seed_root" ] && [ ! -L "$seed_root" ]
+          source "$destination/runtime/common.sh"
+          source_path="$(cx_backend_source_path "$seed_root" "$EXPECTED_BACKEND")"
+          source_basename="${source_path#"$seed_root/"}"
+          [ -n "$source_basename" ] \
+            && [ "$source_path" = "$seed_root/$source_basename" ] \
+            && [[ "$source_basename" != */* ]]
+          python3 "$destination/source_archive.py" \
+            "$archive" "$destination" "$source_basename"
+          cx_backend_source_is_valid "$EXPECTED_BACKEND" "$source_path"
+          printf 'CX_BACKEND_SOURCE_SEED_ROOT=%s\n' "$seed_root" >> "$GITHUB_ENV"
+      - name: Extract and validate this execution control
+        run: |
+          set -euo pipefail
+          cd "$CX_SOURCE_ROOT/experimental/CollectiveX" 2>/dev/null \
+            || { echo "CollectiveX source is unavailable" >&2; exit 1; }
+          if [ '${{ inputs.operation }}' = probe-precision ]; then
+            python3 tests/probe_precision.py \
+              --extract-from "$CX_JOB_ROOT/control/matrix_full.json" \
+              --probe-id '${{ matrix.id }}' \
+              --expect-sku '${{ matrix.sku }}' \
+              --expect-backend '${{ matrix.backend }}' \
+              --expect-nodes '${{ matrix.nodes }}' \
+              --out '${{ env.CX_SHARD_FILE }}'
+          else
+            python3 sweep_matrix.py \
+              --extract-from "$CX_JOB_ROOT/control/matrix_full.json" \
+              --shard-id '${{ matrix.id }}' \
+              --expect-sku '${{ matrix.sku }}' \
+              --expect-backend '${{ matrix.backend }}' \
+              --expect-nodes '${{ matrix.nodes }}' \
+              --out '${{ env.CX_SHARD_FILE }}' >/dev/null
+          fi
+      - name: Execute ${{ inputs.operation }} cell ${{ matrix.id }}
+        id: sweep_shard
         env:
-          RUNNER_NAME: ${{ runner.name }}
-        run: bash "experimental/CollectiveX/launchers/launch_${RUNNER_NAME%%_*}.sh"
+          COLLECTIVEX_OPERATOR_CONFIG_CONTENT: ${{ secrets.COLLECTIVEX_OPERATOR_CONFIG_V1 }}
+          COLLECTIVEX_OPERATOR_CONFIG_REQUIRED: '1'
+        run: |
+          set -euo pipefail
+          umask 077
+          : > "$CX_JOB_ROOT/cleanup-unsafe"
+          rm -f -- "$CX_JOB_ROOT/cleanup-safe"
+          cd "$CX_SOURCE_ROOT" 2>/dev/null \
+            || { echo "CollectiveX source is unavailable" >&2; exit 1; }
+          bash "experimental/CollectiveX/launchers/launch_${{ matrix.launcher }}.sh"
+      - name: Confirm allocation cleanup
+        id: allocation_cleanup
+        if: ${{ always() && steps.source.outputs.prepared == 'true' }}
+        run: |
+          set -euo pipefail
+          [ -f "$CX_JOB_ROOT/cleanup-safe" ] && [ ! -e "$CX_JOB_ROOT/cleanup-unsafe" ] \
+            || { echo "CollectiveX allocation cleanup was not confirmed" >&2; exit 1; }
+      - name: Validate shard artifact safety
+        id: artifact_safety
+        if: ${{ always() && steps.allocation_cleanup.outcome == 'success' && (inputs.operation != 'probe-precision' || steps.sweep_shard.outcome == 'success') }}
+        run: |
+          cd "$CX_SOURCE_ROOT" 2>/dev/null \
+            || { echo "CollectiveX source is unavailable" >&2; exit 1; }
+          python3 experimental/CollectiveX/artifact_safety.py experimental/CollectiveX/results/*.json
+      - name: Validate shard delivery completeness
+        id: delivery_contracts
+        if: ${{ always() && steps.artifact_safety.outcome == 'success' }}
+        run: |
+          cd "$CX_SOURCE_ROOT" 2>/dev/null \
+            || { echo "CollectiveX source is unavailable" >&2; exit 1; }
+          if [ '${{ inputs.operation }}' = probe-precision ]; then
+            python3 experimental/CollectiveX/tests/probe_precision.py \
+              --validate-manifest experimental/CollectiveX/results/*.json
+          else
+            python3 experimental/CollectiveX/contracts.py validate-delivery \
+              --source "experimental/CollectiveX/${CX_SHARD_FILE}" \
+              experimental/CollectiveX/results/*.json
+          fi
       - name: Shard summary
-        if: always()
-        run: python3 experimental/CollectiveX/summarize.py --results-dir experimental/CollectiveX/results --markdown >> "$GITHUB_STEP_SUMMARY" || true
+        if: ${{ inputs.operation == 'sweep' && always() && steps.artifact_safety.outcome == 'success' && steps.delivery_contracts.outcome == 'success' }}
+        run: |
+          cd "$CX_SOURCE_ROOT" 2>/dev/null \
+            || { echo "CollectiveX source is unavailable" >&2; exit 1; }
+          python3 experimental/CollectiveX/summarize.py \
+            --results-dir experimental/CollectiveX/results --markdown >> "$GITHUB_STEP_SUMMARY" || true
+      - name: Stage shard artifact
+        id: stage_artifact
+        if: ${{ always() && steps.delivery_contracts.outcome == 'success' && steps.artifact_safety.outcome == 'success' }}
+        run: |
+          set -euo pipefail
+          cd "$CX_SOURCE_ROOT" 2>/dev/null \
+            || { echo "CollectiveX source is unavailable" >&2; exit 1; }
+          cp -- experimental/CollectiveX/results/*.json "$CX_JOB_ROOT/artifact/"
       - name: Upload shard results
-        if: always()
+        id: upload_artifact
+        if: always() && steps.stage_artifact.outcome == 'success' && steps.delivery_contracts.outcome == 'success' && steps.artifact_safety.outcome == 'success'
         uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
         with:
-          name: cxshard-${{ matrix.id }}-${{ github.run_id }}
-          path: experimental/CollectiveX/results/*.json   # glob skips the hidden .shard_*.json
-          if-no-files-found: warn
-
-  # ---- aggregate: collect every shard into ONE ndjson (the "result aggregator at the end") ----
-  aggregate:
-    needs: sweep
-    if: always()
+          name: ${{ inputs.operation == 'probe-precision' && format('cxprobe-{0}-{1}-{2}', matrix.id, github.run_id, github.run_attempt) || format('cxshard-{0}-{1}-{2}', matrix.id, github.run_id, github.run_attempt) }}
+          path: |
+            ${{ env.CX_JOB_ROOT }}/artifact/*.json
+          if-no-files-found: error
+          retention-days: ${{ inputs.release_tag == 'v1' && 90 || 3 }}
+      - name: Cleanup isolated workspace
+        if: ${{ always() && steps.source.outputs.prepared == 'true' }}
+        run: |
+          set -euo pipefail
+          [[ "$CX_JOB_ROOT" =~ ^/tmp/inferencex-collectivex-[0-9]+-[0-9]+-[A-Za-z0-9._-]+$ ]] \
+            || { echo "CollectiveX cleanup root is invalid" >&2; exit 1; }
+          [ "$CX_SOURCE_ROOT" = "$CX_JOB_ROOT/source" ] \
+            || { echo "CollectiveX cleanup source is invalid" >&2; exit 1; }
+          [ -f "$CX_JOB_ROOT/cleanup-safe" ] && [ ! -e "$CX_JOB_ROOT/cleanup-unsafe" ] \
+            || { echo "CollectiveX allocation cleanup was not confirmed; retaining isolated files" >&2; exit 1; }
+          if [ '${{ steps.sweep_shard.outcome }}' = success ] \
+              && [ '${{ steps.allocation_cleanup.outcome }}' = success ] \
+              && [ '${{ steps.artifact_safety.outcome }}' = success ] \
+              && [ '${{ steps.delivery_contracts.outcome }}' = success ] \
+              && [ '${{ steps.stage_artifact.outcome }}' = success ] \
+              && [ '${{ steps.upload_artifact.outcome }}' = success ] \
+              && [ -f "$CX_SOURCE_ROOT/experimental/CollectiveX/runtime/common.sh" ]; then
+            # shellcheck source=/dev/null
+            if source "$CX_SOURCE_ROOT/experimental/CollectiveX/runtime/common.sh" \
+                >/dev/null 2>&1; then
+              cx_cleanup_private_logs 0
+            fi
+          fi
+          rm -rf -- "$CX_JOB_ROOT"
+
+  probe-summary:
+    needs: [setup, sweep]
+    if: ${{ always() && inputs.operation == 'probe-precision' && needs.sweep.result == 'success' }}
     runs-on: ubuntu-latest
+    timeout-minutes: 15
     steps:
       - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v5.0.0
-        with: { clean: true }
+        with: { clean: true, persist-credentials: false }
       - uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
         with:
-          pattern: cxshard-*-${{ github.run_id }}
-          path: _shards
+          name: cxsweep-matrix-${{ github.run_id }}-${{ github.run_attempt }}
+          path: ${{ runner.temp }}/collectivex-probes/control
+      - uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
+        with:
+          pattern: cxprobe-*-${{ github.run_id }}-${{ github.run_attempt }}
           merge-multiple: true
-      - name: Aggregate shards -> one ndjson
-        working-directory: experimental/CollectiveX
+          path: ${{ runner.temp }}/collectivex-probes/results
+      - name: Validate exact precision probe coverage
+        run: |
+          set -euo pipefail
+          plan="$RUNNER_TEMP/collectivex-probes/control/matrix_full.json"
+          shopt -s nullglob
+          manifests=("$RUNNER_TEMP"/collectivex-probes/results/*.json)
+          [ "${#manifests[@]}" -gt 0 ] || {
+            echo 'precision probe artifacts are empty' >&2
+            exit 1
+          }
+          python3 experimental/CollectiveX/artifact_safety.py "$plan" "${manifests[@]}"
+          python3 experimental/CollectiveX/tests/probe_precision.py \
+            --validate-bundle "$plan" --validate-manifest "${manifests[@]}"
+      - name: Upload validated precision probe bundle
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
+        with:
+          name: cxprecision-probes-${{ github.run_id }}-${{ github.run_attempt }}
+          path: ${{ runner.temp }}/collectivex-probes
+          if-no-files-found: error
+          retention-days: 30
+
+  publish:
+    if: ${{ inputs.operation == 'publish-v1' }}
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    env:
+      GH_TOKEN: ${{ github.token }}
+      RUN_IDS: ${{ inputs.publish_run_ids }}
+    steps:
+      - name: Verify source runs
+        id: runs
+        env:
+          REPOSITORY: ${{ github.repository }}
+        run: |
+          set -euo pipefail
+          IFS=',' read -r -a run_ids <<< "$RUN_IDS"
+          [ "${#run_ids[@]}" -eq 3 ] || {
+            echo 'publish_run_ids must contain exactly three IDs' >&2
+            exit 1
+          }
+          [ "$(printf '%s\n' "${run_ids[@]}" | sort -u | wc -l)" -eq 3 ] || {
+            echo 'publish_run_ids must be unique' >&2
+            exit 1
+          }
+
+          : > "$RUNNER_TEMP/collectivex-runs.tsv"
+          source_sha=''
+          for run_id in "${run_ids[@]}"; do
+            [[ "$run_id" =~ ^[1-9][0-9]*$ ]] || {
+              echo 'publish_run_ids contains a non-decimal ID' >&2
+              exit 1
+            }
+            metadata=$(gh api "repos/$REPOSITORY/actions/runs/$run_id")
+            name=$(jq -r '.name' <<< "$metadata")
+            path=$(jq -r '.path' <<< "$metadata")
+            branch=$(jq -r '.head_branch' <<< "$metadata")
+            status=$(jq -r '.status' <<< "$metadata")
+            conclusion=$(jq -r '.conclusion' <<< "$metadata")
+            sha=$(jq -r '.head_sha' <<< "$metadata")
+            attempt=$(jq -r '.run_attempt' <<< "$metadata")
+            [ "$name" = 'CollectiveX Sweep' ] \
+              && [ "$path" = '.github/workflows/collectivex-sweep.yml' ] \
+              && [ "$branch" = 'collectivex' ] \
+              && [ "$status" = 'completed' ] \
+              && [ "$conclusion" = 'success' ] \
+              && [ "$attempt" = 1 ] \
+              && [[ "$sha" =~ ^[0-9a-f]{40}$ ]] || {
+                echo "run $run_id is not an eligible first-attempt V1 sweep" >&2
+                exit 1
+              }
+            if [ -z "$source_sha" ]; then
+              source_sha="$sha"
+            else
+              [ "$sha" = "$source_sha" ] || {
+                echo 'source runs do not share one source SHA' >&2
+                exit 1
+              }
+            fi
+            printf '%s\t%s\t%s\n' "$run_id" "$attempt" "$sha" \
+              >> "$RUNNER_TEMP/collectivex-runs.tsv"
+          done
+          echo "source_sha=$source_sha" >> "$GITHUB_OUTPUT"
+
+      - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v5.0.0
+        with:
+          ref: ${{ steps.runs.outputs.source_sha }}
+          clean: true
+          persist-credentials: false
+
+      - name: Install publisher dependencies
+        run: python3 -m pip install --quiet -r experimental/CollectiveX/requirements.txt
+
+      - name: Build promoted publication
+        env:
+          REPOSITORY: ${{ github.repository }}
         run: |
           set -euo pipefail
-          tag="${{ inputs.backend }}${{ inputs.deepep_v2 && '-v2' || '' }}"
-          python3 aggregate_results.py --in-dir ../../_shards --out "results/aggregate/collectivex_${tag}_${{ github.run_id }}.ndjson"
+          store="$RUNNER_TEMP/collectivex-publisher"
+          downloads="$RUNNER_TEMP/collectivex-downloads"
+          output="$RUNNER_TEMP/collectivex-publication"
+          umask 027
+          mkdir -m 750 "$store" "$downloads" "$output"
+          : > "$RUNNER_TEMP/collectivex-bundles.txt"
+          : > "$RUNNER_TEMP/collectivex-qualification-indices.txt"
+
+          while IFS=$'\t' read -r run_id attempt source_sha; do
+            run_dir="$downloads/$run_id"
+            mkdir -m 750 "$run_dir"
+            gh run download "$run_id" --repo "$REPOSITORY" --dir "$run_dir"
+            matrix="$run_dir/cxsweep-matrix-$run_id-$attempt/matrix_full.json"
+            marker="$run_dir/cxrelease-v1-$run_id-$attempt/release.json"
+            [ -f "$matrix" ] || {
+              echo "run $run_id is missing its exact matrix artifact" >&2
+              exit 1
+            }
+            [ -f "$marker" ] || {
+              echo "run $run_id is not tagged for V1 publication" >&2
+              exit 1
+            }
+            matrix_sha=$(sha256sum "$matrix" | cut -d' ' -f1)
+            qualification_index=$(jq -r '.qualification_index' "$marker")
+            execution_plan_sha=$(
+              PYTHONPATH=experimental/CollectiveX python3 -c \
+                'import json,sys,sweep_matrix as s; print(s.qualification_execution_plan_sha256(json.load(open(sys.argv[1], encoding="utf-8")), int(sys.argv[2])))' \
+                "$matrix" "$qualification_index"
+            )
+            jq -e \
+              --arg run_id "$run_id" \
+              --arg attempt "$attempt" \
+              --arg source_sha "$source_sha" \
+              --arg matrix_sha "$matrix_sha" \
+              --arg execution_plan_sha "$execution_plan_sha" \
+              'keys == ["execution_plan_sha256","format","matrix_sha256","qualification_index","release_tag","run_attempt","run_id","source_sha"]
+               and .format == "collectivex.release-tag.v1"
+               and .release_tag == "v1"
+               and .run_id == $run_id
+               and .run_attempt == $attempt
+               and .source_sha == $source_sha
+               and (.qualification_index == 1 or .qualification_index == 2 or .qualification_index == 3)
+               and .matrix_sha256 == $matrix_sha
+               and .execution_plan_sha256 == $execution_plan_sha' \
+              "$marker" >/dev/null || {
+                echo "run $run_id has an invalid V1 release marker" >&2
+                exit 1
+              }
+            jq -r '.qualification_index' "$marker" \
+              >> "$RUNNER_TEMP/collectivex-qualification-indices.txt"
+
+            mapfile -t artifacts < <(
+              find "$run_dir" -mindepth 1 -maxdepth 1 -type d \
+                \( -name "cxshard-*-$run_id-$attempt" \
+                -o -name "cxunsupported-$run_id-$attempt" \) -print | sort
+            )
+            [ "${#artifacts[@]}" -gt 0 ] || {
+              echo "run $run_id has no result artifacts" >&2
+              exit 1
+            }
+            artifact_args=()
+            for artifact in "${artifacts[@]}"; do
+              artifact_args+=(--artifact "$artifact")
+            done
+            result=$(
+              python3 experimental/CollectiveX/publisher.py --store-root "$store" ingest \
+                --matrix "$matrix" \
+                "${artifact_args[@]}" \
+                --repository "$REPOSITORY" \
+                --run-id "$run_id" \
+                --run-attempt "$attempt" \
+                --qualification-index "$qualification_index" \
+                --source-sha "$source_sha"
+            )
+            bundle_id=$(jq -er '.bundle_id' <<< "$result")
+            printf '%s\n' "$bundle_id" >> "$RUNNER_TEMP/collectivex-bundles.txt"
+          done < "$RUNNER_TEMP/collectivex-runs.tsv"
+
+          [ "$(sort -n "$RUNNER_TEMP/collectivex-qualification-indices.txt" | tr '\n' ' ')" = '1 2 3 ' ] || {
+            echo 'source runs must contain qualification indices 1, 2, and 3 exactly once' >&2
+            exit 1
+          }
+
+          mapfile -t bundle_ids < "$RUNNER_TEMP/collectivex-bundles.txt"
+          promote_args=()
+          for bundle_id in "${bundle_ids[@]}"; do
+            promote_args+=(--bundle "$bundle_id")
+          done
+          result=$(
+            python3 experimental/CollectiveX/publisher.py --store-root "$store" promote \
+              "${promote_args[@]}"
+          )
+          dataset_id=$(jq -er '.dataset_sha256' <<< "$result")
+          dataset="$store/public/datasets/$dataset_id/dataset.json"
+          [ -f "$dataset" ] || {
+            echo 'publisher did not install the promoted dataset' >&2
+            exit 1
+          }
+          publication="$output/collectivex_public_v1_$dataset_id.ndjson"
+          cp -- "$dataset" "$publication"
+          python3 experimental/CollectiveX/artifact_safety.py "$publication"
+          python3 experimental/CollectiveX/publisher.py --store-root "$store" verify \
+            --channel dev-latest "${promote_args[@]}"
+          [ "$(stat -c %s "$publication")" -le 33554432 ] || {
+            echo 'publication exceeds the 32 MiB frontend limit' >&2
+            exit 1
+          }
+          sha256sum "$publication"
           {
-            echo "## CollectiveX sweep aggregate (${tag})"
-            echo '```'
-            wc -l results/aggregate/*.ndjson 2>/dev/null || echo "no ndjson"
-            echo '```'
+            echo '## CollectiveX V1 publication'
+            echo
+            echo "Dataset: \`$dataset_id\`"
+            echo
+            echo 'Source runs:'
+            sed 's/^/- `/' "$RUNNER_TEMP/collectivex-runs.tsv" | sed 's/$/`/'
           } >> "$GITHUB_STEP_SUMMARY"
-      - name: Upload aggregate
+
+      - name: Upload JIT publication artifact
         uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
         with:
-          name: cxsweep-aggregate-${{ inputs.backend }}${{ inputs.deepep_v2 && '-v2' || '' }}-${{ github.run_id }}
-          path: experimental/CollectiveX/results/aggregate/*.ndjson
-          if-no-files-found: warn
+          name: cxpublication-v1-${{ github.run_id }}-${{ github.run_attempt }}
+          path: ${{ runner.temp }}/collectivex-publication/*.ndjson
+          if-no-files-found: error
+          retention-days: 90
+
+  refresh:
+    if: ${{ inputs.operation == 'refresh-v1' }}
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    env:
+      GH_TOKEN: ${{ github.token }}
+      SOURCE_RUN_ID: ${{ inputs.refresh_run_id }}
+      EXPECTED_DIGEST: ${{ inputs.refresh_digest }}
+    steps:
+      - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v5.0.0
+        with: { clean: true, persist-credentials: false }
+
+      - name: Revalidate promoted publication
+        env:
+          REPOSITORY: ${{ github.repository }}
+        run: |
+          set -euo pipefail
+          [[ "$SOURCE_RUN_ID" =~ ^[1-9][0-9]*$ ]] || {
+            echo 'refresh_run_id must be a decimal workflow run ID' >&2
+            exit 1
+          }
+          [[ "$EXPECTED_DIGEST" =~ ^[0-9a-f]{64}$ ]] || {
+            echo 'refresh_digest must be a SHA-256 digest' >&2
+            exit 1
+          }
+          metadata=$(gh api "repos/$REPOSITORY/actions/runs/$SOURCE_RUN_ID")
+          path=$(jq -r '.path' <<< "$metadata")
+          branch=$(jq -r '.head_branch' <<< "$metadata")
+          status=$(jq -r '.status' <<< "$metadata")
+          conclusion=$(jq -r '.conclusion' <<< "$metadata")
+          attempt=$(jq -r '.run_attempt' <<< "$metadata")
+          [ "$path" = '.github/workflows/collectivex-sweep.yml' ] \
+            && [ "$branch" = 'collectivex' ] \
+            && [ "$status" = completed ] \
+            && [ "$conclusion" = success ] \
+            && [[ "$attempt" =~ ^[1-9][0-9]*$ ]] || {
+              echo 'refresh source is not an eligible CollectiveX workflow run' >&2
+              exit 1
+            }
+
+          source="$RUNNER_TEMP/collectivex-refresh-source"
+          output="$RUNNER_TEMP/collectivex-publication"
+          install -d -m 750 "$source" "$output"
+          gh run download "$SOURCE_RUN_ID" --repo "$REPOSITORY" \
+            --name "cxpublication-v1-$SOURCE_RUN_ID-$attempt" --dir "$source"
+          mapfile -t files < <(find "$source" -mindepth 1 -maxdepth 1 -type f -print)
+          [ "${#files[@]}" -eq 1 ] || {
+            echo 'refresh source must contain exactly one root file' >&2
+            exit 1
+          }
+          expected_name="collectivex_public_v1_${EXPECTED_DIGEST}.ndjson"
+          [ "$(basename "${files[0]}")" = "$expected_name" ] || {
+            echo 'refresh source filename differs from its requested digest' >&2
+            exit 1
+          }
+          [ "$(sha256sum "${files[0]}" | cut -d' ' -f1)" = "$EXPECTED_DIGEST" ] || {
+            echo 'refresh source bytes differ from their requested digest' >&2
+            exit 1
+          }
+          [ "$(stat -c %s "${files[0]}")" -le 33554432 ] || {
+            echo 'refresh source exceeds the 32 MiB frontend limit' >&2
+            exit 1
+          }
+          python3 - "${files[0]}" <<'PY'
+          import json
+          import pathlib
+          import sys
+
+          path = pathlib.Path(sys.argv[1])
+          text = path.read_text(encoding="utf-8")
+          if not text.endswith("\n") or text.count("\n") != 1:
+              raise SystemExit("publication must contain exactly one NDJSON record")
+          document = json.loads(text)
+          if document.get("format") != "collectivex.public.v1":
+              raise SystemExit("publication format is invalid")
+          if document.get("promotion", {}).get("status") != "promoted":
+              raise SystemExit("publication is not promoted")
+          PY
+          python3 experimental/CollectiveX/artifact_safety.py "${files[0]}"
+          cp -- "${files[0]}" "$output/$expected_name"
+
+      - name: Upload refreshed JIT publication artifact
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
+        with:
+          name: cxpublication-v1-${{ github.run_id }}-${{ github.run_attempt }}
+          path: ${{ runner.temp }}/collectivex-publication/*.ndjson
+          if-no-files-found: error
+          retention-days: 90
diff --git a/AGENTS.md b/AGENTS.md
index 6a385a5c1..b49f547e2 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -2,6 +2,8 @@
 
 Guidance for AI agents working with InferenceX.
 
+> **Temporary language override:** Until the user explicitly removes this override, ignore all other repository instructions that require Simplified Chinese or bilingual output. Work in English only: do not create or update `_zh.md` files, Chinese documentation, translations, or Chinese text in commits, pull requests, issues, reviews, or comments. Leave existing Chinese content untouched.
+
 > **Mandatory reading: [`CONTRIBUTING.md`](CONTRIBUTING.md)** — read it before opening or reviewing any PR. It covers the full PR review flow, the CODEOWNER sign-off process, the `/reuse-sweep-run` merge path, post-merge responsibilities, and critical cluster rules (e.g. never leaving root-owned files on AMD runners).
 
 > **PR and GitHub-issue titles & descriptions must be bilingual — include a Simplified Chinese version in addition to English.** Title format: `<English title> / <中文标题>`. In the PR/issue body, follow the English content with its Chinese translation (e.g. a `## 中文说明` section mirroring the summary; don't translate code blocks, logs, or stack traces — summarize around them). **PR comments must include a Chinese translation too** — conversation comments, review summaries, and inline review comments alike: short comments as a single `<English> / <中文>` line, longer ones with the Chinese translation as a trailing paragraph (`中文：...`). Exception: the CODEOWNER sign-off template stays English-verbatim (the sign-off verifier triggers on its exact phrase); bot-generated comments follow their own workflow templates. This applies to every PR and every issue, matching the bilingual docs rule in Code Conventions.
diff --git a/experimental/CollectiveX/.gitignore b/experimental/CollectiveX/.gitignore
new file mode 100644
index 000000000..56b307215
--- /dev/null
+++ b/experimental/CollectiveX/.gitignore
@@ -0,0 +1,15 @@
+__pycache__/
+*.pyc
+results/
+unsupported/
+.shards/
+.cx_workloads/
+.cx_backend/
+/matrix_full.json
+gpucore.*
+
+# Local plans and infrastructure inventory.
+goal.md
+notes.md
+configs/platforms.yaml
+private-infra.md
diff --git a/experimental/CollectiveX/README.md b/experimental/CollectiveX/README.md
new file mode 100644
index 000000000..b58a5af61
--- /dev/null
+++ b/experimental/CollectiveX/README.md
@@ -0,0 +1,168 @@
+# CollectiveX
+
+<div align="center">
+
+**English** | [中文](./README_zh.md)
+
+</div>
+
+CollectiveX is an experimental MoE expert-parallel communication benchmark. It measures dispatch,
+combine, and paired roundtrip latency across EP libraries and accelerator systems.
+
+> Publication hold: historical schema 3-5 data is diagnostic. No current dataset is approved for
+> rankings, recommendations, or regression baselines.
+
+> Development status: the sections below document the implemented BF16 pre-V1 baseline, not the
+> final V1 qualification contract. Precision profiles, full point-level publication, and branch-only
+> publication are under active implementation; case counts and digests are not frozen.
+
+## Implemented Pre-V1 Execution Profile
+
+Every scheduled case is BF16 with backend-tuned resources and packed placement. The explicit mode
+selects one of two contracts:
+
+- Normal mode uses `layout-and-dispatch-v1`, rank-deduplicated token payloads, and activation-only
+  combine. Uniform core coverage and one Zipf sensitivity remain; EPLB is measured only as the Zipf
+  remedy.
+- Low-latency mode uses `expert-packed-weighted-combine-v1`, token-expert payloads, and gate-weighted
+  combine through genuine DeepEP V1 or UCCL low-latency APIs. It is decode-only and never shares a
+  ranking cohort with normal mode. Other backends are explicitly unsupported for this suite.
+
+Both modes use `fixed-512-v1`: 64 trials x 8 timed iterations with 32 synchronized full roundtrip
+warmups before each measured component at every trial/point. Roundtrip is measured first; each
+iteration takes the cross-rank maximum before nearest-rank p50/p90/p95/p99, and roundtrip p99 is the
+headline latency. A stdlib integer counter produces byte-identical routing and gate weights.
+
+The implemented baseline matrix covers H100, H200, B200, B300, GB200, GB300, MI325X, and MI355X.
+It requests
+608 cases / 1,600 token points: 364 runnable cases / 940 points, emitted as 58 executable workflow
+shards/allocation cells, plus 244 explicit unsupported cases / 660 points. `sweep_matrix.py`
+materializes every token ladder and rejects missing, stale, malformed, or altered shard controls.
+Shards are emitted round-robin by SKU so the bounded GHA matrix uses every runner pool early.
+
+| Systems | EP8 | EP16 |
+|---|---|---|
+| H100/H200/B200/B300 | 1x8 NVLink, scale-up | 2x8 NVLink + RDMA, scale-out |
+| MI325X/MI355X | 1x8 XGMI, scale-up | 2x8 XGMI + RDMA, scale-out |
+| GB200/GB300 | 2x4 MNNVL, scale-up | 4x4 MNNVL, scale-up |
+
+Physical host count does not determine scope: both GB topologies stay inside one 72-GPU MNNVL
+scale-up domain.
+
+| Backend | Current scope |
+|---|---|
+| DeepEP V1 | Image-pinned `deep_ep.Buffer`: normal and native low-latency APIs; upstream v1.2.1 on x86 and the image's GB fork on arm64 |
+| DeepEP V2 | PR #605 `ElasticBuffer` plus #630: LSA for scale-up and GIN for x86 EP16 scale-out; source/SASS-bound reproducible JIT |
+| DeepEP Hybrid | Pinned `HybridEPBuffer`: x86 EP16 multi-domain RDMA/DOCA; GB EP8/EP16 in one MNNVL communication domain |
+| UCCL | Pinned 0.1.1 wheel and wrapper with normal and native low-latency APIs on Hopper; Blackwell is explicitly unsupported |
+| NCCL/RCCL A2A | Portable rank-deduplicated payload plus expert/routing-metadata reference |
+| MoRI | EP8 uses MI325X AsyncLL or MI355X IntraNode; EP16 pins InterNodeV1 over 2x8 XGMI + RDMA |
+
+FlashInfer is outside v1 because its exercised EP path failed intermittently at runtime. It is not
+misreported as a platform capability limitation and can return after a stable pinned path is proven.
+
+DeepEP V2 means the `ElasticBuffer` implementation introduced by
+[DeepEP PR #605](https://github.com/deepseek-ai/DeepEP/pull/605), not a newer legacy `Buffer` build.
+The pinned source is the minimal upstream [PR #630](https://github.com/deepseek-ai/DeepEP/pull/630)
+follow-up: its parent is the #605 merge tree and its only source change fixes pure scale-up
+initialization when GIN is unavailable. Scale-up cases request NCCL Device API LSA and fail closed
+unless the realized LSA team covers the full EP world. x86 EP16 scale-out cases instead require the
+hybrid path with GIN, two logical scale-out domains represented by two physical RDMA ranks, and eight
+scale-up ranks per domain; GB EP16 remains MNNVL scale-up and therefore uses LSA. The isolated build
+records the API, source, loaded libraries, generated JIT source, executable SASS, and raw CUBIN
+diagnostics. The current H100 runner pool is explicitly unsupported for V2 because NCCL 2.30.4
+reports that its EP8 communicator lacks Device API symmetric-memory support; re-enabling that pool
+requires an all-rank CUDA P2P/LSA-capable runtime. Other NVIDIA SKUs remain unvalidated until their
+GPU outcomes pass the native correctness and publication gates.
+
+Axes not implemented in this baseline include cached-layout `[cl]`, runtime-visible `[rv]`, FP8,
+quantized combine,
+extra routing distributions, activation profiles, uneven allocation, placement permutations, model
+envelopes, and scaling studies.
+
+## Workflow And Artifacts
+
+`.github/workflows/collectivex-sweep.yml` generates a public-SKU matrix, extracts a strict ignored
+`.shards/<id>.json` control, executes one allocation per shard, privacy-checks result JSON, and uploads
+raw GitHub artifacts. Runs default to `release_tag=unversioned` and are diagnostic-only. A V1 run
+must explicitly select `release_tag=v1`; setup then requires the locked full-matrix digest and emits
+a run/attempt/source-bound `cxrelease-v1-*` marker. Partial and filtered runs cannot receive it.
+
+The main-registered `.github/workflows/collectivex-sweep.yml` provides `sweep`, `publish-v1`, and
+`refresh-v1` operations, so its branch revision can be dispatched with `--ref collectivex` without
+a standalone branch-only workflow. Publication accepts exactly three successful first-attempt tagged
+sweep run IDs from one source SHA, revalidates their metadata and release markers, and runs
+`publisher.py` in a disposable runner-local workspace. Refresh revalidates and reuploads only the
+exact content-addressed sanitized dataset. Raw artifacts and the private publisher workspace are
+never exposed to the frontend.
+
+There is no results server, attached store, Vercel storage, GCP, Neon, managed database, or managed
+object store. With the existing server-side `GITHUB_TOKEN`, the frontend discovers the latest
+successful version-scoped publication workflow, downloads its NDJSON artifact just in time, verifies
+the ZIP layout, UTF-8/NDJSON shape, schema, promotion state, and SHA-256, then serves versioned channel
+and immutable dataset URLs. The UI keeps an explicit benchmark-version selector; V2 and later
+versions must use separate release tags and publication identities. The full validation contract is
+in [docs/methodology.md](docs/methodology.md).
+
+## Runner Configuration
+
+Runner-local Slurm and storage values use a strict per-SKU JSON document at
+`$XDG_CONFIG_HOME/inferencex/collectivex.json` or `COLLECTIVEX_OPERATOR_CONFIG`. The mode-0600,
+same-owner, non-symlink file is outside the checkout and never uploaded. Unknown runners, fields,
+duplicate keys, endpoint literals, unsafe paths, and non-JSON input fail closed; configuration is
+never evaluated as shell. GHA passes encrypted `COLLECTIVEX_OPERATOR_CONFIG_V1` content only to the
+launcher, which validates it, exports the selected SKU's allowlisted values, and deletes the
+temporary copy before allocation. Required JSON fields are:
+
+| SKU | Variables |
+|---|---|
+| `h100-dgxc`, `b200-dgxc` | `partition`, `account`, `squash_dir`, `stage_dir` |
+| `h200-dgxc` | `partition`, `squash_dir`, `stage_dir` |
+| `b300` | `partition`, `account`, `squash_dir`, `stage_dir` |
+| `gb200` | `partition`, `account`, ordered `storage_roots` |
+| `gb300` | `partition`, `account`, `squash_dir`, `stage_dir`, `enroot_cache_path` |
+| `mi325x`, `mi355x` | `partition`, `squash_dir`, `stage_dir` |
+
+Every selected non-MNNVL EP16 placement additionally requires `socket_ifname` and `rdma_devices`
+for its operator-approved fabric; optional
+`ib_gid_index` and `rdma_service_level` are also allowlisted. CollectiveX does not heuristically
+select a management route or HCA. After allocation, every non-MNNVL scale-out node must prove that
+all configured interfaces and active HCA ports exist before backend setup. Scale-up and MNNVL jobs
+clear these overrides. Scale-out NCCL/RCCL is pinned to `IB` with exact-match HCA selectors so a
+socket fallback fails instead of being mislabeled as RDMA.
+
+`stage_dir` is a pre-existing, runner-owned, non-symlinked base outside the checkout and workflow
+workspace. It is not group- or world-writable and is visible at the same path on the runner and every
+allocated node. Jobs create only a marked mode-0700 execution child, prove cross-node read/write
+visibility, and remove that exact child after allocation teardown; they never mount the runner
+checkout or create a stage beneath image storage on AMD.
+
+Before import, each Docker Hub tag is resolved with bounded registry requests and must match its
+pinned digest; digest-qualified overrides are rejected. Enroot imports use a fixed filesystem epoch
+and a versioned, registry-digest-bound cache key. Every mounted squash is freshly hashed. The
+verified registry digest and local squash hash are both recorded. Image-provided DeepEP is checked
+against exact wheel and installed-file fingerprints; source-built backends use pinned commits and
+runtime-verified GPU targets. DeepEP V2's mode-0700 cluster-local build cache is keyed by a versioned
+build recipe, verified image, architecture, upstream trees, and dependency pins; only its fixed
+`/cx-cache` mount reaches the container, and it never enters result artifacts.
+Pinned V2 and Hybrid sources are fetched once per workflow. Each job validates the complete archive,
+extracts only its exact backend root, permits only contained relative leaf symlinks to archived
+regular files, and revalidates the Git tree and submodule pins before staging.
+Compute containers receive an explicit environment allowlist. Private host, address, device, NIC,
+credential, workspace, and path data stays in encrypted config, ignored operator notes, or bounded
+mode-0600 runner logs; it is never uploaded.
+
+## Local Checks
+
+```bash
+uv run --with-requirements experimental/CollectiveX/requirements.txt \
+  python -m unittest discover experimental/CollectiveX/tests -p 'test_*.py'
+uv run --with-requirements experimental/CollectiveX/requirements.txt \
+  python experimental/CollectiveX/sweep_matrix.py --backends all --out /tmp/cx-matrix.json >/dev/null
+uv run --with-requirements experimental/CollectiveX/requirements.txt \
+  python experimental/CollectiveX/publisher.py --store-root "$COLLECTIVEX_STORE_ROOT" verify
+bash -n experimental/CollectiveX/runtime/*.sh experimental/CollectiveX/launchers/*.sh
+```
+
+Core paths are `capability.py`, `configs/`, `contracts.py`, `schemas/`, `sweep_matrix.py`,
+`publisher.py`, `runtime/`, `launchers/`, and `tests/`.
diff --git a/experimental/CollectiveX/README_zh.md b/experimental/CollectiveX/README_zh.md
new file mode 100644
index 000000000..fb32369b4
--- /dev/null
+++ b/experimental/CollectiveX/README_zh.md
@@ -0,0 +1,154 @@
+# CollectiveX
+
+<div align="center">
+
+[English](./README.md) | **中文**
+
+</div>
+
+CollectiveX 是实验性的 MoE 专家并行通信基准，用于测量不同 EP 库和加速器系统的
+dispatch、combine 及配对 roundtrip 延迟。
+
+> 发布暂停：历史 schema 3-5 数据仅供诊断。目前没有数据集获准用于排名、推荐或回归基线。
+
+## v1 执行配置
+
+每个调度用例均采用 BF16、后端调优资源和 packed placement。显式指定的 mode 选择以下两个
+契约之一：
+
+- Normal mode 使用 `layout-and-dispatch-v1`、按 rank 去重的 token payload 和 activation-only
+  combine。核心覆盖使用 uniform routing，并保留一个 Zipf 敏感性场景；EPLB 只作为 Zipf
+  的修正方案测量。
+- Low-latency mode 使用 `expert-packed-weighted-combine-v1`、token-expert payload 和
+  gate-weighted combine，并且只调用真正的 DeepEP V1 或 UCCL low-latency API。该模式仅覆盖
+  解码，绝不与 normal mode 共用排名 cohort。其他后端在此 suite 中均显式标为 unsupported。
+
+两种模式统一使用 `fixed-512-v1`：64 trials x 8 timed iterations；每个 trial/point 的每个被测
+组件前执行 32 次同步完整 roundtrip warmup。先测 roundtrip；每次 iteration 先取跨 rank 最大值，
+再按 nearest-rank 计算 p50/p90/p95/p99，主要延迟指标为 roundtrip p99。stdlib 整数计数器
+生成逐字节一致的 routing 和 gate weights。
+
+规范矩阵覆盖 H100、H200、B200、B300、GB200、GB300、MI325X 和 MI355X。矩阵请求
+608 个 cases / 1,600 个 token points：364 个可运行 cases / 940 个 points，并形成 58 个可执行
+workflow shards/allocation cells；另有 244 个显式 unsupported cases / 660 个 points。
+`sweep_matrix.py` 物化每个 token ladder，并拒绝缺失、过期、格式错误或被修改的 shard controls。
+分片按 SKU round-robin 发出，使受限的 GHA matrix 尽早使用所有 runner pools。
+
+| 系统 | EP8 | EP16 |
+|---|---|---|
+| H100/H200/B200/B300 | 1x8 NVLink，scale-up | 2x8 NVLink + RDMA，scale-out |
+| MI325X/MI355X | 1x8 XGMI，scale-up | 2x8 XGMI + RDMA，scale-out |
+| GB200/GB300 | 2x4 MNNVL，scale-up | 4x4 MNNVL，scale-up |
+
+物理主机数量不能决定通信范围：两种 GB 拓扑都位于同一个 72-GPU MNNVL scale-up domain 内。
+
+| 后端 | 当前范围 |
+|---|---|
+| DeepEP V1 | 镜像固定的 `deep_ep.Buffer`：提供 normal 和原生 low-latency API；x86 使用 upstream v1.2.1，arm64 使用镜像内 GB fork |
+| DeepEP V2 | PR #605 `ElasticBuffer` 加 #630：scale-up 使用 LSA，x86 EP16 scale-out 使用 GIN；JIT 可复现并绑定 source/SASS |
+| DeepEP Hybrid | 固定的 `HybridEPBuffer`：x86 EP16 使用 multi-domain RDMA/DOCA；GB EP8/EP16 位于同一个 MNNVL communication domain |
+| UCCL | Hopper 上固定的 0.1.1 wheel 和 wrapper，提供 normal 和原生 low-latency API；Blackwell 显式标为 unsupported |
+| NCCL/RCCL A2A | 可移植的 rank-deduplicated payload 加 expert/routing-metadata reference |
+| MoRI | EP8 使用 MI325X AsyncLL 或 MI355X IntraNode；EP16 固定使用 2x8 XGMI + RDMA 上的 InterNodeV1 |
+
+FlashInfer 不在 v1 范围内，因为已测试的 EP path 在运行时存在间歇性失败。该问题不会被误报为
+平台能力限制；在证明有稳定的固定实现后可重新加入。
+
+DeepEP V2 指 [DeepEP PR #605](https://github.com/deepseek-ai/DeepEP/pull/605) 引入的
+`ElasticBuffer` 实现，而不是更新的 legacy `Buffer` build。固定 source 使用最小化的 upstream
+[PR #630](https://github.com/deepseek-ai/DeepEP/pull/630) 后续修复：其 parent 是 #605 merge
+tree，唯一 source 变更是修复 GIN 不可用时的纯 scale-up 初始化。Scale-up cases 请求 NCCL
+Device API LSA；若实际建立的 LSA team 未覆盖整个 EP world，则直接失败。x86 EP16 scale-out
+cases 必须使用启用 GIN 的 hybrid path，其精确拓扑为两个逻辑 scale-out domains（由两个物理
+RDMA ranks 表示）、每个 domain 八个 scale-up ranks；GB EP16 仍是 MNNVL scale-up，因此继续
+使用 LSA。隔离构建会记录 API、source、loaded libraries、generated JIT source、executable
+SASS 与 raw CUBIN diagnostics。当前 H100 runner pool 被明确标记为 V2 unsupported，因为 NCCL
+2.30.4 报告其 EP8 communicator 不具备 Device API symmetric-memory 支持；只有该 pool 的
+runtime 支持全 rank CUDA P2P/LSA 后才能重新启用。其他 NVIDIA SKU 在 GPU outcome 通过 native
+correctness 和 publication gates 前仍为 unvalidated。
+
+v1 已移除的轴包括 cached-layout `[cl]`、runtime-visible `[rv]`、FP8、quantized combine、
+额外 routing distributions、activation profiles、uneven allocation、placement permutations、
+model envelopes 和 scaling studies。
+
+## Workflow 与产物
+
+`.github/workflows/collectivex-sweep.yml` 生成 public-SKU matrix，提取严格且被忽略的
+`.shards/<id>.json` control，每个 shard 执行一次 allocation，对结果 JSON 做隐私检查并上传
+raw GitHub artifacts。运行默认使用 `release_tag=unversioned`，仅供诊断。V1 运行必须显式选择
+`release_tag=v1`；setup 随后要求固定的完整 matrix digest，并生成绑定 run、attempt 与 source 的
+`cxrelease-v1-*` marker。Partial 或 filtered 运行无法获得该 marker。
+
+`.github/workflows/collectivex-publish.yml` 是显式的 V1 gate。它只接受三个来自同一 source SHA、
+成功且带 V1 tag 的 sweep run IDs，重新校验 GitHub metadata 与 release markers，并在 runner 本地
+可丢弃工作区中执行 `publisher.py`。只有完整通过 promotion、隐私检查和内容寻址的数据集才会以
+`cxpublication-v1-*` 上传；raw artifacts 与 publisher private workspace 永不暴露给前端。
+
+系统不需要 results server、attached store、Vercel storage、GCP、Neon、managed database 或
+managed object store。前端使用已有的 server-side `GITHUB_TOKEN`，即时发现最新成功且按版本隔离
+的 publication workflow，下载其 NDJSON artifact，校验 ZIP layout、UTF-8/NDJSON 结构、schema、
+promotion 状态与 SHA-256，随后提供带版本的 channel URL 和 immutable dataset URL。UI 保留显式
+benchmark-version selector；V2 及后续版本必须使用独立的 release tag 与 publication identity。
+完整 validation contract 见 [docs/methodology_zh.md](docs/methodology_zh.md)。
+
+## Runner 配置
+
+Runner 本地 Slurm 和 storage 值使用严格的 per-SKU JSON 文档，路径为
+`$XDG_CONFIG_HOME/inferencex/collectivex.json` 或 `COLLECTIVEX_OPERATOR_CONFIG`。该 mode-0600、
+同 owner、非 symlink 文件位于 checkout 外且永不上传。未知 runners、fields、duplicate keys、
+endpoint literals、unsafe paths 和非 JSON 输入均 fail closed；配置绝不作为 shell 执行。GHA
+仅将加密的 `COLLECTIVEX_OPERATOR_CONFIG_V1` 内容传给 launcher；launcher 验证后只导出所选
+SKU 的 allowlisted values，并在 allocation 前删除临时副本。必需 JSON fields 如下：
+
+| SKU | 变量 |
+|---|---|
+| `h100-dgxc`, `b200-dgxc` | `partition`, `account`, `squash_dir`, `stage_dir` |
+| `h200-dgxc` | `partition`, `squash_dir`, `stage_dir` |
+| `b300` | `partition`, `account`, `squash_dir`, `stage_dir` |
+| `gb200` | `partition`, `account`, 有序 `storage_roots` |
+| `gb300` | `partition`, `account`, `squash_dir`, `stage_dir`, `enroot_cache_path` |
+| `mi325x`, `mi355x` | `partition`, `squash_dir`, `stage_dir` |
+
+每个已选中的非 MNNVL EP16 placement 还必须提供 `socket_ifname` 和 `rdma_devices`，用来指定
+operator 审核过的 fabric；还可配置 allowlisted
+`ib_gid_index` 与 `rdma_service_level`。CollectiveX 不会通过启发式规则选择 management route 或
+HCA。Allocation 完成后，每个非 MNNVL scale-out 节点都必须证明所有已配置 interface 与 active
+HCA port 存在，之后才允许初始化 backend。Scale-up 和 MNNVL job 会清除这些 overrides。
+Scale-out NCCL/RCCL 固定使用 `IB` 与精确匹配的 HCA selectors；如果无法使用 RDMA，job 会失败，
+而不会回退到 socket 后仍被错误标记为 RDMA。
+
+`stage_dir` 必须是 checkout 与 workflow workspace 外预创建且由 runner owner 持有的 base，
+不能经过 symlink，group 和 world 都不能写入，并且 runner 与所有 allocation 节点必须以相同路径
+访问。Job 只创建带 marker 的 mode-0700 execution child，验证跨节点读写可见性，并在
+allocation teardown 后只删除该 child；不会挂载 runner checkout，也不会在 AMD image storage
+下创建 stage。
+
+导入前，每个 Docker Hub tag 都通过有界 registry requests 解析，并且必须匹配固定 digest；拒绝
+digest-qualified overrides。Enroot imports 使用固定 filesystem epoch 和带版本、绑定 registry
+digest 的 cache key。每个已挂载 squash 都重新计算 hash，同时记录 verified registry digest 和
+local squash hash。镜像提供的 DeepEP 会按精确 wheel 和 installed-file fingerprints 检查；
+source-built backends 使用固定 commits 和 runtime-verified GPU targets。DeepEP V2 的 mode-0700
+cluster-local build cache 由版本化 build recipe、verified image、architecture、upstream
+trees 和 dependency pins 共同寻址；container 只看到固定的 `/cx-cache` mount，且该 cache 永不
+进入 result artifacts。
+固定的 V2 与 Hybrid source 在每个 workflow 中只获取一次。每个 job 都会验证完整 archive，仅
+提取自身精确 backend root，只允许指向 archive 内 regular file 的受限相对 leaf symlink，并在
+staging 前重新核对 Git tree 与 submodule pins。
+Compute containers 仅接收显式 environment allowlist。Private host、address、device、NIC、
+credential、workspace 和 path 数据只保留在加密配置、忽略的 operator notes 或有界 mode-0600
+runner logs 中，永不上传。
+
+## 本地检查
+
+```bash
+uv run --with-requirements experimental/CollectiveX/requirements.txt \
+  python -m unittest discover experimental/CollectiveX/tests -p 'test_*.py'
+uv run --with-requirements experimental/CollectiveX/requirements.txt \
+  python experimental/CollectiveX/sweep_matrix.py --backends all --out /tmp/cx-matrix.json >/dev/null
+uv run --with-requirements experimental/CollectiveX/requirements.txt \
+  python experimental/CollectiveX/publisher.py --store-root "$COLLECTIVEX_STORE_ROOT" verify
+bash -n experimental/CollectiveX/runtime/*.sh experimental/CollectiveX/launchers/*.sh
+```
+
+核心路径为 `capability.py`、`configs/`、`contracts.py`、`schemas/`、`sweep_matrix.py`、
+`publisher.py`、`runtime/`、`launchers/` 和 `tests/`。
diff --git a/experimental/CollectiveX/artifact_safety.py b/experimental/CollectiveX/artifact_safety.py
new file mode 100644
index 000000000..83d522fba
--- /dev/null
+++ b/experimental/CollectiveX/artifact_safety.py
@@ -0,0 +1,212 @@
+#!/usr/bin/env python3
+"""Fail-closed privacy check for CollectiveX public result documents."""
+from __future__ import annotations
+
+import argparse
+import ipaddress
+import json
+import os
+import re
+import stat
+
+
+SENSITIVE_FIELDS = frozenset({
+    "environment", "env", "host", "hostname", "uuid", "gpu_uuid", "device_uuid",
+    "pci_bus_id", "ip_address", "ip_addresses", "master_addr", "ssh", "ssh_target",
+    "nodelist", "node_list", "nic_guid", "ib_guid", "topology_matrix", "rdma_devices",
+    "user", "username", "password", "passwd", "secret", "token", "access_token",
+    "api_token", "auth_token", "api_key", "private_key", "credential", "credentials",
+    "address", "addresses", "ip", "ips",
+})
+SENSITIVE_FIELDS_COMPACT = frozenset(item.replace("_", "") for item in SENSITIVE_FIELDS)
+SENSITIVE_FIELD_SUFFIXES = (
+    "_host", "_hostname", "_address", "_addresses", "_path", "_paths", "_ip", "_ips",
+    "_password", "_passwd", "_secret", "_token", "_credential", "_credentials",
+    "_uuid", "_guid", "_bus_id",
+)
+SENSITIVE_VALUE_PATTERNS = (
+    ("private-path", re.compile(
+        r"(?<![A-Za-z0-9_.-])/(?:home|mnt|workspace|root|users|tmp|data|it-share|lustre|raid|nvme_home|scratch|gpfs|fsx)(?:/|$)",
+        re.I,
+    )),
+    ("ipv4-address", re.compile(r"(?<!\d)(?:\d{1,3}\.){3}\d{1,3}(?!\d)")),
+    ("pci-address", re.compile(r"\b[0-9a-f]{4}:[0-9a-f]{2}:[0-9a-f]{2}\.[0-7]\b", re.I)),
+    ("hardware-address", re.compile(
+        r"\b(?:[0-9a-f]{2}[:-]){5}(?:[0-9a-f]{2})\b|"
+        r"\b(?:[0-9a-f]{2}:){7}(?:[0-9a-f]{2})\b|\b0x[0-9a-f]{16}\b",
+        re.I,
+    )),
+    ("uuid", re.compile(
+        r"\b(?:GPU-|MIG-)?[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\b",
+        re.I,
+    )),
+    ("ssh-target", re.compile(r"(?:ssh://|\bssh\s+[^\s/@]+@[^\s/]+)", re.I)),
+    ("host-identifier", re.compile(
+        r"\b(?:host(?:name)?|master[_-]?(?:addr|address)|node[_-]?list)\s*(?:=|:)\s*[^\s,;]+",
+        re.I,
+    )),
+    ("private-hostname", re.compile(
+        r"\b(?:[a-z0-9-]+\.)+(?:cluster|corp|internal|lan|local)\b|"
+        r"\b(?:compute|gpu|head|login|node|worker)[-_]?[0-9][a-z0-9_.-]*\b|"
+        r"\bdgx-[a-z0-9-]+-[0-9]+\b|\bip-(?:[0-9]{1,3}-){3}[0-9]{1,3}\b",
+        re.I,
+    )),
+    ("secret-token", re.compile(
+        r"(?:gh[pousr]_[A-Za-z0-9]{20,}|github_pat_[A-Za-z0-9_]{20,}|"
+        r"glpat-[A-Za-z0-9_-]{20,}|xox[baprs]-[A-Za-z0-9-]{20,}|"
+        r"(?:AKIA|ASIA)[0-9A-Z]{16}|AIza[0-9A-Za-z_-]{35}|"
+        r"(?:sk-(?:proj|svcacct)-[A-Za-z0-9_-]{20,}|sk-[A-Za-z0-9]{32,}|"
+        r"sk_(?:live|test)_[A-Za-z0-9]{20,}|hf_[A-Za-z0-9]{20,})|"
+        r"npm_[A-Za-z0-9]{20,}|"
+        r"pypi-[A-Za-z0-9_-]{20,}|dckr_pat_[A-Za-z0-9_-]{20,}|"
+        r"Bearer\s+[A-Za-z0-9._~+/-]{16,}|Basic\s+[A-Za-z0-9+/=]{16,}|"
+        r"eyJ[A-Za-z0-9_-]{8,}\.[A-Za-z0-9_-]{8,}\.[A-Za-z0-9_-]{8,}|"
+        r"-----BEGIN(?: [A-Z]+)? PRIVATE KEY-----)",
+        re.I,
+    )),
+    ("secret-assignment", re.compile(
+        r"\b(?:api[_-]?key|access[_-]?token|auth[_-]?token|client[_-]?secret|"
+        r"password|passwd|secret|accountkey)\s*(?:=|:)\s*[\"']?"
+        r"[A-Za-z0-9+/_=.~-]{8,}",
+        re.I,
+    )),
+)
+IPV6_CANDIDATE = re.compile(
+    r"(?<![0-9A-Za-z])\[?([0-9A-Fa-f:]{2,}(?:%[0-9A-Za-z_.-]+)?)\]?"
+)
+CONTEXTUAL_VALUE_RULES = frozenset({"ssh-target", "host-identifier", "private-hostname"})
+MAX_INPUT_BYTES = 64 * 1024 * 1024
+
+
+class ArtifactSafetyError(ValueError):
+    """A document contains data that cannot cross the public boundary."""
+
+
+def _normalized_field(value: object) -> str:
+    normalized = re.sub(r"([A-Z]+)([A-Z][a-z])", r"\1_\2", str(value).strip())
+    normalized = re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", normalized)
+    return normalized.lower().replace("-", "_")
+
+
+def _sensitive_value_rule(value: str, *, contextual: bool = True) -> str | None:
+    matched = next(
+        (
+            name for name, pattern in SENSITIVE_VALUE_PATTERNS
+            if (contextual or name not in CONTEXTUAL_VALUE_RULES) and pattern.search(value)
+        ),
+        None,
+    )
+    if matched:
+        return matched
+    for candidate in IPV6_CANDIDATE.findall(value):
+        try:
+            address = candidate.split("%", 1)[0]
+            if ipaddress.ip_address(address).version == 6:
+                return "ipv6-address"
+        except ValueError:
+            continue
+    return None
+
+
+def assert_publication_safe(docs: list[dict]) -> None:
+    """Reject private infrastructure fields and value shapes."""
+    def walk(value, doc_index: int, parent_field: str | None = None) -> None:
+        if isinstance(value, dict):
+            for key, child in value.items():
+                field = _normalized_field(key)
+                compact = field.replace("_", "")
+                if (
+                    field in SENSITIVE_FIELDS
+                    or compact in SENSITIVE_FIELDS_COMPACT
+                    or field.endswith(SENSITIVE_FIELD_SUFFIXES)
+                ):
+                    raise ArtifactSafetyError(
+                        f"artifact safety: doc[{doc_index}] contains forbidden private field"
+                    )
+                key_rule = _sensitive_value_rule(str(key))
+                if key_rule:
+                    raise ArtifactSafetyError(
+                        f"artifact safety: doc[{doc_index}] contains forbidden {key_rule} key"
+                    )
+                walk(child, doc_index, field)
+        elif isinstance(value, list):
+            for child in value:
+                walk(child, doc_index, parent_field)
+        elif isinstance(value, str):
+            rule = _sensitive_value_rule(value, contextual=parent_field != "ref")
+            if rule:
+                raise ArtifactSafetyError(
+                    f"artifact safety: doc[{doc_index}] contains forbidden {rule} value"
+                )
+
+    for index, doc in enumerate(docs):
+        if not isinstance(doc, dict):
+            raise ArtifactSafetyError(f"artifact safety: doc[{index}] is not a JSON object")
+        walk(doc, index)
+
+
+def load_documents(paths: list[str]) -> list[dict]:
+    docs: list[dict] = []
+    for path in paths:
+        try:
+            metadata = os.lstat(path)
+        except OSError as exc:
+            raise ArtifactSafetyError("artifact safety: result file is unavailable") from exc
+        if (
+            not stat.S_ISREG(metadata.st_mode)
+            or metadata.st_uid != os.getuid()
+            or metadata.st_size <= 0
+            or metadata.st_size > MAX_INPUT_BYTES
+        ):
+            raise ArtifactSafetyError("artifact safety: result file is unavailable")
+        descriptor = -1
+        try:
+            descriptor = os.open(path, os.O_RDONLY | getattr(os, "O_NOFOLLOW", 0))
+            opened = os.fstat(descriptor)
+            if (
+                not stat.S_ISREG(opened.st_mode)
+                or (opened.st_dev, opened.st_ino, opened.st_size)
+                != (metadata.st_dev, metadata.st_ino, metadata.st_size)
+            ):
+                raise ArtifactSafetyError("artifact safety: result file changed during open")
+            with os.fdopen(descriptor, encoding="utf-8") as fh:
+                descriptor = -1
+                if path.endswith(".ndjson"):
+                    for line_number, line in enumerate(fh, 1):
+                        if not line.strip():
+                            continue
+                        try:
+                            docs.append(json.loads(line))
+                        except json.JSONDecodeError as exc:
+                            raise ArtifactSafetyError(
+                                f"artifact safety: malformed NDJSON at input line {line_number}"
+                            ) from exc
+                else:
+                    docs.append(json.load(fh))
+        except json.JSONDecodeError as exc:
+            raise ArtifactSafetyError("artifact safety: malformed JSON input") from exc
+        except (OSError, UnicodeError) as exc:
+            raise ArtifactSafetyError("artifact safety: result file is unreadable") from exc
+        finally:
+            if descriptor >= 0:
+                os.close(descriptor)
+    if not docs:
+        raise ArtifactSafetyError("artifact safety: no public result documents found")
+    return docs
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description="Check CollectiveX result artifacts for private data")
+    parser.add_argument("paths", nargs="+")
+    args = parser.parse_args()
+    try:
+        docs = load_documents(args.paths)
+        assert_publication_safe(docs)
+    except ArtifactSafetyError as exc:
+        parser.error(str(exc))
+    print(f"artifact safety: {len(docs)} public document(s) passed")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/experimental/CollectiveX/capability.py b/experimental/CollectiveX/capability.py
new file mode 100644
index 000000000..346e2ad14
--- /dev/null
+++ b/experimental/CollectiveX/capability.py
@@ -0,0 +1,569 @@
+#!/usr/bin/env python3
+"""Public runner and backend capability registry for CollectiveX v1."""
+
+from __future__ import annotations
+
+import re
+from typing import Any
+
+import identity
+
+
+DEEPEP_V2_COMMIT = "fa8a9b16898204afd347c663b89e65ef87dc6ce6"
+DEEPEP_V2_SKU_CAPABILITIES = {
+    "h100-dgxc": {
+        "schedulable": False,
+        "basis": "current-runner-nccl-device-api-symmetric-memory-unavailable",
+    },
+    "h200-dgxc": {"schedulable": True, "basis": "upstream-sm90-requirement"},
+    "b200-dgxc": {"schedulable": True, "basis": "upstream-sm100-result"},
+    "gb200": {"schedulable": True, "basis": "upstream-sm100-result"},
+    "b300": {"schedulable": True, "basis": "pinned-pr605-pr630-sm103-maps-sm100f"},
+    "gb300": {"schedulable": True, "basis": "pinned-pr605-pr630-sm103-maps-sm100f"},
+    "mi325x": {"schedulable": False, "basis": "nvidia-only"},
+    "mi355x": {"schedulable": False, "basis": "nvidia-only"},
+}
+
+
+def _topologies(
+    product: str, *, gpus_per_node: int, scale_up_domain: int, scale_up_transport: str
+) -> dict[int, dict[str, Any]]:
+    scale_up_class = (
+        f"{product}-nvl72-mnnvl"
+        if scale_up_transport == "mnnvl"
+        else f"{product}-xgmi"
+        if scale_up_transport == "xgmi"
+        else f"{product}-{scale_up_transport}-island"
+    )
+    return {
+        8: {
+            "nodes": 8 // gpus_per_node,
+            "gpus_per_node": gpus_per_node,
+            "scale_up_domain": scale_up_domain,
+            "scope": "scale-up",
+            "scale_up_transport": scale_up_transport,
+            "scale_out_transport": None,
+            "transport": scale_up_transport,
+            "topology_class": scale_up_class,
+        },
+        16: {
+            "nodes": 16 // gpus_per_node,
+            "gpus_per_node": gpus_per_node,
+            "scale_up_domain": scale_up_domain,
+            "scope": "scale-up" if scale_up_domain >= 16 else "scale-out",
+            "scale_up_transport": scale_up_transport,
+            "scale_out_transport": None if scale_up_domain >= 16 else "rdma",
+            "transport": (
+                scale_up_transport
+                if scale_up_domain >= 16
+                else f"{scale_up_transport}-rdma"
+            ),
+            "topology_class": (
+                scale_up_class
+                if scale_up_domain >= 16
+                else f"{product}-{scale_up_transport}-rdma"
+            ),
+        },
+    }
+
+
+def _platform(
+    *, vendor: str, arch: str, machine: str, product: str, gpus_per_node: int,
+    scale_up_domain: int, scale_up_transport: str, launcher: str,
+) -> dict[str, Any]:
+    topologies = _topologies(
+        product,
+        gpus_per_node=gpus_per_node,
+        scale_up_domain=scale_up_domain,
+        scale_up_transport=scale_up_transport,
+    )
+    ep8 = topologies[8]
+    return {
+        "vendor": vendor,
+        "arch": arch,
+        "machine": machine,
+        "product": product,
+        # EP8 defaults remain while downstream readers migrate to per-EP records.
+        "transport": ep8["transport"],
+        "topology_class": ep8["topology_class"],
+        "gpus_per_node": gpus_per_node,
+        "scale_up_domain": scale_up_domain,
+        "ep_degrees": tuple(topologies),
+        "topologies": topologies,
+        "launcher": launcher,
+    }
+
+
+PLATFORMS = {
+    "h100-dgxc": _platform(
+        vendor="nvidia", arch="sm90", machine="amd64", product="h100",
+        gpus_per_node=8, scale_up_domain=8, scale_up_transport="nvlink",
+        launcher="single-slurm",
+    ),
+    "h200-dgxc": _platform(
+        vendor="nvidia", arch="sm90", machine="amd64", product="h200",
+        gpus_per_node=8, scale_up_domain=8, scale_up_transport="nvlink",
+        launcher="single-slurm",
+    ),
+    "b200-dgxc": _platform(
+        vendor="nvidia", arch="sm100", machine="amd64", product="b200",
+        gpus_per_node=8, scale_up_domain=8, scale_up_transport="nvlink",
+        launcher="single-slurm",
+    ),
+    "b300": _platform(
+        vendor="nvidia", arch="sm103", machine="amd64", product="b300",
+        gpus_per_node=8, scale_up_domain=8, scale_up_transport="nvlink",
+        launcher="single-slurm",
+    ),
+    "gb200": _platform(
+        vendor="nvidia", arch="sm100", machine="arm64", product="gb200",
+        gpus_per_node=4, scale_up_domain=72, scale_up_transport="mnnvl",
+        launcher="gb-nv",
+    ),
+    "gb300": _platform(
+        vendor="nvidia", arch="sm103", machine="arm64", product="gb300",
+        gpus_per_node=4, scale_up_domain=72, scale_up_transport="mnnvl",
+        launcher="gb-nv",
+    ),
+    "mi325x": _platform(
+        vendor="amd", arch="gfx942", machine="amd64", product="mi325x",
+        gpus_per_node=8, scale_up_domain=8, scale_up_transport="xgmi",
+        launcher="mi-amds",
+    ),
+    "mi355x": _platform(
+        vendor="amd", arch="gfx950", machine="amd64", product="mi355x",
+        gpus_per_node=8, scale_up_domain=8, scale_up_transport="xgmi",
+        launcher="mi-amds",
+    ),
+}
+
+BACKENDS = {
+    "deepep": {"vendors": {"nvidia"}},
+    "deepep-v2": {
+        "vendors": {"nvidia"},
+        "implementation": "deep_ep.ElasticBuffer",
+        "source": "deepseek-ai/DeepEP#605+#630",
+        "commit": DEEPEP_V2_COMMIT,
+        "communication_backend": "nccl-device-lsa",
+        "torch": "2.10.0+cu130",
+        "nccl": "2.30.4",
+        "sku_capabilities": DEEPEP_V2_SKU_CAPABILITIES,
+    },
+    "uccl": {
+        "vendors": {"nvidia"},
+        "machines": {"amd64"},
+        "excluded_skus": {"b200-dgxc", "b300"},
+    },
+    "deepep-hybrid": {"vendors": {"nvidia"}},
+    "mori": {"vendors": {"amd"}},
+    "nccl-ep": {"vendors": {"nvidia", "amd"}},
+}
+SWEEP_BACKENDS = tuple(BACKENDS)
+
+PRECISION_DISPOSITIONS = {
+    "supported", "unsupported", "not-applicable", "provisional",
+}
+_NVIDIA_SKUS = (
+    "h100-dgxc", "h200-dgxc", "b200-dgxc", "b300", "gb200", "gb300",
+)
+_DEEPEP_V2_PRECISION_SKUS = (
+    "h200-dgxc", "b200-dgxc", "b300", "gb200", "gb300",
+)
+_HOPPER_UCCL_SKUS = ("h100-dgxc", "h200-dgxc")
+
+
+def _precision_rule(
+    *,
+    backend: str,
+    skus: tuple[str, ...],
+    ep_degrees: tuple[int, ...],
+    mode: str,
+    basis: str,
+    disposition: str = "provisional",
+) -> dict[str, Any]:
+    return {
+        "backend": backend,
+        "skus": skus,
+        "ep_degrees": ep_degrees,
+        "mode": mode,
+        "disposition": disposition,
+        "basis": basis,
+    }
+
+
+_NORMAL_E4M3FN_PROFILE = "d-fp8-e4m3fn-b128-f32-prequantized.c-bf16"
+_NORMAL_E4M3FNUZ_PROFILE = "d-fp8-e4m3fnuz-b128-f32-prequantized.c-bf16"
+_LL_FP8_PROFILE = "d-fp8-e4m3fn-b128-f32-fused.c-bf16"
+_LL_LOGFMT_PROFILE = "d-bf16.c-logfmt10-dynamic64"
+_LL_FP8_LOGFMT_PROFILE = (
+    "d-fp8-e4m3fn-b128-f32-fused.c-logfmt10-dynamic64"
+)
+_MORI_E4M3FN_DIRECT_PROFILE = "d-bf16.c-fp8-e4m3fn-direct-cast-noscale"
+_MORI_E4M3FN_BOTH_PROFILE = (
+    "d-fp8-e4m3fn-b128-f32-prequantized.c-fp8-e4m3fn-direct-cast-noscale"
+)
+_MORI_E4M3FNUZ_DIRECT_PROFILE = "d-bf16.c-fp8-e4m3fnuz-direct-cast-noscale"
+_MORI_E4M3FNUZ_BOTH_PROFILE = (
+    "d-fp8-e4m3fnuz-b128-f32-prequantized.c-fp8-e4m3fnuz-direct-cast-noscale"
+)
+
+# These are native-path candidates, not executable claims. A cell must be changed
+# from provisional to supported or unsupported after its pinned runtime probe.
+PRECISION_CAPABILITIES: dict[str, tuple[dict[str, Any], ...]] = {
+    _NORMAL_E4M3FN_PROFILE: (
+        _precision_rule(
+            backend="deepep", skus=_NVIDIA_SKUS, ep_degrees=(8, 16), mode="normal",
+            basis="deepep-v1-normal-prequantized-e4m3fn-block128-f32-scale",
+        ),
+        _precision_rule(
+            backend="deepep-v2", skus=_DEEPEP_V2_PRECISION_SKUS,
+            ep_degrees=(8, 16), mode="normal",
+            basis="deepep-v2-normal-prequantized-e4m3fn-block128-f32-scale",
+        ),
+        _precision_rule(
+            backend="deepep-hybrid", skus=_NVIDIA_SKUS,
+            ep_degrees=(8, 16), mode="normal",
+            basis="deepep-hybrid-normal-uint8-e4m3fn-block128-f32-scale",
+        ),
+        _precision_rule(
+            backend="uccl", skus=_HOPPER_UCCL_SKUS, ep_degrees=(8, 16), mode="normal",
+            basis="uccl-deepep-api-normal-prequantized-e4m3fn-block128-f32-scale",
+        ),
+        _precision_rule(
+            backend="mori", skus=("mi355x",), ep_degrees=(8, 16), mode="normal",
+            basis="mori-gfx950-normal-prequantized-ocp-e4m3fn-block128-f32-scale",
+        ),
+    ),
+    _NORMAL_E4M3FNUZ_PROFILE: (
+        _precision_rule(
+            backend="mori", skus=("mi325x",), ep_degrees=(8, 16), mode="normal",
+            basis="mori-gfx942-normal-prequantized-e4m3fnuz-block128-f32-scale",
+        ),
+    ),
+    _LL_FP8_PROFILE: (
+        _precision_rule(
+            backend="deepep", skus=_NVIDIA_SKUS, ep_degrees=(8, 16),
+            mode="low-latency",
+            basis="deepep-v1-low-latency-fused-e4m3fn-block128-f32-scale",
+        ),
+        _precision_rule(
+            backend="uccl", skus=_HOPPER_UCCL_SKUS, ep_degrees=(8, 16),
+            mode="low-latency",
+            basis="uccl-deepep-api-low-latency-fused-e4m3fn-block128-f32-scale",
+        ),
+    ),
+    _LL_LOGFMT_PROFILE: (
+        _precision_rule(
+            backend="deepep", skus=_NVIDIA_SKUS, ep_degrees=(8, 16),
+            mode="low-latency",
+            basis="deepep-v1-low-latency-logfmt10-dynamic-per64-combine",
+        ),
+        _precision_rule(
+            backend="uccl", skus=_HOPPER_UCCL_SKUS, ep_degrees=(8, 16),
+            mode="low-latency",
+            basis="uccl-deepep-api-low-latency-logfmt10-dynamic-per64-combine",
+        ),
+    ),
+    _LL_FP8_LOGFMT_PROFILE: (
+        _precision_rule(
+            backend="deepep", skus=_NVIDIA_SKUS, ep_degrees=(8, 16),
+            mode="low-latency",
+            basis="deepep-v1-low-latency-fused-e4m3fn-dispatch-logfmt10-combine",
+        ),
+        _precision_rule(
+            backend="uccl", skus=_HOPPER_UCCL_SKUS, ep_degrees=(8, 16),
+            mode="low-latency",
+            basis="uccl-deepep-api-low-latency-fused-e4m3fn-dispatch-logfmt10-combine",
+        ),
+    ),
+    _MORI_E4M3FN_DIRECT_PROFILE: (
+        _precision_rule(
+            backend="mori", skus=("mi355x",), ep_degrees=(8,), mode="normal",
+            basis="mori-gfx950-ep8-intranode-e4m3fn-direct-cast-combine",
+        ),
+    ),
+    _MORI_E4M3FN_BOTH_PROFILE: (
+        _precision_rule(
+            backend="mori", skus=("mi355x",), ep_degrees=(8,), mode="normal",
+            basis="mori-gfx950-ep8-intranode-e4m3fn-dispatch-and-direct-cast-combine",
+        ),
+    ),
+    _MORI_E4M3FNUZ_DIRECT_PROFILE: (
+        _precision_rule(
+            backend="mori", skus=("mi325x",), ep_degrees=(8,), mode="normal",
+            basis="mori-gfx942-ep8-asyncll-e4m3fnuz-direct-cast-combine",
+        ),
+    ),
+    _MORI_E4M3FNUZ_BOTH_PROFILE: (
+        _precision_rule(
+            backend="mori", skus=("mi325x",), ep_degrees=(8,), mode="normal",
+            basis="mori-gfx942-ep8-asyncll-e4m3fnuz-dispatch-and-direct-cast-combine",
+        ),
+    ),
+}
+
+
+def runtime_identity_issues(
+    sku: str, *, vendor: str, arch: str, machine: str, device_name: str,
+    device_count: int, world_size: int,
+) -> list[str]:
+    """Validate public product identity on every rank without private device identifiers."""
+    platform = PLATFORMS.get(sku)
+    if platform is None:
+        return [f"unknown runner identity {sku!r}"]
+    issues = []
+    for field, observed in (("vendor", vendor), ("arch", arch), ("machine", machine)):
+        if observed != platform[field]:
+            issues.append(f"{field}={observed!r}, expected {platform[field]!r}")
+    products = set(re.findall(r"[a-z]+\d+[a-z]*", device_name.lower()))
+    if platform["product"] not in products:
+        issues.append(f"device product {device_name!r} does not identify {platform['product']}")
+    if device_count != platform["gpus_per_node"]:
+        issues.append(
+            f"visible GPUs={device_count}, expected {platform['gpus_per_node']} per node"
+        )
+    if world_size not in platform["ep_degrees"]:
+        issues.append(f"EP{world_size} is not registered for {sku}")
+    return issues
+
+
+def topology_for(sku: str, ep: int) -> dict[str, Any] | None:
+    """Return the exact public topology registered for one SKU/EP cell."""
+    platform = PLATFORMS.get(sku)
+    if platform is None:
+        return None
+    return platform["topologies"].get(ep)
+
+
+def _resolve_base(
+    sku: str,
+    backend: str,
+    *,
+    ep: int | None = None,
+    nodes: int | None = None,
+    routing: str = "uniform",
+    eplb: bool = False,
+    mode: str = "normal",
+) -> tuple[bool, str]:
+    """Resolve the existing BF16 capability without a precision candidate."""
+    platform, implementation = PLATFORMS.get(sku), BACKENDS.get(backend)
+    if platform is None:
+        return False, f"unknown GHA runner label {sku!r}"
+    if implementation is None:
+        return False, f"unknown backend {backend!r}"
+    if mode not in {"normal", "low-latency"}:
+        return False, f"unknown benchmark mode {mode!r}"
+    if mode == "low-latency" and backend not in {"deepep", "uccl"}:
+        return False, f"{backend} has no distinct low-latency API"
+    if ep is None:
+        if nodes is None:
+            ep = platform["ep_degrees"][0]
+        else:
+            matches = [
+                degree for degree, topology in platform["topologies"].items()
+                if topology["nodes"] == nodes
+            ]
+            if len(matches) != 1:
+                return False, f"{sku} does not register a unique {nodes}-node EP degree"
+            ep = matches[0]
+    topology = topology_for(sku, ep)
+    if topology is None or (nodes is not None and nodes != topology["nodes"]):
+        return False, f"{sku} does not register EP{ep} on {nodes} nodes"
+    if routing not in {"uniform", "zipf"} or (eplb and routing != "zipf"):
+        return False, "v1 routing is uniform or zipf, with EPLB only on zipf"
+    if platform["vendor"] not in implementation["vendors"]:
+        return False, f"{backend} does not support {platform['vendor']}"
+    sku_capability = implementation.get("sku_capabilities", {}).get(sku)
+    if sku_capability is not None and not sku_capability["schedulable"]:
+        return False, f"{backend} is unsupported on {sku}: {sku_capability['basis']}"
+    if platform["machine"] not in implementation.get("machines", {platform["machine"]}):
+        return False, f"{backend} does not support {platform['machine']}"
+    if sku in implementation.get("excluded_skus", set()):
+        return False, f"{backend} is unavailable on {sku}"
+    return True, "ok"
+
+
+def precision_targets(
+    profile_names: tuple[str, ...] | list[str] | None = None,
+) -> list[dict[str, Any]]:
+    """Expand exact native precision candidates into deterministic target cells."""
+    names = list(PRECISION_CAPABILITIES) if profile_names is None else list(profile_names)
+    unknown = sorted(set(names) - set(PRECISION_CAPABILITIES))
+    if unknown:
+        raise ValueError(f"unknown precision capability profiles {unknown}")
+    targets: list[dict[str, Any]] = []
+    seen: set[tuple[str, str, str, int, str]] = set()
+    for profile_name in names:
+        for rule in PRECISION_CAPABILITIES[profile_name]:
+            for sku in rule["skus"]:
+                for ep in rule["ep_degrees"]:
+                    key = (profile_name, rule["backend"], sku, ep, rule["mode"])
+                    if key in seen:
+                        raise RuntimeError(f"duplicate precision capability target {key}")
+                    seen.add(key)
+                    targets.append({
+                        "precision_profile": profile_name,
+                        "backend": rule["backend"],
+                        "sku": sku,
+                        "ep": ep,
+                        "mode": rule["mode"],
+                        "disposition": rule["disposition"],
+                        "basis": rule["basis"],
+                    })
+    return targets
+
+
+def provisional_precision_targets(
+    profile_names: tuple[str, ...] | list[str] | None = None,
+) -> list[dict[str, Any]]:
+    """Return probe-gated targets that must be eliminated before scheduling."""
+    return [
+        target for target in precision_targets(profile_names)
+        if target["disposition"] == "provisional"
+    ]
+
+
+def precision_target_declared(
+    precision_profile: str,
+    *,
+    sku: str,
+    backend: str,
+    ep: int,
+    mode: str,
+) -> bool:
+    """Return whether a profile has an exact native candidate for this cell."""
+    return any(
+        target["precision_profile"] == precision_profile
+        and target["sku"] == sku
+        and target["backend"] == backend
+        and target["ep"] == ep
+        and target["mode"] == mode
+        for target in precision_targets([precision_profile])
+    )
+
+
+def resolve_disposition(
+    sku: str,
+    backend: str,
+    *,
+    ep: int | None = None,
+    nodes: int | None = None,
+    routing: str = "uniform",
+    eplb: bool = False,
+    mode: str = "normal",
+    precision_profile: str | None = None,
+) -> tuple[str, str]:
+    """Resolve a baseline or exact precision cell to its capability disposition."""
+    base_ok, base_detail = _resolve_base(
+        sku,
+        backend,
+        ep=ep,
+        nodes=nodes,
+        routing=routing,
+        eplb=eplb,
+        mode=mode,
+    )
+    if precision_profile is None or precision_profile == identity.V1_CONTROL_PRECISION_PROFILE:
+        return ("supported", "ok") if base_ok else ("unsupported", base_detail)
+    if precision_profile not in identity.V1_PRECISION_PROFILES:
+        return "unsupported", f"unknown precision profile {precision_profile!r}"
+    profile = identity.V1_PRECISION_PROFILES[precision_profile]
+    if mode not in profile["modes"]:
+        return (
+            "not-applicable",
+            f"precision profile {precision_profile} is not defined for {mode} mode",
+        )
+    if ep is None:
+        platform = PLATFORMS.get(sku)
+        if platform is None:
+            return "unsupported", base_detail
+        if nodes is None:
+            ep = platform["ep_degrees"][0]
+        else:
+            matches = [
+                degree for degree, topology in platform["topologies"].items()
+                if topology["nodes"] == nodes
+            ]
+            if len(matches) != 1:
+                return "unsupported", base_detail
+            ep = matches[0]
+    matches = [
+        target for target in precision_targets([precision_profile])
+        if target["sku"] == sku
+        and target["backend"] == backend
+        and target["ep"] == ep
+        and target["mode"] == mode
+    ]
+    if not matches:
+        return (
+            "not-applicable",
+            f"{precision_profile} has no native {backend} target on {sku} EP{ep}",
+        )
+    if not base_ok:
+        return "unsupported", base_detail
+    target = matches[0]
+    return target["disposition"], target["basis"]
+
+
+def resolve(
+    sku: str,
+    backend: str,
+    *,
+    ep: int | None = None,
+    nodes: int | None = None,
+    routing: str = "uniform",
+    eplb: bool = False,
+    mode: str = "normal",
+    precision_profile: str | None = None,
+) -> tuple[bool, str]:
+    """Return whether one fixed-v1 case can run on a public GHA runner label."""
+    disposition, detail = resolve_disposition(
+        sku,
+        backend,
+        ep=ep,
+        nodes=nodes,
+        routing=routing,
+        eplb=eplb,
+        mode=mode,
+        precision_profile=precision_profile,
+    )
+    return disposition == "supported", detail
+
+
+def _validate_precision_capabilities() -> None:
+    expected = set(identity.V1_PRECISION_PROFILES) - {
+        identity.V1_CONTROL_PRECISION_PROFILE
+    }
+    if set(PRECISION_CAPABILITIES) != expected:
+        raise RuntimeError("precision capability profiles differ from the identity registry")
+    empty = sorted(
+        profile for profile, rules in PRECISION_CAPABILITIES.items() if not rules
+    )
+    if empty:
+        raise RuntimeError(f"precision profiles have no native targets: {empty}")
+    for target in precision_targets():
+        if target["backend"] not in BACKENDS or target["sku"] not in PLATFORMS:
+            raise RuntimeError(f"unknown precision target: {target}")
+        if target["ep"] not in PLATFORMS[target["sku"]]["ep_degrees"]:
+            raise RuntimeError(f"invalid precision target EP degree: {target}")
+        if target["disposition"] not in PRECISION_DISPOSITIONS - {"not-applicable"}:
+            raise RuntimeError(f"invalid declared precision disposition: {target}")
+        if target["mode"] not in identity.V1_PRECISION_PROFILES[
+            target["precision_profile"]
+        ]["modes"]:
+            raise RuntimeError(f"precision target mode differs from its profile: {target}")
+        topology = topology_for(target["sku"], target["ep"])
+        base_ok, base_detail = _resolve_base(
+            target["sku"],
+            target["backend"],
+            ep=target["ep"],
+            nodes=topology["nodes"] if topology is not None else None,
+            mode=target["mode"],
+        )
+        if target["disposition"] in {"supported", "provisional"} and not base_ok:
+            raise RuntimeError(
+                f"precision target exceeds its backend capability: {target}: {base_detail}"
+            )
+
+
+_validate_precision_capabilities()
diff --git a/experimental/CollectiveX/configs/suites.yaml b/experimental/CollectiveX/configs/suites.yaml
new file mode 100644
index 000000000..2f5214724
--- /dev/null
+++ b/experimental/CollectiveX/configs/suites.yaml
@@ -0,0 +1,72 @@
+# CollectiveX v1 comparison suites.
+schema_version: 1
+
+suites:
+  ep-core-v1:
+    mode: normal
+    workloads: [deepseek-v3-v1]
+    platforms: [h100-dgxc, h200-dgxc, b300, b200-dgxc, gb300, gb200, mi355x, mi325x]
+    ep_degrees: [8, 16]
+    routings: [uniform]
+    phases: [decode, prefill]
+    token_points_prefill: [256, 512]
+    required_publication: official
+
+  ep-routing-v1:
+    mode: normal
+    workloads: [deepseek-v3-v1]
+    platforms: [h100-dgxc, h200-dgxc, b300, b200-dgxc, gb300, gb200, mi355x, mi325x]
+    ep_degrees: [8, 16]
+    routings: [zipf]
+    eplb: [false, true]
+    phases: [decode, prefill]
+    token_points_decode: [128]
+    token_points_prefill: [512]
+    required_publication: comparable-experimental
+
+  ep-low-latency-v1:
+    mode: low-latency
+    backends: [deepep, uccl]
+    workloads: [deepseek-v3-v1]
+    platforms: [h100-dgxc, h200-dgxc, b300, b200-dgxc, gb300, gb200, mi355x, mi325x]
+    ep_degrees: [8, 16]
+    routings: [uniform]
+    phases: [decode]
+    token_points_decode: [1, 2, 4, 8, 16, 32, 64, 128]
+    required_publication: official
+
+  ep-precision-normal-v1:
+    mode: normal
+    backends: [deepep, deepep-v2, uccl, deepep-hybrid, mori]
+    workloads: [deepseek-v3-v1]
+    platforms: [h100-dgxc, h200-dgxc, b300, b200-dgxc, gb300, gb200, mi355x, mi325x]
+    ep_degrees: [8, 16]
+    routings: [uniform]
+    phases: [decode, prefill]
+    token_points_decode: [128]
+    token_points_prefill: [512]
+    precision_profiles:
+      - d-fp8-e4m3fn-b128-f32-prequantized.c-bf16
+      - d-fp8-e4m3fnuz-b128-f32-prequantized.c-bf16
+      - d-bf16.c-fp8-e4m3fn-direct-cast-noscale
+      - d-fp8-e4m3fn-b128-f32-prequantized.c-fp8-e4m3fn-direct-cast-noscale
+      - d-bf16.c-fp8-e4m3fnuz-direct-cast-noscale
+      - d-fp8-e4m3fnuz-b128-f32-prequantized.c-fp8-e4m3fnuz-direct-cast-noscale
+    provisional: true
+    required_publication: comparable-experimental
+
+  ep-precision-low-latency-v1:
+    mode: low-latency
+    backends: [deepep, uccl]
+    workloads: [deepseek-v3-v1]
+    platforms: [h100-dgxc, h200-dgxc, b300, b200-dgxc, gb300, gb200, mi355x, mi325x]
+    ep_degrees: [8, 16]
+    routings: [uniform]
+    phases: [decode]
+    token_points_decode: [128]
+    precision_profiles:
+      - d-fp8-e4m3fn-b128-f32-fused.c-bf16
+      - d-bf16.c-logfmt10-dynamic64
+      - d-fp8-e4m3fn-b128-f32-fused.c-logfmt10-dynamic64
+    provisional: true
+    required_publication: comparable-experimental
diff --git a/experimental/CollectiveX/configs/workloads.yaml b/experimental/CollectiveX/configs/workloads.yaml
new file mode 100644
index 000000000..b5b68334c
--- /dev/null
+++ b/experimental/CollectiveX/configs/workloads.yaml
@@ -0,0 +1,9 @@
+# CollectiveX v1 canonical workload and phase metadata.
+schema_version: 1
+
+model_derived:
+  deepseek-v3-v1:
+    hidden: 7168
+    topk: 8
+    routed_experts: 256
+    verified_against: "deepseek-ai/DeepSeek-V3@e815299b0bcbac849fa540c768ef21845365c9eb/config.json"
diff --git a/experimental/CollectiveX/contracts.py b/experimental/CollectiveX/contracts.py
new file mode 100644
index 000000000..04357da78
--- /dev/null
+++ b/experimental/CollectiveX/contracts.py
@@ -0,0 +1,3058 @@
+#!/usr/bin/env python3
+"""Strict native attempt contracts and metric validation for CollectiveX v1."""
+from __future__ import annotations
+
+import argparse
+import datetime as dt
+from functools import lru_cache
+import hashlib
+import json
+import math
+import os
+from pathlib import Path, PurePosixPath
+import re
+import sys
+from typing import Any, Iterable
+
+import artifact_safety
+import capability
+import identity
+
+TESTS = Path(__file__).resolve().parent / "tests"
+sys.path.insert(0, str(TESTS))
+import eplb as eplb_contract  # noqa: E402
+import workload as workload_contract  # noqa: E402
+
+RAW_FORMAT = "collectivex.ep.v1"
+SAMPLES_FORMAT = "collectivex.samples.v1"
+TERMINAL_FORMAT = "collectivex.terminal.v1"
+TERMINAL_CASE_FIELDS = {
+    "backend", "canonical", "eplb", "ep", "experts", "gpus_per_node", "hidden",
+    "ladder", "mode", "nodes", "phase", "required_publication", "routing",
+    "samples_per_point", "scale_out_transport", "scale_up_domain", "scale_up_transport",
+    "scope", "suite", "timing", "topk", "topology_class", "transport",
+    "warmup_semantics", "workload",
+}
+ALLOCATION_FACTOR_FIELDS = {
+    "artifact", "execution_id", "job", "repo", "run_attempt", "run_id", "runner",
+    "source_sha", "qualification_index",
+}
+GIT_RUN_FIELDS = {
+    "artifact", "job", "qualification_index", "ref", "repo", "run_attempt", "run_id",
+    "source_sha",
+}
+PRE_EXECUTION_FAILURE_REASONS = {
+    "setup": "launcher-setup-failed",
+    "repository-stage": "repository-staging-failed",
+    "registry-verification": "container-registry-verification-failed",
+    "scheduler-allocation": "scheduler-allocation-failed",
+    "container-import": "container-image-preparation-failed",
+    "container-hash": "container-image-identity-failed",
+    "container-launch": "container-runtime-launch-failed",
+    "backend-setup": "backend-setup-failed",
+    "artifact-collection": "artifact-collection-failed",
+}
+RUNTIME_FAILURE_REASONS = {
+    **PRE_EXECUTION_FAILURE_REASONS,
+    "runtime-identity": "runtime-identity-mismatch",
+    "timeout": "execution-timeout",
+    "deadlock": "execution-deadlock",
+    "execution": "distributed-command-failed",
+}
+POST_EMIT_FAILURE_REASONS = {
+    mode: "post-emit-distributed-command-failed"
+    for mode in ("runtime-identity", "timeout", "deadlock", "execution")
+}
+CAPABILITY_FAILURE_REASONS = frozenset({
+    "backend-platform-unsupported",
+    "backend-token-capacity",
+})
+RETURN_CODE_FAILURE_MODES = {
+    5: "runtime-identity",
+    124: "timeout",
+    137: "timeout",
+}
+PERCENTILES = ("p50", "p90", "p95", "p99")
+V1_CONDITIONING_LADDERS = {
+    "decode": (1, 2, 4, 8, 16, 32, 64, 128),
+    "prefill": (1, 2, 4, 8, 16, 32, 64, 128, 256, 512),
+}
+V1_CONDITIONING_ROUNDS_PER_SHAPE = 8
+DEEPEP_V2_JIT_KERNELS = frozenset({
+    "barrier", "combine", "combine_reduce_epilogue", "dispatch",
+    "dispatch_copy_epilogue",
+})
+DEEPEP_V2_V1_PROVENANCE = {
+    "deepep_version": "2.0.0",
+    "deepep_distribution_version": "2.0.0+fa8a9b1",
+    "deepep_commit": "fa8a9b16898204afd347c663b89e65ef87dc6ce6",
+    "deepep_tree": "29809e75c5874e6609dac4804e7b651d5226959f",
+    "deepep_pr": 605,
+    "deepep_fix_pr": 630,
+    "fmt_commit": "a4c7e17133ee9cb6a2f45545f6e974dd3c393efa",
+    "torch_version": "2.10.0+cu130",
+    "nccl_package_version": "2.30.4",
+    "nccl_version": "2.30.4",
+    "nvshmem_package_version": "3.3.9",
+}
+UCCL_DEPENDENCY_VERSIONS = {
+    "intervaltree": "3.1.0",
+    "nvidia-cuda-runtime-cu12": "12.9.79",
+    "sortedcontainers": "2.4.0",
+}
+SCHEMA_DIR = Path(__file__).resolve().parent / "schemas"
+_SCHEMA_CACHE: dict[str, dict[str, Any]] = {}
+REQUIRED_BACKEND_PROVENANCE = {
+    "deepep": (
+        "deepep_version", "deepep_commit", "backend_lineage", "allow_mnnvl",
+        "mnnvl_comm", "mode", "num_nvl_bytes", "num_rdma_bytes",
+    ),
+    "deepep-v2": (
+        *DEEPEP_V2_V1_PROVENANCE, "api_signature_sha256", "loaded_libraries",
+        "jit_cubins", "jit_random_seed", "deterministic", "num_experts",
+        "tuning_num_experts", "allow_hybrid_mode", "gin_enabled",
+        "communication_backend",
+    ),
+    "deepep-hybrid": (
+        "deepep_commit", "deepep_tree", "branch", "backend_lineage",
+        "loaded_libraries", "realized_config", "jit_kernel_keys", "jit_shared_objects",
+    ),
+    "uccl": (
+        "uccl_version", "uccl_commit", "uccl_wrapper_commit", "backend_lineage",
+        "loaded_libraries", "uccl_dependency_versions", "mode", "num_nvl_bytes",
+        "num_rdma_bytes",
+    ),
+    "mori": ("mori_commit",),
+    "nccl-ep": ("nccl_version", "collective_library", "backend_lineage"),
+}
+PROVENANCE_KEYS = {
+    "allocated_qps", "allow_hybrid_mode", "allow_mnnvl", "allow_multiple_reduction",
+    "api", "api_signature_sha256", "backend", "backend_lineage", "block_num",
+    "block_num_floored", "block_num_target", "branch", "collective_library",
+    "combine_dtype", "combine_warps", "communication_backend", "cuda_version",
+    "deepep_commit", "deepep_distribution_version", "deepep_fix_pr", "deepep_pr", "deepep_tree",
+    "deepep_version", "deterministic", "device_cus",
+    "device_sms", "dispatch_dtype", "dispatch_warps", "enable_sdma", "fmt_commit",
+    "gin_enabled",
+    "gpus_per_node", "heap_size",
+    "impl", "jit_cache_key", "jit_cubins", "jit_kernel_keys", "jit_random_seed",
+    "jit_shared_objects", "kernel_type",
+    "loaded_libraries", "local_experts",
+    "logical_scaleout_ranks",
+    "logical_scaleup_ranks", "mapping_variant", "max_num_inp_token_per_rank",
+    "max_num_tokens", "max_total_recv_tokens", "mnnvl_comm", "mode", "mori_commit",
+    "nccl_communicator", "nccl_package_version", "nccl_version", "num_experts",
+    "nvshmem_package_version",
+    "num_max_tokens_per_rank", "num_nvl_bytes", "num_qps", "num_qps_per_rank",
+    "num_rdma_bytes", "num_sms", "path",
+    "physical_nvlink_ranks", "physical_rdma_ranks", "prefer_overlap_with_compute",
+    "rdma_block_num",
+    "realized_config", "reference_semantics", "requested_num_sms", "resource_mode", "routing_factor",
+    "routing_metadata", "sm_fraction", "top_k",
+    "torch_git_version", "torch_version", "transport", "trtllm", "tuned_source",
+    "tuning_num_experts",
+    "uccl_commit", "uccl_dependency_versions", "uccl_version", "uccl_wrapper_commit",
+    "use_external_inp_buf",
+    "workspace",
+}
+
+
+class ContractError(ValueError):
+    """A document differs from the native v1 contract."""
+
+
+def scheduled_case_profile(case: dict[str, Any], path: str = "case") -> dict[str, Any]:
+    """Resolve an explicit scheduled mode to its immutable measurement profile."""
+    try:
+        return identity.profile_for_case(case)
+    except identity.IdentityError as exc:
+        raise ContractError(f"{path}: {exc}") from exc
+
+
+def _scheduled_case(value: Any, path: str) -> dict[str, Any]:
+    """Validate baseline or explicit-precision scheduled case fields."""
+    fields = set(TERMINAL_CASE_FIELDS)
+    if isinstance(value, dict) and "precision_profile" in value:
+        fields.add("precision_profile")
+    return _keys(value, fields, path)
+
+
+def resolve_deepep_mnnvl(
+    *, requested: bool, signature_parameters: Iterable[str], deepep_commit: str | None
+) -> tuple[dict[str, bool], str]:
+    """Resolve one explicit DeepEP MNNVL API mode without signature fallbacks."""
+    if not requested:
+        return {}, "not-requested"
+    if "allow_mnnvl" in set(signature_parameters):
+        return {"allow_mnnvl": True}, "explicit-allow-mnnvl"
+    raise ContractError(
+        f"requested DeepEP MNNVL is unsupported by commit {deepep_commit or 'unknown'}"
+    )
+
+
+def collective_kernel_generation(collective_library: Any) -> str:
+    """Return the public NCCL/RCCL implementation lineage."""
+    if collective_library not in {"nccl", "rccl"}:
+        raise ContractError("reference collective library must be nccl or rccl")
+    return collective_library
+
+
+def project_resource_profile(provenance: dict[str, Any]) -> dict[str, Any]:
+    """Project backend provenance into the canonical cross-backend resource vocabulary."""
+    device_units = provenance.get("device_sms") or provenance.get("device_cus")
+    if provenance.get("num_sms") is not None:
+        kind, configured = "sm", provenance["num_sms"]
+    elif (
+        provenance.get("block_num") is not None
+        and provenance.get("kernel_type") != "AsyncLL"
+    ):
+        kind, configured = "cu_block", provenance["block_num"]
+    else:
+        kind, configured = None, None
+    achieved = configured / device_units if configured and device_units else None
+    fixed = "fixed-kernel" in str(provenance.get("tuned_source", ""))
+    source = str(provenance.get("tuned_source", ""))
+    num_nvl_bytes = provenance.get("num_nvl_bytes")
+    num_rdma_bytes = provenance.get("num_rdma_bytes")
+    persistent_bytes = (
+        (num_nvl_bytes or 0) + (num_rdma_bytes or 0)
+        if num_nvl_bytes is not None or num_rdma_bytes is not None
+        else provenance.get("heap_size")
+    )
+    return {
+        "achieved_fraction": round(achieved, 4) if achieved else None,
+        "comm_units_kind": kind,
+        "configured_units": configured,
+        "conformance_class": (
+            "not-applicable" if fixed else "backend-default" if "default" in source
+            else "pinned-upstream"
+        ),
+        "device_units": device_units,
+        "fixed_kernel": fixed,
+        "nonconforming": False,
+        "pareto_eligible": False,
+        "persistent_bytes": persistent_bytes,
+        "qps_per_rank": provenance.get("num_qps_per_rank"),
+        "requested_fraction": None,
+        "resource_class": "fixed-kernel" if fixed else "fixed-profile",
+        "target_achieved_within_tol": None,
+        "tolerance": 0.10,
+        "tuned_source": provenance.get("tuned_source"),
+        "warps_combine": provenance.get("combine_warps"),
+        "warps_dispatch": provenance.get("dispatch_warps"),
+    }
+
+
+def backend_version(provenance: dict[str, Any]) -> str | None:
+    """Return the canonical public backend version from implementation provenance."""
+    for field in (
+        "deepep_version", "uccl_version", "nccl_version",
+        "mori_commit", "deepep_commit",
+    ):
+        value = provenance.get(field)
+        if value is not None and str(value).strip():
+            return str(value)[:160]
+    return None
+
+
+def public_series_config(
+    *, kernel_generation: Any, provenance: dict[str, Any],
+    resource_profile: dict[str, Any], resource_mode: Any, device_product: Any,
+) -> dict[str, Any]:
+    """Project raw implementation facts into the exact public configuration fields."""
+    generation = None if kernel_generation == "n-a" else kernel_generation
+    profile = "profile-" + _sha256_json(resource_profile)[:16]
+    return {
+        "backend": {
+            "generation": generation,
+            "version": backend_version(provenance),
+        },
+        "resource": {
+            "mode": resource_mode,
+            "profile": profile,
+            "comm_units_kind": resource_profile.get("comm_units_kind"),
+            "configured_units": resource_profile.get("configured_units"),
+        },
+        "system": {"label": str(device_product)[:160]},
+    }
+
+
+def public_series_config_sha256(config: dict[str, Any]) -> str:
+    """Commit the canonical public configuration projection into series identity."""
+    return _sha256_json(config)
+
+
+SOURCE_BUILT_LIBRARY_ROLES = frozenset({
+    "deepep-extension", "deepep-hybrid-extension",
+})
+
+
+def series_provenance(provenance: dict[str, Any]) -> dict[str, Any]:
+    """Project stable semantic build identity while retaining raw binaries in private evidence."""
+    projected = {
+        key: value for key, value in provenance.items()
+        if key not in {"jit_cache_key", "jit_shared_objects", "path", "sm_fraction"}
+    }
+    libraries = provenance.get("loaded_libraries")
+    if isinstance(libraries, list):
+        projected["loaded_libraries"] = [
+            {
+                "name": item.get("name"),
+                "role": item.get("role"),
+                "source_tree": provenance.get("deepep_tree"),
+            }
+            if isinstance(item, dict) and item.get("role") in SOURCE_BUILT_LIBRARY_ROLES
+            else item
+            for item in libraries
+        ]
+    jit_cubins = provenance.get("jit_cubins")
+    if isinstance(jit_cubins, list):
+        projected["jit_cubins"] = [
+            {
+                "cache_key": item.get("cache_key"),
+                "sass_sha256": item.get("sass_sha256"),
+                "source_sha256": item.get("source_sha256"),
+            }
+            if isinstance(item, dict)
+            else item
+            for item in jit_cubins
+        ]
+    return projected
+
+
+def routing_implementation_control_sha256(implementation: dict[str, Any]) -> str:
+    """Bind routing cohorts to the same static build/generator and non-treatment configuration."""
+    provenance = implementation.get("provenance")
+    if not isinstance(provenance, dict):
+        raise ContractError("implementation provenance is unavailable")
+    semantic = series_provenance(provenance)
+    treatment_fields = {
+        "jit_cache_key", "jit_cubins", "jit_kernel_keys", "jit_shared_objects",
+        "local_experts", "num_experts", "path", "realized_config", "sm_fraction",
+    }
+    return _sha256_json({
+        "kernel_generation": implementation.get("kernel_generation"),
+        "name": implementation.get("name"),
+        "provenance": {
+            key: value for key, value in semantic.items()
+            if key not in treatment_fields
+        },
+        "resource_profile": implementation.get("resource_profile"),
+    })
+
+
+def _resolved_provenance_value(field: str, value: Any) -> bool:
+    if value is None or isinstance(value, (dict, list, tuple, set)) and not value:
+        return False
+    text = str(value).strip().lower()
+    if not text or text in {"unknown", "none", "null", "n/a", "?", "capture-failed"}:
+        return False
+    if "capture-failed" in text:
+        return False
+    if field.endswith("_commit") and (
+        text in {"main", "hybrid-ep", "uccl", "pkg-uccl"}
+        or text.endswith(("-unknown", "-none", "-main", "-hybrid-ep"))
+    ):
+        return False
+    return True
+
+
+def _content_evidence_is_valid(value: Any, required_roles: set[str]) -> bool:
+    if not isinstance(value, list) or not value:
+        return False
+    records: set[tuple[str, str]] = set()
+    roles: set[str] = set()
+    for item in value:
+        if not isinstance(item, dict) or set(item) != {"name", "role", "sha256"}:
+            return False
+        name, role, digest = item["name"], item["role"], item["sha256"]
+        if (
+            not isinstance(name, str)
+            or not re.fullmatch(r"[A-Za-z0-9][A-Za-z0-9_.+-]{0,159}", name)
+            or not isinstance(role, str)
+            or not re.fullmatch(r"[A-Za-z0-9][A-Za-z0-9_.+-]{0,127}", role)
+            or not isinstance(digest, str)
+            or not re.fullmatch(r"[0-9a-f]{64}", digest)
+            or (role, name) in records
+        ):
+            return False
+        records.add((role, name))
+        roles.add(role)
+    return required_roles <= roles
+
+
+def _deepep_v2_jit_cubins_are_valid(value: Any) -> bool:
+    if not isinstance(value, list) or len(value) != len(DEEPEP_V2_JIT_KERNELS):
+        return False
+    cache_keys = []
+    kernel_names = set()
+    for item in value:
+        if not isinstance(item, dict) or set(item) != {
+            "cache_key", "cubin_sha256", "sass_sha256", "source_sha256",
+        }:
+            return False
+        cache_key = item["cache_key"]
+        match = (
+            re.fullmatch(r"kernel\.([A-Za-z0-9_+-]+)\.[0-9a-f]{32}", cache_key)
+            if isinstance(cache_key, str)
+            else None
+        )
+        if (
+            match is None
+            or any(
+                not isinstance(item[field], str)
+                or not re.fullmatch(r"[0-9a-f]{64}", item[field])
+                for field in ("cubin_sha256", "sass_sha256", "source_sha256")
+            )
+        ):
+            return False
+        cache_keys.append(cache_key)
+        kernel_names.add(match.group(1))
+    return (
+        cache_keys == sorted(set(cache_keys))
+        and kernel_names == DEEPEP_V2_JIT_KERNELS
+    )
+
+
+HYBRID_REALIZED_CONFIG_FIELDS = {
+    "hidden_dim", "max_num_of_tokens_per_rank", "num_of_experts_per_rank",
+    "num_of_ranks_per_node", "num_of_nodes", "pad_multiple",
+    "num_of_tokens_per_chunk_preprocessing_api",
+    "num_of_threads_per_block_preprocessing_api", "num_of_blocks_preprocessing_api",
+    "num_of_blocks_permute", "num_of_blocks_unpermute", "token_data_type",
+    "num_of_stages_dispatch_api", "num_of_stages_permute_block_dispatch_api",
+    "num_of_in_flight_s2g_dispatch_api",
+    "num_of_in_flight_s2g_permute_block_dispatch_api",
+    "num_of_additional_in_flight_s2g_dispatch_api",
+    "num_of_tokens_per_chunk_dispatch_api", "num_of_blocks_dispatch_api",
+    "forward_dispatch_api", "device_side_sync_dispatch_api",
+    "num_of_stages_g2s_combine_api", "num_of_stages_s2g_combine_api",
+    "num_of_tokens_per_chunk_combine_api", "num_of_tokens_per_group_combine_api",
+    "num_of_blocks_combine_api", "num_of_additional_in_flight_s2g_combine_api",
+    "backward_combine_api", "device_side_sync_combine_api",
+}
+HYBRID_REALIZED_BOOL_FIELDS = {
+    "forward_dispatch_api", "device_side_sync_dispatch_api", "backward_combine_api",
+    "device_side_sync_combine_api",
+}
+
+
+def _hybrid_realized_config_is_valid(value: Any) -> bool:
+    if not isinstance(value, dict) or set(value) != HYBRID_REALIZED_CONFIG_FIELDS:
+        return False
+    for field, field_value in value.items():
+        if field in HYBRID_REALIZED_BOOL_FIELDS:
+            if type(field_value) is not bool:
+                return False
+        elif field == "token_data_type":
+            if field_value not in {"UINT8", "UINT16"}:
+                return False
+        elif type(field_value) is not int or field_value < 0:
+            return False
+    return all(value[field] > 0 for field in (
+        "hidden_dim", "max_num_of_tokens_per_rank", "num_of_experts_per_rank",
+        "num_of_ranks_per_node", "num_of_nodes",
+    ))
+
+
+def hybrid_communication_domains(ep_size: int, scale_up_domain: int) -> tuple[int, int]:
+    """Return active ranks per fabric domain and the number of such domains."""
+    if type(ep_size) is not int or type(scale_up_domain) is not int:
+        raise ContractError("hybrid communication topology must be integral")
+    if ep_size <= 0 or scale_up_domain <= 0:
+        raise ContractError("hybrid communication topology must be positive")
+    domain_ranks = min(ep_size, scale_up_domain)
+    if ep_size % domain_ranks:
+        raise ContractError("hybrid EP size does not divide into communication domains")
+    return domain_ranks, ep_size // domain_ranks
+
+
+def _hybrid_kernel_keys_are_valid(value: Any) -> bool:
+    return (
+        isinstance(value, list)
+        and len(value) == 3
+        and len(set(value)) == 3
+        and value == sorted(value)
+        and all(
+            isinstance(key, str)
+            and re.fullmatch(r"[A-Za-z0-9][A-Za-z0-9_.+-]{0,511}", key)
+            for key in value
+        )
+    )
+
+
+def _hybrid_jit_evidence_is_valid(value: Any, kernel_keys: Any) -> bool:
+    if not _hybrid_kernel_keys_are_valid(kernel_keys) or not isinstance(value, list):
+        return False
+    if len(value) != len(kernel_keys):
+        return False
+    rank_sets = []
+    for expected_key, item in zip(kernel_keys, value):
+        if not isinstance(item, dict) or set(item) != {"kernel_key", "rank_artifacts"}:
+            return False
+        rank_artifacts = item["rank_artifacts"]
+        if item["kernel_key"] != expected_key or not isinstance(rank_artifacts, list):
+            return False
+        ranks = []
+        for artifact in rank_artifacts:
+            if not isinstance(artifact, dict) or set(artifact) != {"bytes", "rank", "sha256"}:
+                return False
+            rank, digest, size = artifact["rank"], artifact["sha256"], artifact["bytes"]
+            if (
+                type(rank) is not int
+                or rank < 0
+                or not isinstance(digest, str)
+                or not re.fullmatch(r"[0-9a-f]{64}", digest)
+                or type(size) is not int
+                or size <= 0
+            ):
+                return False
+            ranks.append(rank)
+        if not ranks or ranks != list(range(len(ranks))):
+            return False
+        rank_sets.append(ranks)
+    return all(ranks == rank_sets[0] for ranks in rank_sets)
+
+
+def backend_provenance_issues(backend: str, provenance: dict[str, Any]) -> list[str]:
+    unknown = [
+        field for field, value in provenance.items()
+        if isinstance(value, str) and value.strip().lower() == "unknown"
+    ]
+    unresolved = [
+        field for field in REQUIRED_BACKEND_PROVENANCE.get(backend, ())
+        if not _resolved_provenance_value(field, provenance.get(field))
+    ]
+    if backend == "deepep":
+        mode = provenance.get("mnnvl_comm")
+        allow = provenance.get("allow_mnnvl")
+        valid_modes = {
+            "not-requested": False,
+            "explicit-allow-mnnvl": True,
+        }
+        if type(allow) is not bool or valid_modes.get(mode) is not allow:
+            unresolved.append("mnnvl_comm")
+        if provenance.get("backend_lineage") != "deepep-v1":
+            unresolved.append("backend_lineage")
+    if backend in {"deepep", "uccl"}:
+        mode = provenance.get("mode")
+        num_nvl_bytes = provenance.get("num_nvl_bytes")
+        num_rdma_bytes = provenance.get("num_rdma_bytes")
+        if mode not in {"normal", "low-latency"}:
+            unresolved.append("mode")
+        if type(num_nvl_bytes) is not int or num_nvl_bytes < 0:
+            unresolved.append("num_nvl_bytes")
+        if type(num_rdma_bytes) is not int or num_rdma_bytes < 0:
+            unresolved.append("num_rdma_bytes")
+        if mode == "normal" and (type(num_nvl_bytes) is not int or num_nvl_bytes <= 0):
+            unresolved.append("num_nvl_bytes")
+        if mode == "low-latency":
+            if num_nvl_bytes != 0:
+                unresolved.append("num_nvl_bytes")
+            if type(num_rdma_bytes) is not int or num_rdma_bytes <= 0:
+                unresolved.append("num_rdma_bytes")
+            if (
+                type(provenance.get("num_max_tokens_per_rank")) is not int
+                or provenance["num_max_tokens_per_rank"] <= 0
+            ):
+                unresolved.append("num_max_tokens_per_rank")
+            if backend == "deepep" and (
+                type(provenance.get("num_qps_per_rank")) is not int
+                or provenance["num_qps_per_rank"] <= 0
+            ):
+                unresolved.append("num_qps_per_rank")
+    if backend == "deepep-v2":
+        for field in ("num_experts", "tuning_num_experts"):
+            if type(provenance.get(field)) is not int or provenance[field] <= 0:
+                unresolved.append(field)
+        if not _deepep_v2_jit_cubins_are_valid(provenance.get("jit_cubins")):
+            unresolved.append("jit_cubins")
+        if provenance.get("jit_random_seed") != "collectivex-deepep-v2-fa8a9b1":
+            unresolved.append("jit_random_seed")
+        unresolved.extend(
+            field for field, expected in DEEPEP_V2_V1_PROVENANCE.items()
+            if provenance.get(field) != expected
+        )
+        policy = (
+            provenance.get("allow_hybrid_mode"),
+            provenance.get("gin_enabled"),
+            provenance.get("communication_backend"),
+        )
+        if policy not in {
+            (False, False, "nccl-device-lsa"),
+            (True, True, "nccl-gin"),
+        }:
+            unresolved.extend(
+                ("allow_hybrid_mode", "gin_enabled", "communication_backend")
+            )
+    content_roles = {
+        "deepep-v2": {"deepep-extension", "nccl", "nvshmem"},
+        "deepep-hybrid": {"deepep-extension", "deepep-hybrid-extension"},
+        "uccl": {
+            "uccl-distribution", "uccl-wrapper", "intervaltree-distribution",
+            "sortedcontainers-distribution", "cuda-runtime",
+        },
+    }.get(backend)
+    if content_roles is not None and not _content_evidence_is_valid(
+        provenance.get("loaded_libraries"), content_roles
+    ):
+        unresolved.append("loaded_libraries")
+    if backend in {"deepep-v2", "deepep-hybrid"} and not re.fullmatch(
+        r"[0-9a-f]{40}", str(provenance.get("deepep_tree", ""))
+    ):
+        unresolved.append("deepep_tree")
+    if backend == "deepep-hybrid" and provenance.get("backend_lineage") != "deepep-hybrid":
+        unresolved.append("backend_lineage")
+    if backend == "deepep-hybrid":
+        if not _hybrid_realized_config_is_valid(provenance.get("realized_config")):
+            unresolved.append("realized_config")
+        if not _hybrid_kernel_keys_are_valid(provenance.get("jit_kernel_keys")):
+            unresolved.append("jit_kernel_keys")
+        if not _hybrid_jit_evidence_is_valid(
+            provenance.get("jit_shared_objects"), provenance.get("jit_kernel_keys")
+        ):
+            unresolved.append("jit_shared_objects")
+    if backend == "uccl" and provenance.get("backend_lineage") != "uccl":
+        unresolved.append("backend_lineage")
+    if backend == "uccl" and provenance.get("uccl_dependency_versions") != (
+        UCCL_DEPENDENCY_VERSIONS
+    ):
+        unresolved.append("uccl_dependency_versions")
+    if backend == "nccl-ep":
+        collective = provenance.get("collective_library")
+        if collective not in {"nccl", "rccl"}:
+            unresolved.append("collective_library")
+        if provenance.get("backend_lineage") != collective:
+            unresolved.append("backend_lineage")
+    if backend == "mori" and provenance.get("kernel_type") == "InterNodeV1":
+        expected = {
+            "block_num": 96,
+            "rdma_block_num": 64,
+            "dispatch_warps": 8,
+            "combine_warps": 8,
+            "num_qps": 1,
+            "use_external_inp_buf": True,
+            "gpus_per_node": 8,
+        }
+        unresolved.extend(
+            field for field, value in expected.items()
+            if provenance.get(field) != value
+        )
+    for field, minimum in (
+        ("num_nvl_bytes", 0), ("num_rdma_bytes", 0),
+        ("num_qps_per_rank", 1),
+    ):
+        if field in provenance and (
+            type(provenance[field]) is not int or provenance[field] < minimum
+        ):
+            unresolved.append(field)
+    if "rdma_block_num" in provenance and (
+        type(provenance["rdma_block_num"]) is not int
+        or provenance["rdma_block_num"] < 0
+    ):
+        unresolved.append("rdma_block_num")
+    if "use_external_inp_buf" in provenance and type(
+        provenance["use_external_inp_buf"]
+    ) is not bool:
+        unresolved.append("use_external_inp_buf")
+    return sorted(set(unknown + unresolved))
+
+
+def provenance_complete(
+    provenance: dict[str, Any], backend: str, git_run: dict[str, Any] | None,
+    *, allocation_stratum_sha256: Any, image_digest: Any, image_verified: Any,
+    squash_sha256: Any,
+) -> bool:
+    image = str(image_digest or "")
+    squash = str(squash_sha256 or "")
+    allocation_stratum = str(allocation_stratum_sha256 or "")
+    return (
+        not backend_provenance_issues(backend, provenance)
+        and bool(re.fullmatch(r"[0-9a-f]{64}", allocation_stratum))
+        and image_verified is True
+        and bool(re.fullmatch(r"sha256:[0-9a-f]{64}", image))
+        and bool(re.fullmatch(r"[0-9a-f]{64}", squash))
+        and isinstance(git_run, dict)
+        and all(git_run.get(field) for field in GIT_RUN_FIELDS)
+    )
+
+
+def strict_load(path: str | os.PathLike[str]) -> Any:
+    """Load JSON while rejecting duplicate keys and non-finite constants."""
+    def pairs(items):
+        result = {}
+        for key, value in items:
+            if key in result:
+                raise ContractError(f"duplicate JSON key {key!r}")
+            result[key] = value
+        return result
+
+    def constant(value):
+        raise ContractError(f"non-finite JSON number {value}")
+
+    try:
+        with open(path) as handle:
+            return json.load(handle, object_pairs_hook=pairs, parse_constant=constant)
+    except (OSError, json.JSONDecodeError) as exc:
+        raise ContractError(f"invalid JSON {path}: {exc}") from exc
+
+
+def canonical_json_bytes(value: Any) -> bytes:
+    """Canonical finite JSON bytes for checksums and immutable artifacts."""
+    _finite_tree(value)
+    try:
+        return json.dumps(
+            value, allow_nan=False, ensure_ascii=False, sort_keys=True,
+            separators=(",", ":"),
+        ).encode("utf-8")
+    except (TypeError, ValueError) as exc:
+        raise ContractError(f"value is not canonical JSON: {exc}") from exc
+
+
+def content_manifest_evidence(
+    *, role: str, name: str, files: Iterable[tuple[str, str | os.PathLike[str]]]
+) -> dict[str, str]:
+    """Hash a labeled file set without exposing any host path in provenance."""
+    if not re.fullmatch(r"[A-Za-z0-9][A-Za-z0-9_.+-]{0,127}", role):
+        raise ContractError("content evidence role is invalid")
+    if not re.fullmatch(r"[A-Za-z0-9][A-Za-z0-9_.+-]{0,159}", name):
+        raise ContractError("content evidence name is invalid")
+    manifest: list[dict[str, Any]] = []
+    labels: set[str] = set()
+    for label, raw_path in files:
+        logical = PurePosixPath(label)
+        if (
+            not label
+            or logical.is_absolute()
+            or ".." in logical.parts
+            or label in labels
+            or any(ord(character) < 0x20 or ord(character) > 0x7E for character in label)
+        ):
+            raise ContractError("content evidence label is invalid or duplicated")
+        path = Path(raw_path)
+        if not path.is_file():
+            raise ContractError("content evidence source is not a file")
+        digest = hashlib.sha256()
+        size = 0
+        with path.open("rb") as handle:
+            for chunk in iter(lambda: handle.read(1024 * 1024), b""):
+                digest.update(chunk)
+                size += len(chunk)
+        labels.add(label)
+        manifest.append({"bytes": size, "label": label, "sha256": digest.hexdigest()})
+    if not manifest:
+        raise ContractError("content evidence cannot be empty")
+    digest = hashlib.sha256(
+        canonical_json_bytes(sorted(manifest, key=lambda item: item["label"]))
+    ).hexdigest()
+    return {"name": name, "role": role, "sha256": digest}
+
+
+def _obj(value: Any, path: str) -> dict[str, Any]:
+    if not isinstance(value, dict):
+        raise ContractError(f"{path} must be an object")
+    return value
+
+
+def _keys(value: Any, expected: set[str], path: str) -> dict[str, Any]:
+    obj = _obj(value, path)
+    actual = set(obj)
+    if actual != expected:
+        raise ContractError(
+            f"{path} fields differ: missing={sorted(expected - actual)}, "
+            f"extra={sorted(actual - expected)}"
+        )
+    return obj
+
+
+def _text(value: Any, path: str, *, nullable: bool = False) -> str | None:
+    if nullable and value is None:
+        return None
+    if not isinstance(value, str) or not value:
+        raise ContractError(f"{path} must be a non-empty string")
+    return value
+
+
+def _integer(value: Any, path: str, *, minimum: int = 0) -> int:
+    if type(value) is not int or value < minimum:
+        raise ContractError(f"{path} must be an integer >= {minimum}")
+    return value
+
+
+def validate_conditioning_contract(value: Any, phase: str) -> dict[str, Any]:
+    """Validate the exact phase-specific v1 conditioning schedule."""
+    if phase not in V1_CONDITIONING_LADDERS:
+        raise ContractError("raw conditioning phase is invalid")
+    conditioning = _keys(
+        value, {"contract", "ladder", "roundtrips_per_shape"},
+        "raw.measurement.conditioning",
+    )
+    ladder = conditioning["ladder"]
+    if (
+        conditioning["contract"] != identity.V1_CASE_PROFILE["conditioning_contract"]
+        or type(ladder) is not list
+        or any(type(point) is not int for point in ladder)
+        or ladder != list(V1_CONDITIONING_LADDERS[phase])
+        or _integer(
+            conditioning["roundtrips_per_shape"],
+            "raw.measurement.conditioning.roundtrips_per_shape",
+            minimum=1,
+        ) != V1_CONDITIONING_ROUNDS_PER_SHAPE
+    ):
+        raise ContractError(f"raw {phase} conditioning contract differs")
+    return conditioning
+
+
+def _number(value: Any, path: str, *, minimum: float | None = None) -> float:
+    if isinstance(value, bool) or not isinstance(value, (int, float)) or not math.isfinite(value):
+        raise ContractError(f"{path} must be finite")
+    result = float(value)
+    if minimum is not None and result < minimum:
+        raise ContractError(f"{path} must be >= {minimum}")
+    return result
+
+
+def _finite_tree(value: Any, path: str = "$") -> None:
+    if isinstance(value, float) and not math.isfinite(value):
+        raise ContractError(f"{path} contains a non-finite number")
+    if isinstance(value, list):
+        for index, item in enumerate(value):
+            _finite_tree(item, f"{path}[{index}]")
+    elif isinstance(value, dict):
+        for key, item in value.items():
+            _finite_tree(item, f"{path}.{key}")
+
+
+def _typed(value: Any, kind: str, path: str) -> str:
+    if not identity.is_typed_id(value, kind):
+        raise ContractError(f"{path} is not a {kind} ID")
+    return value
+
+
+def _sha256_json(value: Any) -> str:
+    payload = json.dumps(
+        value, allow_nan=False, ensure_ascii=False, sort_keys=True, separators=(",", ":")
+    ).encode()
+    return hashlib.sha256(payload).hexdigest()
+
+
+def _precision_byte_provenance(
+    axis: dict[str, Any], logical_copies: int, hidden: int
+) -> dict[str, Any]:
+    bits_per_value = {
+        "bf16": 16,
+        "fp8-e4m3fn": 8,
+        "fp8-e4m3fnuz": 8,
+        "logfmt10": 10,
+    }.get(axis["communication_format"])
+    if bits_per_value is None:
+        raise ContractError("unknown communication precision format")
+    scale_size = {None: 0, "f32": 4, "implicit-logfmt10": 0}.get(axis["scale_dtype"])
+    if scale_size is None:
+        raise ContractError("unknown communication scale dtype")
+    group_size = axis["scale_group_size"]
+    groups = math.ceil(hidden / group_size) if group_size is not None else 0
+    activation = logical_copies * math.ceil(hidden * bits_per_value / 8)
+    scales = logical_copies * groups * scale_size
+    return {
+        "accounting_contract": "activation-data-plus-scales-v1",
+        "activation_data_bytes": activation,
+        "scale_bytes": scales,
+        "total_logical_bytes": activation + scales,
+    }
+
+
+@lru_cache(maxsize=None)
+def _expected_eplb_calibration(
+    routing: str,
+    hidden: int,
+    topk: int,
+    logical_experts: int,
+    physical_experts: int,
+    ep_size: int,
+    seed: int,
+    reference_tokens_per_rank: int,
+) -> tuple[dict[str, Any], dict[str, Any]]:
+    member, checksums, indices, _ = workload_contract.canonical_eplb_calibration_member(
+        routing,
+        hidden,
+        topk,
+        logical_experts,
+        ep_size,
+        reference_tokens_per_rank,
+        seed,
+    )
+    load = [0] * logical_experts
+    for row in indices:
+        for expert in row:
+            load[expert] += 1
+    plan = eplb_contract.build_plan(load, physical_experts, ep_size)
+    descriptor = {
+        "calibration_token_offset": workload_contract.EPLB_CALIBRATION_TOKEN_OFFSET,
+        "calibration_trace_sha256": checksums["trace"],
+        "calibration_window": workload_contract.EPLB_CALIBRATION_WINDOW,
+        "calibration_workload_id": member,
+    }
+    return plan, descriptor
+
+
+@lru_cache(maxsize=None)
+def _expected_eplb_plan(
+    routing: str,
+    topk: int,
+    logical_experts: int,
+    physical_experts: int,
+    ep_size: int,
+    seed: int,
+    reference_tokens_per_rank: int,
+    hidden: int = 7168,
+) -> dict[str, Any]:
+    """Compatibility wrapper returning the disjoint calibration plan."""
+    plan, _ = _expected_eplb_calibration(
+        routing,
+        hidden,
+        topk,
+        logical_experts,
+        physical_experts,
+        ep_size,
+        seed,
+        reference_tokens_per_rank,
+    )
+    return plan
+
+
+@lru_cache(maxsize=None)
+def _expected_canonical_trace(
+    routing: str,
+    hidden: int,
+    topk: int,
+    logical_experts: int,
+    physical_experts: int,
+    ep_size: int,
+    tokens_per_rank: int,
+    seed: int,
+    eplb_enabled: bool,
+    reference_tokens_per_rank: int,
+) -> tuple[str, dict[str, str], str, list[list[int]], list[list[float]]]:
+    member, checksums, indices, weights = workload_contract.canonical_member(
+        routing,
+        hidden,
+        topk,
+        logical_experts,
+        ep_size,
+        tokens_per_rank,
+        seed,
+    )
+    if eplb_enabled:
+        plan = _expected_eplb_plan(
+            routing,
+            topk,
+            logical_experts,
+            physical_experts,
+            ep_size,
+            seed,
+            reference_tokens_per_rank,
+            hidden,
+        )
+        indices = eplb_contract.remap_rows(indices, plan)
+    routing_hash = workload_contract.trace_checksums(indices, weights)["trace"]
+    return member, checksums, routing_hash, indices, weights
+
+
+def _coefficient_of_variation(values: list[int]) -> float:
+    mean = sum(values) / len(values)
+    if mean == 0:
+        return 0.0
+    variance = sum((value - mean) ** 2 for value in values) / len(values)
+    return variance**0.5 / mean
+
+
+def _expected_routing_summary(
+    indices: list[list[int]],
+    weights: list[list[float]],
+    *,
+    physical_experts: int,
+    ep_size: int,
+    tokens_per_rank: int,
+    gpus_per_node: int,
+    scale_up_domain: int,
+) -> dict[str, Any]:
+    """Recompute every published routing/load statistic without torch."""
+    experts_per_rank = physical_experts // ep_size
+    expert_load = [0] * physical_experts
+    assignment_load = [0] * ep_size
+    payload_load = [0] * ep_size
+    fanouts: list[int] = []
+    local = same_node = same_domain = copies = 0
+    for token, row in enumerate(indices):
+        destinations = {expert // experts_per_rank for expert in row}
+        source = token // tokens_per_rank
+        fanouts.append(len(destinations))
+        for expert in row:
+            expert_load[expert] += 1
+            assignment_load[expert // experts_per_rank] += 1
+        for destination in destinations:
+            payload_load[destination] += 1
+            copies += 1
+            local += destination == source
+            same_node += destination // gpus_per_node == source // gpus_per_node
+            same_domain += destination // scale_up_domain == source // scale_up_domain
+    fanout_histogram = [fanouts.count(value) for value in range(1, ep_size + 1)]
+    expert_mean = sum(expert_load) / len(expert_load)
+    return {
+        "empty_expert_count": expert_load.count(0),
+        "empty_rank_count": payload_load.count(0),
+        "expert_assignment_rank_cv": _coefficient_of_variation(assignment_load),
+        "expert_assignments_per_rank": assignment_load,
+        "expert_load_cv": _coefficient_of_variation(expert_load),
+        "expert_load_max": max(expert_load),
+        "expert_load_mean": expert_mean,
+        "expert_load_min": min(expert_load),
+        "fanout_histogram": fanout_histogram,
+        "fanout_max": max(fanouts),
+        "fanout_mean": sum(fanouts) / len(fanouts),
+        "fanout_min": min(fanouts),
+        "hash": workload_contract.trace_checksums(indices, weights)["trace"],
+        "hotspot_ratio": max(expert_load) / expert_mean if expert_mean else 0.0,
+        "locality": {
+            "placement": "packed",
+            "local_rank_fraction": local / copies,
+            "same_node_fraction": same_node / copies,
+            "same_scaleup_domain_fraction": same_domain / copies,
+            "cross_node_fraction": 1 - same_node / copies,
+            "cross_domain_fraction": 1 - same_domain / copies,
+            "gpus_per_node": gpus_per_node,
+            "scale_up_domain": scale_up_domain,
+            "copies": copies,
+        },
+        "payload_copies_per_rank": payload_load,
+        "payload_rank_cv": _coefficient_of_variation(payload_load),
+        "routed_copies": copies,
+        "source_token_stats": {
+            "min": tokens_per_rank,
+            "mean": float(tokens_per_rank),
+            "max": tokens_per_rank,
+            "cv": 0.0,
+            "empty_ranks": 0,
+            "total": tokens_per_rank * ep_size,
+            "ranks": ep_size,
+        },
+    }
+
+
+def _expected_histogram(samples: list[float], bins: int = 40) -> dict[str, Any]:
+    low, high = min(samples), max(samples)
+    if high <= low:
+        return {"n": len(samples), "min": low, "max": high, "bins": bins, "counts": [len(samples)]}
+    counts = [0] * bins
+    span = high - low
+    for sample in samples:
+        index = min(bins - 1, int((sample - low) / span * bins))
+        counts[index] += 1
+    return {
+        "n": len(samples),
+        "min": round(low, 3),
+        "max": round(high, 3),
+        "bins": bins,
+        "counts": counts,
+    }
+
+
+def _expected_anomalies(
+    tokens: int, components: dict[str, Any]
+) -> list[dict[str, Any]]:
+    dispatch = components["dispatch"]["percentiles_us"]
+    stage = components["stage"]["percentiles_us"]
+    combine = components["combine"]["percentiles_us"]
+    roundtrip = components["roundtrip"]["percentiles_us"]
+    isolated = components["isolated_sum"]["percentiles_us"]
+    anomalies: list[dict[str, Any]] = []
+    if isolated is not None and roundtrip["p99"] > 3.0 * isolated["p99"]:
+        anomalies.append({
+            "type": "roundtrip_gt_isolated_sum",
+            "T": tokens,
+            "roundtrip_p99": round(roundtrip["p99"], 2),
+            "isolated_sum_p99": round(isolated["p99"], 2),
+            "ratio": round(roundtrip["p99"] / isolated["p99"], 2),
+            "threshold": 3.0,
+        })
+    floor = (
+        max(dispatch["p50"], combine["p50"], stage["p50"] if stage is not None else 0.0)
+        if dispatch and combine else None
+    )
+    if floor and roundtrip["p50"] < 0.95 * floor:
+        anomalies.append({
+            "type": "roundtrip_lt_component_floor",
+            "T": tokens,
+            "roundtrip_p50": round(roundtrip["p50"], 2),
+            "component_floor_p50": round(floor, 2),
+        })
+    return anomalies
+
+
+def _validate_canonical_workload(
+    workload: dict[str, Any],
+    scheduled_case: dict[str, Any],
+    rows: list[dict[str, Any]],
+    eplb: dict[str, Any],
+) -> None:
+    """Bind every canonical member and measured routing hash to its scheduled token row."""
+    profile = identity.profile_for_case(scheduled_case)
+    if eplb["enabled"]:
+        plan = _expected_eplb_plan(
+            scheduled_case["routing"],
+            scheduled_case["topk"],
+            scheduled_case["experts"],
+            eplb["num_physical_experts"],
+            scheduled_case["ep"],
+            profile["seed"],
+            profile["eplb_reference_tokens_per_rank"],
+            scheduled_case["hidden"],
+        )
+        if eplb["mapping_hash"] != eplb_contract.mapping_hash(plan):
+            raise ContractError("raw EPLB mapping differs from the frozen canonical plan")
+
+    expected: dict[str, dict[str, str]] = {}
+    for index, row in enumerate(rows):
+        member, checksums, routing_hash, _, _ = _expected_canonical_trace(
+            scheduled_case["routing"],
+            scheduled_case["hidden"],
+            scheduled_case["topk"],
+            scheduled_case["experts"],
+            eplb["num_physical_experts"],
+            scheduled_case["ep"],
+            row["tokens_per_rank"],
+            profile["seed"],
+            eplb["enabled"],
+            profile["eplb_reference_tokens_per_rank"],
+        )
+        if row["routing"]["hash"] != routing_hash:
+            raise ContractError(
+                f"raw.measurement.rows[{index}].routing.hash differs from its canonical member"
+            )
+        expected[member] = checksums
+    if (
+        len(expected) != len(rows)
+        or workload["members"] != sorted(expected)
+        or workload["manifest_checksums"] != expected
+    ):
+        raise ContractError("raw canonical member set/checksums differ from scheduled rows")
+    expected_workload_id = identity.workload_id({
+        "members": [
+            {"checksums": expected[member], "workload_id": member}
+            for member in sorted(expected)
+        ]
+    })
+    if workload["workload_id"] != expected_workload_id:
+        raise ContractError("raw composite workload identity differs from scheduled rows")
+
+
+def _nearest_rank(samples: list[float], q: int) -> float:
+    ordered = sorted(samples)
+    return ordered[max(0, min(len(ordered) - 1, math.ceil(q / 100 * len(ordered)) - 1))]
+
+
+def _close(observed: Any, expected: float, path: str, tolerance: float = 1e-6) -> None:
+    value = _number(observed, path)
+    if not math.isclose(value, expected, rel_tol=tolerance, abs_tol=tolerance):
+        raise ContractError(f"{path}={value} differs from recomputed {expected}")
+
+
+def _equivalent(
+    observed: Any, expected: Any, path: str, *, tolerance: float = 1e-6
+) -> None:
+    """Compare a recomputed JSON subtree while allowing only float roundoff."""
+    if isinstance(expected, dict):
+        value = _keys(observed, set(expected), path)
+        for key, child in expected.items():
+            _equivalent(value[key], child, f"{path}.{key}", tolerance=tolerance)
+        return
+    if isinstance(expected, list):
+        if not isinstance(observed, list) or len(observed) != len(expected):
+            raise ContractError(f"{path} differs from recomputed evidence")
+        for index, child in enumerate(expected):
+            _equivalent(observed[index], child, f"{path}[{index}]", tolerance=tolerance)
+        return
+    if isinstance(expected, float):
+        _close(observed, expected, path, tolerance)
+        return
+    if type(observed) is not type(expected) or observed != expected:
+        raise ContractError(f"{path} differs from recomputed evidence")
+
+
+def _schema_equal(left: Any, right: Any) -> bool:
+    """JSON Schema equality: booleans are distinct from numbers."""
+    if isinstance(left, bool) or isinstance(right, bool):
+        return type(left) is type(right) and left == right
+    if isinstance(left, dict) and isinstance(right, dict):
+        return set(left) == set(right) and all(
+            _schema_equal(left[key], right[key]) for key in left
+        )
+    if isinstance(left, list) and isinstance(right, list):
+        return len(left) == len(right) and all(
+            _schema_equal(a, b) for a, b in zip(left, right, strict=True)
+        )
+    return left == right
+
+
+def _schema_ref(root: dict[str, Any], reference: str) -> dict[str, Any]:
+    if not reference.startswith("#/"):
+        raise ContractError("native artifact schema contains a non-local reference")
+    value: Any = root
+    for part in reference[2:].split("/"):
+        part = part.replace("~1", "/").replace("~0", "~")
+        if not isinstance(value, dict) or part not in value:
+            raise ContractError("native artifact schema contains a broken reference")
+        value = value[part]
+    if not isinstance(value, dict):
+        raise ContractError("native artifact schema reference is not an object")
+    return value
+
+
+def _schema_type_matches(value: Any, expected: str) -> bool:
+    if expected == "null":
+        return value is None
+    if expected == "boolean":
+        return type(value) is bool
+    if expected == "object":
+        return isinstance(value, dict)
+    if expected == "array":
+        return isinstance(value, list)
+    if expected == "string":
+        return isinstance(value, str)
+    if expected == "number":
+        return (
+            not isinstance(value, bool)
+            and isinstance(value, (int, float))
+            and math.isfinite(value)
+        )
+    if expected == "integer":
+        return (
+            not isinstance(value, bool)
+            and isinstance(value, (int, float))
+            and math.isfinite(value)
+            and float(value).is_integer()
+        )
+    raise ContractError(f"native artifact schema uses unsupported type {expected!r}")
+
+
+def _validate_schema_value(
+    value: Any, schema: dict[str, Any], root: dict[str, Any], path: str
+) -> None:
+    """Validate the bounded JSON Schema subset used by native artifact contracts."""
+    if "$ref" in schema:
+        _validate_schema_value(value, _schema_ref(root, schema["$ref"]), root, path)
+        return
+    if "oneOf" in schema:
+        matches = 0
+        for candidate in schema["oneOf"]:
+            try:
+                _validate_schema_value(value, candidate, root, path)
+            except ContractError:
+                continue
+            matches += 1
+        if matches != 1:
+            raise ContractError(f"{path} must match exactly one native schema alternative")
+        return
+    expected_type = schema.get("type")
+    if expected_type is not None and not _schema_type_matches(value, expected_type):
+        raise ContractError(f"{path} is not a schema {expected_type}")
+    if "const" in schema and not _schema_equal(value, schema["const"]):
+        raise ContractError(f"{path} differs from its schema constant")
+    if "enum" in schema and not any(_schema_equal(value, item) for item in schema["enum"]):
+        raise ContractError(f"{path} is outside its schema enum")
+
+    if isinstance(value, dict):
+        required = set(schema.get("required", ()))
+        properties = schema.get("properties", {})
+        missing = required - set(value)
+        if missing:
+            raise ContractError(f"{path} lacks schema fields {sorted(missing)}")
+        additional = schema.get("additionalProperties", True)
+        extra = set(value) - set(properties)
+        if additional is False and extra:
+            raise ContractError(f"{path} has extra schema fields {sorted(extra)}")
+        for key, item in value.items():
+            if key in properties:
+                _validate_schema_value(item, properties[key], root, f"{path}.{key}")
+            elif isinstance(additional, dict):
+                _validate_schema_value(item, additional, root, f"{path}.{key}")
+        property_names = schema.get("propertyNames")
+        if property_names is not None:
+            for key in value:
+                _validate_schema_value(key, property_names, root, f"{path}.<key>")
+
+    if isinstance(value, list):
+        if len(value) < schema.get("minItems", 0):
+            raise ContractError(f"{path} has too few schema items")
+        maximum = schema.get("maxItems")
+        if maximum is not None and len(value) > maximum:
+            raise ContractError(f"{path} has too many schema items")
+        if schema.get("uniqueItems") and any(
+            _schema_equal(item, prior)
+            for index, item in enumerate(value)
+            for prior in value[:index]
+        ):
+            raise ContractError(f"{path} schema items are not unique")
+        if "items" in schema:
+            for index, item in enumerate(value):
+                _validate_schema_value(item, schema["items"], root, f"{path}[{index}]")
+
+    if isinstance(value, str):
+        if len(value) < schema.get("minLength", 0):
+            raise ContractError(f"{path} is shorter than its schema minimum")
+        maximum = schema.get("maxLength")
+        if maximum is not None and len(value) > maximum:
+            raise ContractError(f"{path} is longer than its schema maximum")
+        if "pattern" in schema and re.search(schema["pattern"], value) is None:
+            raise ContractError(f"{path} does not match its schema pattern")
+        if schema.get("format") == "date-time":
+            try:
+                parsed = dt.datetime.fromisoformat(value.replace("Z", "+00:00"))
+            except ValueError as exc:
+                raise ContractError(f"{path} is not a schema date-time") from exc
+            if parsed.tzinfo is None:
+                raise ContractError(f"{path} schema date-time lacks a timezone")
+
+    if (
+        not isinstance(value, bool)
+        and isinstance(value, (int, float))
+        and math.isfinite(value)
+    ):
+        if "minimum" in schema and value < schema["minimum"]:
+            raise ContractError(f"{path} is below its schema minimum")
+        if "maximum" in schema and value > schema["maximum"]:
+            raise ContractError(f"{path} is above its schema maximum")
+
+
+def _validate_native_schema(name: str, value: Any) -> None:
+    schema = _SCHEMA_CACHE.get(name)
+    if schema is None:
+        loaded = strict_load(SCHEMA_DIR / name)
+        if not isinstance(loaded, dict):
+            raise ContractError(f"native artifact schema {name} is not an object")
+        schema = loaded
+        _SCHEMA_CACHE[name] = schema
+    _validate_schema_value(value, schema, schema, "$")
+
+
+def validate_samples_document(document: Any) -> dict[str, Any]:
+    _validate_native_schema("samples-v1.schema.json", document)
+    doc = _keys(
+        document,
+        {"allocation_id", "attempt_id", "case_id", "format", "points",
+         "qualification_index", "sampling", "schema_version", "series_id"},
+        "samples",
+    )
+    if doc["format"] != SAMPLES_FORMAT or doc["schema_version"] != 1:
+        raise ContractError("samples format/schema differs from v1")
+    for field, kind in (
+        ("allocation_id", "allocation"), ("attempt_id", "attempt"),
+        ("case_id", "case"), ("series_id", "series"),
+    ):
+        _typed(doc[field], kind, f"samples.{field}")
+    qualification_index = _integer(
+        doc["qualification_index"], "samples.qualification_index", minimum=1
+    )
+    if qualification_index > 3:
+        raise ContractError("samples.qualification_index must be in 1..3")
+    sampling = _keys(
+        doc["sampling"], {"iterations_per_trial", "reduction", "trials"}, "samples.sampling"
+    )
+    if (
+        _integer(sampling["iterations_per_trial"], "samples.sampling.iterations_per_trial", minimum=1) != 8
+        or _integer(sampling["trials"], "samples.sampling.trials", minimum=1) != 64
+        or sampling["reduction"] != identity.V1_CASE_PROFILE["rank_reduction"]
+    ):
+        raise ContractError("samples must use the fixed 8x64 cross-rank-max contract")
+    points = doc["points"]
+    if not isinstance(points, list) or not points:
+        raise ContractError("samples.points must be non-empty")
+    seen = set()
+    for index, point_value in enumerate(points):
+        path = f"samples.points[{index}]"
+        point = _keys(
+            point_value,
+            {"components", "evidence_id", "point_id", "sample_sha256", "tokens_per_rank"},
+            path,
+        )
+        tokens = _integer(point["tokens_per_rank"], f"{path}.tokens_per_rank", minimum=1)
+        if tokens in seen:
+            raise ContractError(f"duplicate sample token point {tokens}")
+        seen.add(tokens)
+        _typed(point["point_id"], "point", f"{path}.point_id")
+        _typed(point["evidence_id"], "evidence", f"{path}.evidence_id")
+        components = _keys(
+            point["components"], {"combine", "dispatch", "roundtrip", "stage"},
+            f"{path}.components",
+        )
+        for name, component_value in components.items():
+            component = _keys(
+                component_value, {"availability", "sample_count", "trials"},
+                f"{path}.components.{name}",
+            )
+            availability = component["availability"]
+            count = _integer(component["sample_count"], f"{path}.components.{name}.sample_count")
+            trials = component["trials"]
+            if availability == "unavailable":
+                if count != 0 or trials is not None or name == "roundtrip":
+                    raise ContractError(f"{path}.components.{name} has invalid unavailability")
+                continue
+            if availability != "measured" or not isinstance(trials, list) or len(trials) != 64:
+                raise ContractError(f"{path}.components.{name} must contain 64 measured trials")
+            if any(not isinstance(trial, list) or len(trial) != 8 for trial in trials):
+                raise ContractError(f"{path}.components.{name} trials must each contain 8 samples")
+            flattened = [
+                _number(sample, f"{path}.components.{name}.trials", minimum=0.0)
+                for trial in trials for sample in trial
+            ]
+            if count != 512 or len(flattened) != 512:
+                raise ContractError(f"{path}.components.{name} must contain 512 samples")
+        sample_base = {"components": components, "tokens_per_rank": tokens}
+        if point["sample_sha256"] != _sha256_json(sample_base):
+            raise ContractError(f"{path}.sample_sha256 differs")
+    return doc
+
+
+def _validate_component(
+    component_value: Any,
+    sample_component: dict[str, Any] | None,
+    path: str,
+    *,
+    derived: bool = False,
+) -> None:
+    component = _keys(
+        component_value, {"availability", "origin", "percentiles_us", "sample_count"}, path
+    )
+    availability = component["availability"]
+    if availability == "unavailable":
+        if component != {
+            "availability": "unavailable", "origin": None,
+            "percentiles_us": None, "sample_count": 0,
+        }:
+            raise ContractError(f"{path} has invalid unavailable representation")
+        if sample_component and sample_component["availability"] != "unavailable":
+            raise ContractError(f"{path} disagrees with samples")
+        return
+    expected_availability = "derived" if derived else "measured"
+    expected_origin = "derived-percentile-sum" if derived else "measured"
+    if availability != expected_availability or component["origin"] != expected_origin:
+        raise ContractError(f"{path} has invalid availability/origin")
+    percentiles = _keys(component["percentiles_us"], set(PERCENTILES), f"{path}.percentiles_us")
+    if derived:
+        if component["sample_count"] != 0:
+            raise ContractError(f"{path}.sample_count must be zero for a derived value")
+        return
+    if sample_component is None or sample_component["availability"] != "measured":
+        raise ContractError(f"{path} lacks measured sample evidence")
+    flattened = [sample for trial in sample_component["trials"] for sample in trial]
+    if component["sample_count"] != len(flattened):
+        raise ContractError(f"{path}.sample_count differs from exact samples")
+    for name, percentile in zip(PERCENTILES, (50, 90, 95, 99), strict=True):
+        _close(percentiles[name], _nearest_rank(flattened, percentile), f"{path}.{name}")
+
+
+def _validate_oracle(
+    value: Any, path: str, profile: dict[str, Any] | None = None
+) -> dict[str, Any]:
+    profile = profile or identity.V1_NORMAL_CASE_PROFILE
+    oracle = _keys(
+        value,
+        {"atol", "checks", "combine_weight_semantics", "contract", "dispatch_sha256",
+         "max_absolute_error", "max_elementwise_relative_error", "max_relative_error",
+         "max_weight_error", "order_sha256", "ordering_contract", "passed", "receive_count",
+         "rtol"},
+        path,
+    )
+    if oracle["contract"] != profile["oracle_contract"]:
+        raise ContractError(f"{path}.contract differs")
+    checks = _keys(
+        oracle["checks"],
+        {"combine_values", "counts", "metadata", "multiplicity", "payload", "source_set",
+         "weights"},
+        f"{path}.checks",
+    )
+    if any(type(value) is not bool for value in checks.values()):
+        raise ContractError(f"{path}.checks must be boolean")
+    if type(oracle["passed"]) is not bool:
+        raise ContractError(f"{path}.passed must be boolean")
+    _integer(oracle["receive_count"], f"{path}.receive_count")
+    _text(oracle["ordering_contract"], f"{path}.ordering_contract")
+    expected_weight_semantics = (
+        "gate-weighted-sum"
+        if profile["combine_semantics"] == "gate-weighted"
+        else "unweighted-rank-sum"
+    )
+    if oracle["combine_weight_semantics"] != expected_weight_semantics:
+        raise ContractError(f"{path}.combine_weight_semantics differs from v1")
+    _close(oracle["rtol"], 5e-2, f"{path}.rtol")
+    _close(oracle["atol"], 2e-2, f"{path}.atol")
+    for field in ("dispatch_sha256", "order_sha256"):
+        digest = oracle[field]
+        if digest is not None and (
+            not isinstance(digest, str) or len(digest) != 64
+            or any(character not in "0123456789abcdef" for character in digest)
+        ):
+            raise ContractError(f"{path}.{field} is not a SHA-256 digest")
+    for field in (
+        "max_absolute_error", "max_elementwise_relative_error", "max_relative_error",
+        "max_weight_error",
+    ):
+        if oracle[field] is not None:
+            _number(oracle[field], f"{path}.{field}", minimum=0.0)
+    expected_pass = (
+        all(checks.values())
+        and oracle["max_relative_error"] is not None
+        and oracle["max_relative_error"] < 5e-2
+    )
+    if oracle["passed"] != expected_pass:
+        raise ContractError(f"{path}.passed differs from its evidence")
+    return oracle
+
+
+def _validate_precision_evidence(
+    value: Any, profile_id: str, communication_precision: dict[str, Any], path: str
+) -> dict[str, Any]:
+    precision = _keys(value, {"combine", "dispatch", "passed", "profile_id"}, path)
+    if precision["profile_id"] != profile_id or type(precision["passed"]) is not bool:
+        raise ContractError(f"{path} profile/outcome differs")
+    for direction in ("dispatch", "combine"):
+        axis_path = f"{path}.{direction}"
+        axis = _keys(
+            precision[direction],
+            {"dequantized_semantics", "encoded_payload_valid", "max_abs_error",
+             "max_rel_error", "passed", "saturation_count", "saturation_rate",
+             "scales_finite", "scales_positive"},
+            axis_path,
+        )
+        for field in ("dequantized_semantics", "encoded_payload_valid", "passed"):
+            if type(axis[field]) is not bool:
+                raise ContractError(f"{axis_path}.{field} must be boolean")
+        expects_scales = communication_precision[direction]["scale_dtype"] is not None
+        for field in ("scales_finite", "scales_positive"):
+            if expects_scales:
+                if type(axis[field]) is not bool:
+                    raise ContractError(f"{axis_path}.{field} must be boolean")
+            elif axis[field] is not None:
+                raise ContractError(f"{axis_path}.{field} must be null without scales")
+        saturation_count = _integer(
+            axis["saturation_count"], f"{axis_path}.saturation_count"
+        )
+        saturation_rate = _number(
+            axis["saturation_rate"], f"{axis_path}.saturation_rate", minimum=0.0
+        )
+        if saturation_rate > 1.0:
+            raise ContractError(f"{axis_path}.saturation_rate must be <= 1")
+        _number(axis["max_abs_error"], f"{axis_path}.max_abs_error", minimum=0.0)
+        _number(axis["max_rel_error"], f"{axis_path}.max_rel_error", minimum=0.0)
+        expected_pass = (
+            axis["encoded_payload_valid"]
+            and axis["dequantized_semantics"]
+            and (not expects_scales or (axis["scales_finite"] and axis["scales_positive"]))
+            and saturation_count >= 0
+        )
+        if axis["passed"] != bool(expected_pass):
+            raise ContractError(f"{axis_path}.passed differs from its evidence")
+    expected_pass = precision["dispatch"]["passed"] and precision["combine"]["passed"]
+    if precision["passed"] != expected_pass:
+        raise ContractError(f"{path}.passed differs from direction evidence")
+    return precision
+
+
+def validate_raw_document(document: Any, samples_document: Any) -> dict[str, Any]:
+    """Validate identities, exact samples, formulas, privacy, and the native raw shape."""
+    _validate_native_schema("raw-case-v1.schema.json", document)
+    doc = _keys(
+        document,
+        {"case", "format", "generated_at", "identity", "implementation", "measurement",
+         "outcome", "provenance", "record_type", "runtime_fingerprint", "sample_artifact",
+         "schema_version", "topology", "workload"},
+        "raw",
+    )
+    _finite_tree(doc)
+    if doc["format"] != RAW_FORMAT or doc["schema_version"] != 1 or doc["record_type"] != "case-attempt":
+        raise ContractError("raw format/schema/record type differs from v1")
+    _text(doc["generated_at"], "raw.generated_at")
+    identifiers = _keys(
+        doc["identity"],
+        {"allocation_factors", "allocation_id", "attempt_id", "attempt_ordinal", "case_factors",
+         "case_id", "series_factors", "series_id"},
+        "raw.identity",
+    )
+    for field, kind in (
+        ("allocation_id", "allocation"), ("attempt_id", "attempt"),
+        ("case_id", "case"), ("series_id", "series"),
+    ):
+        _typed(identifiers[field], kind, f"raw.identity.{field}")
+    ordinal = _integer(identifiers["attempt_ordinal"], "raw.identity.attempt_ordinal", minimum=1)
+    allocation_factors = _keys(
+        identifiers["allocation_factors"], ALLOCATION_FACTOR_FIELDS,
+        "raw.identity.allocation_factors",
+    )
+    qualification_index = _integer(
+        allocation_factors["qualification_index"],
+        "raw.identity.allocation_factors.qualification_index",
+        minimum=1,
+    )
+    if qualification_index > 3:
+        raise ContractError("raw qualification index must be in 1..3")
+    case_factors = _keys(
+        identifiers["case_factors"], {"case", "profile", "sku"},
+        "raw.identity.case_factors",
+    )
+    scheduled_case = _scheduled_case(
+        case_factors["case"], "raw.identity.case_factors.case"
+    )
+    profile = scheduled_case_profile(scheduled_case, "raw.identity.case_factors.case")
+    if case_factors["profile"] != profile:
+        raise ContractError("raw case profile differs from CollectiveX v1")
+    _text(case_factors["sku"], "raw.identity.case_factors.sku")
+    series_factors = _keys(
+        identifiers["series_factors"],
+        {"backend", "case_id", "image_digest", "implementation_contract_sha256",
+         "public_config_sha256", "routing_control_sha256",
+         "runtime_fingerprint_sha256", "source_sha", "squash_sha256", "workload_id"},
+        "raw.identity.series_factors",
+    )
+    if identity.allocation_id(identifiers["allocation_factors"]) != identifiers["allocation_id"]:
+        raise ContractError("allocation identity differs")
+    if identity.digest("case", identifiers["case_factors"]) != identifiers["case_id"]:
+        raise ContractError("case identity differs")
+    if identity.series_id(identifiers["series_factors"]) != identifiers["series_id"]:
+        raise ContractError("series identity differs")
+    if identity.attempt_id(
+        allocation=identifiers["allocation_id"], case=identifiers["case_id"], ordinal=ordinal
+    ) != identifiers["attempt_id"]:
+        raise ContractError("attempt identity differs")
+
+    samples = validate_samples_document(samples_document)
+    for field in ("allocation_id", "attempt_id", "case_id", "series_id"):
+        if samples[field] != identifiers[field]:
+            raise ContractError(f"samples.{field} differs from raw identity")
+    if samples["qualification_index"] != qualification_index:
+        raise ContractError("samples qualification index differs from raw allocation")
+    sample_by_token = {point["tokens_per_rank"]: point for point in samples["points"]}
+
+    case = _keys(
+        doc["case"],
+        {"attempt_ordinal", "backend", "eplb", "ep_size", "mode", "phase",
+         "required_publication", "resource_mode", "runner", "shape", "suite", "workload_name"},
+        "raw.case",
+    )
+    ep_size = _integer(case["ep_size"], "raw.case.ep_size", minimum=1)
+    if case["attempt_ordinal"] != ordinal:
+        raise ContractError("case attempt ordinal differs")
+    for field in ("backend", "mode", "phase", "required_publication", "resource_mode", "runner",
+                  "suite", "workload_name"):
+        _text(case[field], f"raw.case.{field}")
+    shape = _keys(
+        case["shape"],
+        {"activation_profile", "combine_precision", "dispatch_precision", "eplb", "experts",
+         "experts_per_rank", "hidden", "kernel_gen", "num_logical_experts",
+         "precision_profile", "routing", "topk"},
+        "raw.case.shape",
+    )
+    hidden = _integer(shape["hidden"], "raw.case.shape.hidden", minimum=1)
+    topk = _integer(shape["topk"], "raw.case.shape.topk", minimum=1)
+    physical_experts = _integer(
+        shape["experts"], "raw.case.shape.experts", minimum=1
+    )
+    logical_experts = _integer(
+        shape["num_logical_experts"],
+        "raw.case.shape.num_logical_experts",
+        minimum=1,
+    )
+    experts_per_rank = _integer(
+        shape["experts_per_rank"], "raw.case.shape.experts_per_rank", minimum=1
+    )
+    precision_profile_id = scheduled_case.get(
+        "precision_profile", identity.V1_CONTROL_PRECISION_PROFILE
+    )
+    communication_precision = identity.precision_profile(precision_profile_id)
+    if (
+        shape["precision_profile"] != precision_profile_id
+        or shape["dispatch_precision"] != communication_precision["dispatch"]
+        or shape["combine_precision"] != communication_precision["combine"]
+    ):
+        raise ContractError("raw communication precision differs from scheduled case")
+    eplb = _keys(
+        case["eplb"],
+        {"calibration_token_offset", "calibration_trace_sha256", "calibration_window",
+         "calibration_workload_id", "enabled", "imbalance_after", "imbalance_before",
+         "mapping_hash", "max_replicas", "num_logical_experts", "num_physical_experts",
+         "num_redundant", "planner", "reference_tokens_per_rank", "replicated_experts"},
+        "raw.case.eplb",
+    )
+    if not isinstance(eplb["enabled"], bool):
+        raise ContractError("raw.case.eplb.enabled must be boolean")
+    expected_redundant = (
+        profile["eplb_redundant_experts"] if eplb["enabled"] else 0
+    )
+    expected_physical = eplb_contract.physical_count(
+        scheduled_case["experts"], expected_redundant, ep_size
+    )
+    if (
+        shape["eplb"] != eplb["enabled"]
+        or logical_experts != scheduled_case["experts"]
+        or physical_experts != expected_physical
+        or experts_per_rank * ep_size != physical_experts
+        or eplb["num_logical_experts"] != logical_experts
+        or eplb["num_physical_experts"] != physical_experts
+        or eplb["num_redundant"] != expected_redundant
+    ):
+        raise ContractError("raw EPLB/shape dimensions differ from the frozen profile")
+    if eplb["enabled"]:
+        expected_plan, calibration_descriptor = _expected_eplb_calibration(
+            scheduled_case["routing"],
+            hidden,
+            topk,
+            logical_experts,
+            physical_experts,
+            ep_size,
+            profile["seed"],
+            profile["eplb_reference_tokens_per_rank"],
+        )
+        expected_eplb = {
+            **calibration_descriptor,
+            "enabled": True,
+            "imbalance_after": expected_plan["imbalance_after"],
+            "imbalance_before": expected_plan["imbalance_before"],
+            "mapping_hash": eplb_contract.mapping_hash(expected_plan),
+            "max_replicas": expected_plan["max_replicas"],
+            "num_logical_experts": logical_experts,
+            "num_physical_experts": physical_experts,
+            "num_redundant": expected_redundant,
+            "planner": profile["eplb_planner"],
+            "reference_tokens_per_rank": profile[
+                "eplb_reference_tokens_per_rank"
+            ],
+            "replicated_experts": expected_plan["replicated_experts"],
+        }
+    else:
+        expected_eplb = {
+            "calibration_token_offset": None,
+            "calibration_trace_sha256": None,
+            "calibration_window": None,
+            "calibration_workload_id": None,
+            "enabled": False,
+            "imbalance_after": None,
+            "imbalance_before": None,
+            "mapping_hash": None,
+            "max_replicas": None,
+            "num_logical_experts": logical_experts,
+            "num_physical_experts": physical_experts,
+            "num_redundant": 0,
+            "planner": None,
+            "reference_tokens_per_rank": None,
+            "replicated_experts": 0,
+        }
+    _equivalent(eplb, expected_eplb, "raw.case.eplb", tolerance=1e-9)
+    if case_factors["sku"] != case["runner"]:
+        raise ContractError("raw case runner differs from case identity")
+
+    workload = _keys(
+        doc["workload"],
+        {"activation_generator", "activation_identity", "activation_profile",
+         "cross_rank_consistent", "manifest_checksums", "members", "routing_generator", "source",
+         "trace_hashes", "trace_signature", "workload_id"},
+        "raw.workload",
+    )
+    if workload["source"] not in {"canonical-serialized", "seeded-runtime"}:
+        raise ContractError("raw workload source is invalid")
+    if workload["source"] == "canonical-serialized":
+        _typed(workload["workload_id"], "workload", "raw.workload.workload_id")
+        members = workload["members"]
+        checksums = workload["manifest_checksums"]
+        if (
+            not isinstance(members, list)
+            or not members
+            or members != sorted(set(members))
+            or not all(identity.is_typed_id(member, "workload") for member in members)
+            or not isinstance(checksums, dict)
+            or set(checksums) != set(members)
+        ):
+            raise ContractError("raw canonical workload members/checksums are invalid")
+        for member, values in checksums.items():
+            if (
+                not isinstance(values, dict)
+                or set(values) != {"topk_idx", "topk_weights", "trace"}
+                or any(not re.fullmatch(r"[0-9a-f]{64}", str(value)) for value in values.values())
+            ):
+                raise ContractError(f"raw canonical workload checksums differ for {member}")
+        expected_workload_id = identity.workload_id({
+            "members": [
+                {"checksums": checksums[member], "workload_id": member}
+                for member in members
+            ]
+        })
+        if workload["workload_id"] != expected_workload_id:
+            raise ContractError("raw composite workload identity differs from its members")
+    elif any(workload[field] is not None for field in ("members", "manifest_checksums", "workload_id")):
+        raise ContractError("raw seeded workload cannot claim serialized members")
+    if workload["cross_rank_consistent"] is not True:
+        raise ContractError("raw workload is not consistent across ranks")
+
+    measurement = _keys(
+        doc["measurement"],
+        {"component_order_contract", "conditioning", "contract", "execution_order_sha256",
+         "qualification_index", "rows", "sampling", "source_allocation"},
+        "raw.measurement",
+    )
+    if measurement["qualification_index"] != qualification_index:
+        raise ContractError("raw measurement qualification index differs from allocation")
+    if not isinstance(measurement["execution_order_sha256"], str) or not re.fullmatch(
+        r"[0-9a-f]{64}", measurement["execution_order_sha256"]
+    ):
+        raise ContractError("raw measurement execution order digest is invalid")
+    validate_conditioning_contract(measurement["conditioning"], case["phase"])
+    sampling = _keys(
+        measurement["sampling"],
+        {"contract", "iterations_per_trial", "percentile_method", "reduction",
+         "samples_per_component", "trials", "warmup_iterations", "warmup_semantics"},
+        "raw.measurement.sampling",
+    )
+    expected_sampling = {
+        "contract": profile["sampling_contract"], "iterations_per_trial": 8,
+        "percentile_method": profile["percentile_method"],
+        "reduction": profile["rank_reduction"],
+        "samples_per_component": 512, "trials": 64, "warmup_iterations": 32,
+        "warmup_semantics": "full-roundtrip-before-each-component-trial-point-v1",
+    }
+    if sampling != expected_sampling:
+        raise ContractError("raw sampling contract differs from fixed-512-v1")
+    if (
+        case["mode"] != profile["mode"]
+        or case["resource_mode"] != profile["resource_mode"]
+        or measurement["contract"] != profile["contract"]
+        or measurement["component_order_contract"] != profile["component_order_contract"]
+        or measurement["source_allocation"] != "even"
+        or shape["activation_profile"] != profile["activation_profile"]
+        or workload["activation_generator"] != profile["activation_generator"]
+        or workload["activation_profile"] != profile["activation_profile"]
+        or workload["routing_generator"] != profile["routing_generator"]
+    ):
+        raise ContractError("raw case differs from the frozen v1 profile")
+    expected_activation = hashlib.sha256(
+        (
+            f"counter|seed={profile['seed']}|hidden={hidden}|"
+            f"gen={profile['activation_generator']}"
+        ).encode()
+    ).hexdigest()
+    if workload["activation_identity"] != expected_activation:
+        raise ContractError("raw activation identity differs from the frozen seed/profile")
+    rows = measurement["rows"]
+    if not isinstance(rows, list) or not rows:
+        raise ContractError("raw.measurement.rows must be non-empty")
+    seen_points = set()
+    row_tokens = []
+    recomputed_anomalies = 0
+    for index, row_value in enumerate(rows):
+        path = f"raw.measurement.rows[{index}]"
+        row = _keys(
+            row_value,
+            {"anomalies", "byte_provenance", "components", "correctness", "evidence_id",
+             "global_tokens", "point_id", "receive", "routing",
+             "sample_histograms", "sample_sha256", "token_rate_at_latency_percentile",
+             "tokens_per_rank"},
+            path,
+        )
+        tokens = _integer(row["tokens_per_rank"], f"{path}.tokens_per_rank", minimum=1)
+        row_tokens.append(tokens)
+        if tokens in seen_points or tokens not in sample_by_token:
+            raise ContractError(f"{path} token point is duplicate or missing samples")
+        seen_points.add(tokens)
+        if row["global_tokens"] != tokens * ep_size:
+            raise ContractError(f"{path}.global_tokens formula differs")
+        sample_point = sample_by_token[tokens]
+        expected_point = identity.point_id(series=identifiers["series_id"], tokens_per_rank=tokens)
+        if row["point_id"] != expected_point or sample_point["point_id"] != expected_point:
+            raise ContractError(f"{path}.point_id differs")
+        expected_evidence = identity.evidence_id(
+            point=expected_point, allocation=identifiers["allocation_id"],
+            attempt=identifiers["attempt_id"], sample_sha256=sample_point["sample_sha256"],
+        )
+        if row["evidence_id"] != expected_evidence or sample_point["evidence_id"] != expected_evidence:
+            raise ContractError(f"{path}.evidence_id differs")
+        if row["sample_sha256"] != sample_point["sample_sha256"]:
+            raise ContractError(f"{path}.sample_sha256 differs")
+        components = _keys(
+            row["components"], {"combine", "dispatch", "isolated_sum", "roundtrip", "stage"},
+            f"{path}.components",
+        )
+        for name in ("combine", "dispatch", "roundtrip", "stage"):
+            _validate_component(
+                components[name], sample_point["components"][name], f"{path}.components.{name}"
+            )
+        _validate_component(
+            components["isolated_sum"], None, f"{path}.components.isolated_sum", derived=True
+        )
+        expected_stage_availability = (
+            "measured"
+            if communication_precision["dispatch"]["communication_format"] != "bf16"
+            or (case["backend"] == "mori" and shape["kernel_gen"] == "intranode")
+            else "unavailable"
+        )
+        if components["stage"]["availability"] != expected_stage_availability:
+            raise ContractError(f"{path}.components.stage differs from adapter device work")
+        _, _, _, expected_indices, expected_weights = _expected_canonical_trace(
+            scheduled_case["routing"],
+            hidden,
+            topk,
+            logical_experts,
+            physical_experts,
+            ep_size,
+            tokens,
+            profile["seed"],
+            eplb["enabled"],
+            profile["eplb_reference_tokens_per_rank"],
+        )
+        expected_routing = _expected_routing_summary(
+            expected_indices,
+            expected_weights,
+            physical_experts=physical_experts,
+            ep_size=ep_size,
+            tokens_per_rank=tokens,
+            gpus_per_node=scheduled_case["gpus_per_node"],
+            scale_up_domain=scheduled_case["scale_up_domain"],
+        )
+        _equivalent(
+            row["routing"], expected_routing, f"{path}.routing", tolerance=1e-5
+        )
+        expected_payload_counts = (
+            expected_routing["expert_assignments_per_rank"]
+            if profile["payload_unit"] == "token-expert"
+            else expected_routing["payload_copies_per_rank"]
+        )
+        throughput = _keys(
+            row["token_rate_at_latency_percentile"], set(PERCENTILES),
+            f"{path}.token_rate_at_latency_percentile",
+        )
+        for percentile in PERCENTILES:
+            latency = components["roundtrip"]["percentiles_us"][percentile]
+            if latency <= 0:
+                raise ContractError(f"{path} roundtrip latency must be positive")
+            _close(
+                throughput[percentile], row["global_tokens"] / (latency * 1e-6),
+                f"{path}.token_rate_at_latency_percentile.{percentile}", 1e-9,
+            )
+        correctness = _keys(
+            row["correctness"],
+            {"contract", "max_relative_error", "passed", "precision", "rank_evidence", "scope"},
+            f"{path}.correctness",
+        )
+        if (
+            correctness["contract"] != profile["oracle_contract"]
+            or correctness["scope"] != profile["correctness_scope"]
+            or type(correctness["passed"]) is not bool
+        ):
+            raise ContractError(f"{path}.correctness contract differs")
+        precision_evidence = _validate_precision_evidence(
+            correctness["precision"], precision_profile_id, communication_precision,
+            f"{path}.correctness.precision",
+        )
+        _number(
+            correctness["max_relative_error"],
+            f"{path}.correctness.max_relative_error",
+            minimum=0.0,
+        )
+        rank_evidence = correctness["rank_evidence"]
+        if not isinstance(rank_evidence, list) or len(rank_evidence) != ep_size:
+            raise ContractError(f"{path}.correctness.rank_evidence must cover every rank")
+        ranks = set()
+        observed_max_error = 0.0
+        evidence_passed = True
+        for evidence_index, evidence_value in enumerate(rank_evidence):
+            evidence_path = f"{path}.correctness.rank_evidence[{evidence_index}]"
+            evidence = _keys(
+                evidence_value,
+                {"input_unchanged", "order_stable", "post_timing", "pre_timing", "rank"},
+                evidence_path,
+            )
+            evidence_rank = _integer(evidence["rank"], f"{evidence_path}.rank")
+            if evidence_rank >= ep_size:
+                raise ContractError(f"{evidence_path}.rank is outside the EP group")
+            ranks.add(evidence_rank)
+            if type(evidence["input_unchanged"]) is not bool or type(evidence["order_stable"]) is not bool:
+                raise ContractError(f"{evidence_path} stability fields must be boolean")
+            pre = _validate_oracle(
+                evidence["pre_timing"], f"{evidence_path}.pre_timing", profile
+            )
+            post = _validate_oracle(
+                evidence["post_timing"], f"{evidence_path}.post_timing", profile
+            )
+            if (
+                pre["receive_count"] != expected_payload_counts[evidence_rank]
+                or post["receive_count"] != expected_payload_counts[evidence_rank]
+            ):
+                raise ContractError(
+                    f"{evidence_path}.receive_count differs from canonical routing"
+                )
+            expected_stability = all(
+                pre[field] == post[field]
+                for field in ("ordering_contract", "order_sha256", "dispatch_sha256")
+            )
+            if evidence["order_stable"] != expected_stability:
+                raise ContractError(f"{evidence_path}.order_stable differs from the evidence")
+            errors = [
+                oracle["max_relative_error"]
+                for oracle in (pre, post)
+                if oracle["max_relative_error"] is not None
+            ]
+            observed_max_error = max([observed_max_error, *errors])
+            evidence_passed = evidence_passed and all(
+                (evidence["input_unchanged"], evidence["order_stable"], pre["passed"], post["passed"])
+            )
+        evidence_passed = evidence_passed and precision_evidence["passed"]
+        if ranks != set(range(ep_size)) or correctness["passed"] != evidence_passed:
+            raise ContractError(f"{path}.correctness rank coverage or outcome differs")
+        _close(
+            correctness["max_relative_error"], observed_max_error,
+            f"{path}.correctness.max_relative_error",
+        )
+        if components["dispatch"]["availability"] == "measured":
+            for percentile in PERCENTILES:
+                expected = (
+                    components["dispatch"]["percentiles_us"][percentile]
+                    + (
+                        components["stage"]["percentiles_us"][percentile]
+                        if components["stage"]["availability"] == "measured"
+                        else 0.0
+                    )
+                    + components["combine"]["percentiles_us"][percentile]
+                )
+                _close(
+                    components["isolated_sum"]["percentiles_us"][percentile], expected,
+                    f"{path}.components.isolated_sum.{percentile}",
+                )
+        logical_copies = (
+            sum(expected_routing["expert_assignments_per_rank"])
+            if profile["payload_unit"] == "token-expert"
+            else expected_routing["routed_copies"]
+        )
+        dispatch_bytes = _precision_byte_provenance(
+            communication_precision["dispatch"], logical_copies, hidden
+        )
+        combine_bytes = _precision_byte_provenance(
+            communication_precision["combine"], logical_copies, hidden
+        )
+        stage_bytes = {
+            "accounting_contract": "activation-data-plus-scales-v1",
+            "activation_data_bytes": 0,
+            "scale_bytes": 0,
+            "total_logical_bytes": 0,
+        }
+        roundtrip_bytes = {
+            "accounting_contract": "activation-data-plus-scales-v1",
+            **{
+                field: dispatch_bytes[field] + combine_bytes[field]
+                for field in (
+                    "activation_data_bytes", "scale_bytes", "total_logical_bytes"
+                )
+            },
+        }
+        expected_byte_provenance = {
+            "combine": combine_bytes,
+            "dispatch": dispatch_bytes,
+            "roundtrip": roundtrip_bytes,
+            "stage": stage_bytes,
+        }
+        _equivalent(
+            row["byte_provenance"], expected_byte_provenance, f"{path}.byte_provenance"
+        )
+
+        max_receive = max(expected_payload_counts)
+        expected_receive = {
+            "max": max_receive,
+            "mean": sum(expected_payload_counts) / ep_size,
+            "min": min(expected_payload_counts),
+            "total": sum(expected_payload_counts),
+        }
+        _equivalent(row["receive"], expected_receive, f"{path}.receive")
+        expected_histograms = {
+            name: (
+                _expected_histogram([
+                    sample
+                    for trial in sample_point["components"][name]["trials"]
+                    for sample in trial
+                ])
+                if sample_point["components"][name]["availability"] == "measured"
+                else None
+            )
+            for name in ("dispatch", "stage", "combine", "roundtrip")
+        }
+        _equivalent(
+            row["sample_histograms"], expected_histograms, f"{path}.sample_histograms"
+        )
+        expected_anomalies = _expected_anomalies(tokens, components)
+        _equivalent(row["anomalies"], expected_anomalies, f"{path}.anomalies")
+        recomputed_anomalies += len(expected_anomalies)
+    if seen_points != set(sample_by_token):
+        raise ContractError("raw rows and sample points differ")
+    if row_tokens != sorted(row_tokens):
+        raise ContractError("raw rows must follow the scheduled token ladder")
+    expected_trace_hashes = sorted(row["routing"]["hash"] for row in rows)
+    if workload["trace_hashes"] != expected_trace_hashes:
+        raise ContractError("raw workload trace hashes differ from measured rows")
+    expected_trace_signature = hashlib.sha256(
+        "|".join(expected_trace_hashes).encode()
+    ).hexdigest()
+    if workload["trace_signature"] != expected_trace_signature:
+        raise ContractError("raw workload trace signature differs from measured rows")
+
+    implementation = _keys(
+        doc["implementation"], {"kernel_generation", "name", "provenance", "resource_profile"},
+        "raw.implementation",
+    )
+    if (
+        implementation["name"] != case["backend"]
+        or implementation["kernel_generation"] != shape["kernel_gen"]
+    ):
+        raise ContractError("raw implementation identity differs from the case")
+    provenance_fields = _obj(implementation["provenance"], "raw.implementation.provenance")
+    unknown = set(provenance_fields) - PROVENANCE_KEYS
+    if unknown:
+        raise ContractError(f"raw implementation provenance has unknown fields {sorted(unknown)}")
+    if (
+        implementation["name"] == "deepep-v2"
+        and provenance_fields.get("deterministic") is not False
+    ):
+        raise ContractError("DeepEP V2 deterministic mode differs from the v1 kernel contract")
+    if implementation["name"] == "deepep-v2" and (
+        _integer(
+            provenance_fields.get("tuning_num_experts"),
+            "raw.implementation.provenance.tuning_num_experts",
+            minimum=1,
+        ) != logical_experts
+        or _integer(
+            provenance_fields.get("num_experts"),
+            "raw.implementation.provenance.num_experts",
+            minimum=1,
+        ) != physical_experts
+    ):
+        raise ContractError("DeepEP V2 expert-count provenance differs from the case")
+    if implementation["name"] == "deepep-hybrid":
+        realized_config = provenance_fields.get("realized_config")
+        jit_kernel_keys = provenance_fields.get("jit_kernel_keys")
+        jit_shared_objects = provenance_fields.get("jit_shared_objects")
+        domain_ranks, communication_domains = hybrid_communication_domains(
+            ep_size, scheduled_case["scale_up_domain"]
+        )
+        if (
+            not _hybrid_realized_config_is_valid(realized_config)
+            or not _hybrid_jit_evidence_is_valid(jit_shared_objects, jit_kernel_keys)
+            or realized_config["hidden_dim"] != shape["hidden"]
+            or realized_config["num_of_experts_per_rank"] * ep_size != physical_experts
+            or realized_config["num_of_ranks_per_node"] != domain_ranks
+            or realized_config["num_of_nodes"] != communication_domains
+            or realized_config["token_data_type"] != "UINT16"
+            or any(
+                len(artifact["rank_artifacts"]) != ep_size
+                for artifact in jit_shared_objects
+            )
+        ):
+            raise ContractError("DeepEP Hybrid realized config/JIT evidence differs from the case")
+    if implementation["name"] == "nccl-ep" and implementation["kernel_generation"] != (
+        collective_kernel_generation(provenance_fields.get("collective_library"))
+    ):
+        raise ContractError("NCCL/RCCL kernel generation differs from collective lineage")
+    resource_profile = _obj(
+        implementation["resource_profile"], "raw.implementation.resource_profile"
+    )
+    expected_resource_profile = project_resource_profile(provenance_fields)
+    if resource_profile != expected_resource_profile:
+        raise ContractError("raw resource profile differs from implementation provenance")
+    topology = _keys(
+        doc["topology"],
+        {"device_count", "device_product", "gpus_per_node", "nodes", "placement",
+         "realized_placement", "scale_out_transport", "scale_up_domain",
+         "scale_up_transport", "scope", "topology_class", "transport", "world_size"},
+        "raw.topology",
+    )
+    for field in ("device_count", "gpus_per_node", "nodes", "scale_up_domain", "world_size"):
+        _integer(topology[field], f"raw.topology.{field}", minimum=1)
+    for field in ("scale_up_transport", "scope", "topology_class", "transport"):
+        _text(topology[field], f"raw.topology.{field}")
+    if topology["scale_out_transport"] is not None:
+        _text(topology["scale_out_transport"], "raw.topology.scale_out_transport")
+    realized = _keys(
+        topology["realized_placement"],
+        {"gpus_per_node", "nodes", "ranks_per_node", "unique_local_ranks", "valid"},
+        "raw.topology.realized_placement",
+    )
+    if realized != {
+        "gpus_per_node": topology["gpus_per_node"],
+        "nodes": topology["nodes"],
+        "ranks_per_node": topology["gpus_per_node"],
+        "unique_local_ranks": True,
+        "valid": True,
+    }:
+        raise ContractError("raw realized placement differs from requested topology")
+    if (
+        topology["world_size"] != ep_size
+        or topology["nodes"] * topology["gpus_per_node"] != ep_size
+        or topology["device_count"] != topology["gpus_per_node"]
+        or topology["placement"] != profile["placement"]
+        or (
+            topology["scope"] == "scale-up"
+            and (
+                ep_size > topology["scale_up_domain"]
+                or topology["scale_out_transport"] is not None
+            )
+        )
+        or (
+            topology["scope"] == "scale-out"
+            and (
+                ep_size <= topology["scale_up_domain"]
+                or ep_size % topology["scale_up_domain"] != 0
+                or topology["scale_out_transport"] is None
+            )
+        )
+        or topology["scope"] not in {"scale-up", "scale-out"}
+    ):
+        raise ContractError("raw topology dimensions differ from the case")
+    if implementation["name"] == "deepep-v2":
+        scale_out = scheduled_case["scope"] == "scale-out"
+        expected_policy = (
+            (True, True, "nccl-gin")
+            if scale_out
+            else (False, False, "nccl-device-lsa")
+        )
+        if (
+            provenance_fields.get("allow_hybrid_mode"),
+            provenance_fields.get("gin_enabled"),
+            provenance_fields.get("communication_backend"),
+        ) != expected_policy:
+            raise ContractError("DeepEP V2 communication policy differs from the v1 contract")
+        lsa_topology = tuple(
+            _integer(
+                provenance_fields.get(field),
+                f"raw.implementation.provenance.{field}",
+                minimum=1,
+            )
+            for field in (
+                "physical_rdma_ranks", "physical_nvlink_ranks",
+                "logical_scaleout_ranks", "logical_scaleup_ranks",
+            )
+        )
+        domains = ep_size // scheduled_case["scale_up_domain"]
+        expected_v2_topology = (
+            (
+                domains,
+                scheduled_case["scale_up_domain"],
+                domains,
+                scheduled_case["scale_up_domain"],
+            )
+            if scale_out
+            else (1, ep_size, 1, ep_size)
+        )
+        if lsa_topology != expected_v2_topology:
+            raise ContractError("DeepEP V2 realized communication domains differ from topology")
+    runtime = _keys(
+        doc["runtime_fingerprint"],
+        {"accelerator_runtime", "collective_library", "device", "driver_version", "framework",
+         "machine", "python_version", "vendor"},
+        "raw.runtime_fingerprint",
+    )
+    for field in ("machine", "python_version", "vendor"):
+        _text(runtime[field], f"raw.runtime_fingerprint.{field}")
+    runtime_device = _keys(
+        runtime["device"], {"arch", "compute_units", "memory_bytes", "product", "warp_size"},
+        "raw.runtime_fingerprint.device",
+    )
+    if topology["device_product"] != runtime_device["product"]:
+        raise ContractError("raw topology and runtime device products differ")
+    platform = capability.PLATFORMS.get(case["runner"])
+    if platform is not None:
+        identity_issues = capability.runtime_identity_issues(
+            case["runner"], vendor=runtime["vendor"], arch=runtime_device["arch"],
+            machine=runtime["machine"], device_name=runtime_device["product"],
+            device_count=topology["device_count"], world_size=topology["world_size"],
+        )
+        registered_topology = capability.topology_for(case["runner"], ep_size)
+        if identity_issues or (
+            registered_topology is None
+            or topology["gpus_per_node"] != platform["gpus_per_node"]
+            or topology["scale_up_domain"] != platform["scale_up_domain"]
+            or any(
+                topology[field] != registered_topology[field]
+                for field in (
+                    "nodes", "scope", "scale_up_transport", "scale_out_transport",
+                    "topology_class", "transport",
+                )
+            )
+        ):
+            raise ContractError(
+                "raw runtime/topology differs from the scheduled SKU: "
+                + "; ".join(identity_issues)
+            )
+    raw_provenance = _keys(
+        doc["provenance"],
+        {"allocation_stratum_sha256", "command", "distributed_launcher", "git_run",
+         "image", "redaction"},
+        "raw.provenance",
+    )
+    allocation_stratum = raw_provenance["allocation_stratum_sha256"]
+    if workload["source"] == "canonical-serialized" and not (
+        isinstance(allocation_stratum, str)
+        and re.fullmatch(r"[0-9a-f]{64}", allocation_stratum)
+    ):
+        raise ContractError("canonical raw evidence is missing its private allocation stratum")
+    image = _keys(
+        raw_provenance["image"],
+        {"arch", "digest", "digest_verified", "reference", "squash_sha256"},
+        "raw.provenance.image",
+    )
+    if (
+        image["digest_verified"] is not True
+        or not isinstance(image["digest"], str)
+        or not re.fullmatch(r"sha256:[0-9a-f]{64}", image["digest"])
+    ):
+        raise ContractError("raw image digest was not registry-verified")
+    if raw_provenance["redaction"] != "sanitized-v1":
+        raise ContractError("raw provenance redaction contract differs")
+    git_run = raw_provenance["git_run"]
+    if git_run is not None:
+        git_run = _keys(git_run, GIT_RUN_FIELDS, "raw.provenance.git_run")
+        if git_run["qualification_index"] != qualification_index:
+            raise ContractError("raw git run qualification index differs from allocation")
+    expected_provenance_complete = provenance_complete(
+        provenance_fields,
+        case["backend"],
+        git_run,
+        allocation_stratum_sha256=allocation_stratum,
+        image_digest=image["digest"],
+        image_verified=image["digest_verified"],
+        squash_sha256=image["squash_sha256"],
+    )
+
+    actual_scheduled_case = {
+        "backend": case["backend"],
+        "canonical": workload["source"] == "canonical-serialized",
+        "eplb": eplb["enabled"],
+        "ep": ep_size,
+        "experts": shape["num_logical_experts"],
+        "gpus_per_node": topology["gpus_per_node"],
+        "hidden": hidden,
+        "ladder": " ".join(map(str, row_tokens)),
+        "mode": case["mode"],
+        "nodes": topology["nodes"],
+        "phase": case["phase"],
+        "required_publication": case["required_publication"],
+        "routing": shape["routing"],
+        "samples_per_point": sampling["samples_per_component"],
+        "scale_out_transport": topology["scale_out_transport"],
+        "scale_up_domain": topology["scale_up_domain"],
+        "scale_up_transport": topology["scale_up_transport"],
+        "scope": topology["scope"],
+        "suite": case["suite"],
+        "timing": (
+            f"{sampling['iterations_per_trial']}:{sampling['trials']}:"
+            f"{sampling['warmup_iterations']}"
+        ),
+        "topk": shape["topk"],
+        "topology_class": topology["topology_class"],
+        "transport": topology["transport"],
+        "warmup_semantics": sampling["warmup_semantics"],
+        "workload": case["workload_name"],
+    }
+    if "precision_profile" in scheduled_case:
+        actual_scheduled_case["precision_profile"] = shape["precision_profile"]
+    if scheduled_case != actual_scheduled_case:
+        mismatches = sorted(
+            field for field in scheduled_case
+            if scheduled_case[field] != actual_scheduled_case[field]
+        )
+        raise ContractError(f"raw data differs from scheduled case fields {mismatches}")
+
+    if workload["source"] == "canonical-serialized":
+        _validate_canonical_workload(workload, scheduled_case, rows, eplb)
+
+    expected_series = {
+        "backend": case["backend"],
+        "case_id": identifiers["case_id"],
+        "image_digest": image["digest"],
+        "implementation_contract_sha256": _sha256_json({
+            "kernel_generation": implementation["kernel_generation"],
+            "name": implementation["name"],
+            "provenance": series_provenance(provenance_fields),
+            "resource_profile": resource_profile,
+        }),
+        "public_config_sha256": public_series_config_sha256(public_series_config(
+            kernel_generation=implementation["kernel_generation"],
+            provenance=provenance_fields,
+            resource_profile=resource_profile,
+            resource_mode=case["resource_mode"],
+            device_product=topology["device_product"],
+        )),
+        "routing_control_sha256": routing_implementation_control_sha256(implementation),
+        "runtime_fingerprint_sha256": _sha256_json(runtime),
+        "source_sha": git_run["source_sha"] if git_run is not None else None,
+        "squash_sha256": image["squash_sha256"],
+        "workload_id": workload["workload_id"] or workload["trace_signature"],
+    }
+    if series_factors != expected_series:
+        raise ContractError("raw series factors differ from measured implementation/runtime")
+    expected_allocation = {
+        "artifact": git_run["artifact"] if git_run is not None else None,
+        "execution_id": allocation_factors["execution_id"],
+        "job": git_run["job"] if git_run is not None else None,
+        "qualification_index": qualification_index,
+        "repo": git_run["repo"] if git_run is not None else None,
+        "run_attempt": git_run["run_attempt"] if git_run is not None else None,
+        "run_id": git_run["run_id"] if git_run is not None else None,
+        "runner": case["runner"],
+        "source_sha": git_run["source_sha"] if git_run is not None else None,
+    }
+    if allocation_factors != expected_allocation:
+        raise ContractError("raw allocation factors differ from provenance")
+    artifact = _keys(doc["sample_artifact"], {"bytes", "format", "path", "sha256"}, "raw.sample_artifact")
+    if artifact["format"] != SAMPLES_FORMAT or Path(artifact["path"]).name != artifact["path"]:
+        raise ContractError("raw.sample_artifact format/path is invalid")
+    if not isinstance(artifact["sha256"], str) or len(artifact["sha256"]) != 64:
+        raise ContractError("raw.sample_artifact.sha256 is invalid")
+    _integer(artifact["bytes"], "raw.sample_artifact.bytes", minimum=1)
+    outcome = _keys(doc["outcome"], {"publication_status", "reasons", "status", "validity"}, "raw.outcome")
+    if outcome["status"] not in {"success", "invalid"} or outcome["publication_status"] not in {"diagnostic", "invalid"}:
+        raise ContractError("raw outcome status is invalid")
+    if not isinstance(outcome["reasons"], list) or not all(isinstance(x, str) for x in outcome["reasons"]):
+        raise ContractError("raw outcome reasons must be strings")
+    validity = _keys(
+        outcome["validity"],
+        {"anomaly_free", "execution_status", "measurement_conformance", "provenance_complete",
+         "resource_conformance", "sampling_conformance", "semantic_correctness",
+         "workload_identity", "workload_source"},
+        "raw.outcome.validity",
+    )
+    correctness_passed = all(row["correctness"]["passed"] for row in rows)
+    workload_consistent = workload["cross_rank_consistent"] is True
+    expected_status = "success" if correctness_passed and workload_consistent else "invalid"
+    expected_publication = "diagnostic" if expected_status == "success" else "invalid"
+    if (
+        outcome["status"] != expected_status
+        or outcome["publication_status"] != expected_publication
+        or bool(outcome["reasons"]) == (expected_status == "success")
+        or validity["execution_status"] != "complete"
+        or validity["semantic_correctness"] != ("pass" if correctness_passed else "fail")
+        or validity["workload_identity"] != (
+            "consistent-across-ranks" if workload_consistent else "inconsistent"
+        )
+        or validity["workload_source"] != workload["source"]
+        or validity["measurement_conformance"] != "conformant"
+        or validity["sampling_conformance"] != "conformant"
+        or validity["resource_conformance"] != resource_profile["conformance_class"]
+        or validity["anomaly_free"] != (recomputed_anomalies == 0)
+        or validity["provenance_complete"] is not expected_provenance_complete
+    ):
+        raise ContractError("raw outcome differs from its measurement evidence")
+    artifact_safety.assert_publication_safe([doc])
+    return doc
+
+
+def make_terminal_document(
+    *,
+    allocation_factors: dict[str, Any],
+    attempt_ordinal: int,
+    case: dict[str, Any],
+    case_factors: dict[str, Any],
+    control_sha256: str | None,
+    failure_mode: str,
+    generated_at: str,
+    git_run: dict[str, Any] | None,
+    reason: str,
+    return_code: int,
+    source: str,
+    status: str,
+    expected_case_id: str | None = None,
+) -> dict[str, Any]:
+    """Build and self-validate one attributable non-success attempt."""
+    case_id = identity.digest("case", case_factors)
+    if expected_case_id is not None and expected_case_id != case_id:
+        raise ContractError(
+            f"scheduled case ID differs from terminal factors: {expected_case_id} != {case_id}"
+        )
+    allocation_id = identity.allocation_id(allocation_factors)
+    attempt_id = identity.attempt_id(
+        allocation=allocation_id, case=case_id, ordinal=attempt_ordinal
+    )
+    document = {
+        "format": TERMINAL_FORMAT,
+        "schema_version": 1,
+        "record_type": "terminal-outcome",
+        "generated_at": generated_at,
+        "identity": {
+            "allocation_factors": allocation_factors,
+            "allocation_id": allocation_id,
+            "attempt_id": attempt_id,
+            "attempt_ordinal": attempt_ordinal,
+            "case_factors": case_factors,
+            "case_id": case_id,
+        },
+        "case": case,
+        "provenance": {
+            "git_run": git_run,
+            "control_sha256": control_sha256,
+            "redaction": "sanitized-v1",
+            "source": source,
+        },
+        "outcome": {
+            "status": status,
+            "failure_mode": failure_mode,
+            "reason": reason,
+            "return_code": return_code,
+        },
+    }
+    return validate_terminal_document(document)
+
+
+def validate_terminal_document(document: Any) -> dict[str, Any]:
+    _validate_native_schema("terminal-outcome-v1.schema.json", document)
+    doc = _keys(
+        document,
+        {"case", "format", "generated_at", "identity", "outcome", "provenance", "record_type",
+         "schema_version"},
+        "terminal",
+    )
+    if doc["format"] != TERMINAL_FORMAT or doc["schema_version"] != 1 or doc["record_type"] != "terminal-outcome":
+        raise ContractError("terminal format/schema/record type differs from v1")
+    ids = _keys(doc["identity"], {
+        "allocation_factors", "allocation_id", "attempt_id", "attempt_ordinal",
+        "case_factors", "case_id",
+    }, "terminal.identity")
+    for field, kind in (("allocation_id", "allocation"), ("attempt_id", "attempt"), ("case_id", "case")):
+        _typed(ids[field], kind, f"terminal.identity.{field}")
+    ordinal = _integer(ids["attempt_ordinal"], "terminal.identity.attempt_ordinal", minimum=1)
+    case = _scheduled_case(doc["case"], "terminal.case")
+    factors = _keys(ids["case_factors"], {"case", "profile", "sku"}, "terminal.identity.case_factors")
+    if factors["case"] != case or factors["profile"] != scheduled_case_profile(
+        case, "terminal.case"
+    ):
+        raise ContractError("terminal case factors differ from the scheduled case/profile")
+    _text(factors["sku"], "terminal.identity.case_factors.sku")
+    allocation = _keys(
+        ids["allocation_factors"], ALLOCATION_FACTOR_FIELDS,
+        "terminal.identity.allocation_factors",
+    )
+    qualification_index = _integer(
+        allocation["qualification_index"],
+        "terminal.identity.allocation_factors.qualification_index",
+        minimum=1,
+    )
+    if qualification_index > 3:
+        raise ContractError("terminal qualification index must be in 1..3")
+    expected_case = identity.digest("case", factors)
+    expected_allocation = identity.allocation_id(allocation)
+    expected_attempt = identity.attempt_id(
+        allocation=expected_allocation, case=expected_case, ordinal=ordinal
+    )
+    if (ids["case_id"], ids["allocation_id"], ids["attempt_id"]) != (
+        expected_case, expected_allocation, expected_attempt
+    ):
+        raise ContractError("terminal typed identities do not match their factors")
+    provenance = _keys(
+        doc["provenance"], {"git_run", "control_sha256", "redaction", "source"},
+        "terminal.provenance",
+    )
+    git_run = provenance["git_run"]
+    if git_run is not None:
+        git_run = _keys(git_run, GIT_RUN_FIELDS, "terminal.provenance.git_run")
+        if git_run["qualification_index"] != qualification_index:
+            raise ContractError(
+                "terminal git run qualification index differs from allocation"
+            )
+    control = provenance["control_sha256"]
+    if control is not None and (
+        not isinstance(control, str) or len(control) != 64
+        or any(char not in "0123456789abcdef" for char in control)
+    ):
+        raise ContractError("terminal control_sha256 is invalid")
+    if provenance["redaction"] != "sanitized-v1":
+        raise ContractError("terminal redaction contract differs")
+    source = _text(provenance["source"], "terminal.provenance.source")
+    outcome = _keys(
+        doc["outcome"], {"failure_mode", "reason", "return_code", "status"}, "terminal.outcome"
+    )
+    if outcome["status"] not in {"failed", "invalid", "unsupported"}:
+        raise ContractError("terminal outcome status is invalid")
+    failure_mode = _text(outcome["failure_mode"], "terminal.outcome.failure_mode")
+    reason = _text(outcome["reason"], "terminal.outcome.reason")
+    _integer(outcome["return_code"], "terminal.outcome.return_code")
+    if source == "runtime-emitter":
+        expected_runner = factors["sku"]
+        expected_reason = RUNTIME_FAILURE_REASONS.get(failure_mode)
+        valid_outcome = outcome["status"] == "failed" and reason == expected_reason
+    elif source == "post-emit-command":
+        expected_runner = factors["sku"]
+        expected_reason = POST_EMIT_FAILURE_REASONS.get(failure_mode)
+        valid_outcome = outcome["status"] == "failed" and reason == expected_reason
+    elif source == "matrix-capability-resolver":
+        expected_runner = "capability-resolver"
+        valid_outcome = (
+            outcome["status"] == "unsupported"
+            and failure_mode == "capability"
+            and reason in CAPABILITY_FAILURE_REASONS
+        )
+    else:
+        raise ContractError("terminal provenance source is not registered")
+    if not valid_outcome:
+        raise ContractError("terminal source and outcome are not registered")
+    expected_allocation = {
+        "artifact": git_run["artifact"] if git_run is not None else None,
+        "execution_id": allocation["execution_id"],
+        "job": git_run["job"] if git_run is not None else None,
+        "qualification_index": qualification_index,
+        "repo": git_run["repo"] if git_run is not None else None,
+        "run_attempt": git_run["run_attempt"] if git_run is not None else None,
+        "run_id": git_run["run_id"] if git_run is not None else None,
+        "runner": expected_runner,
+        "source_sha": git_run["source_sha"] if git_run is not None else None,
+    }
+    if allocation != expected_allocation:
+        raise ContractError("terminal allocation factors differ from provenance or source")
+    artifact_safety.assert_publication_safe([doc])
+    return doc
+
+
+def load_raw_attempt(path: str | os.PathLike[str]) -> dict[str, Any]:
+    document = strict_load(path)
+    artifact = _obj(document, "raw").get("sample_artifact")
+    artifact = _obj(artifact, "raw.sample_artifact")
+    sample_path = Path(path).with_name(_text(artifact.get("path"), "raw.sample_artifact.path"))
+    payload = sample_path.read_bytes()
+    if len(payload) != artifact.get("bytes") or hashlib.sha256(payload).hexdigest() != artifact.get("sha256"):
+        raise ContractError("sample artifact bytes or digest differ")
+    samples = strict_load(sample_path)
+    return validate_raw_document(document, samples)
+
+
+def load_attempt(path: str | os.PathLike[str]) -> dict[str, Any]:
+    """Fully validate and return one native raw or terminal attempt."""
+    document = strict_load(path)
+    if isinstance(document, dict) and document.get("format") == RAW_FORMAT:
+        return load_raw_attempt(path)
+    if isinstance(document, dict) and document.get("format") == TERMINAL_FORMAT:
+        return validate_terminal_document(document)
+    raise ContractError("unknown native attempt format")
+
+
+def quarantine_invalid_attempt(path: str | os.PathLike[str]) -> bool:
+    """Move an invalid attempt and its basename-safe sample outside JSON upload globs."""
+    destination = Path(path)
+    if not destination.is_file():
+        return False
+    try:
+        load_attempt(destination)
+        return False
+    except (ContractError, OSError, ValueError):
+        try:
+            document = json.loads(destination.read_bytes())
+        except (OSError, json.JSONDecodeError):
+            document = {}
+        artifact = document.get("sample_artifact") if isinstance(document, dict) else None
+        sample_name = artifact.get("path") if isinstance(artifact, dict) else None
+        if isinstance(sample_name, str) and Path(sample_name).name == sample_name:
+            sample_path = destination.with_name(sample_name)
+            if sample_path.is_file():
+                os.replace(sample_path, sample_path.with_name(sample_path.name + ".quarantine"))
+        os.replace(destination, destination.with_name(destination.name + ".quarantine"))
+        return True
+
+
+def normalize_attempt(document: dict[str, Any]) -> dict[str, Any]:
+    """Return the publisher-facing projection after native validation."""
+    if document.get("format") == RAW_FORMAT:
+        ids = document["identity"]
+        return {
+            "allocation_id": ids["allocation_id"],
+            "attempt_id": ids["attempt_id"],
+            "case": document["case"],
+            "case_id": ids["case_id"],
+            "generated_at": document["generated_at"],
+            "outcome": document["outcome"],
+            "points": document["measurement"]["rows"],
+            "runtime_fingerprint": document["runtime_fingerprint"],
+            "series_id": ids["series_id"],
+        }
+    if document.get("format") == TERMINAL_FORMAT:
+        ids = document["identity"]
+        return {
+            "allocation_id": ids["allocation_id"],
+            "attempt_id": ids["attempt_id"],
+            "case": document["case"],
+            "case_id": ids["case_id"],
+            "generated_at": document["generated_at"],
+            "outcome": document["outcome"],
+            "points": [],
+            "runtime_fingerprint": None,
+            "series_id": None,
+        }
+    raise ContractError("unknown attempt format")
+
+
+def _env_integer(name: str, default: int) -> int:
+    try:
+        return int(os.environ.get(name, str(default)))
+    except ValueError:
+        return default
+
+
+def _env_enabled(name: str) -> bool:
+    return os.environ.get(name, "").lower() in {"1", "true", "yes"}
+
+
+def _terminal_case_from_environment(backend: str, phase: str) -> dict[str, Any]:
+    ep = _env_integer("CX_EP", _env_integer("CX_NGPUS", 1))
+    gpus_per_node = _env_integer("CX_GPUS_PER_NODE", ep)
+    ladder = os.environ.get("CX_TOKENS_LADDER", "") or (
+        "1 2 4 8 16 32 64 128"
+        if phase == "decode"
+        else "128 256 512 1024 2048 4096"
+    )
+    case = {
+        "suite": os.environ.get("CX_SUITE") or "manual",
+        "workload": os.environ.get("CX_WORKLOAD_NAME") or "manual",
+        "required_publication": os.environ.get("CX_REQUIRED_PUBLICATION") or "diagnostic",
+        "backend": backend,
+        "mode": os.environ.get("CX_MODE", "normal"),
+        "routing": os.environ.get("CX_ROUTING", "uniform"),
+        "phase": phase,
+        "ep": ep,
+        "eplb": _env_enabled("CX_EPLB"),
+        "hidden": _env_integer("CX_HIDDEN", 7168),
+        "topk": _env_integer("CX_TOPK", 8),
+        "experts": _env_integer("CX_EXPERTS", 256),
+        "samples_per_point": _env_integer("CX_SAMPLES_PER_POINT", 512),
+        "warmup_semantics": os.environ.get(
+            "CX_WARMUP_SEMANTICS",
+            "full-roundtrip-before-each-component-trial-point-v1",
+        ),
+        "ladder": ladder,
+        "timing": (
+            f'{_env_integer("CX_ITERS", 8)}:{_env_integer("CX_TRIALS", 64)}:'
+            f'{_env_integer("CX_WARMUP", 32)}'
+        ),
+        "canonical": _env_enabled("CX_CANONICAL"),
+        "nodes": _env_integer("CX_NODES", _env_integer("SLURM_NNODES", 1)),
+        "gpus_per_node": gpus_per_node,
+        "scale_up_domain": _env_integer("CX_SCALE_UP_DOMAIN", gpus_per_node),
+        "scope": os.environ.get("CX_SCOPE", "scale-up"),
+        "topology_class": os.environ.get("CX_TOPO", "manual"),
+        "transport": os.environ.get("CX_TRANSPORT", "unknown"),
+        "scale_up_transport": os.environ.get("CX_SCALE_UP_TRANSPORT", "unknown"),
+        "scale_out_transport": os.environ.get("CX_SCALE_OUT_TRANSPORT") or None,
+    }
+    precision_profile = os.environ.get("CX_PRECISION_PROFILE") or None
+    if precision_profile is not None:
+        case["precision_profile"] = precision_profile
+    return case
+
+
+def _git_run_from_environment() -> dict[str, Any] | None:
+    def value(name: str) -> str | None:
+        return os.environ.get(name) or None
+
+    git_run = {
+        "run_id": value("GITHUB_RUN_ID"),
+        "run_attempt": value("GITHUB_RUN_ATTEMPT"),
+        "ref": value("GITHUB_REF_NAME") or value("GITHUB_REF"),
+        "source_sha": value("COLLECTIVEX_SOURCE_SHA") or value("GITHUB_SHA"),
+        "repo": value("GITHUB_REPOSITORY"),
+        "job": value("GITHUB_JOB"),
+        "artifact": value("COLLECTIVEX_ARTIFACT_NAME"),
+    }
+    if not any(item is not None for item in git_run.values()):
+        return None
+    git_run["qualification_index"] = _env_integer("CX_QUALIFICATION_INDEX", 1)
+    return git_run
+
+
+def _allocation_factors_from_environment(
+    runner: str, git_run: dict[str, Any] | None
+) -> dict[str, Any]:
+    return {
+        "artifact": git_run["artifact"] if git_run is not None else None,
+        "execution_id": os.environ.get("COLLECTIVEX_EXECUTION_ID") or None,
+        "job": git_run["job"] if git_run is not None else None,
+        "qualification_index": _env_integer("CX_QUALIFICATION_INDEX", 1),
+        "repo": git_run["repo"] if git_run is not None else None,
+        "run_attempt": git_run["run_attempt"] if git_run is not None else None,
+        "run_id": git_run["run_id"] if git_run is not None else None,
+        "runner": runner,
+        "source_sha": git_run["source_sha"] if git_run is not None else None,
+    }
+
+
+def make_terminal_from_environment(
+    *, backend: str, phase: str, return_code: int, failure_mode: str | None = None
+) -> dict[str, Any]:
+    """Build a terminal document from the same exported case coordinates as run_ep."""
+    mode = failure_mode or RETURN_CODE_FAILURE_MODES.get(return_code, "execution")
+    reason = RUNTIME_FAILURE_REASONS.get(mode)
+    if reason is None:
+        raise ContractError("runtime failure mode is not registered")
+    runner = os.environ.get("CX_RUNNER", "")
+    case = _terminal_case_from_environment(backend, phase)
+    case_factors = {
+        "case": case,
+        "profile": scheduled_case_profile(case, "runtime case"),
+        "sku": runner,
+    }
+    git_run = _git_run_from_environment()
+    control = os.environ.get("COLLECTIVEX_CONTROL_SHA256") or None
+    return make_terminal_document(
+        allocation_factors=_allocation_factors_from_environment(runner, git_run),
+        attempt_ordinal=_env_integer("CX_ATTEMPT_ID", 1),
+        case=case,
+        case_factors=case_factors,
+        control_sha256=control,
+        failure_mode=mode,
+        generated_at=dt.datetime.now(dt.timezone.utc).isoformat(),
+        git_run=git_run,
+        reason=reason,
+        return_code=return_code,
+        source="runtime-emitter",
+        status="failed",
+        expected_case_id=os.environ.get("CX_CASE_ID") or None,
+    )
+
+
+def _write_document(path: str | os.PathLike[str], document: dict[str, Any]) -> None:
+    destination = Path(path)
+    destination.parent.mkdir(parents=True, exist_ok=True)
+    temporary = destination.with_name(destination.name + ".tmp")
+    temporary.write_text(json.dumps(document, indent=2, sort_keys=True) + "\n")
+    os.replace(temporary, destination)
+
+
+def demote_raw_attempt(path: str | os.PathLike[str], return_code: int) -> dict[str, Any]:
+    """Replace a rank-zero raw result when the distributed command later fails."""
+    destination = Path(path)
+    raw = strict_load(destination)
+    if not isinstance(raw, dict) or raw.get("format") != RAW_FORMAT:
+        raise ContractError("only a native raw attempt can be demoted")
+    ids = _obj(raw.get("identity"), "raw.identity")
+    required = {
+        "allocation_factors", "allocation_id", "attempt_id", "attempt_ordinal",
+        "case_factors", "case_id",
+    }
+    if not required.issubset(ids):
+        raise ContractError("raw identity lacks terminal factors")
+    mode = RETURN_CODE_FAILURE_MODES.get(return_code, "execution")
+    git_run = _obj(raw.get("provenance"), "raw.provenance").get("git_run")
+    if git_run is not None:
+        git_run = _keys(git_run, GIT_RUN_FIELDS, "raw.provenance.git_run")
+    terminal = make_terminal_document(
+        allocation_factors=ids["allocation_factors"],
+        attempt_ordinal=ids["attempt_ordinal"],
+        case=ids["case_factors"]["case"],
+        case_factors=ids["case_factors"],
+        control_sha256=os.environ.get("COLLECTIVEX_CONTROL_SHA256") or None,
+        failure_mode=mode,
+        generated_at=dt.datetime.now(dt.timezone.utc).isoformat(),
+        git_run=git_run,
+        reason=POST_EMIT_FAILURE_REASONS[mode],
+        return_code=return_code,
+        source="post-emit-command",
+        status="failed",
+        expected_case_id=ids["case_id"],
+    )
+    artifact = raw.get("sample_artifact") or {}
+    sample_name = artifact.get("path")
+    if isinstance(sample_name, str) and Path(sample_name).name == sample_name:
+        destination.with_name(sample_name).unlink(missing_ok=True)
+    _write_document(destination, terminal)
+    return terminal
+
+
+def validate_attempt_paths(paths: list[str]) -> int:
+    """Fully validate a result directory's attempts and paired sample artifacts."""
+    if not paths or len(paths) != len(set(paths)):
+        raise ContractError("validate-many requires unique result paths")
+    sample_paths: set[Path] = set()
+    referenced_samples: set[Path] = set()
+    attempt_count = 0
+    for raw_path in paths:
+        path = Path(raw_path).resolve()
+        document = strict_load(path)
+        if isinstance(document, dict) and document.get("format") == RAW_FORMAT:
+            document = load_raw_attempt(path)
+            referenced_samples.add(path.with_name(document["sample_artifact"]["path"]))
+            attempt_count += 1
+        elif isinstance(document, dict) and document.get("format") == TERMINAL_FORMAT:
+            validate_terminal_document(document)
+            attempt_count += 1
+        elif isinstance(document, dict) and document.get("format") == SAMPLES_FORMAT:
+            validate_samples_document(document)
+            sample_paths.add(path)
+        else:
+            raise ContractError(f"unknown result artifact {path.name}")
+    if sample_paths != referenced_samples:
+        raise ContractError("sample artifacts are missing, orphaned, or outside the validated set")
+    if attempt_count == 0:
+        raise ContractError("result set contains no native attempts")
+    return attempt_count
+
+
+def validate_delivery(
+    paths: list[str], source_path: str, *, disposition: str | None = None
+) -> int:
+    """Reconcile a shard or matrix disposition with its complete native attempt set."""
+    source_file = Path(source_path).resolve()
+    source = strict_load(source_file)
+    if isinstance(source, dict) and source.get("format") == "collectivex.matrix.v1":
+        if disposition is None:
+            raise ContractError("matrix delivery validation requires a disposition")
+        wrappers = [
+            item for item in source.get("requested_cases", [])
+            if isinstance(item, dict) and item.get("disposition") == disposition
+        ]
+        expected = {
+            item["case"]["case_id"]: (item["sku"], item["case"])
+            for item in wrappers
+        }
+        expected_count = len(wrappers)
+        require_one_allocation = disposition == "unsupported"
+    elif isinstance(source, dict) and isinstance(source.get("cases"), list):
+        expected = {
+            case["case_id"]: (source.get("sku"), case)
+            for case in source["cases"]
+        }
+        expected_count = len(source["cases"])
+        require_one_allocation = True
+    else:
+        raise ContractError("delivery source is not a matrix or shard control")
+    if not expected or len(expected) != expected_count:
+        raise ContractError("delivery source has empty or duplicate case coverage")
+
+    validate_attempt_paths(paths)
+    attempts = []
+    for raw_path in paths:
+        document = strict_load(raw_path)
+        if isinstance(document, dict) and document.get("format") in {RAW_FORMAT, TERMINAL_FORMAT}:
+            attempts.append(load_attempt(raw_path))
+    by_case: dict[str, list[dict[str, Any]]] = {}
+    attempt_ids = set()
+    allocation_ids = set()
+    source_sha256 = hashlib.sha256(source_file.read_bytes()).hexdigest()
+    for document in attempts:
+        ids = document["identity"]
+        case_id = ids["case_id"]
+        if case_id not in expected or ids["attempt_id"] in attempt_ids:
+            raise ContractError("delivery contains an extra case or duplicate attempt")
+        attempt_ids.add(ids["attempt_id"])
+        allocation_ids.add(ids["allocation_id"])
+        sku, scheduled = expected[case_id]
+        scheduled_case = {key: value for key, value in scheduled.items() if key != "case_id"}
+        if ids["case_factors"] != {
+            "case": scheduled_case,
+            "profile": scheduled_case_profile(scheduled_case, "delivery case"),
+            "sku": sku,
+        }:
+            raise ContractError("delivery attempt differs from its scheduled case")
+        factors = ids["allocation_factors"]
+        expected_environment = {
+            "artifact": os.environ.get("COLLECTIVEX_ARTIFACT_NAME"),
+            "execution_id": os.environ.get("COLLECTIVEX_EXECUTION_ID"),
+            "job": os.environ.get("GITHUB_JOB"),
+            "repo": os.environ.get("GITHUB_REPOSITORY"),
+            "run_attempt": os.environ.get("GITHUB_RUN_ATTEMPT"),
+            "run_id": os.environ.get("GITHUB_RUN_ID"),
+            "source_sha": os.environ.get("COLLECTIVEX_SOURCE_SHA") or os.environ.get("GITHUB_SHA"),
+        }
+        expected_runner = (
+            "capability-resolver"
+            if document["format"] == TERMINAL_FORMAT
+            and document["provenance"]["source"] == "matrix-capability-resolver"
+            else sku
+        )
+        if any(
+            value is not None and factors[field] != value
+            for field, value in expected_environment.items()
+        ) or factors["runner"] != expected_runner:
+            raise ContractError("delivery allocation factors differ from the workflow")
+        if document["format"] == TERMINAL_FORMAT:
+            control = document["provenance"]["control_sha256"]
+            if control != source_sha256:
+                raise ContractError("terminal outcome does not reference its exact control document")
+        by_case.setdefault(case_id, []).append(document)
+    if set(by_case) != set(expected):
+        raise ContractError("delivery case coverage is incomplete")
+    for case_id, documents in by_case.items():
+        ordinals = sorted(document["identity"]["attempt_ordinal"] for document in documents)
+        if ordinals != list(range(1, len(ordinals) + 1)):
+            raise ContractError(f"delivery attempt ordinals are not contiguous for {case_id}")
+    if require_one_allocation and len(allocation_ids) != 1:
+        raise ContractError("one shard must use exactly one allocation identity")
+    return len(attempts)
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description="CollectiveX native attempt contracts")
+    subparsers = parser.add_subparsers(dest="command", required=True)
+    probe = subparsers.add_parser("probe")
+    probe.add_argument("path")
+    probe.add_argument("--status", choices=("success", "invalid"))
+    emit = subparsers.add_parser("emit-terminal")
+    emit.add_argument("--out", required=True)
+    emit.add_argument("--backend", required=True)
+    emit.add_argument("--phase", required=True, choices=("decode", "prefill"))
+    emit.add_argument("--return-code", required=True, type=int)
+    emit.add_argument("--failure-mode")
+    demote = subparsers.add_parser("demote")
+    demote.add_argument("path")
+    demote.add_argument("--return-code", required=True, type=int)
+    validate_many = subparsers.add_parser("validate-many")
+    validate_many.add_argument("paths", nargs="+")
+    quarantine = subparsers.add_parser("quarantine-invalid")
+    quarantine.add_argument("path")
+    delivery = subparsers.add_parser("validate-delivery")
+    delivery.add_argument("--source", required=True)
+    delivery.add_argument("--disposition")
+    delivery.add_argument("paths", nargs="+")
+    args = parser.parse_args()
+    try:
+        if args.command == "probe":
+            document = load_attempt(args.path)
+            if args.status is None:
+                return 0
+            if document.get("format") != RAW_FORMAT:
+                return 1
+            outcome = document["outcome"]
+            validity = outcome.get("validity")
+            return int(
+                not (
+                    isinstance(validity, dict)
+                    and validity.get("execution_status") == "complete"
+                    and outcome.get("status") == args.status
+                )
+            )
+        if args.command == "emit-terminal":
+            document = make_terminal_from_environment(
+                backend=args.backend,
+                phase=args.phase,
+                return_code=args.return_code,
+                failure_mode=args.failure_mode,
+            )
+            _write_document(args.out, document)
+            print(f"preserved terminal outcome ({document['outcome']['failure_mode']})")
+            return 0
+        if args.command == "validate-many":
+            print(f"validated {validate_attempt_paths(args.paths)} native attempts")
+            return 0
+        if args.command == "quarantine-invalid":
+            quarantine_invalid_attempt(args.path)
+            return 0
+        if args.command == "validate-delivery":
+            print(
+                f"validated {validate_delivery(args.paths, args.source, disposition=args.disposition)} "
+                "delivery attempts"
+            )
+            return 0
+        demote_raw_attempt(args.path, args.return_code)
+        return 0
+    except (ContractError, identity.IdentityError, OSError, ValueError) as exc:
+        print(f"terminal contract error: {exc}", file=sys.stderr)
+        return 1
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/experimental/CollectiveX/docs/methodology.md b/experimental/CollectiveX/docs/methodology.md
new file mode 100644
index 000000000..396a705b6
--- /dev/null
+++ b/experimental/CollectiveX/docs/methodology.md
@@ -0,0 +1,311 @@
+# CollectiveX EP Pre-V1 Baseline
+
+<div align="center">
+
+**English** | [中文](./methodology_zh.md)
+
+</div>
+
+This document describes the implemented BF16 baseline. It is not yet the V1 qualification contract.
+Before any V1-tagged run, this English document must match the implemented precision, measurement,
+publication, and frontend contracts; counts and digests remain unfrozen. Chinese documentation
+synchronization is explicitly deferred for the V1 implementation phase.
+
+## Product Boundary
+
+CollectiveX is a communication microbenchmark for:
+
+- comparing EP libraries on one chip/topology;
+- comparing EP latency and logical payload bandwidth across systems under the same workload; and
+- exposing unsupported, failed, invalid, and unstable evidence without contaminating decisions.
+
+It does not predict serving throughput without a separate correlation study.
+
+## Implemented Matrix
+
+The implemented workload is `deepseek-v3-v1`: hidden 7168, top-k 8, 256 routed experts, BF16 dispatch
+and combine, packed placement, and backend-tuned resources. Each case explicitly selects normal
+`layout-and-dispatch-v1` or low-latency `expert-packed-weighted-combine-v1` semantics.
+
+- `ep-core-v1`: uniform routing; decode T=1..128 powers of two; prefill T=256/512.
+- `ep-routing-v1`: Zipf with EPLB off/on; decode T=128; prefill T=512.
+- `ep-low-latency-v1`: DeepEP V1/UCCL native low-latency APIs; uniform decode T=1..128 powers of
+  two; the capability contract rejects every other backend instead of fabricating a low-latency path.
+- Implemented baseline surface: 608 requested cases / 1,600 token points; 364 runnable cases / 940
+  points in
+  58 executable workflow shards/allocation cells; 244 unsupported cases / 660 points.
+
+| Systems | EP8 | EP16 |
+|---|---|---|
+| H100/H200/B200/B300 | 1x8 NVLink, scale-up | 2x8 NVLink + RDMA, scale-out |
+| MI325X/MI355X | 1x8 XGMI, scale-up | 2x8 XGMI + RDMA, scale-out |
+| GB200/GB300 | 2x4 MNNVL, scale-up | 4x4 MNNVL, scale-up |
+
+Physical host count does not define scope. Both GB cells remain inside one 72-GPU MNNVL scale-up
+domain.
+
+Unsupported combinations are terminal outcomes, not silently skipped coverage. DeepEP V2 is the
+`ElasticBuffer` introduced by PR #605, pinned with upstream PR #630's minimal pure-scale-up fix.
+Scale-up cases request NCCL Device API LSA and fail closed unless the realized LSA team covers the
+full EP world. x86 EP16 scale-out uses the hybrid path with GIN and requires two logical scale-out
+domains represented by two physical RDMA ranks, with eight scale-up ranks per domain. GB EP16
+remains MNNVL scale-up and uses LSA. NVIDIA capabilities declared in source remain unvalidated until
+GPU outcomes pass the native oracle and publisher gates. H100 V2 on the current runner pool is a
+declared unsupported combination in v1 because NCCL 2.30.4 reports no Device API symmetric-memory
+support for its EP8 communicator; that pool can return only after all-rank CUDA P2P/LSA support is
+restored. This baseline omits `[cl]`, `[rv]`, quantization, alternate activation/routing profiles,
+uneven allocation, placement permutations, model envelopes, and scaling.
+FlashInfer is excluded from v1 after repeatable intermittent execution failures; those failures are
+not converted into planned-unsupported coverage.
+MoRI EP8 uses MI325X AsyncLL or MI355X IntraNode in normal mode. EP16 uses pinned InterNodeV1 over
+2x8 XGMI + RDMA with 96 blocks, 64 RDMA blocks, 8 warps, one QP per PE, and external input. MoRI's
+AsyncLL transport is not the genuine low-latency suite contract and is never labeled as such.
+
+## Workload Identity
+
+One canonical workload is generated over the global token batch and sliced by source rank. Expert
+indices and gate weights are serialized. Activations use a versioned integer counter formula whose
+BF16 values are exact across runtimes; its full identity is bound into the manifest. The manifest
+also binds shape/EP coordinates and oracle version. SHA-256 covers canonical bytes and parameters;
+library RNG regeneration is not proof of identity.
+
+Routing traffic distinguishes:
+
+- token-expert assignments, which determine expert compute load; and
+- rank-deduplicated token payload copies, which determine EP activation traffic.
+
+Adapters may not generate routing or reinterpret one quantity as the other.
+
+## Measurement
+
+Normal mode uses `layout-and-dispatch-v1`: dispatch timing includes layout plus communication, and
+combine returns activation payload through an unweighted rank-sum path. Low-latency mode uses
+`expert-packed-weighted-combine-v1`: native DeepEP V1/UCCL APIs dispatch token-expert assignments and
+perform gate-weighted combine. Expert-output staging is outside isolated combine timing and inside
+measured paired roundtrip. Each component declares availability, origin, start/end states, stage
+scope, and sample count. A paired-only API reports null isolated components. `isolated_sum` is
+derived and never used for throughput or recommendations. Mode is series identity, and normal and
+low-latency evidence cannot share a ranking cohort.
+
+Every measured component uses `fixed-512-v1`:
+
+- 64 trials x 8 timed iterations = 512 observations;
+- 32 synchronized full dispatch-stage-combine warmups before each available measured component at
+  every trial/point;
+- roundtrip first, then isolated dispatch and combine, with a fixed per-phase conditioning ladder; and
+- per-iteration maximum latency across ranks before nearest-rank p50/p90/p95/p99.
+
+Measured roundtrip p99 is the headline latency. Retries remain separate attempts; a later success
+does not erase earlier failures. Decode and prefill identify the serving regime represented by one
+MoE-layer collective; they do not change the timed primitive at an otherwise identical shape.
+
+The NCCL/RCCL reference is an end-to-end Python adapter, not a bare fabric primitive. Its dispatch
+boundary includes layout, count exchange, a device-to-host split synchronization, fresh receive
+allocation, and four payload/metadata all-to-all calls; activation-only combine adds one all-to-all plus
+scatter/reduction. Its p99 therefore measures the complete reference-adapter boundary and can be
+host/scheduler-sensitive. It is useful for portable system controls but must not be labeled fabric,
+link, bus, or single-collective latency.
+
+The versioned conditioning and EPLB planner contracts (reference trace, redundant count, and
+placement/remap version) are part of scheduled and evidence identity.
+
+Logical payload bandwidth is:
+
+`logical_payload_bytes / measured_latency_seconds`
+
+Normal-mode payload bytes use rank-deduplicated token-rank activations; low-latency bytes use
+token-expert assignments. Both add required scale bytes at the named boundary and exclude expert
+metadata, padding, and backend buffer capacity. Algorithm bandwidth, bus bandwidth, wire utilization,
+and physical-link utilization are not published without a defined primitive model or transport
+counters. Logical bandwidth must never be labeled physical bandwidth. Published payload and token
+rates are named `rate_at_latency_percentile`: bytes or tokens divided by the matching latency
+percentile. They are lower-tail service rates at p99 latency, not p99 percentiles of an inverted rate
+distribution.
+
+## Correctness
+
+An implementation-independent oracle uses an expert-specific deterministic transform so wrong
+expert routing cannot pass an identity roundtrip. For every rank and point it verifies:
+
+1. destination rank/expert, source token, multiplicity, gate weight, and receive counts;
+2. dispatched payload and metadata before timing;
+3. combined output before timing;
+4. unchanged semantic inputs through all timed samples; and
+5. dispatched payload/metadata and combined output again after timing.
+
+Normal-mode adapters use activation-only, unweighted rank-sum combine. The oracle builds each rank's
+gate-weighted expert aggregate before combine, independently derives `sum(gate * expert(token))`,
+and checks the dispatch metadata and transformed output. Low-latency adapters separately verify the
+expert-packed source/expert assignment, native gate weights, and gate-weighted combined output. Both
+contracts check every element with recorded `rtol=0.05` and `atol=0.02`. Any failed rank or point
+makes the case ineligible.
+Pre/post dispatch evidence is hashed in canonical source-token order. Native receive slots may be
+assigned nondeterministically, so physical receive order is not treated as a correctness property.
+
+## Native Result
+
+One raw case document uses `format: "collectivex.ep.v1"`, rejects unknown fields, and contains:
+
+- `case`: stable case ID, suite, required tier, and coordinate;
+- `workload`: canonical identity and logical MoE shape;
+- `measurement`: sampling, component states, timing, and byte accounting;
+- `implementation`: instantiated class/API, pinned source, loaded libraries, and resources;
+- `topology`: requested and realized SKU, devices, placement, scale-up domain, and transport;
+- `provenance`: source SHA, image/squash hashes, allocation, run, and attempt;
+- `rows`: point latency, byte accounting, token rate, correctness, load, fanout, and anomaly evidence; and
+- `outcome`: `success`, `failed`, `invalid`, `diagnostic`, or `unsupported`, with reasons.
+
+Raw result documents and exact samples pass through transient GitHub delivery artifacts before the
+publisher archives them in the private bundle; they never enter the public tree. Private environment
+details remain in local mode-0600 logs and ignored operator notes; they are never archived or
+published. Every expected case has one terminal selected outcome while every attempt remains retained.
+
+## Identity And Comparisons
+
+Canonical JSON produces three full SHA-256 IDs:
+
+- `series_id`: all locked factors except token coordinate and repeat allocation;
+- `point_id`: `series_id` plus token coordinate; and
+- `evidence_id`: `point_id` plus allocation/run/attempt/sample checksum.
+
+Locked factors include workload bytes, measurement and sampling contract, resources, realized
+topology, implementation/build, loaded libraries, image/squash, runtime, and source SHA.
+Deferred code generation is captured before measurement and recaptured afterward. DeepEP V2 uses a
+fixed NVCC random seed and binds final cache keys plus generated-source and executable-SASS hashes;
+raw CUBIN bytes remain private diagnostics. Hybrid binds its realized auto-tuned config and complete
+kernel-key set while retaining rank-local shared-object hashes as private diagnostics. Locally built
+extension hashes are diagnostic; their pinned source trees, build recipe, runtime, and dependencies
+remain series-bound.
+The series identity includes the case ID, which binds the complete scheduled token ladder and the
+frozen percentile, rank-reduction, conditioning, warmup, and correctness semantics.
+
+A controlled comparison declares one contrast:
+
+- `library`: backend implementation and its tuned resource profile may differ; the realized system,
+  workload, EP, resource policy, source, and measurement remain matched;
+- `chip`: a controlled platform contrast. The full realized system/topology and tuned resource
+  profile may differ while workload, EP, placement class, resource policy, backend lineage, source,
+  and measurement remain matched. It is not a silicon-only comparison;
+- `system`: all hardware/backend differences stay visible while workload, EP, and measurement match;
+- `routing`: routing distribution/EPLB differs while the static implementation build/generator,
+  system, model shape, resource profile, and measurement remain matched. Uniform and Zipf without
+  EPLB reuse the same generated implementation; EPLB's physical-expert/JIT configuration remains an
+  explicit treatment difference.
+
+Any undeclared mismatch rejects the overlay. Chip/system results describe measured systems, not
+silicon alone.
+
+## Evidence Policy
+
+Capability declarations say what may be attempted; artifacts determine evidence status. Promotion
+requires exact expected coverage with no missing, extra, duplicate, malformed, or heterogeneous
+case. Public coverage preserves each matrix disposition; promotion requires every runnable case to
+succeed and every planned-unsupported case to remain unsupported in every selected run. Only the
+pinned canonical full-v1 matrix, with a decision-grade library, chip, system, and routing cohort,
+may advance `dev-latest`; partial matrices remain diagnostic. The full-matrix digest intentionally
+pins the exact workflow shard grouping as well as the requested cases, so changing `--max-cases`
+or the SKU round-robin scheduling order produces diagnostic-only runs even when case coverage is
+unchanged. Superseded retries,
+planned-unsupported outcomes, and unstable comparison cohorts may render diagnostically but cannot
+rank or recommend; every successful required series in a promoted dataset remains decision-grade.
+Any failed, invalid, or diagnostic retry of a runnable case blocks promotion even if a later retry
+succeeds. Routing cohorts are comparable-experimental sensitivities and never produce configuration
+recommendations; official library/platform/system cohorts own actionable recommendations.
+
+A point becomes decision-grade only after three independent workflow runs and allocation IDs pass
+correctness, identity, provenance, tail gates, p50/p99 repeat-stability thresholds, and stable ordering. The
+publisher, not the frontend, computes eligibility, controlled cohorts, sensitivity pairs, and
+recommendations.
+
+## Execution Isolation
+
+Every non-MNNVL scale-out case uses operator-pinned socket and RDMA selectors. The launcher rejects
+missing or partial profiles, then probes every allocated node for the configured interface, active
+HCA port, and configured GID before backend initialization. It never substitutes a default route,
+inherited runner environment, or transport fallback. Scale-up and MNNVL cases clear the profile;
+scale-out NCCL/RCCL forces `NCCL_NET=IB` and exact HCA matching. Selector values remain in encrypted
+config and mode-0600 private logs.
+
+Repository staging uses a pre-existing, runner-owned, group/world non-writable shared base outside
+the checkout and workflow workspace. The parent process resolves the exact execution child before
+copying, claims it with a runner-owned marker, and verifies that all allocated nodes can read and
+write the same bytes. Cleanup waits for confirmed allocation teardown and removes only that child,
+including a safely identified partial claim. The same-run V2/Hybrid source archive is fully validated
+under fixed member and expanded-size bounds, and only the selected pinned root is extracted; a
+symlink is accepted only when it is a relative leaf pointing to a regular member inside the same
+backend root, followed by exact Git tree/submodule validation.
+
+## Artifact Validation And JIT Delivery
+
+There is no self-hosted service, Vercel storage, GCP, Neon, managed database, or managed object
+store. The publication workflow uses runner-local temporary storage only as a disposable validation
+and promotion workspace:
+
+```text
+$COLLECTIVEX_STORE_ROOT/
+  private/incoming/          # write-once downloaded GHA attempts
+  private/bundles/<sha256>/  # immutable source archives, native results/samples, matrix, checksums
+  private/quarantine/        # rejected attempts plus machine-readable reasons
+  public/datasets/<sha256>/  # immutable sanitized frontend datasets
+  public/channels/           # small atomic pointers: latest-attempt, dev-latest
+  locks/
+```
+
+Private and public trees use separate permissions. JSON manifests and checksums are authoritative;
+a rebuildable catalog is only an index. Raw sweep artifacts are transient publisher input; only the
+sanitized promoted NDJSON is retained as a frontend publication artifact.
+
+Container tags are checked against pinned registry digests. Enroot imports use a fixed
+`SOURCE_DATE_EPOCH` and versioned cache generation; every mounted squash is freshly hashed into
+series identity. Image-provided DeepEP is also checked against exact per-architecture wheel and
+installed-file fingerprints, so a stale cache cannot inherit the pinned source identity.
+Source-built DeepEP V2 uses a separate mode-0700 cluster-local cache mounted only as `/cx-cache`.
+Its content key binds a versioned build recipe, verified image digest, CPU/GPU architecture,
+upstream source trees, and pinned build dependencies. The cache is never an artifact or publisher
+input; per-execution source/results stages remain isolated and disposable, and marker plus runtime
+probes fail closed before reuse. The runner UID is inside the trusted cluster boundary: this cache
+guards against stale or accidental mutation, not hostile same-UID jobs. Only an unpublished partial
+build may be reset automatically; a published cache that fails integrity or runtime checks is left
+intact and rejected so a concurrent allocation cannot lose files it is using.
+
+Publication is fail-closed:
+
+1. acquire an exclusive filesystem lock and stage on the destination filesystem;
+2. archive source bytes before parsing;
+3. require the exact matrix-declared artifact set and reject every unconsumed archive member;
+4. validate strict schemas, privacy, checksums, identities, timing, and exact matrix outcomes;
+5. write checksums and `COMPLETE`, fsync, then atomically rename the private bundle;
+6. build and validate the sanitized content-addressed dataset, fsync, then atomically rename it;
+7. atomically replace `dev-latest.json` only when every promotion gate passes.
+
+Rejected attempts may update workspace `latest-attempt` but never `dev-latest`. The workspace is
+destroyed with the publication runner and is never attached to the frontend. No artifact is emitted
+unless all three selected bundles advance `dev-latest`.
+
+`publisher.py ingest` accepts the exact matrix plus one `--artifact` directory or ZIP per GitHub
+artifact. `promote` accepts explicit immutable bundle IDs. Default `verify` requires
+`latest-attempt`; it also verifies `dev-latest` when present, while an explicit
+`--channel dev-latest` requires it. The workflow copies only the verified sanitized dataset to a
+one-record `collectivex_public_v1_<sha256>.ndjson` artifact. Raw artifacts and private workspace
+content are never bundled into the application.
+
+Sweeps default to `release_tag=unversioned`. The main-registered `collectivex-sweep.yml` owns
+`sweep`, `publish-v1`, and `refresh-v1`, so its branch revision remains dispatchable. V1 emits a
+marker bound to the run ID, first attempt, qualification index, source SHA, and locked matrix digest.
+Publication accepts exactly three unique successful run IDs from one source SHA with qualification
+indices 1, 2, and 3, downloads their immutable artifacts, and passes the same provenance assertions
+to `publisher.py ingest`. Refresh requires an exact source run and dataset digest and reuploads the
+same validated sanitized bytes. Partial, filtered, untagged, cross-source, rerun, failed, expired,
+or digest-mismatched inputs fail closed.
+
+Using a server-side GitHub read token, the frontend discovers the latest successful version-scoped
+publication run and downloads the publication artifact just in time. It requires exactly one root
+NDJSON entry, validates UTF-8, schema, promotion status, and filename/body SHA-256, then exposes a
+short-lived versioned channel pointer and immutable versioned dataset URL. The benchmark-version
+selector currently exposes V1; later versions require separate release and publication identities.
+The frontend never invents missing values, selects retries, or recomputes decision eligibility.
+
+## Legacy Data
+
+Numeric schemas 3-5 are outside the v1 publisher and frontend reader. They remain historical
+diagnostic evidence and cannot seed `dev-latest` or drive v1 decisions.
diff --git a/experimental/CollectiveX/docs/methodology_zh.md b/experimental/CollectiveX/docs/methodology_zh.md
new file mode 100644
index 000000000..7f6dcb67a
--- /dev/null
+++ b/experimental/CollectiveX/docs/methodology_zh.md
@@ -0,0 +1,297 @@
+# CollectiveX EP v1 契约
+
+<div align="center">
+
+[English](./methodology.md) | **中文**
+
+</div>
+
+本文档定义新的 CollectiveX 结果。历史运行笔记是 evidence，不是 contract。
+
+## 产品边界
+
+CollectiveX 是通信 microbenchmark，用于：
+
+- 在同一 chip/topology 上比较 EP libraries；
+- 在相同 workload 下比较不同系统的 EP latency 和 logical payload bandwidth；
+- 展示 unsupported、failed、invalid 和 unstable evidence，同时避免污染决策。
+
+若没有单独的 correlation study，它不能预测 serving throughput。
+
+## 矩阵
+
+提升后的 workload 为 `deepseek-v3-v1`：hidden 7168、top-k 8、256 routed experts、BF16
+dispatch 和 combine、packed placement，以及 backend-tuned resources。每个 case 都显式选择
+normal `layout-and-dispatch-v1` 或 low-latency `expert-packed-weighted-combine-v1` 语义。
+
+- `ep-core-v1`：uniform routing；decode T=1..128 的 2 次幂；prefill T=256/512。
+- `ep-routing-v1`：Zipf，EPLB off/on；decode T=128；prefill T=512。
+- `ep-low-latency-v1`：使用 DeepEP V1/UCCL 原生 low-latency API；uniform decode T=1..128 的
+  2 次幂；capability contract 会拒绝其他后端，不会伪造 low-latency path。
+- 规范矩阵范围：请求 608 个 cases / 1,600 个 token points；364 个可运行 cases / 940 个
+  points，分布在 58 个可执行 workflow shards/allocation cells；244 个 unsupported cases / 660 个
+  points。
+
+| 系统 | EP8 | EP16 |
+|---|---|---|
+| H100/H200/B200/B300 | 1x8 NVLink，scale-up | 2x8 NVLink + RDMA，scale-out |
+| MI325X/MI355X | 1x8 XGMI，scale-up | 2x8 XGMI + RDMA，scale-out |
+| GB200/GB300 | 2x4 MNNVL，scale-up | 4x4 MNNVL，scale-up |
+
+物理主机数量不能定义通信范围。两个 GB 配置都位于同一个 72-GPU MNNVL scale-up domain 内。
+
+Unsupported combinations 是 terminal outcomes，不会被静默跳过。DeepEP V2 指 PR #605
+引入的 `ElasticBuffer`，并固定使用 upstream PR #630 的最小纯 scale-up 修复。V2 的 scale-up
+cases 请求 NCCL Device API LSA；若实际建立的 LSA team 未覆盖整个 EP world，则直接失败。x86
+EP16 scale-out 使用启用 GIN 的 hybrid path，并要求两个逻辑 scale-out domains（由两个物理 RDMA
+ranks 表示）、每个 domain 八个 scale-up ranks。GB EP16 仍是 MNNVL scale-up，因此使用 LSA。
+Source 中声明的 NVIDIA capabilities 在 GPU outcomes 通过 native oracle 和 publisher gates 前仍为
+unvalidated。当前 runner pool 上的 H100 V2 在 v1 中被声明为 unsupported，因为 NCCL 2.30.4
+报告其 EP8 communicator 不具备 Device API symmetric-memory 支持；只有该 pool 恢复全 rank
+CUDA P2P/LSA 支持后才能重新加入。已移除的轴包括 `[cl]`、`[rv]`、quantization、alternate
+activation/routing profiles、uneven allocation、placement
+permutations、model envelopes 和 scaling。
+FlashInfer 因可重复出现的间歇性执行失败而排除在 v1 外；这些失败不会转为 planned-unsupported
+coverage。
+MoRI EP8 在 normal mode 下使用 MI325X AsyncLL 或 MI355X IntraNode。EP16 固定使用 2x8 XGMI +
+RDMA 上的 InterNodeV1，配置为 96 blocks、64 RDMA blocks、8 warps、每个 PE 一个 QP，以及
+external input。MoRI 的 AsyncLL transport 不属于真正的 low-latency suite contract，也绝不会
+以该模式标注。
+
+## Workload 身份
+
+一个 canonical workload 在 global token batch 上生成，再按 source rank 切分。Expert indices
+和 gate weights 会序列化。Activations 使用带版本的整数计数器公式，其 BF16 值在不同 runtime
+中精确一致；完整身份绑定到 manifest。Manifest 还绑定 shape/EP coordinates 和 oracle version。
+SHA-256 覆盖 canonical bytes 和 parameters；重新生成 library RNG 不能证明身份一致。
+
+Routing traffic 区分：
+
+- token-expert assignments，决定 expert compute load；
+- rank-deduplicated token payload copies，决定 EP activation traffic。
+
+Adapters 不得生成 routing，也不得将两种量相互解释。
+
+## 测量
+
+Normal mode 使用 `layout-and-dispatch-v1`：dispatch timing 包括 layout 和 communication，combine
+通过 unweighted rank-sum path 返回 activation payload。Low-latency mode 使用
+`expert-packed-weighted-combine-v1`：DeepEP V1/UCCL 原生 API dispatch token-expert assignments，
+并执行 gate-weighted combine。Expert-output staging 不计入 isolated combine timing，但计入被测
+paired roundtrip。每个 component 声明 availability、origin、start/end states、stage scope 和 sample
+count。仅有 paired API 时，isolated components 报 null。`isolated_sum` 为派生值，不用于
+throughput 或 recommendations。Mode 属于 series identity；normal 和 low-latency evidence 不能
+共用排名 cohort。
+
+每个被测 component 均使用 `fixed-512-v1`：
+
+- 64 trials x 8 timed iterations = 512 observations；
+- 每个 trial/point 的每个可用被测 component 前，执行 32 次同步完整
+  dispatch-stage-combine warmups；
+- 先测 roundtrip，再测 isolated dispatch 和 combine，并使用固定的 per-phase conditioning ladder；
+- 每次 iteration 先取跨 rank 最大 latency，再以 nearest-rank 计算 p50/p90/p95/p99。
+
+被测 roundtrip p99 是 headline latency。Retries 保持为独立 attempts；后续成功不会抹除早期失败。
+Decode 和 prefill 表示一个 MoE-layer collective 所代表的 serving regime；在其他 shape 相同时，
+它们不会改变 timed primitive。
+
+NCCL/RCCL reference 是 end-to-end Python adapter，而不是 bare fabric primitive。其 dispatch
+boundary 包含 layout、count exchange、device-to-host split synchronization、fresh receive
+allocation，以及四次 payload/metadata all-to-all；activation-only combine 还包含一次 all-to-all 和
+scatter/reduction。因此其 p99 测量完整 reference-adapter boundary，可能对 host/scheduler 敏感。
+它可作为 portable system control，但不得标记为 fabric、link、bus 或 single-collective latency。
+
+带版本的 conditioning 和 EPLB planner contracts（reference trace、redundant count 和
+placement/remap version）属于 scheduled 和 evidence identity。
+
+Logical payload bandwidth 为：
+
+`logical_payload_bytes / measured_latency_seconds`
+
+Normal-mode payload bytes 使用按 rank 去重的 token-rank activations；low-latency bytes 使用
+token-expert assignments。两种模式都在命名边界上加入必需 scale bytes，并排除 expert metadata、
+padding 和 backend buffer capacity。若没有定义 primitive model 或 transport counters，不发布
+algorithm bandwidth、bus bandwidth、wire utilization 或 physical-link utilization。Logical
+bandwidth 绝不能标为 physical bandwidth。已发布 payload 和 token rates 命名为
+`rate_at_latency_percentile`：bytes 或 tokens 除以对应 latency percentile。它们是 p99 latency
+下的 lower-tail service rates，不是 inverted rate distribution 的 p99 percentiles。
+
+## 正确性
+
+与实现无关的 oracle 使用 expert-specific deterministic transform，使错误 expert routing 无法
+通过 identity roundtrip。它对每个 rank 和 point 验证：
+
+1. destination rank/expert、source token、multiplicity、gate weight 和 receive counts；
+2. timing 前的 dispatched payload 和 metadata；
+3. timing 前的 combined output；
+4. 所有 timed samples 期间 semantic inputs 不变；
+5. timing 后再次验证 dispatched payload/metadata 和 combined output。
+
+Normal-mode adapters 使用 activation-only、unweighted rank-sum combine。Oracle 在 combine 前
+构造每个 rank 的 gate-weighted expert aggregate，独立计算 `sum(gate * expert(token))`，并检查
+dispatch metadata 和 transformed output。Low-latency adapters 单独验证 expert-packed
+source/expert assignment、原生 gate weights 和 gate-weighted combined output。两个契约都使用
+已记录的 `rtol=0.05` 和 `atol=0.02` 检查每个 element。任一 rank 或 point 失败都会使 case
+不合格。Pre/post dispatch evidence 按
+canonical source-token order 计算 hash。Native receive slots 可能非确定性分配，因此 physical
+receive order 不作为 correctness property。
+
+## Native 结果
+
+单个 raw case document 使用 `format: "collectivex.ep.v1"`，拒绝未知 fields，并包含：
+
+- `case`：稳定 case ID、suite、required tier 和 coordinate；
+- `workload`：canonical identity 和 logical MoE shape；
+- `measurement`：sampling、component states、timing 和 byte accounting；
+- `implementation`：实例化 class/API、固定 source、loaded libraries 和 resources；
+- `topology`：requested 和 realized SKU、devices、placement、scale-up domain 和 transport；
+- `provenance`：source SHA、image/squash hashes、allocation、run 和 attempt；
+- `rows`：point latency、byte accounting、token rate、correctness、load、fanout 和 anomaly evidence；
+- `outcome`：`success`、`failed`、`invalid`、`diagnostic` 或 `unsupported`，以及 reasons。
+
+Raw result documents 和 exact samples 会先经过临时 GitHub delivery artifacts，再由 publisher
+归档到 private bundle；它们不会进入 public tree。Private environment details 只保留在本地
+mode-0600 logs 和忽略的 operator notes 中；不会归档或发布。每个 expected case 有一个 terminal
+selected outcome，同时保留每次 attempt。
+
+## 身份与比较
+
+Canonical JSON 生成三个完整 SHA-256 IDs：
+
+- `series_id`：除 token coordinate 和 repeat allocation 外的所有 locked factors；
+- `point_id`：`series_id` 加 token coordinate；
+- `evidence_id`：`point_id` 加 allocation/run/attempt/sample checksum。
+
+Locked factors 包括 workload bytes、measurement 和 sampling contract、resources、realized
+topology、implementation/build、loaded libraries、image/squash、runtime 和 source SHA。
+Deferred code generation 会在 measurement 前捕获，并在之后再次捕获。DeepEP V2 使用固定的
+NVCC random seed，并绑定最终 cache keys、generated-source hashes 与 executable-SASS hashes；
+raw CUBIN bytes 仅保留为 private diagnostics。Hybrid 绑定实际自动调优配置与完整 kernel-key
+set，同时将各 rank 的 shared-object hashes 仅保留为 private diagnostics。本地构建的 extension
+hashes 属于 diagnostic；其固定 source trees、build recipe、runtime 与 dependencies 仍绑定到
+series。
+Series identity 包含 case ID；case ID 绑定完整 scheduled token ladder，以及固定的 percentile、
+rank-reduction、conditioning、warmup 和 correctness semantics。
+
+Controlled comparison 只声明一个 contrast：
+
+- `library`：backend implementation 及其 tuned resource profile 可以不同；realized system、
+  workload、EP、resource policy、source 和 measurement 必须匹配；
+- `chip`：受控 platform contrast。完整 realized system/topology 和 tuned resource profile 可以不同，
+  但 workload、EP、placement class、resource policy、backend lineage、source 和 measurement 必须
+  匹配。它不是 silicon-only comparison；
+- `system`：保留所有 hardware/backend 差异，同时匹配 workload、EP 和 measurement；
+- `routing`：routing distribution/EPLB 可以不同，但 static implementation build/generator、system、
+  model shape、resource profile 和 measurement 必须匹配。未启用 EPLB 的 Uniform 和 Zipf 复用
+  同一 generated implementation；EPLB 的 physical-expert/JIT configuration 是显式 treatment
+  difference。
+
+任何未声明的 mismatch 都会拒绝 overlay。Chip/system results 描述 measured systems，而非仅描述
+silicon。
+
+## Evidence 策略
+
+Capability declarations 说明可以尝试什么；artifacts 决定 evidence status。Promotion 要求完整的
+expected coverage，不能有 missing、extra、duplicate、malformed 或 heterogeneous case。Public
+coverage 保留每个 matrix disposition；promotion 要求每个 runnable case 在所有 selected runs 中
+成功，且每个 planned-unsupported case 始终为 unsupported。只有固定 canonical full-v1 matrix，
+且具有 decision-grade library、chip、system 和 routing cohort，才能推进 `dev-latest`；partial
+matrices 仍为 diagnostic。Full-matrix digest 有意绑定精确 workflow shard grouping 和 requested
+cases，因此即使 case coverage 不变，修改 `--max-cases` 或 SKU round-robin scheduling order 也只
+会产生 diagnostic-only runs。Superseded retries、planned-unsupported outcomes 和 unstable
+comparison cohorts 可以用于诊断展示，但不能排名或推荐；promoted dataset 中每个成功的 required
+series 都必须保持 decision-grade。Runnable case 的任何 failed、invalid 或 diagnostic retry 都会
+阻止 promotion，即使后续 retry 成功。Routing cohorts 是 comparable-experimental sensitivities，
+不会产生 configuration recommendations；official library/platform/system cohorts 才能产生可执行
+recommendations。
+
+一个 point 只有在三个独立 workflow runs 和 allocation IDs 均通过 correctness、identity、
+provenance、tail gates、p50/p99 repeat-stability thresholds 和 stable ordering 后才成为
+decision-grade。Eligibility、controlled cohorts、sensitivity pairs 和 recommendations 由
+publisher 而非 frontend 计算。
+
+## 执行隔离
+
+每个非 MNNVL scale-out case 都使用 operator 固定的 socket 与 RDMA selectors。Launcher 会拒绝
+缺失或不完整的 profile，并在 backend 初始化前逐个 allocation 节点检查已配置 interface、active
+HCA port 与指定 GID。它不会改用 default route、继承的 runner environment 或 transport
+fallback。Scale-up 和 MNNVL case 会清除该 profile；scale-out NCCL/RCCL 强制设置
+`NCCL_NET=IB` 并精确匹配 HCA。Selector values 只保留在加密配置和 mode-0600 private logs 中。
+
+Repository staging 使用 checkout 与 workflow workspace 外预创建的 shared base；该 base 由
+runner owner 持有，group/world 均不可写。父进程在复制前解析精确 execution child，以
+runner-owned marker 声明所有权，并验证所有 allocation 节点读写的是同一份 bytes。Cleanup 会
+等待 allocation teardown 得到确认，并只删除该 child，包括可安全识别的未完成 claim。同一 run
+的 V2/Hybrid source archive 会在固定 member 数和解压大小上限内完整验证，并且只提取所选 fixed
+root；仅当相对 leaf symlink 指向同一 backend root 内的 regular member 时才允许创建，之后还要
+通过精确 Git tree/submodule 校验。
+
+## 产物验证与即时交付
+
+不使用 self-hosted service、Vercel storage、GCP、Neon、managed database 或 managed object
+store。Publication workflow 仅将 runner 本地临时存储用作可丢弃的 validation 与 promotion
+工作区：
+
+```text
+$COLLECTIVEX_STORE_ROOT/
+  private/incoming/          # write-once downloaded GHA attempts
+  private/bundles/<sha256>/  # immutable source archives, native results/samples, matrix, checksums
+  private/quarantine/        # rejected attempts plus machine-readable reasons
+  public/datasets/<sha256>/  # immutable sanitized frontend datasets
+  public/channels/           # small atomic pointers: latest-attempt, dev-latest
+  locks/
+```
+
+Private 和 public trees 使用不同 permissions。JSON manifests 和 checksums 是权威记录；可重建
+catalog 仅为 index。Raw sweep artifacts 只是 publisher 的临时输入；只有清理并完成 promotion
+的 NDJSON 会保留为前端 publication artifact。
+
+Container tags 会与固定 registry digests 核对。Enroot imports 使用固定
+`SOURCE_DATE_EPOCH` 和 versioned cache generation；每个 mounted squash 都重新计算 hash 并纳入
+series identity。Image-provided DeepEP 也按精确 per-architecture wheel 和 installed-file
+fingerprints 检查，因此 stale cache 不能继承固定 source identity。
+Source-built DeepEP V2 使用独立的 mode-0700 cluster-local cache，并且只以 `/cx-cache` 挂载。
+其 content key 绑定版本化 build recipe、verified image digest、CPU/GPU architecture、
+upstream source trees 和固定 build dependencies。该 cache 既不是 artifact，也不是 publisher
+input；每次执行的 source/results stage 仍然隔离且可丢弃，并在复用前以 marker 和 runtime probe
+fail closed。Runner UID 属于受信任的 cluster boundary：该 cache 用于防止 stale 或意外修改，
+不防御恶意的同 UID job。只有从未发布的 partial build 才能自动重置；已发布 cache 一旦未通过
+integrity 或 runtime 检查，将保持原样并被拒绝，避免并发 allocation 正在使用的文件被删除。
+
+Publication 采用 fail-closed：
+
+1. 获取 exclusive filesystem lock，并在 destination filesystem 上 stage；
+2. 解析前归档 source bytes；
+3. 要求精确 matrix-declared artifact set，并拒绝每个未消费 archive member；
+4. 验证 strict schemas、privacy、checksums、identities、timing 和精确 matrix outcomes；
+5. 写入 checksums 和 `COMPLETE`，fsync，然后原子 rename private bundle；
+6. 构建并验证 sanitized content-addressed dataset，fsync，然后原子 rename；
+7. 仅在全部 promotion gates 通过后原子替换 `dev-latest.json`。
+
+Rejected attempts 可以更新工作区中的 `latest-attempt`，但不能更新 `dev-latest`。工作区会随
+publication runner 销毁，且绝不连接到前端。只有三个选定 bundles 全部推进 `dev-latest` 后才会
+生成 artifact。
+
+`publisher.py ingest` 接受精确 matrix，并为每个 GitHub artifact 接受一个 `--artifact` directory
+或 ZIP。`promote` 接受显式 immutable bundle IDs。默认 `verify` 要求 `latest-attempt`；若存在
+`dev-latest` 也会验证，而显式 `--channel dev-latest` 则要求其存在。Workflow 只会将通过验证并
+清理后的 dataset 复制到单记录 `collectivex_public_v1_<sha256>.ndjson` artifact。Raw artifacts 和
+private workspace 内容绝不打包进应用。
+
+Sweeps 默认使用 `release_tag=unversioned`。选择 `v1` 时必须匹配固定的完整 matrix digest，并
+生成绑定 run ID、attempt、source SHA 与 matrix SHA-256 的 marker。手动 publication workflow
+只接受三个唯一、成功、来自同一 source SHA 的 `CollectiveX Sweep` run IDs；它会重新校验
+metadata 与精确 markers，下载 immutable artifacts，并将相同 provenance assertions 传给
+`publisher.py ingest`。Partial、filtered、untagged、跨 source、失败或过期的输入都会 fail closed。
+
+前端使用 server-side GitHub read token，即时发现最新成功且按版本隔离的 publication run，并
+下载 publication artifact。它要求 ZIP 根目录只有一个 NDJSON entry，校验 UTF-8、schema、
+promotion 状态及 filename/body SHA-256，随后提供短期缓存的带版本 channel pointer 和 immutable
+带版本 dataset URL。Benchmark-version selector 当前只显示 V1；后续版本必须使用独立的 release
+与 publication identity。前端不会虚构 missing values、选择 retries，或重新计算 decision
+eligibility。
+
+## 历史数据
+
+Numeric schemas 3-5 不在 v1 publisher 和 frontend reader 范围内。它们仍是 historical
+diagnostic evidence，不能作为 `dev-latest` 初始数据或驱动 v1 decisions。
diff --git a/experimental/CollectiveX/identity.py b/experimental/CollectiveX/identity.py
new file mode 100644
index 000000000..3b263cc9c
--- /dev/null
+++ b/experimental/CollectiveX/identity.py
@@ -0,0 +1,404 @@
+#!/usr/bin/env python3
+"""Canonical, cross-runtime identities for CollectiveX v1."""
+from __future__ import annotations
+
+import hashlib
+import json
+import re
+from copy import deepcopy
+from typing import Any
+
+IDENTITY_VERSION = 1
+MAX_SAFE_INTEGER = (1 << 53) - 1
+PREFIXES = {
+    "case": "cxcase-v1-",
+    "workload": "cxwork-v1-",
+    "series": "cxseries-v1-",
+    "point": "cxpoint-v1-",
+    "evidence": "cxevidence-v1-",
+    "allocation": "cxallocation-v1-",
+    "attempt": "cxattempt-v1-",
+}
+V1_NORMAL_CASE_PROFILE = {
+    "activation_generator": "collectivex-activation-counter-v4",
+    "activation_profile": "canonical-counter-source-v4",
+    "combine_dtype": "bf16",
+    "combine_quant_mode": "none",
+    "combine_semantics": "activation-only",
+    "component_order_contract": "qualification-hash-rotated-components-v1",
+    "conditioning_contract": "fixed-phase-ramp-8-roundtrips-v1",
+    "contract": "layout-and-dispatch-v1",
+    "correctness_scope": "dispatch-metadata-and-transformed-combine",
+    "dtype": "bf16",
+    "eplb_planner": "greedy-rank-major-v1",
+    "eplb_redundant_experts": 32,
+    "eplb_reference_tokens_per_rank": 2048,
+    "mode": "normal",
+    "oracle_contract": "expert-specific-transform-v1",
+    "oracle_tolerances": "rtol=0.05,atol=0.02",
+    "payload_unit": "token-rank",
+    "placement": "packed",
+    "percentile_method": "nearest-rank",
+    "rank_reduction": "cross-rank-max-per-iteration",
+    "resource_mode": "fixed-profile",
+    "routing_generator": "collectivex-routing-counter-v3",
+    "sampling_contract": "fixed-512-v1",
+    "seed": 67,
+    "source_identity_contract": "bounded-sign-bit-source-v1",
+}
+
+V1_LOW_LATENCY_CASE_PROFILE = {
+    **V1_NORMAL_CASE_PROFILE,
+    "component_order_contract": "qualification-hash-rotated-components-v1",
+    "combine_semantics": "gate-weighted",
+    "contract": "expert-packed-weighted-combine-v1",
+    "correctness_scope": "expert-assignment-and-weighted-combine",
+    "mode": "low-latency",
+    "oracle_contract": "expert-assignment-transform-v1",
+    "payload_unit": "token-expert",
+}
+
+# Compatibility alias for normal-mode callers. New scheduling and validation
+# must select a profile from the explicit case mode.
+V1_CASE_PROFILE = V1_NORMAL_CASE_PROFILE
+V1_CASE_PROFILES = {
+    "normal": V1_NORMAL_CASE_PROFILE,
+    "low-latency": V1_LOW_LATENCY_CASE_PROFILE,
+}
+
+V1_CONTROL_PRECISION_PROFILE = "d-bf16.c-bf16"
+V1_NORMAL_PRECISION_PROFILE_IDS = (
+    "d-fp8-e4m3fn-b128-f32-prequantized.c-bf16",
+    "d-fp8-e4m3fnuz-b128-f32-prequantized.c-bf16",
+    "d-bf16.c-fp8-e4m3fn-direct-cast-noscale",
+    "d-fp8-e4m3fn-b128-f32-prequantized.c-fp8-e4m3fn-direct-cast-noscale",
+    "d-bf16.c-fp8-e4m3fnuz-direct-cast-noscale",
+    "d-fp8-e4m3fnuz-b128-f32-prequantized.c-fp8-e4m3fnuz-direct-cast-noscale",
+)
+V1_LOW_LATENCY_PRECISION_PROFILE_IDS = (
+    "d-fp8-e4m3fn-b128-f32-fused.c-bf16",
+    "d-bf16.c-logfmt10-dynamic64",
+    "d-fp8-e4m3fn-b128-f32-fused.c-logfmt10-dynamic64",
+)
+
+
+def _communication_axis(
+    *,
+    api_input_dtype: str,
+    api_output_dtype: str,
+    communication_format: str,
+    scale_dtype: str | None,
+    scale_layout: str,
+    scale_group_size: int | None,
+    padding_contract: str,
+    alignment_contract: str,
+    quantization_origin: str,
+    conversion_boundary: str,
+) -> dict[str, Any]:
+    return {
+        "api_input_dtype": api_input_dtype,
+        "api_output_dtype": api_output_dtype,
+        "communication_format": communication_format,
+        "scale_dtype": scale_dtype,
+        "scale_layout": scale_layout,
+        "scale_group_size": scale_group_size,
+        "padding_contract": padding_contract,
+        "alignment_contract": alignment_contract,
+        "quantization_origin": quantization_origin,
+        "conversion_boundary": conversion_boundary,
+    }
+
+
+_BF16_AXIS = _communication_axis(
+    api_input_dtype="bf16",
+    api_output_dtype="bf16",
+    communication_format="bf16",
+    scale_dtype=None,
+    scale_layout="none",
+    scale_group_size=None,
+    padding_contract="none",
+    alignment_contract="native-bf16-vector-alignment",
+    quantization_origin="none",
+    conversion_boundary="none",
+)
+_FP8_E4M3FN_PREQUANTIZED_DISPATCH = _communication_axis(
+    api_input_dtype="fp8-e4m3fn-with-f32-scale",
+    api_output_dtype="fp8-e4m3fn-with-f32-scale",
+    communication_format="fp8-e4m3fn",
+    scale_dtype="f32",
+    scale_layout="per-token-hidden-block",
+    scale_group_size=128,
+    padding_contract="right-zero-pad-hidden-to-128",
+    alignment_contract="hidden-block-128",
+    quantization_origin="caller-prequantized",
+    conversion_boundary="before-dispatch-timing",
+)
+_FP8_E4M3FNUZ_PREQUANTIZED_DISPATCH = _communication_axis(
+    api_input_dtype="fp8-e4m3fnuz-with-f32-scale",
+    api_output_dtype="fp8-e4m3fnuz-with-f32-scale",
+    communication_format="fp8-e4m3fnuz",
+    scale_dtype="f32",
+    scale_layout="per-token-hidden-block",
+    scale_group_size=128,
+    padding_contract="right-zero-pad-hidden-to-128",
+    alignment_contract="hidden-block-128",
+    quantization_origin="caller-prequantized",
+    conversion_boundary="before-dispatch-timing",
+)
+_FP8_E4M3FN_FUSED_DISPATCH = _communication_axis(
+    api_input_dtype="bf16",
+    api_output_dtype="fp8-e4m3fn-with-f32-scale",
+    communication_format="fp8-e4m3fn",
+    scale_dtype="f32",
+    scale_layout="per-token-hidden-block",
+    scale_group_size=128,
+    padding_contract="right-zero-pad-hidden-to-128",
+    alignment_contract="hidden-block-128",
+    quantization_origin="backend-fused",
+    conversion_boundary="inside-dispatch-timing",
+)
+_LOGFMT10_DYNAMIC64_COMBINE = _communication_axis(
+    api_input_dtype="bf16",
+    api_output_dtype="bf16",
+    communication_format="logfmt10",
+    scale_dtype="implicit-logfmt10",
+    scale_layout="dynamic-per-64-values",
+    scale_group_size=64,
+    padding_contract="right-zero-pad-values-to-64",
+    alignment_contract="value-block-64",
+    quantization_origin="backend-internal",
+    conversion_boundary="inside-combine-timing",
+)
+_FP8_E4M3FN_DIRECT_CAST_COMBINE = _communication_axis(
+    api_input_dtype="bf16",
+    api_output_dtype="bf16",
+    communication_format="fp8-e4m3fn",
+    scale_dtype=None,
+    scale_layout="none",
+    scale_group_size=None,
+    padding_contract="none",
+    alignment_contract="native-fp8-vector-alignment",
+    quantization_origin="backend-internal-direct-cast",
+    conversion_boundary="inside-combine-timing",
+)
+_FP8_E4M3FNUZ_DIRECT_CAST_COMBINE = _communication_axis(
+    api_input_dtype="bf16",
+    api_output_dtype="bf16",
+    communication_format="fp8-e4m3fnuz",
+    scale_dtype=None,
+    scale_layout="none",
+    scale_group_size=None,
+    padding_contract="none",
+    alignment_contract="native-fp8-vector-alignment",
+    quantization_origin="backend-internal-direct-cast",
+    conversion_boundary="inside-combine-timing",
+)
+
+V1_PRECISION_PROFILES: dict[str, dict[str, Any]] = {
+    V1_CONTROL_PRECISION_PROFILE: {
+        "modes": ["normal", "low-latency"],
+        "dispatch": _BF16_AXIS,
+        "combine": _BF16_AXIS,
+    },
+    "d-fp8-e4m3fn-b128-f32-prequantized.c-bf16": {
+        "modes": ["normal"],
+        "dispatch": _FP8_E4M3FN_PREQUANTIZED_DISPATCH,
+        "combine": _BF16_AXIS,
+    },
+    "d-fp8-e4m3fnuz-b128-f32-prequantized.c-bf16": {
+        "modes": ["normal"],
+        "dispatch": _FP8_E4M3FNUZ_PREQUANTIZED_DISPATCH,
+        "combine": _BF16_AXIS,
+    },
+    "d-fp8-e4m3fn-b128-f32-fused.c-bf16": {
+        "modes": ["low-latency"],
+        "dispatch": _FP8_E4M3FN_FUSED_DISPATCH,
+        "combine": _BF16_AXIS,
+    },
+    "d-bf16.c-logfmt10-dynamic64": {
+        "modes": ["low-latency"],
+        "dispatch": _BF16_AXIS,
+        "combine": _LOGFMT10_DYNAMIC64_COMBINE,
+    },
+    "d-fp8-e4m3fn-b128-f32-fused.c-logfmt10-dynamic64": {
+        "modes": ["low-latency"],
+        "dispatch": _FP8_E4M3FN_FUSED_DISPATCH,
+        "combine": _LOGFMT10_DYNAMIC64_COMBINE,
+    },
+    "d-bf16.c-fp8-e4m3fn-direct-cast-noscale": {
+        "modes": ["normal"],
+        "dispatch": _BF16_AXIS,
+        "combine": _FP8_E4M3FN_DIRECT_CAST_COMBINE,
+    },
+    "d-fp8-e4m3fn-b128-f32-prequantized.c-fp8-e4m3fn-direct-cast-noscale": {
+        "modes": ["normal"],
+        "dispatch": _FP8_E4M3FN_PREQUANTIZED_DISPATCH,
+        "combine": _FP8_E4M3FN_DIRECT_CAST_COMBINE,
+    },
+    "d-bf16.c-fp8-e4m3fnuz-direct-cast-noscale": {
+        "modes": ["normal"],
+        "dispatch": _BF16_AXIS,
+        "combine": _FP8_E4M3FNUZ_DIRECT_CAST_COMBINE,
+    },
+    "d-fp8-e4m3fnuz-b128-f32-prequantized.c-fp8-e4m3fnuz-direct-cast-noscale": {
+        "modes": ["normal"],
+        "dispatch": _FP8_E4M3FNUZ_PREQUANTIZED_DISPATCH,
+        "combine": _FP8_E4M3FNUZ_DIRECT_CAST_COMBINE,
+    },
+}
+
+
+def case_profile(mode: str) -> dict[str, Any]:
+    """Return the immutable measurement profile for one scheduled mode."""
+    try:
+        return V1_CASE_PROFILES[mode]
+    except KeyError as exc:
+        raise IdentityError(f"unknown CollectiveX case mode {mode!r}") from exc
+
+
+def precision_profile(name: str) -> dict[str, Any]:
+    """Return one exact dispatch/combine communication-format profile."""
+    try:
+        profile = V1_PRECISION_PROFILES[name]
+    except KeyError as exc:
+        raise IdentityError(f"unknown CollectiveX precision profile {name!r}") from exc
+    return {"profile_id": name, **deepcopy(profile)}
+
+
+def profile_for_case(case: dict[str, Any]) -> dict[str, Any]:
+    """Resolve a scheduled case's explicit mode to its identity profile."""
+    mode = case.get("mode")
+    if not isinstance(mode, str):
+        raise IdentityError("scheduled case mode is missing")
+    base = case_profile(mode)
+    precision_name = case.get("precision_profile")
+    if precision_name is None:
+        return base
+    if not isinstance(precision_name, str):
+        raise IdentityError("scheduled case precision_profile must be a string")
+    precision = precision_profile(precision_name)
+    if mode not in precision["modes"]:
+        raise IdentityError(
+            f"precision profile {precision_name!r} is not valid in mode {mode!r}"
+        )
+    return {**base, "communication_precision": precision}
+
+
+class IdentityError(ValueError):
+    """An identity payload cannot be represented consistently across runtimes."""
+
+
+def _validate(value: Any, path: str = "$") -> None:
+    if value is None or isinstance(value, bool):
+        return
+    if isinstance(value, str):
+        if any(ord(character) < 0x20 or ord(character) > 0x7E for character in value):
+            raise IdentityError(f"{path}: string must contain printable ASCII only")
+        return
+    if type(value) is int:
+        if abs(value) > MAX_SAFE_INTEGER:
+            raise IdentityError(f"{path}: integer exceeds the cross-runtime safe range")
+        return
+    if isinstance(value, list):
+        for index, item in enumerate(value):
+            _validate(item, f"{path}[{index}]")
+        return
+    if isinstance(value, dict):
+        for key, item in value.items():
+            if not isinstance(key, str):
+                raise IdentityError(f"{path}: object key is not a string")
+            if any(ord(character) < 0x20 or ord(character) > 0x7E for character in key):
+                raise IdentityError(f"{path}: object key must contain printable ASCII only")
+            _validate(item, f"{path}.{key}")
+        return
+    raise IdentityError(f"{path}: unsupported identity value {type(value).__name__}")
+
+
+def canonical_bytes(value: Any) -> bytes:
+    """Return compact UTF-8 JSON after enforcing the portable value subset."""
+    _validate(value)
+    return json.dumps(
+        value,
+        ensure_ascii=False,
+        allow_nan=False,
+        sort_keys=True,
+        separators=(",", ":"),
+    ).encode("utf-8")
+
+
+def digest(kind: str, value: Any) -> str:
+    """Hash a typed v1 identity payload and return its typed identifier."""
+    try:
+        prefix = PREFIXES[kind]
+    except KeyError as exc:
+        raise IdentityError(f"unknown identity kind {kind!r}") from exc
+    body = {"kind": kind, "value": value, "version": IDENTITY_VERSION}
+    return prefix + hashlib.sha256(canonical_bytes(body)).hexdigest()
+
+
+def is_typed_id(value: Any, kind: str) -> bool:
+    prefix = PREFIXES.get(kind)
+    return bool(
+        isinstance(value, str)
+        and prefix
+        and re.fullmatch(re.escape(prefix) + r"[0-9a-f]{64}", value)
+    )
+
+
+def case_id(*, sku: str, profile: dict[str, Any], case: dict[str, Any]) -> str:
+    return digest("case", {"case": case, "profile": profile, "sku": sku})
+
+
+def workload_id(value: dict[str, Any]) -> str:
+    return digest("workload", value)
+
+
+def series_id(value: dict[str, Any]) -> str:
+    return digest("series", value)
+
+
+def point_id(*, series: str, tokens_per_rank: int) -> str:
+    return digest("point", {"series_id": series, "tokens_per_rank": tokens_per_rank})
+
+
+def allocation_id(value: dict[str, Any]) -> str:
+    return digest("allocation", value)
+
+
+def attempt_id(*, allocation: str, case: str, ordinal: int) -> str:
+    return digest(
+        "attempt", {"allocation_id": allocation, "case_id": case, "ordinal": ordinal}
+    )
+
+
+def evidence_id(
+    *, point: str, allocation: str, attempt: str, sample_sha256: str
+) -> str:
+    return digest(
+        "evidence",
+        {
+            "allocation_id": allocation,
+            "attempt_id": attempt,
+            "point_id": point,
+            "sample_sha256": sample_sha256,
+        },
+    )
+
+
+IDENTITY_TEST_VECTOR = {
+    "payload": {"backend": "deepep", "ep": 8, "shape": [7168, 8, 256]},
+    "series_id": "cxseries-v1-a79bf758488e3edd50f5531f3af825f371bf42aae7c4097e461fd2a32615af81",
+}
+
+
+def verify_test_vector() -> None:
+    observed = series_id(IDENTITY_TEST_VECTOR["payload"])
+    if observed != IDENTITY_TEST_VECTOR["series_id"]:
+        raise IdentityError(
+            f"identity implementation differs: {observed} != {IDENTITY_TEST_VECTOR['series_id']}"
+        )
+
+
+if __name__ == "__main__":
+    verify_test_vector()
+    print(IDENTITY_TEST_VECTOR["series_id"])
diff --git a/experimental/CollectiveX/launchers/launch_gb-nv.sh b/experimental/CollectiveX/launchers/launch_gb-nv.sh
new file mode 100644
index 000000000..21aae4c13
--- /dev/null
+++ b/experimental/CollectiveX/launchers/launch_gb-nv.sh
@@ -0,0 +1,110 @@
+#!/usr/bin/env bash
+# CollectiveX shared GB200/GB300 NVL72 (aarch64) launcher.
+# shellcheck disable=SC2016,SC2034
+#
+# EP8/EP16 use one Slurm task per GPU across two or four trays in the same
+# MNNVL scale-up domain.
+set -euo pipefail
+HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+CX_DIR="$(cd "$HERE/.." && pwd)"; REPO_ROOT="$(cd "$CX_DIR/../.." && pwd)"
+# shellcheck source=../runtime/common.sh
+source "$HERE/../runtime/common.sh"
+
+PRODUCT="${CX_SHARD_SKU:-${CX_GB_PRODUCT:-${CX_PUBLIC_RUNNER:-}}}"
+case "$PRODUCT" in
+  gb200|gb300) ;;
+  *) cx_die "set CX_SHARD_SKU or CX_PUBLIC_RUNNER to gb200 or gb300" ;;
+esac
+RUNNER="$PRODUCT"
+export CX_RUNNER="$RUNNER" CX_BENCH="${CX_BENCH:-deepep}"
+export CX_IMAGE_PLATFORM=linux/arm64
+JOB_ID=""
+cx_install_launcher_fail_safe
+cx_set_failure_stage setup
+cx_load_operator_config
+cx_lock_canonical_gha_env "$RUNNER"
+NODES="${CX_NODES:-2}"; GPN="${CX_GPUS_PER_NODE:-4}"
+SCALE_UP_DOMAIN="${CX_SCALE_UP_DOMAIN:-72}"
+EXPECTED_WORLD=$((NODES * GPN))
+NGPUS="${CX_NGPUS:-$EXPECTED_WORLD}"
+if [ "$PRODUCT" = gb200 ]; then default_time=30; else default_time=90; fi
+TIME_MIN="${CX_TIME:-$default_time}"
+[ "$NODES" = 2 ] || [ "$NODES" = 4 ] \
+  || cx_die "$PRODUCT v1 supports two or four four-GPU trays"
+[ "$GPN" = 4 ] || cx_die "$PRODUCT requires four GPUs per tray"
+[ "$SCALE_UP_DOMAIN" = 72 ] || cx_die "$PRODUCT requires the NVL72 scale-up domain"
+[ "$NGPUS" = "$EXPECTED_WORLD" ] \
+  || cx_die "$PRODUCT world size must equal nodes x GPUs per tray"
+cx_apply_timing_profile
+IMAGE="${CX_IMAGE:-$(cx_default_image "$PRODUCT")}"
+TS="$(date -u +%Y-%m-%dT%H-%M-%SZ)"
+export CX_RUNNER="$RUNNER" CX_TS="$TS" CX_TOPO="${PRODUCT}-nvl72-mnnvl"
+export CX_SCOPE=scale-up CX_TRANSPORT=mnnvl CX_SCALE_UP_TRANSPORT=mnnvl
+export CX_NODES="$NODES" CX_GPUS_PER_NODE="$GPN" CX_SCALE_UP_DOMAIN="$SCALE_UP_DOMAIN"
+export CX_NGPUS="$NGPUS"
+unset CX_SCALE_OUT_TRANSPORT
+case "$CX_BENCH" in
+  deepep|deepep-v2|deepep-hybrid|nccl-ep) ;;
+  *) cx_die "unsupported $PRODUCT EP backend: $CX_BENCH" ;;
+esac
+cx_validate_shard_control "$CX_DIR"
+cx_require_vars CX_PARTITION CX_ACCOUNT CX_SQUASH_DIR CX_STAGE_DIR
+[ "$PRODUCT" != gb300 ] || cx_require_vars CX_ENROOT_CACHE_PATH
+PARTITION="$CX_PARTITION"; ACCOUNT="$CX_ACCOUNT"; SQUASH_DIR="$CX_SQUASH_DIR"
+[ -z "${CX_ENROOT_CACHE_PATH:-}" ] || export ENROOT_CACHE_PATH="$CX_ENROOT_CACHE_PATH"
+export NCCL_CUMEM_ENABLE=1 NCCL_MNNVL_ENABLE=1 MC_FORCE_MNNVL=1
+cx_apply_network_profile "$NODES" "$CX_TRANSPORT"
+
+cx_log "$PRODUCT nodes=$NODES x ${GPN}gpu world=$NGPUS bench=$CX_BENCH"
+[ "${CX_DRYRUN:-0}" = 1 ] && { cx_log "DRYRUN"; exit 0; }
+cx_set_failure_stage registry-verification
+cx_verify_registry_image "$IMAGE"
+cx_set_failure_stage repository-stage
+MOUNT_SRC="$(cx_stage_path "$REPO_ROOT" "$CX_STAGE_DIR")"
+cx_stage_repo "$REPO_ROOT" "$MOUNT_SRC"
+cx_prepare_runtime_marker "$MOUNT_SRC"
+CONTAINER_MOUNTS="$MOUNT_SRC:/ix"
+if [ "$CX_BENCH" = deepep-v2 ] || [ "$CX_BENCH" = deepep-hybrid ]; then
+  cx_set_failure_stage backend-setup
+  cx_prepare_backend_source "$MOUNT_SRC" "$CX_BENCH" \
+    || cx_die "cannot stage the pinned backend source"
+  export CX_BACKEND_SOURCE_ROOT=/ix/experimental/CollectiveX/.cx_sources
+fi
+if [ "$CX_BENCH" = deepep-v2 ]; then
+  cx_prepare_backend_cache "$CX_SQUASH_DIR" \
+    || cx_die "cannot prepare the isolated backend cache"
+  CONTAINER_MOUNTS="$CONTAINER_MOUNTS,$CX_PREPARED_BACKEND_CACHE:/cx-cache"
+  export CX_BACKEND_CACHE_ROOT=/cx-cache
+fi
+
+cx_set_failure_stage scheduler-allocation
+command -v salloc >/dev/null || cx_die "salloc not found"
+cx_salloc_jobid --partition="$PARTITION" --account="$ACCOUNT" --nodes="$NODES" \
+  --gres=gpu:"$GPN" --ntasks-per-node="$GPN" --exclusive --mem=0 --cpus-per-task=35 \
+  --time="$TIME_MIN"
+[ -n "$JOB_ID" ] || cx_die "no JOB_ID from salloc"
+cx_set_failure_stage container-import
+SQUASH_FILE="$(cx_ensure_squash_on_job "$JOB_ID" "$SQUASH_DIR" "$IMAGE")"
+cx_set_failure_stage container-hash
+cx_export_squash_identity "$SQUASH_FILE"
+cx_preflight_allocation "$JOB_ID" "$NODES" "$MOUNT_SRC" "$SQUASH_FILE" \
+  "${CX_SHARD_FILE:-}"
+
+# Keep the loader policy here because it is platform/container specific and
+# security tests evaluate this literal independently.
+SOURCE_BACKEND_ENV='case "${SLURM_NODEID:-}" in ""|*[!0-9]*) exit 66;; esac; env_file="/ix/experimental/CollectiveX/.cx_backend/env/node-${SLURM_NODEID}.sh"; env_root="${env_file%/*}"; [ -d "$env_root" ] && [ ! -L "$env_root" ] || exit 66; case "$(stat -c "%a" "$env_root")" in 700|[1-7]700) ;; *) exit 66;; esac; [ -f "$env_file" ] && [ -r "$env_file" ] && [ ! -L "$env_file" ] && [ "$(stat -c "%u:%a" "$env_file")" = "$(stat -c "%u" "$env_root"):600" ] || exit 66; . "$env_file" || exit 66'
+BACKEND_PROBE="$SOURCE_BACKEND_ENV"'; case "$CX_BENCH" in deepep) python3 -c "from deep_ep import Buffer";; deepep-v2) python3 -c "import deep_ep; assert hasattr(deep_ep, '\''ElasticBuffer'\'')";; deepep-hybrid) python3 -c "import deep_ep; assert hasattr(deep_ep, '\''HybridEPBuffer'\'')";; nccl-ep) python3 -c "import torch";; esac'
+WRAP="${SOURCE_BACKEND_ENV}"$'\n'"$(cx_slurm_rank_wrapper)"
+CX_DISTRIBUTED_CONTAINER_ARGS=(--container-writable --container-remap-root)
+[ "$CX_BENCH" != deepep ] || export CX_ALLOW_MNNVL=1
+run_rc=0
+cx_set_failure_stage container-launch
+cx_run_distributed_shard || run_rc=$?
+
+cx_adopt_runtime_stage "$MOUNT_SRC"
+collect_rc=0
+cx_collect_results "$MOUNT_SRC" "$REPO_ROOT" || collect_rc=$?
+[ "$run_rc" != 0 ] || [ "$collect_rc" = 0 ] || cx_set_failure_stage artifact-collection
+final_rc="$run_rc"
+[ "$final_rc" != 0 ] || final_rc="$collect_rc"
+exit "$final_rc"
diff --git a/experimental/CollectiveX/launchers/launch_mi-amds.sh b/experimental/CollectiveX/launchers/launch_mi-amds.sh
new file mode 100644
index 000000000..f66f820f5
--- /dev/null
+++ b/experimental/CollectiveX/launchers/launch_mi-amds.sh
@@ -0,0 +1,160 @@
+#!/usr/bin/env bash
+# CollectiveX shared MI325X/MI355X AMD Slurm launcher (one or two nodes).
+# shellcheck disable=SC2016,SC2034
+set -euo pipefail
+
+HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+CX_DIR="$(cd "$HERE/.." && pwd)"
+REPO_ROOT="$(cd "$CX_DIR/../.." && pwd)"
+# shellcheck source=../runtime/common.sh
+source "$HERE/../runtime/common.sh"
+
+RUNNER="${CX_SHARD_SKU:-${CX_PUBLIC_RUNNER:-}}"
+case "$RUNNER" in
+  mi325x) CPUS_PER_TASK=256; DEVICE_MOUNTS=",/dev/kfd:/dev/kfd,/dev/dri:/dev/dri" ;;
+  mi355x) CPUS_PER_TASK=128; DEVICE_MOUNTS="" ;;
+  *) cx_die "set CX_SHARD_SKU or CX_PUBLIC_RUNNER to mi325x or mi355x" ;;
+esac
+export CX_RUNNER="$RUNNER" CX_BENCH="${CX_BENCH:-mori}"
+export CX_IMAGE_PLATFORM=linux/amd64
+JOB_ID=""
+cx_install_launcher_fail_safe
+cx_set_failure_stage setup
+cx_load_operator_config
+cx_lock_canonical_gha_env "$RUNNER"
+
+NODES="${CX_NODES:-1}"; GPN="${CX_GPUS_PER_NODE:-8}"
+SCALE_UP_DOMAIN="${CX_SCALE_UP_DOMAIN:-8}"
+EXPECTED_WORLD=$((NODES * GPN))
+NGPUS="${CX_NGPUS:-$EXPECTED_WORLD}"
+TIME_MIN="${CX_TIME:-60}"
+EXCLUDE_NODES="${CX_EXCLUDE_NODES:-}"
+NODELIST="${CX_NODELIST:-}"
+MOUNT_DIR=/ix
+TS="$(date -u +%Y-%m-%dT%H-%M-%SZ)"
+[ "$NODES" = 1 ] || [ "$NODES" = 2 ] \
+  || cx_die "$RUNNER supports one or two nodes"
+[ "$GPN" = 8 ] || cx_die "$RUNNER requires eight GPUs per node"
+[ "$SCALE_UP_DOMAIN" = 8 ] || cx_die "$RUNNER requires an eight-GPU scale-up domain"
+[ "$NGPUS" = "$EXPECTED_WORLD" ] \
+  || cx_die "$RUNNER world size must equal nodes x GPUs per node"
+case "$CX_BENCH" in
+  mori|nccl-ep) ;;
+  *) cx_die "unsupported AMD EP backend: $CX_BENCH" ;;
+esac
+
+if [ "$RUNNER" = mi325x ]; then
+  export MORI_DISABLE_AUTO_XGMI="${MORI_DISABLE_AUTO_XGMI:-0}"
+  export MORI_ENABLE_SDMA="${MORI_ENABLE_SDMA:-1}"
+  export MORI_APP_LOG_LEVEL="${MORI_APP_LOG_LEVEL:-info}"
+  export MORI_SHMEM_LOG_LEVEL="${MORI_SHMEM_LOG_LEVEL:-info}"
+  export MORI_IO_LOG_LEVEL="${MORI_IO_LOG_LEVEL:-info}"
+  [ "$CX_BENCH" != mori ] \
+    || export CX_IMAGE="${CX_IMAGE:-$CX_IMAGE_AMD_MORI_MI325}"
+fi
+if [ "$CX_BENCH" = mori ]; then
+  if [ "$NODES" -gt 1 ]; then
+    export CX_MORI_KERNEL_TYPE=internode-v1
+  elif [ "$RUNNER" = mi325x ]; then
+    export CX_MORI_KERNEL_TYPE="${CX_MORI_KERNEL_TYPE:-asyncll}"
+  else
+    export CX_MORI_KERNEL_TYPE="${CX_MORI_KERNEL_TYPE:-intranode}"
+  fi
+fi
+IMAGE="${CX_IMAGE:-$(cx_default_image "$RUNNER")}"
+export CX_RUNNER="$RUNNER" CX_NGPUS="$NGPUS" CX_NODES="$NODES"
+export CX_GPUS_PER_NODE="$GPN" CX_SCALE_UP_DOMAIN="$SCALE_UP_DOMAIN" CX_TS="$TS"
+export CX_SCALE_UP_TRANSPORT=xgmi
+if [ "$NODES" -gt 1 ]; then
+  export CX_SCOPE=scale-out CX_SCALE_OUT_TRANSPORT=rdma
+  export CX_TRANSPORT=xgmi-rdma CX_TOPO="${RUNNER}-xgmi-rdma"
+else
+  export CX_SCOPE=scale-up CX_TRANSPORT=xgmi CX_TOPO="${RUNNER}-xgmi"
+  unset CX_SCALE_OUT_TRANSPORT
+fi
+export CX_RUN_TIMEOUT="${CX_RUN_TIMEOUT:-1800}"
+cx_apply_network_profile "$NODES" "$CX_TRANSPORT"
+cx_validate_shard_control "$CX_DIR"
+cx_require_vars CX_PARTITION CX_SQUASH_DIR CX_STAGE_DIR
+PARTITION="$CX_PARTITION"; SQUASH_DIR="$CX_SQUASH_DIR"
+
+cx_log "runner=$RUNNER nodes=$NODES x ${GPN}gpu world=$NGPUS bench=$CX_BENCH"
+cx_set_failure_stage repository-stage
+MOUNT_SRC="$(cx_stage_path "$REPO_ROOT" "$CX_STAGE_DIR")"
+cx_stage_repo "$REPO_ROOT" "$MOUNT_SRC"
+cx_prepare_runtime_marker "$MOUNT_SRC"
+[ "${CX_DRYRUN:-0}" != 1 ] || { cx_log "CX_DRYRUN=1 - not allocating"; exit 0; }
+cx_set_failure_stage registry-verification
+cx_verify_registry_image "$IMAGE"
+cx_set_failure_stage scheduler-allocation
+command -v salloc >/dev/null || cx_die "salloc not found on this runner"
+
+allocation=(--partition="$PARTITION" --nodes="$NODES" --gres=gpu:"$GPN" --exclusive
+  --time="$TIME_MIN")
+if [ "$NODES" = 1 ]; then
+  allocation+=(--cpus-per-task="$CPUS_PER_TASK")
+else
+  allocation+=(--ntasks-per-node="$GPN" --cpus-per-task="$((CPUS_PER_TASK / GPN))")
+fi
+if [ -n "$NODELIST" ]; then
+  cx_log "using configured node pin"
+  allocation+=(--nodelist="$NODELIST")
+elif [ -n "$EXCLUDE_NODES" ]; then
+  allocation+=(--exclude="$EXCLUDE_NODES")
+fi
+cx_salloc_jobid "${allocation[@]}"
+[ -n "$JOB_ID" ] || cx_die "could not resolve allocated JOB_ID from salloc"
+cx_set_failure_stage setup
+cx_validate_network_profile_on_job "$JOB_ID" "$NODES" "$CX_TRANSPORT"
+
+cx_set_failure_stage container-import
+SQUASH_FILE="$(cx_ensure_squash_on_job \
+  "$JOB_ID" "$SQUASH_DIR" "$IMAGE" "${CX_LOCK_DIR:-}")"
+cx_set_failure_stage container-hash
+import_log="$(cx_private_log_path image-hash)"
+if ! COLLECTIVEX_SQUASH_SHA256="$(
+  srun --jobid="$JOB_ID" --nodes=1 --ntasks=1 --chdir=/tmp \
+    --export="$(cx_host_exports)" sha256sum "$SQUASH_FILE" \
+    2>>"$import_log" | awk 'NR==1 {print $1}'
+)"; then
+  cx_fail_stage container-hash "$import_log"
+fi
+[[ "$COLLECTIVEX_SQUASH_SHA256" =~ ^[0-9a-f]{64}$ ]] \
+  || cx_fail_stage container-hash "$import_log"
+export COLLECTIVEX_SQUASH_SHA256
+cx_preflight_allocation "$JOB_ID" "$NODES" "$MOUNT_SRC" "$SQUASH_FILE" \
+  "${CX_SHARD_FILE:-}"
+CONTAINER_MOUNTS="$MOUNT_SRC:$MOUNT_DIR$DEVICE_MOUNTS"
+
+if [ "$NODES" = 1 ]; then
+  run_rc=0
+  cx_set_failure_stage container-launch
+  runtime_log="$(cx_private_log_path runtime)"
+  srun --jobid="$JOB_ID" --chdir=/tmp --container-image="$SQUASH_FILE" \
+    --container-mounts="$CONTAINER_MOUNTS" --container-writable --container-remap-root \
+    --no-container-mount-home --container-workdir="$MOUNT_DIR/experimental/CollectiveX" \
+    --no-container-entrypoint --export="$(cx_container_exports)" \
+    bash "$MOUNT_DIR/experimental/CollectiveX/runtime/run_in_container.sh" \
+    >"$runtime_log" 2>&1 || run_rc=$?
+else
+  SOURCE_BACKEND_ENV='case "${SLURM_NODEID:-}" in ""|*[!0-9]*) exit 66;; esac; env_file="/ix/experimental/CollectiveX/.cx_backend/env/node-${SLURM_NODEID}.sh"; env_root="${env_file%/*}"; [ -d "$env_root" ] && [ ! -L "$env_root" ] || exit 66; case "$(stat -c "%a" "$env_root")" in 700|[1-7]700) ;; *) exit 66;; esac; [ -f "$env_file" ] && [ -r "$env_file" ] && [ ! -L "$env_file" ] && [ "$(stat -c "%u:%a" "$env_file")" = "$(stat -c "%u" "$env_root"):600" ] || exit 66; . "$env_file" || exit 66'
+  BACKEND_PROBE="$SOURCE_BACKEND_ENV"'; case "$CX_BENCH" in mori) python3 -c "import mori";; nccl-ep) python3 -c "import torch";; esac'
+  WRAP="${SOURCE_BACKEND_ENV}"$'\n'"$(cx_slurm_rank_wrapper)"
+  CX_DISTRIBUTED_CONTAINER_ARGS=(--container-writable --container-remap-root)
+  run_rc=0
+  cx_set_failure_stage container-launch
+  cx_run_distributed_shard || run_rc=$?
+fi
+
+cx_adopt_runtime_stage "$MOUNT_SRC"
+if [ "$NODES" = 1 ] && [ "$run_rc" != 0 ]; then
+  cx_fail_stage "$CX_FAILSAFE_MODE" "$runtime_log" || true
+fi
+collect_rc=0
+cx_collect_results "$MOUNT_SRC" "$REPO_ROOT" || collect_rc=$?
+[ "$run_rc" != 0 ] || [ "$collect_rc" = 0 ] || cx_set_failure_stage artifact-collection
+final_rc="$run_rc"
+[ "$final_rc" != 0 ] || final_rc="$collect_rc"
+rm -f "$MOUNT_SRC"/experimental/CollectiveX/gpucore.* 2>/dev/null || true
+cx_log "done - result artifacts collected"
+exit "$final_rc"
diff --git a/experimental/CollectiveX/launchers/launch_single-slurm.sh b/experimental/CollectiveX/launchers/launch_single-slurm.sh
new file mode 100644
index 000000000..eade8fb75
--- /dev/null
+++ b/experimental/CollectiveX/launchers/launch_single-slurm.sh
@@ -0,0 +1,154 @@
+#!/usr/bin/env bash
+# CollectiveX shared standard NVIDIA Slurm launcher (one or two nodes).
+# shellcheck disable=SC2016,SC2034
+set -euo pipefail
+
+HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+CX_DIR="$(cd "$HERE/.." && pwd)"
+REPO_ROOT="$(cd "$CX_DIR/../.." && pwd)"
+# shellcheck source=../runtime/common.sh
+source "$HERE/../runtime/common.sh"
+
+RUNNER="${CX_SHARD_SKU:-${CX_PUBLIC_RUNNER:-}}"
+ALLOC_EXTRA=(); SRUN_EXTRA=(); LOCAL_IMPORT=0
+case "$RUNNER" in
+  h100-dgxc) PRODUCT=h100; TOPO=h100-nvlink-island; DEFAULT_TIME=45; REQUIRE_ACCOUNT=1 ;;
+  h200-dgxc)
+    PRODUCT=h200; TOPO=h200-nvlink-island; DEFAULT_TIME=45; REQUIRE_ACCOUNT=0
+    SRUN_EXTRA=(--container-remap-root)
+    ;;
+  b200-dgxc)
+    PRODUCT=b200; TOPO=b200-nvlink-island; DEFAULT_TIME=30; REQUIRE_ACCOUNT=1
+    ALLOC_EXTRA=(--mem=0)
+    ;;
+  b300)
+    PRODUCT=b300; TOPO=b300-nvlink-island; DEFAULT_TIME=45; REQUIRE_ACCOUNT=1
+    # Do not restore ALLOC_EXTRA=(-N 1 --mem=0); it blocks two-node B300 jobs.
+    ALLOC_EXTRA=(--mem=0)
+    SRUN_EXTRA=(--mpi=none --container-remap-root)
+    LOCAL_IMPORT=1
+    ;;
+  *) cx_die "set CX_SHARD_SKU or CX_PUBLIC_RUNNER to a registered NVIDIA SKU" ;;
+esac
+export CX_RUNNER="$RUNNER" CX_BENCH="${CX_BENCH:-deepep}"
+export CX_IMAGE_PLATFORM=linux/amd64
+JOB_ID=""
+cx_install_launcher_fail_safe
+cx_set_failure_stage setup
+cx_load_operator_config
+cx_lock_canonical_gha_env "$RUNNER"
+
+NODES="${CX_NODES:-1}"; GPN="${CX_GPUS_PER_NODE:-8}"
+SCALE_UP_DOMAIN="${CX_SCALE_UP_DOMAIN:-8}"
+EXPECTED_WORLD=$((NODES * GPN))
+NGPUS="${CX_NGPUS:-$EXPECTED_WORLD}"
+TIME_MIN="${CX_TIME:-$DEFAULT_TIME}"
+IMAGE="${CX_IMAGE:-$(cx_default_image "$PRODUCT")}"
+TS="$(date -u +%Y-%m-%dT%H-%M-%SZ)"
+[ "$NODES" = 1 ] || [ "$NODES" = 2 ] \
+  || cx_die "$RUNNER supports one or two nodes"
+[ "$GPN" = 8 ] || cx_die "$RUNNER requires eight GPUs per node"
+[ "$SCALE_UP_DOMAIN" = 8 ] || cx_die "$RUNNER requires an eight-GPU scale-up domain"
+[ "$NGPUS" = "$EXPECTED_WORLD" ] \
+  || cx_die "$RUNNER world size must equal nodes x GPUs per node"
+case "$CX_BENCH" in
+  deepep|deepep-v2|deepep-hybrid|uccl|nccl-ep) ;;
+  *) cx_die "unsupported $RUNNER EP backend: $CX_BENCH" ;;
+esac
+
+export CX_RUNNER="$RUNNER" CX_NGPUS="$NGPUS" CX_NODES="$NODES"
+export CX_GPUS_PER_NODE="$GPN" CX_SCALE_UP_DOMAIN="$SCALE_UP_DOMAIN"
+export CX_TS="$TS" CX_SCALE_UP_TRANSPORT=nvlink
+if [ "$NODES" -gt 1 ]; then
+  export CX_SCOPE=scale-out CX_SCALE_OUT_TRANSPORT=rdma
+  export CX_TRANSPORT=nvlink-rdma CX_TOPO="${PRODUCT}-nvlink-rdma"
+else
+  export CX_SCOPE=scale-up CX_TRANSPORT=nvlink CX_TOPO="$TOPO"
+  unset CX_SCALE_OUT_TRANSPORT
+fi
+export CX_NCCL_HOME="${CX_NCCL_HOME:-/usr}" NCCL_CUMEM_ENABLE=1
+cx_apply_network_profile "$NODES" "$CX_TRANSPORT"
+cx_validate_shard_control "$CX_DIR"
+cx_require_vars CX_PARTITION CX_SQUASH_DIR
+[ "$REQUIRE_ACCOUNT" = 0 ] || cx_require_vars CX_ACCOUNT
+[ "$RUNNER" != b300 ] || cx_require_vars CX_STAGE_DIR
+
+cx_log "runner=$RUNNER nodes=$NODES x ${GPN}gpu world=$NGPUS bench=$CX_BENCH"
+[ "${CX_DRYRUN:-0}" != 1 ] || { cx_log "CX_DRYRUN=1 - not allocating"; exit 0; }
+cx_set_failure_stage registry-verification
+cx_verify_registry_image "$IMAGE"
+SQUASH_FILE=""
+cx_set_failure_stage repository-stage
+MOUNT_SRC="$(cx_stage_path "$REPO_ROOT" "${CX_STAGE_DIR:-}")"
+cx_stage_repo "$REPO_ROOT" "$MOUNT_SRC"
+cx_prepare_runtime_marker "$MOUNT_SRC"
+CONTAINER_MOUNTS="$MOUNT_SRC:/ix"
+if [ "$CX_BENCH" = deepep-v2 ] || [ "$CX_BENCH" = deepep-hybrid ]; then
+  cx_set_failure_stage backend-setup
+  cx_prepare_backend_source "$MOUNT_SRC" "$CX_BENCH" \
+    || cx_die "cannot stage the pinned backend source"
+  export CX_BACKEND_SOURCE_ROOT=/ix/experimental/CollectiveX/.cx_sources
+fi
+if [ "$CX_BENCH" = deepep-v2 ]; then
+  cx_prepare_backend_cache "$CX_SQUASH_DIR" \
+    || cx_die "cannot prepare the isolated backend cache"
+  CONTAINER_MOUNTS="$CONTAINER_MOUNTS,$CX_PREPARED_BACKEND_CACHE:/cx-cache"
+  export CX_BACKEND_CACHE_ROOT=/cx-cache
+fi
+
+cx_set_failure_stage scheduler-allocation
+command -v salloc >/dev/null || cx_die "salloc not found on this runner"
+allocation=(--partition="$CX_PARTITION" --nodes="$NODES" --gres=gpu:"$GPN" --exclusive
+  --time="$TIME_MIN" "${ALLOC_EXTRA[@]}")
+[ "$NODES" = 1 ] || allocation+=(--ntasks-per-node="$GPN")
+[ -z "${CX_ACCOUNT:-}" ] || allocation+=(--account="$CX_ACCOUNT")
+[ -z "${CX_EXCLUDE_NODES:-}" ] || allocation+=(--exclude="$CX_EXCLUDE_NODES")
+cx_salloc_jobid "${allocation[@]}"
+[ -n "$JOB_ID" ] || cx_die "could not resolve allocated JOB_ID from salloc"
+cx_set_failure_stage setup
+cx_validate_network_profile_on_job "$JOB_ID" "$NODES" "$CX_TRANSPORT"
+if [ "$LOCAL_IMPORT" = 1 ]; then
+  cx_set_failure_stage container-import
+  SQUASH_FILE="$(CX_ENROOT_LOCAL_IMPORT=1 cx_ensure_squash "$CX_SQUASH_DIR" "$IMAGE")"
+  cx_set_failure_stage container-hash
+  cx_export_squash_identity "$SQUASH_FILE"
+else
+  cx_set_failure_stage container-import
+  SQUASH_FILE="$(cx_ensure_squash_on_job "$JOB_ID" "$CX_SQUASH_DIR" "$IMAGE")"
+  cx_set_failure_stage container-hash
+  cx_export_squash_identity "$SQUASH_FILE"
+fi
+cx_preflight_allocation "$JOB_ID" "$NODES" "$MOUNT_SRC" "$SQUASH_FILE" \
+  "${CX_SHARD_FILE:-}"
+
+if [ "$NODES" = 1 ]; then
+  run_rc=0
+  cx_set_failure_stage container-launch
+  runtime_log="$(cx_private_log_path runtime)"
+  srun --jobid="$JOB_ID" --container-image="$SQUASH_FILE" \
+    --container-mounts="$CONTAINER_MOUNTS" --no-container-mount-home \
+    --container-workdir=/ix/experimental/CollectiveX --no-container-entrypoint \
+    "${SRUN_EXTRA[@]}" --export="$(cx_container_exports)" \
+    bash /ix/experimental/CollectiveX/runtime/run_in_container.sh \
+    >"$runtime_log" 2>&1 || run_rc=$?
+else
+  SOURCE_BACKEND_ENV='case "${SLURM_NODEID:-}" in ""|*[!0-9]*) exit 66;; esac; env_file="/ix/experimental/CollectiveX/.cx_backend/env/node-${SLURM_NODEID}.sh"; env_root="${env_file%/*}"; [ -d "$env_root" ] && [ ! -L "$env_root" ] || exit 66; case "$(stat -c "%a" "$env_root")" in 700|[1-7]700) ;; *) exit 66;; esac; [ -f "$env_file" ] && [ -r "$env_file" ] && [ ! -L "$env_file" ] && [ "$(stat -c "%u:%a" "$env_file")" = "$(stat -c "%u" "$env_root"):600" ] || exit 66; . "$env_file" || exit 66'
+  BACKEND_PROBE="$SOURCE_BACKEND_ENV"'; case "$CX_BENCH" in deepep) python3 -c "from deep_ep import Buffer";; deepep-v2) python3 -c "import deep_ep; assert hasattr(deep_ep, '\''ElasticBuffer'\'')";; deepep-hybrid) python3 -c "import deep_ep; assert hasattr(deep_ep, '\''HybridEPBuffer'\'')";; uccl) python3 -c "import torch; from uccl_deepep import Buffer";; nccl-ep) python3 -c "import torch";; esac'
+  WRAP="${SOURCE_BACKEND_ENV}"$'\n'"$(cx_slurm_rank_wrapper)"
+  CX_DISTRIBUTED_CONTAINER_ARGS=(--container-writable "${SRUN_EXTRA[@]}")
+  run_rc=0
+  cx_set_failure_stage container-launch
+  cx_run_distributed_shard || run_rc=$?
+fi
+
+cx_adopt_runtime_stage "$MOUNT_SRC"
+if [ "$NODES" = 1 ] && [ "$run_rc" != 0 ]; then
+  cx_fail_stage "$CX_FAILSAFE_MODE" "$runtime_log" || true
+fi
+collect_rc=0
+cx_collect_results "$MOUNT_SRC" "$REPO_ROOT" || collect_rc=$?
+[ "$run_rc" != 0 ] || [ "$collect_rc" = 0 ] || cx_set_failure_stage artifact-collection
+final_rc="$run_rc"
+[ "$final_rc" != 0 ] || final_rc="$collect_rc"
+cx_log "done - result artifacts collected"
+exit "$final_rc"
diff --git a/experimental/CollectiveX/publisher.py b/experimental/CollectiveX/publisher.py
new file mode 100644
index 000000000..e64f52c12
--- /dev/null
+++ b/experimental/CollectiveX/publisher.py
@@ -0,0 +1,4260 @@
+#!/usr/bin/env python3
+"""Fail-closed filesystem publisher for CollectiveX EP v1 artifacts."""
+from __future__ import annotations
+
+import argparse
+import contextlib
+import datetime as dt
+import fcntl
+from functools import lru_cache
+import hashlib
+import json
+import math
+import os
+from pathlib import Path, PurePosixPath
+import re
+import shutil
+import stat
+import statistics
+import sys
+import tempfile
+from typing import Any, Iterator, Sequence
+import zipfile
+
+import jsonschema
+import numpy as np
+
+HERE = Path(__file__).resolve().parent
+sys.path.insert(0, str(HERE))
+
+import artifact_safety  # noqa: E402
+import capability  # noqa: E402
+import contracts  # noqa: E402
+import identity  # noqa: E402
+import sweep_matrix  # noqa: E402
+
+FORMAT_BUNDLE = "collectivex.private.bundle.v1"
+FORMAT_PUBLIC = "collectivex.public.v1"
+FORMAT_CHANNEL = "collectivex.channel.v1"
+POLICY = "collectivex-decision-grade-v1"
+PUBLISHER_POLICY = "collectivex-publisher-v1"
+OUTCOMES = ("success", "unsupported", "failed", "invalid", "diagnostic")
+REQUIRED_ALLOCATIONS = 3
+REQUIRED_COHORT_KINDS = ("library", "chip", "system", "routing")
+PRECISION_COHORT_KINDS = (
+    "dispatch-precision", "combine-precision", "precision-pair",
+)
+REQUIRED_PROMOTION_COHORT_COUNTS = {"library": 76, "system": 12, "routing": 116}
+CANONICAL_FULL_V1_MATRIX_SHA256 = (
+    "f1ca85f9689922b90edd5767b9ff2a902f6b896f32f68b2ca086dde3fd2157d0"
+)
+CANONICAL_FULL_V1_CASE_CATALOG_SHA256 = (
+    "8e262178f770b0cdde12b7ec71604afd87251fa55685d4594f29717153ad6bbd"
+)
+P50_STABILITY_LIMIT = 1.10
+P99_STABILITY_LIMIT = 1.25
+TRIAL_DRIFT_RATIO_LIMIT = 1.10
+TRIAL_OUTLIER_FRACTION_LIMIT = 0.05
+TRIAL_OUTLIER_MAD_MULTIPLIER = 6.0
+BOOTSTRAP_RESAMPLES = 10_000
+BOOTSTRAP_CONFIDENCE = 0.95
+BOOTSTRAP_EQUIVALENCE_BAND = 0.05
+BOOTSTRAP_POLICY = "hierarchical-run-trial-p99-ratio-v1"
+BOOTSTRAP_CHUNK_SIZE = 250
+MAX_ARCHIVE_MEMBERS = 20_000
+MAX_ARCHIVE_MEMBER_BYTES = 2 * 1024**3
+MAX_ARCHIVE_TOTAL_BYTES = 16 * 1024**3
+MAX_PUBLIC_DATASET_BYTES = 32 * 1024**2
+HEX64 = re.compile(r"[0-9a-f]{64}")
+SAFE_ID = re.compile(r"[a-z0-9][a-z0-9_.-]{0,127}")
+REASON = re.compile(r"[a-z0-9][a-z0-9.-]{0,95}")
+ARTIFACT_NAME = re.compile(
+    r"cx(?:unsupported|shard-[a-z0-9][a-z0-9_.-]{0,127})-[1-9][0-9]*-[1-9][0-9]*"
+)
+COVERAGE_TOPOLOGY_FIELDS = (
+    "ep_size", "nodes", "gpus_per_node", "scale_up_domain", "scope",
+    "scale_up_transport", "scale_out_transport", "transport", "topology_class",
+)
+CHANNEL_PATH = re.compile(r"datasets/([0-9a-f]{64})/dataset\.json")
+SCHEMA_DIR = HERE / "schemas"
+_SCHEMAS: dict[str, jsonschema.protocols.Validator] = {}
+_BOOTSTRAP_CACHE: dict[tuple[Any, ...], dict[str, Any]] = {}
+
+
+class PublisherError(ValueError):
+    """Input or stored state violates the publication contract."""
+
+
+strict_load = contracts.strict_load
+_canonical = contracts.canonical_json_bytes
+
+
+def _sha_bytes(data: bytes) -> str:
+    return hashlib.sha256(data).hexdigest()
+
+
+def _sha_file(path: Path) -> str:
+    digest = hashlib.sha256()
+    with path.open("rb") as handle:
+        for chunk in iter(lambda: handle.read(1024 * 1024), b""):
+            digest.update(chunk)
+    return digest.hexdigest()
+
+
+def _latest_timestamp(values: Sequence[str]) -> str:
+    """Return the latest evidence timestamp without introducing publisher wall time."""
+    if not values:
+        raise PublisherError("cannot derive a timestamp without evidence")
+
+    def parsed(value: str) -> dt.datetime:
+        try:
+            timestamp = dt.datetime.fromisoformat(value.replace("Z", "+00:00"))
+        except ValueError as exc:
+            raise PublisherError("evidence timestamp is not ISO-8601") from exc
+        if timestamp.tzinfo is None:
+            raise PublisherError("evidence timestamp must include a timezone")
+        return timestamp.astimezone(dt.timezone.utc)
+
+    return max(values, key=lambda value: (parsed(value), value))
+
+
+def _schema(name: str, value: Any) -> None:
+    validator = _SCHEMAS.get(name)
+    if validator is None:
+        schema = strict_load(SCHEMA_DIR / name)
+        jsonschema.Draft202012Validator.check_schema(schema)
+        validator = jsonschema.Draft202012Validator(
+            schema, format_checker=jsonschema.FormatChecker()
+        )
+        _SCHEMAS[name] = validator
+    errors = sorted(validator.iter_errors(value), key=lambda error: list(error.absolute_path))
+    if errors:
+        error = errors[0]
+        location = ".".join(map(str, error.absolute_path)) or "$"
+        raise PublisherError(f"{name}:{location}: {error.message}")
+def _exact(obj: Any, fields: set[str], path: str) -> dict[str, Any]:
+    if not isinstance(obj, dict):
+        raise PublisherError(f"{path} must be an object")
+    actual = set(obj)
+    if actual != fields:
+        raise PublisherError(
+            f"{path} fields differ: missing={sorted(fields - actual)}, "
+            f"extra={sorted(actual - fields)}"
+        )
+    return obj
+def _array(value: Any, path: str, *, nonempty: bool = False) -> list[Any]:
+    if not isinstance(value, list) or (nonempty and not value):
+        qualifier = "a nonempty" if nonempty else "an"
+        raise PublisherError(f"{path} must be {qualifier} array")
+    return value
+
+
+def _integer(value: Any, path: str, *, minimum: int = 0) -> int:
+    if type(value) is not int or value < minimum:
+        raise PublisherError(f"{path} must be an integer >= {minimum}")
+    return value
+
+
+def _unique(values: Sequence[Any], path: str) -> None:
+    serialized = [_canonical(value) for value in values]
+    if len(serialized) != len(set(serialized)):
+        raise PublisherError(f"{path} contains duplicates")
+
+def _eligibility(value: dict[str, Any], path: str) -> dict[str, Any]:
+    allocations = value["allocation_ids"]
+    p50 = value["p50_max_min_ratio"]
+    p99 = value["p99_max_min_ratio"]
+    gates = (
+        len(allocations) >= REQUIRED_ALLOCATIONS,
+        value["complete"], value["correct"], value["measured_roundtrip_p99"],
+        value["stable_p50"], value["stable_p99"], value["stable_ordering"],
+        p50 is not None and p50 <= P50_STABILITY_LIMIT,
+        p99 is not None and p99 <= P99_STABILITY_LIMIT,
+    )
+    if value["decision_grade"] != (all(gates) and not value["reasons"]):
+        raise PublisherError(f"{path}.decision_grade does not match promotion gates")
+    if value["decision_grade"] == bool(value["reasons"]):
+        raise PublisherError(f"{path}.reasons does not match decision status")
+    return value
+
+
+def validate_channel(doc: Any, *, expected_channel: str | None = None) -> dict[str, Any]:
+    _schema("channel-v1.schema.json", doc)
+    if expected_channel and doc["channel"] != expected_channel:
+        raise PublisherError("channel name does not match its file")
+    target = doc["dataset"]
+    match = CHANNEL_PATH.fullmatch(target["path"]) if isinstance(target["path"], str) else None
+    if not match or match.group(1) != target["sha256"]:
+        raise PublisherError("channel dataset path and sha256 do not agree")
+    return doc
+
+
+def _metric_value(series: dict[str, Any], metric: dict[str, Any]) -> tuple[str, float, str]:
+    point = next(
+        (point for point in series["points"] if point["tokens_per_rank"] == metric["tokens_per_rank"]),
+        None,
+    )
+    if point is None or series["phase"] != metric["phase"]:
+        raise PublisherError("decision metric references an unavailable point")
+    component = point["components"]["roundtrip"]
+    if metric["measure"] == "latency_us":
+        value = component["latency_us"][metric["statistic"]]
+        unit = "us"
+    else:
+        rates = component[metric["measure"]]
+        if rates is None:
+            raise PublisherError("data-rate decision has no byte accounting contract")
+        value = rates[metric["statistic"]]
+        unit = "GB/s"
+    return point["point_id"], value, unit
+
+
+def _validate_metric(metric: dict[str, Any]) -> None:
+    expected = "min" if metric["measure"] == "latency_us" else "max"
+    if metric["objective"] != expected:
+        raise PublisherError(f"{metric['measure']} objective must be {expected}")
+
+
+def _metric_label(measure: str, statistic: str) -> str:
+    if measure == "latency_us":
+        return f"{statistic} latency"
+    label = (
+        "activation data rate"
+        if measure == "activation_data_rate_gbps_at_latency_percentile"
+        else "total logical data rate"
+    )
+    return f"{label} at {statistic} latency"
+
+
+def _routing_build_control(build: dict[str, Any]) -> dict[str, Any]:
+    return {
+        key: build[key]
+        for key in (
+            "routing_control_sha256", "image_digest", "source_sha", "squash_sha256",
+        )
+    }
+
+
+def _routing_implementation_mismatch(members: Sequence[dict[str, Any]]) -> bool:
+    off_eplb_hashes = {
+        member["build"]["implementation_contract_sha256"]
+        for member in members if not member["workload"]["eplb"]
+    }
+    return len(off_eplb_hashes) > 1
+
+
+def _public_case_factors(series: dict[str, Any]) -> dict[str, Any]:
+    workload = series["workload"]
+    system = series["system"]
+    measurement = series["measurement"]
+    ep_size = system["ep_size"]
+    case = {
+        "backend": series["backend"]["id"],
+        "canonical": True,
+        "eplb": workload["eplb"],
+        "ep": ep_size,
+        "experts": workload["experts"],
+        "gpus_per_node": system["gpus_per_node"],
+        "hidden": workload["hidden"],
+        "ladder": " ".join(str(point["tokens_per_rank"]) for point in series["points"]),
+        "mode": series["mode"],
+        "nodes": system["nodes"],
+        "phase": series["phase"],
+        "required_publication": series["publication_tier"],
+        "routing": workload["routing"],
+        "samples_per_point": measurement["samples_per_component"],
+        "scale_out_transport": system["scale_out_transport"],
+        "scale_up_domain": system["scale_up_domain"],
+        "scale_up_transport": system["scale_up_transport"],
+        "scope": system["scope"],
+        "suite": series["suite"],
+        "timing": (
+            f"{measurement['iters']}:{measurement['trials']}:"
+            f"{measurement['warmups']}"
+        ),
+        "topk": workload["top_k"],
+        "topology_class": system["topology_class"],
+        "transport": system["transport"],
+        "warmup_semantics": sweep_matrix.ep_harness.WARMUP_SEMANTICS,
+        "workload": series["model"],
+    }
+    if workload["precision_profile"] != identity.V1_CONTROL_PRECISION_PROFILE:
+        case["precision_profile"] = workload["precision_profile"]
+    return {
+        "case": case,
+        "profile": identity.profile_for_case(case),
+        "sku": system["sku"],
+    }
+
+
+def _coverage_topology(case: dict[str, Any]) -> dict[str, Any]:
+    """Project exact fabric placement without exposing private runner details."""
+    return {
+        "ep_size": case.get("ep_size", case.get("ep")),
+        **{field: case[field] for field in COVERAGE_TOPOLOGY_FIELDS if field != "ep_size"},
+    }
+
+
+def _coverage_coordinates(case: dict[str, Any]) -> dict[str, Any]:
+    return {
+        "sku": case["sku"], "backend": case["backend"],
+        "mode": case["mode"], "phase": case["phase"],
+        "topology": _coverage_topology(case),
+    }
+
+
+@lru_cache(maxsize=1)
+def _canonical_coverage_cases() -> dict[str, dict[str, Any]]:
+    matrix = sweep_matrix.resolve_matrix(suites="all", max_cases=128, backends="all")
+    return {
+        item["case"]["case_id"]: {
+            "sku": item["sku"],
+            **item["case"],
+            "disposition": item["disposition"],
+            "reason": item["reason"],
+        }
+        for item in matrix["requested_cases"]
+    }
+
+
+def _public_series_config(series: dict[str, Any]) -> dict[str, Any]:
+    return {
+        "backend": {
+            "generation": series["backend"]["generation"],
+            "version": series["backend"]["version"],
+        },
+        "resource": series["resource"],
+        "system": {"label": series["system"]["label"]},
+    }
+
+
+def _public_cohort_factors(kind: str, item: dict[str, Any]) -> tuple[Any, Any]:
+    workload = item["workload"]
+    build = item["build"]
+    shape = {
+        key: workload[key]
+        for key in (
+            "hidden", "top_k", "experts", "precision_profile", "dispatch_precision",
+            "combine_precision", "activation_profile",
+        )
+    }
+    common = {
+        "model": item["model"], "mode": item["mode"], "phase": item["phase"],
+        "shape": shape, "measurement": item["measurement"],
+        "ep_size": item["system"]["ep_size"],
+    }
+    if kind == "library":
+        return (
+            {**common, "system": item["system"], "workload": workload,
+             "resource_mode": item["resource"]["mode"], "source": build["source_sha"]},
+            item["backend"]["id"],
+        )
+    if kind == "chip":
+        return (
+            {**common, "backend": item["backend"], "workload": workload,
+             "resource_mode": item["resource"]["mode"], "source": build["source_sha"]},
+            item["system"],
+        )
+    if kind == "system":
+        return {**common, "workload": workload, "source": build["source_sha"]}, [
+            item["system"]["sku"], item["backend"]["id"], item["resource"]["profile"]
+        ]
+    if kind == "routing":
+        return (
+            {**common, "backend": item["backend"], "system": item["system"],
+             "resource": item["resource"], "build": _routing_build_control(build)},
+            [workload["routing"], workload["eplb"],
+             build["implementation_contract_sha256"]],
+        )
+    if kind in PRECISION_COHORT_KINDS:
+        static_shape = {
+            key: workload[key]
+            for key in ("hidden", "top_k", "experts", "activation_profile")
+        }
+        control = {
+            "backend": item["backend"],
+            "build": {
+                key: build[key]
+                for key in (
+                    "image_digest", "runtime_fingerprint_sha256", "source_sha",
+                    "squash_sha256",
+                )
+            },
+            "measurement": item["measurement"],
+            "mode": item["mode"],
+            "model": item["model"],
+            "phase": item["phase"],
+            "resource": item["resource"],
+            "shape": static_shape,
+            "system": item["system"],
+            "workload": {
+                "eplb": workload["eplb"],
+                "routing": workload["routing"],
+            },
+        }
+        if kind == "dispatch-precision":
+            control["combine_precision"] = workload["combine_precision"]
+            variant = workload["dispatch_precision"]
+        elif kind == "combine-precision":
+            control["dispatch_precision"] = workload["dispatch_precision"]
+            variant = workload["combine_precision"]
+        else:
+            control.pop("resource")
+            variant = {
+                "combine_precision": workload["combine_precision"],
+                "dispatch_precision": workload["dispatch_precision"],
+                "precision_profile": workload["precision_profile"],
+                "resource": item["resource"],
+            }
+        return control, variant
+    raise PublisherError(f"unknown cohort kind {kind}")
+
+
+def _case_disposition_catalog_sha256(coverage: Sequence[dict[str, Any]]) -> str:
+    catalog = [
+        {"case_id": item["case_id"], "disposition": item["disposition"]}
+        for item in sorted(coverage, key=lambda item: item["case_id"])
+    ]
+    return _sha_bytes(_canonical(catalog))
+
+
+def validate_public_dataset(doc: Any) -> dict[str, Any]:
+    _schema("public-dataset-v1.schema.json", doc)
+    if len(_canonical(doc)) + 1 > MAX_PUBLIC_DATASET_BYTES:
+        raise PublisherError("public dataset exceeds the serving size limit")
+    try:
+        artifact_safety.assert_publication_safe([doc])
+    except artifact_safety.ArtifactSafetyError as exc:
+        raise PublisherError(str(exc)) from exc
+    if doc["source_bundle_ids"] != sorted(doc["source_bundle_ids"]):
+        raise PublisherError("source bundle IDs are not canonical")
+    for field, key in (
+        ("coverage", "case_id"), ("attempts", "attempt_id"),
+        ("series", "series_id"), ("cohorts", "cohort_id"),
+        ("rankings", "ranking_id"), ("recommendations", "recommendation_id"),
+        ("sensitivities", "sensitivity_id"),
+    ):
+        if doc[field] != sorted(doc[field], key=lambda item: item[key]):
+            raise PublisherError(f"{field} are not in canonical identity order")
+    promotion = doc["promotion"]
+    quarantined = promotion["status"] == "quarantined"
+    if quarantined != (promotion["reason"] is not None) or quarantined != (
+        promotion["matrix_id"] is None
+    ):
+        raise PublisherError("promotion reason/matrix identity differs from status")
+    attempts = {item["attempt_id"]: item for item in doc["attempts"]}
+    if len(attempts) != len(doc["attempts"]):
+        raise PublisherError("dataset has duplicate attempt IDs")
+    evidence = [
+        value["evidence_id"] for item in doc["attempts"] for value in item["evidence"]
+    ]
+    _unique(evidence, "dataset attempt evidence")
+    series = {item["series_id"]: item for item in doc["series"]}
+    if len(series) != len(doc["series"]):
+        raise PublisherError("dataset has duplicate series IDs")
+    allocation_ids = set(promotion["allocation_ids"])
+    case_ids = {item["case_id"] for item in doc["coverage"]}
+    if len(case_ids) != len(doc["coverage"]):
+        raise PublisherError("dataset has duplicate case coverage")
+    coverage_by_case = {item["case_id"]: item for item in doc["coverage"]}
+    series_case_ids = {
+        case_id for item in doc["series"] for case_id in item["case_ids"]
+    }
+    canonical_cases = _canonical_coverage_cases()
+    for item in doc["coverage"]:
+        topology = item["topology"]
+        registered = capability.topology_for(item["sku"], topology["ep_size"])
+        if (
+            item["sku"] not in capability.PLATFORMS
+            or item["backend"] not in capability.BACKENDS
+            or registered is None
+            or any(
+                topology[field] != registered[field]
+                for field in COVERAGE_TOPOLOGY_FIELDS if field != "ep_size"
+            )
+        ):
+            raise PublisherError("coverage topology differs from the capability registry")
+        canonical = canonical_cases.get(item["case_id"])
+        if canonical is not None:
+            precision_profile = canonical.get(
+                "precision_profile", identity.V1_CONTROL_PRECISION_PROFILE
+            )
+            precision = identity.precision_profile(precision_profile)
+            expected_projection = {
+                "sku": canonical["sku"],
+                "suite": canonical["suite"],
+                "workload": canonical["workload"],
+                "publication_tier": canonical["required_publication"],
+                "backend": canonical["backend"],
+                "mode": canonical["mode"],
+                "phase": canonical["phase"],
+                "routing": canonical["routing"],
+                "eplb": canonical["eplb"],
+                "precision_profile": precision_profile,
+                "dispatch_precision": precision["dispatch"],
+                "combine_precision": precision["combine"],
+                "topology": _coverage_topology(canonical),
+                "disposition": canonical["disposition"],
+            }
+            if any(item[field] != value for field, value in expected_projection.items()):
+                raise PublisherError("coverage dimensions differ from its case identity")
+            expected_tokens = [int(value) for value in canonical["ladder"].split()]
+            if [point["tokens_per_rank"] for point in item["points"]] != expected_tokens:
+                raise PublisherError("coverage points differ from the requested token ladder")
+        if canonical is None and item["case_id"] not in series_case_ids:
+            raise PublisherError("coverage case identity is outside the v1 catalog")
+        for point in item["points"]:
+            if point["global_tokens"] != point["tokens_per_rank"] * topology["ep_size"]:
+                raise PublisherError("coverage point global token count differs")
+            if (point["terminal_status"] == "measured") != (point["reason"] is None):
+                raise PublisherError("coverage point terminal reason differs from status")
+    for item in doc["attempts"]:
+        if item["case_id"] not in case_ids or item["allocation_id"] not in allocation_ids:
+            raise PublisherError("attempt references undeclared coverage or allocation")
+        if item["series_id"] is not None and item["series_id"] not in series:
+            raise PublisherError("attempt references unknown series")
+        if (item["outcome"] == "success") != (item["reason"] is None):
+            raise PublisherError("attempt reason must be null exactly for success")
+        if item["outcome"] == "success" and item["failure_mode"] is not None:
+            raise PublisherError("successful attempt cannot have a failure mode")
+        if (item["outcome"] == "success" and item["selected"]) != (
+            item["series_id"] is not None
+        ):
+            raise PublisherError("attempt series must be present exactly for selected success")
+    if {item["allocation_id"] for item in doc["attempts"]} != allocation_ids:
+        raise PublisherError("promotion allocation catalog differs from attempts")
+    attempt_groups: dict[tuple[str, str], list[dict[str, Any]]] = {}
+    for item in doc["attempts"]:
+        attempt_groups.setdefault((item["case_id"], item["allocation_id"]), []).append(item)
+    for (case_id, allocation_id), group in attempt_groups.items():
+        ordinals = sorted(item["attempt_index"] for item in group)
+        if ordinals != list(range(1, len(group) + 1)):
+            raise PublisherError("public retries must retain contiguous attempt indexes")
+        if any(
+            item["attempt_id"] != identity.attempt_id(
+                allocation=allocation_id, case=case_id, ordinal=item["attempt_index"]
+            )
+            for item in group
+        ):
+            raise PublisherError("public retry identity differs from its case/allocation/index")
+        selected = [item for item in group if item["selected"]]
+        if len(selected) != 1 or selected[0]["attempt_index"] != ordinals[-1]:
+            raise PublisherError("publisher must select the latest retry per case/allocation")
+    selected_by_series: dict[str, list[dict[str, Any]]] = {}
+    for item in doc["attempts"]:
+        if item["selected"] and item["outcome"] == "success":
+            selected_by_series.setdefault(item["series_id"], []).append(item)
+    terminal = 0
+    for item in doc["coverage"]:
+        listed = set(item["attempt_ids"])
+        selected = item["selected_attempt_id"]
+        expected_attempts = {
+            attempt_id for attempt_id, attempt in attempts.items()
+            if attempt["case_id"] == item["case_id"]
+        }
+        if listed != expected_attempts:
+            raise PublisherError("coverage references attempts from another case")
+        if selected is not None:
+            terminal += 1
+            if (selected not in listed or not attempts[selected]["selected"]
+                    or any(attempts[selected][field] != item[field]
+                           for field in ("outcome", "failure_mode", "reason"))):
+                raise PublisherError("coverage selected outcome differs")
+            selected_candidates = [attempts[value] for value in listed if attempts[value]["selected"]]
+            latest = max(
+                selected_candidates,
+                key=lambda attempt: (
+                    int(attempt["run_id"]), attempt["run_attempt"],
+                    attempt["attempt_index"], attempt["attempt_id"]
+                ),
+            )
+            if selected != latest["attempt_id"]:
+                raise PublisherError("coverage does not select the latest canonical allocation")
+            expected_status = (
+                "measured" if attempts[selected]["outcome"] == "success"
+                else attempts[selected]["outcome"]
+            )
+            if any(point["terminal_status"] != expected_status for point in item["points"]):
+                raise PublisherError("coverage point status differs from selected attempt")
+            if expected_status == "measured":
+                selected_series = series.get(attempts[selected]["series_id"])
+                if selected_series is None:
+                    raise PublisherError("measured coverage points lack a public series")
+                public_points = {
+                    point["tokens_per_rank"]: point for point in selected_series["points"]
+                }
+                if any(
+                    point["series_id"] != selected_series["series_id"]
+                    or point["point_id"]
+                    != public_points.get(point["tokens_per_rank"], {}).get("point_id")
+                    for point in item["points"]
+                ):
+                    raise PublisherError("coverage point identities differ from series")
+    measured_cases = sum(
+        all(point["terminal_status"] == "measured" for point in item["points"])
+        for item in doc["coverage"]
+    )
+    unsupported_cases = sum(
+        all(point["terminal_status"] == "unsupported" for point in item["points"])
+        for item in doc["coverage"]
+    )
+    requested_points = sum(len(item["points"]) for item in doc["coverage"])
+    measured_points = sum(
+        point["terminal_status"] == "measured"
+        for item in doc["coverage"] for point in item["points"]
+    )
+    unsupported_points = sum(
+        point["terminal_status"] == "unsupported"
+        for item in doc["coverage"] for point in item["points"]
+    )
+    expected_counts = {
+        "requested_cases": len(doc["coverage"]),
+        "terminal_cases": terminal,
+        "measured_cases": measured_cases,
+        "unsupported_cases": unsupported_cases,
+        "requested_points": requested_points,
+        "terminal_points": requested_points,
+        "measured_points": measured_points,
+        "unsupported_points": unsupported_points,
+    }
+    if any(promotion[field] != value for field, value in expected_counts.items()):
+        raise PublisherError("promotion coverage counts differ")
+    selected_evidence: dict[tuple[str, str], set[str]] = {}
+    for attempt in doc["attempts"]:
+        if attempt["selected"] and attempt["series_id"] is not None:
+            for value in attempt["evidence"]:
+                selected_evidence.setdefault(
+                    (attempt["series_id"], value["point_id"]), set()
+                ).add(value["evidence_id"])
+    for item in doc["series"]:
+        eligibility = _eligibility(item["eligibility"], f"series {item['series_id']}")
+        workload = item["workload"]
+        model, hidden, top_k, experts = sweep_matrix.V1_WORKLOAD
+        suite_contract = sweep_matrix.V1_SUITE_CONTRACTS.get(item["suite"])
+        coordinate = (
+            item["mode"], item["phase"], workload["routing"], workload["eplb"]
+        )
+        profile_case = {"mode": item["mode"]}
+        if workload["precision_profile"] != identity.V1_CONTROL_PRECISION_PROFILE:
+            profile_case["precision_profile"] = workload["precision_profile"]
+        profile = identity.profile_for_case(profile_case)
+        communication_precision = identity.precision_profile(workload["precision_profile"])
+        if (
+            item["model"] != model
+            or (workload["hidden"], workload["top_k"], workload["experts"])
+            != (hidden, top_k, experts)
+            or suite_contract is None
+            or coordinate not in suite_contract["coordinates"]
+            or (
+                suite_contract.get("backends") is not None
+                and item["backend"]["id"] not in suite_contract["backends"]
+            )
+            or item["publication_tier"] != suite_contract["publication"]
+            or item["measurement"]["contract"] != profile["contract"]
+            or item["measurement"]["component_order_contract"]
+            != profile["component_order_contract"]
+            or item["measurement"]["combine_semantics"] != profile["combine_semantics"]
+            or item["measurement"]["payload_unit"] != profile["payload_unit"]
+            or workload["dispatch_precision"] != communication_precision["dispatch"]
+            or workload["combine_precision"] != communication_precision["combine"]
+            or item["measurement"]["qualification_indices"]
+            != sorted(item["measurement"]["qualification_indices"])
+            or len(set(item["measurement"]["qualification_indices"]))
+            != len(item["measurement"]["qualification_indices"])
+        ):
+            raise PublisherError("series differs from the frozen v1 workload/suite profile")
+        backend_id = item["backend"]["id"]
+        expected_role = "reference" if backend_id == "nccl-ep" else "library"
+        if (
+            backend_id not in capability.BACKENDS
+            or item["backend"]["label"] != BACKEND_LABELS[backend_id]
+            or item["backend"]["role"] != expected_role
+            or item["backend"]["version"] is None
+        ):
+            raise PublisherError("series backend projection differs from v1")
+        sku = item["system"]["sku"]
+        platform = capability.PLATFORMS.get(sku)
+        ep_size = item["system"]["ep_size"]
+        registered_topology = capability.topology_for(sku, ep_size)
+        if platform is None or registered_topology is None:
+            raise PublisherError("series system projection differs from v1")
+        disposition, _ = capability.resolve_disposition(
+            sku, backend_id, ep=ep_size, nodes=item["system"]["nodes"],
+            routing=workload["routing"], eplb=workload["eplb"],
+            mode=item["mode"],
+            precision_profile=(
+                workload["precision_profile"]
+                if workload["precision_profile"] != identity.V1_CONTROL_PRECISION_PROFILE
+                else None
+            ),
+        )
+        if (
+            disposition != "supported"
+            or item["system"]["vendor"] != platform["vendor"]
+            or any(
+                item["system"][field] != registered_topology[field]
+                for field in (
+                    "nodes", "gpus_per_node", "scale_up_domain", "scope",
+                    "scale_up_transport", "scale_out_transport", "transport",
+                    "topology_class",
+                )
+            )
+            or item["system"]["world_size"] != ep_size
+            or platform["product"] not in set(
+                re.findall(r"[a-z]+\d+[a-z]*", item["system"]["label"].lower())
+            )
+        ):
+            raise PublisherError("series system projection differs from v1")
+        if contracts.public_series_config_sha256(_public_series_config(item)) != item[
+            "build"
+        ]["public_config_sha256"]:
+            raise PublisherError("public series configuration differs from its commitment")
+        covered = [coverage_by_case.get(case_id) for case_id in item["case_ids"]]
+        if not covered or any(
+            case is None
+            or {
+                "sku": case["sku"], "backend": case["backend"],
+                "mode": case["mode"], "phase": case["phase"],
+                "topology": case["topology"],
+            }
+            != {
+                "sku": sku, "backend": backend_id,
+                "mode": item["mode"], "phase": item["phase"],
+                "topology": _coverage_topology(item["system"]),
+            }
+            for case in covered
+        ):
+            raise PublisherError("series projection differs from its case coverage")
+        if (
+            item["eplb"]["enabled"] != item["workload"]["eplb"]
+            or item["eplb"]["logical_experts"] != item["workload"]["experts"]
+        ):
+            raise PublisherError("series EPLB descriptor differs from its workload")
+        eplb = item["eplb"]
+        expected_physical = eplb["logical_experts"] + eplb["redundant_experts"]
+        nullable_eplb = (
+            "planner", "mapping_sha256", "reference_tokens_per_rank", "max_replicas",
+            "imbalance_before", "imbalance_after", "calibration_workload_id",
+            "calibration_trace_sha256", "calibration_window", "calibration_token_offset",
+        )
+        if eplb["enabled"]:
+            if (
+                item["workload"]["routing"] != "zipf"
+                or any(eplb[field] is None for field in nullable_eplb)
+                or eplb["planner"] != "greedy-rank-major-v1"
+                or eplb["reference_tokens_per_rank"] != 2048
+                or eplb["redundant_experts"] != 32
+                or eplb["redundant_experts"] % ep_size != 0
+                or eplb["physical_experts"] != expected_physical
+                or eplb["logical_experts"] % ep_size != 0
+                or eplb["physical_experts"] % ep_size != 0
+                or not 1 <= eplb["replicated_experts"] <= min(
+                    eplb["logical_experts"], eplb["redundant_experts"]
+                )
+                or not 2 <= eplb["max_replicas"] <= 1 + eplb["redundant_experts"]
+                or not 1 <= eplb["imbalance_after"] <= eplb["imbalance_before"] <= ep_size
+            ):
+                raise PublisherError("enabled EPLB descriptor is incomplete")
+            expected_plan, calibration = contracts._expected_eplb_calibration(
+                workload["routing"], workload["hidden"], workload["top_k"],
+                eplb["logical_experts"], eplb["physical_experts"], ep_size,
+                identity.V1_CASE_PROFILE["seed"],
+                identity.V1_CASE_PROFILE["eplb_reference_tokens_per_rank"],
+            )
+            expected_eplb = {
+                **calibration,
+                "enabled": True,
+                "planner": identity.V1_CASE_PROFILE["eplb_planner"],
+                "mapping_sha256": contracts.eplb_contract.mapping_hash(expected_plan),
+                "logical_experts": eplb["logical_experts"],
+                "physical_experts": eplb["physical_experts"],
+                "redundant_experts": identity.V1_CASE_PROFILE["eplb_redundant_experts"],
+                "reference_tokens_per_rank": identity.V1_CASE_PROFILE[
+                    "eplb_reference_tokens_per_rank"
+                ],
+                "replicated_experts": expected_plan["replicated_experts"],
+                "max_replicas": expected_plan["max_replicas"],
+                "imbalance_before": expected_plan["imbalance_before"],
+                "imbalance_after": expected_plan["imbalance_after"],
+            }
+            if eplb != expected_eplb:
+                raise PublisherError("enabled EPLB descriptor differs from deterministic plan")
+        elif (
+            any(eplb[field] is not None for field in nullable_eplb)
+            or eplb["physical_experts"] != expected_physical
+            or eplb["redundant_experts"] != 0
+            or eplb["replicated_experts"] != 0
+        ):
+            raise PublisherError("disabled EPLB descriptor claims a plan")
+        if item["backend"]["id"] == "nccl-ep":
+            expected_generation = (
+                "nccl" if item["system"]["vendor"] == "nvidia" else "rccl"
+            )
+            if item["backend"]["generation"] != expected_generation:
+                raise PublisherError("NCCL/RCCL reference generation differs from system vendor")
+        if (item["status"] == "decision-grade") != eligibility["decision_grade"]:
+            raise PublisherError("series status differs from eligibility")
+        if (
+            set(eligibility["allocation_ids"]) != set(item["allocation_ids"])
+            or eligibility["correct"] != all(
+                point["correctness"]["semantic_pass"]
+                and point["correctness"]["precision"]["passed"]
+                for point in item["points"]
+            )
+        ):
+            raise PublisherError("series eligibility differs from its evidence")
+        selected_attempts = selected_by_series.get(item["series_id"], [])
+        if (
+            set(item["case_ids"]) != {attempt["case_id"] for attempt in selected_attempts}
+            or set(item["allocation_ids"])
+            != {attempt["allocation_id"] for attempt in selected_attempts}
+            or item["measurement"]["qualification_indices"]
+            != sorted({attempt["qualification_index"] for attempt in selected_attempts})
+        ):
+            raise PublisherError("series case/allocation catalog differs from selected attempts")
+        if item["eligibility"]["decision_grade"] and len(
+            {attempt["run_id"] for attempt in selected_attempts}
+        ) < REQUIRED_ALLOCATIONS:
+            raise PublisherError("decision-grade series lacks independent workflow runs")
+        tokens = [point["tokens_per_rank"] for point in item["points"]]
+        if tokens != sorted(set(tokens)):
+            raise PublisherError("series points are not in unique ascending token order")
+        if len(item["case_ids"]) != 1:
+            raise PublisherError("public series must represent exactly one v1 case")
+        case_id = item["case_ids"][0]
+        if identity.digest("case", _public_case_factors(item)) != case_id:
+            raise PublisherError("public series projection differs from its case identity")
+        build = item["build"]
+        expected_series_id = identity.series_id({
+            "backend": backend_id,
+            "case_id": case_id,
+            "image_digest": build["image_digest"],
+            "implementation_contract_sha256": build[
+                "implementation_contract_sha256"
+            ],
+            "public_config_sha256": build["public_config_sha256"],
+            "routing_control_sha256": build["routing_control_sha256"],
+            "runtime_fingerprint_sha256": build["runtime_fingerprint_sha256"],
+            "source_sha": build["source_sha"],
+            "squash_sha256": build["squash_sha256"],
+            "workload_id": workload["workload_id"],
+        })
+        if item["series_id"] != expected_series_id:
+            raise PublisherError("public series identity differs from its committed factors")
+        for point in item["points"]:
+            if point["point_id"] != identity.point_id(series=item["series_id"], tokens_per_rank=point["tokens_per_rank"]):
+                raise PublisherError("point identity differs")
+            if point["global_tokens"] != point["tokens_per_rank"] * item["system"]["ep_size"]:
+                raise PublisherError("global_tokens must use EP size")
+            routing = point["routing"]
+            max_fanout = min(item["workload"]["top_k"], item["system"]["ep_size"])
+            if (
+                routing["routed_copies"] < point["global_tokens"]
+                or routing["routed_copies"] > point["global_tokens"] * max_fanout
+                or routing["recv_tokens_max"] > routing["routed_copies"]
+                or routing["recv_tokens_max"] * item["system"]["ep_size"]
+                < routing["routed_copies"]
+                or not math.isclose(
+                    routing["fanout_mean"],
+                    routing["routed_copies"] / point["global_tokens"],
+                    rel_tol=1e-12,
+                )
+                or routing["hotspot_ratio"] < 1
+                or routing["empty_expert_count"] >= eplb["physical_experts"]
+                or routing["empty_rank_count"] >= item["system"]["ep_size"]
+            ):
+                raise PublisherError("point routing/load facts are internally inconsistent")
+            expected_evidence = selected_evidence.get(
+                (item["series_id"], point["point_id"]), set()
+            )
+            if set(point["evidence_ids"]) != expected_evidence:
+                raise PublisherError("point evidence differs from selected series attempts")
+            point_correctness = point["correctness"]
+            if (
+                point_correctness["precision"]["profile_id"]
+                != workload["precision_profile"]
+                or (
+                    point_correctness["semantic_pass"]
+                    and not point_correctness["precision"]["passed"]
+                )
+                or point["stability"]["qualification_indices"]
+                != item["measurement"]["qualification_indices"]
+            ):
+                raise PublisherError("point correctness/stability differs from series evidence")
+            diagnostics = point["trial_diagnostics"]
+            diagnostic_reasons = set(diagnostics["reasons"])
+            component_reasons: set[str] = set()
+            for name, summary in diagnostics["components"].items():
+                if summary is None:
+                    if point["components"][name] is not None:
+                        raise PublisherError("trial diagnostics omit a measured component")
+                    continue
+                if point["components"][name] is None:
+                    raise PublisherError("trial diagnostics describe an unavailable component")
+                if summary["drift_flagged"] != (
+                    summary["first_last_median_ratio"] > TRIAL_DRIFT_RATIO_LIMIT
+                ) or summary["outlier_flagged"] != (
+                    summary["robust_outlier_fraction"] > TRIAL_OUTLIER_FRACTION_LIMIT
+                ):
+                    raise PublisherError("trial diagnostic flags differ from their thresholds")
+                if summary["drift_flagged"]:
+                    component_reasons.add("trial-drift")
+                if summary["outlier_flagged"]:
+                    component_reasons.add("trial-outliers")
+            if (
+                diagnostic_reasons != component_reasons
+                or diagnostics["flagged"] != bool(diagnostic_reasons)
+                or not diagnostic_reasons.issubset(point["anomalies"])
+            ):
+                raise PublisherError("trial diagnostic summary is inconsistent")
+            components = point["components"]
+            if (components["dispatch"] is None) != (components["combine"] is None):
+                raise PublisherError("dispatch/combine availability differs")
+            for name, component in components.items():
+                if component is None:
+                    continue
+                expected_origin = "derived" if name == "isolated_sum" else "measured"
+                expected_samples = None if name == "isolated_sum" else 512
+                if component["origin"] != expected_origin or component["sample_count"] != expected_samples:
+                    raise PublisherError(f"{name} origin or sample count differs")
+                rate_fields = (
+                    "activation_data_rate_gbps_at_latency_percentile",
+                    "total_logical_data_rate_gbps_at_latency_percentile",
+                )
+                if name == "isolated_sum" and any(component[field] is not None for field in rate_fields):
+                    raise PublisherError("isolated_sum cannot publish a derived data rate")
+                if name != "isolated_sum" and any(component[field] is None for field in rate_fields):
+                    raise PublisherError(f"{name} measured data rates are missing")
+                latency = component["latency_us"]
+                if list(latency.values()) != sorted(latency.values()):
+                    raise PublisherError("latency percentiles are not ordered")
+                byte_provenance = component["byte_provenance"]
+                if byte_provenance["total_logical_bytes"] != (
+                    byte_provenance["activation_data_bytes"] + byte_provenance["scale_bytes"]
+                ):
+                    raise PublisherError("component byte accounting does not reconcile")
+                for field, byte_field in (
+                    ("activation_data_rate_gbps_at_latency_percentile", "activation_data_bytes"),
+                    ("total_logical_data_rate_gbps_at_latency_percentile", "total_logical_bytes"),
+                ):
+                    if component[field] is not None:
+                        for statistic, rate in component[field].items():
+                            expected = byte_provenance[byte_field] / (latency[statistic] * 1000.0)
+                            if not math.isclose(rate, expected, rel_tol=1e-9, abs_tol=1e-12):
+                                raise PublisherError("component GB/s formula differs")
+            if components["roundtrip"] is None or components["roundtrip"]["origin"] != "measured":
+                raise PublisherError("roundtrip must be measured")
+            for statistic, throughput in point["roundtrip_token_rate_at_latency_percentile"].items():
+                expected = point["global_tokens"] / (
+                    components["roundtrip"]["latency_us"][statistic] * 1e-6
+                )
+                if not math.isclose(throughput, expected, rel_tol=1e-9):
+                    raise PublisherError("roundtrip token throughput formula differs")
+            if components["dispatch"] is not None:
+                derived = components["isolated_sum"]
+                if derived is None or any(not math.isclose(
+                    derived["latency_us"][statistic],
+                    components["dispatch"]["latency_us"][statistic]
+                    + (
+                        components["stage"]["latency_us"][statistic]
+                        if components["stage"] is not None else 0.0
+                    )
+                    + components["combine"]["latency_us"][statistic], rel_tol=1e-12
+                ) for statistic in ("p50", "p90", "p95", "p99")):
+                    raise PublisherError("isolated_sum is not the component percentile sum")
+            elif components["isolated_sum"] is not None:
+                raise PublisherError("isolated_sum requires measured dispatch/combine components")
+        if any(point["trial_diagnostics"]["flagged"] for point in item["points"]) != (
+            "unresolved-trial-diagnostic" in item["eligibility"]["reasons"]
+        ):
+            raise PublisherError("series trial diagnostic eligibility is inconsistent")
+    cohorts = {item["cohort_id"]: item for item in doc["cohorts"]}
+    if len(cohorts) != len(doc["cohorts"]):
+        raise PublisherError("dataset has duplicate cohort IDs")
+    for item in doc["cohorts"]:
+        if not set(item["series_ids"]).issubset(series):
+            raise PublisherError("cohort references unknown series")
+        members = [series[series_id] for series_id in item["series_ids"]]
+        expected_tier = (
+            "comparable-experimental"
+            if any(member["publication_tier"] == "comparable-experimental" for member in members)
+            else "official"
+        )
+        if item["publication_tier"] != expected_tier:
+            raise PublisherError("cohort publication tier differs from its members")
+        if f"/ {members[0]['mode']} /" not in item["label"]:
+            raise PublisherError("cohort label omits its controlled mode")
+        roles = {member["backend"]["role"] for member in members}
+        if item["kind"] == "library" and roles != {"library"}:
+            raise PublisherError("library cohort contains non-library evidence")
+        if item["kind"] == "system" and roles != {"reference"}:
+            raise PublisherError("system cohort is not a portable reference comparison")
+        if item["kind"] in {"chip", "routing", *PRECISION_COHORT_KINDS} and len(
+            {_canonical(member["backend"]) for member in members}
+        ) != 1:
+            raise PublisherError(f"{item['kind']} cohort mixes backend implementations")
+        public_factors = [_public_cohort_factors(item["kind"], member) for member in members]
+        if len({_canonical(value[0]) for value in public_factors}) != 1:
+            raise PublisherError(f"{item['kind']} cohort does not control its public factors")
+        if len({_canonical(value[1]) for value in public_factors}) < 2:
+            raise PublisherError(f"{item['kind']} cohort does not vary its declared contrast")
+        if item["kind"] == "routing":
+            if item["publication_tier"] != "comparable-experimental":
+                raise PublisherError("routing cohort must be experimental")
+            has_baseline = sum(
+                member["workload"]["routing"] == "uniform"
+                and not member["workload"]["eplb"]
+                for member in members
+            ) == 1
+            missing_reason = "missing-uniform-baseline" in item["eligibility"]["reasons"]
+            if has_baseline == missing_reason:
+                raise PublisherError("routing baseline and eligibility reason disagree")
+            mismatch = _routing_implementation_mismatch(members)
+            mismatch_reason = "implementation-config-mismatch" in item["eligibility"]["reasons"]
+            if mismatch != mismatch_reason:
+                raise PublisherError("routing implementation control and eligibility disagree")
+        if item["kind"] in PRECISION_COHORT_KINDS:
+            if item["publication_tier"] != "comparable-experimental":
+                raise PublisherError("precision cohorts must be experimental")
+            if item["kind"] in {"dispatch-precision", "combine-precision"}:
+                axis = (
+                    "dispatch"
+                    if item["kind"] == "dispatch-precision"
+                    else "combine"
+                )
+                field = f"{axis}_precision"
+                bf16 = identity.precision_profile(
+                    identity.V1_CONTROL_PRECISION_PROFILE
+                )[axis]
+                has_baseline = sum(
+                    _canonical(member["workload"][field]) == _canonical(bf16)
+                    for member in members
+                ) == 1
+                missing_reason = (
+                    "missing-bf16-precision-baseline"
+                    in item["eligibility"]["reasons"]
+                )
+                if has_baseline == missing_reason:
+                    raise PublisherError(
+                        "precision baseline and eligibility reason disagree"
+                    )
+        expected_id = _derived_id("cxcohort-v1-", {
+            "kind": item["kind"], "series_ids": item["series_ids"],
+            "controlled_factors": item["controlled_factors"],
+            "varying_factors": item["varying_factors"],
+        })
+        if item["cohort_id"] != expected_id:
+            raise PublisherError("cohort ID differs from its public factors")
+        expected_factors = {
+            "library": (
+                ["system", "workload", "mode", "phase", "measurement", "resource.mode", "source"],
+                ["backend", "resource"],
+            ),
+            "chip": (
+                ["backend", "source", "workload", "mode", "phase", "measurement", "resource.mode"],
+                ["system", "resource"],
+            ),
+            "system": (
+                ["workload", "mode", "phase", "measurement", "source"],
+                ["system", "backend", "resource"],
+            ),
+            "routing": (
+                ["backend", "implementation-static-build", "system", "model-shape", "mode", "phase", "measurement", "resource"],
+                ["workload.routing", "workload.eplb", "implementation-config"],
+            ),
+            "dispatch-precision": (
+                [
+                    "backend", "implementation-static-build", "system", "model-shape",
+                    "mode", "phase", "workload.routing", "workload.eplb",
+                    "measurement", "resource", "combine-precision",
+                ],
+                ["dispatch-precision"],
+            ),
+            "combine-precision": (
+                [
+                    "backend", "implementation-static-build", "system", "model-shape",
+                    "mode", "phase", "workload.routing", "workload.eplb",
+                    "measurement", "resource", "dispatch-precision",
+                ],
+                ["combine-precision"],
+            ),
+            "precision-pair": (
+                [
+                    "backend", "implementation-static-build", "system", "model-shape",
+                    "mode", "phase", "workload.routing", "workload.eplb",
+                    "measurement",
+                ],
+                [
+                    "dispatch-precision", "combine-precision", "precision-profile",
+                    "resource",
+                ],
+            ),
+        }[item["kind"]]
+        member_allocations = {
+            allocation for series_id in item["series_ids"]
+            for allocation in series[series_id]["allocation_ids"]
+        }
+        if (
+            (item["controlled_factors"], item["varying_factors"]) != expected_factors
+            or set(item["eligibility"]["allocation_ids"]) != member_allocations
+        ):
+            raise PublisherError("cohort factors or allocations differ from its members")
+        _eligibility(item["eligibility"], f"cohort {item['cohort_id']}")
+    expected_ranking_keys: set[tuple[str, str, str, int]] = set()
+    for cohort in doc["cohorts"]:
+        if not cohort["eligibility"]["decision_grade"]:
+            continue
+        members = [series[series_id] for series_id in cohort["series_ids"]]
+        tokens = set.intersection(*(
+            {point["tokens_per_rank"] for point in member["points"]}
+            for member in members
+        ))
+        expected_ranking_keys.update(
+            (cohort["cohort_id"], measure, statistic, token)
+            for token in tokens
+            for measure in (
+                "latency_us", "activation_data_rate_gbps_at_latency_percentile",
+                "total_logical_data_rate_gbps_at_latency_percentile",
+            )
+            for statistic in ("p50", "p99")
+        )
+    ranking_top: dict[
+        tuple[str, str, str, int], dict[str, Any] | None
+    ] = {}
+    ranking_ids: set[str] = set()
+    for ranking in doc["rankings"]:
+        cohort = cohorts.get(ranking["cohort_id"])
+        if (
+            cohort is None
+            or not cohort["eligibility"]["decision_grade"]
+            or ranking["eligibility"] != cohort["eligibility"]
+            or ranking["publication_tier"] != cohort["publication_tier"]
+        ):
+            raise PublisherError("ranking references an ineligible cohort")
+        entries = ranking["entries"]
+        _validate_metric(ranking["metric"])
+        if cohort["kind"] == "library" and any(
+            series[series_id]["backend"]["role"] == "reference"
+            for series_id in cohort["series_ids"]
+        ):
+            raise PublisherError("reference evidence cannot drive a library ranking")
+        if {entry["series_id"] for entry in entries} != set(cohort["series_ids"]):
+            raise PublisherError("ranking does not cover its cohort")
+        for entry in entries:
+            point_id, value, unit = _metric_value(series[entry["series_id"]], ranking["metric"])
+            if entry["point_id"] != point_id or entry["unit"] != unit or not math.isclose(entry["value"], value, rel_tol=1e-12):
+                raise PublisherError("ranking entry differs from series data")
+        reverse = ranking["metric"]["objective"] == "max"
+        expected = sorted(entries, key=lambda entry: (entry["value"], entry["series_id"]), reverse=reverse)
+        metric = ranking["metric"]
+        ranks = [entry["rank"] for entry in entries]
+        if metric["measure"] == "latency_us" and metric["statistic"] == "p99":
+            tied_first = sum(rank == 1 for rank in ranks)
+            expected_ranks = [1] * tied_first + list(
+                range(tied_first + 1, len(entries) + 1)
+            )
+        else:
+            expected_ranks = list(range(1, len(entries) + 1))
+        if entries != expected or not ranks or ranks != expected_ranks:
+            raise PublisherError("ranking order differs")
+        expected_id = _derived_id("cxranking-v1-", {
+            "cohort_id": ranking["cohort_id"], "metric": metric,
+        })
+        if ranking["ranking_id"] != expected_id or expected_id in ranking_ids:
+            raise PublisherError("ranking ID is duplicate or differs")
+        ranking_ids.add(expected_id)
+        ranking_top[(ranking["cohort_id"], metric["measure"], metric["statistic"], metric["tokens_per_rank"])] = (
+            entries[0] if ranks.count(1) == 1 else None
+        )
+    if set(ranking_top) != expected_ranking_keys:
+        raise PublisherError("rankings do not cover every eligible cohort metric")
+    objective = {
+        "min-p50-latency": ("latency_us", "p50"), "min-p99-latency": ("latency_us", "p99"),
+        "max-activation-data-rate-at-p50-latency": (
+            "activation_data_rate_gbps_at_latency_percentile", "p50"
+        ),
+        "max-activation-data-rate-at-p99-latency": (
+            "activation_data_rate_gbps_at_latency_percentile", "p99"
+        ),
+        "max-total-logical-data-rate-at-p50-latency": (
+            "total_logical_data_rate_gbps_at_latency_percentile", "p50"
+        ),
+        "max-total-logical-data-rate-at-p99-latency": (
+            "total_logical_data_rate_gbps_at_latency_percentile", "p99"
+        ),
+    }
+    recommendation_ids: set[str] = set()
+    for item in doc["recommendations"]:
+        if item["objective"] != "min-p99-latency":
+            raise PublisherError("recommendation is not a unique p99 latency winner")
+        measure, statistic = objective[item["objective"]]
+        candidates = [top for key, top in ranking_top.items()
+                      if key[:3] == (item["cohort_id"], measure, statistic)
+                      and top is not None and top["point_id"] == item["point_id"]]
+        if len(candidates) != 1 or any(item[field] != candidates[0][field] for field in ("series_id", "point_id", "value", "unit")):
+            raise PublisherError("recommendation is not a ranking winner")
+        matching_ranking = next(
+            ranking for ranking in doc["rankings"]
+            if ranking["cohort_id"] == item["cohort_id"]
+            and ranking["metric"]["measure"] == measure
+            and ranking["metric"]["statistic"] == statistic
+            and ranking["entries"][0]["point_id"] == item["point_id"]
+        )
+        expected_id = _derived_id("cxrecommendation-v1-", {
+            "objective": item["objective"], "ranking_id": matching_ranking["ranking_id"],
+        })
+        cohort = cohorts[item["cohort_id"]]
+        if (item["recommendation_id"] != expected_id or expected_id in recommendation_ids
+                or cohort["publication_tier"] != "official"
+                or item["publication_tier"] != "official"
+                or item["eligibility"] != cohort["eligibility"]):
+            raise PublisherError("recommendation ID/eligibility differs")
+        recommendation_ids.add(expected_id)
+    expected_recommendations = sum(
+        cohorts[ranking["cohort_id"]]["publication_tier"] == "official"
+        and ranking["metric"]["measure"] == "latency_us"
+        and ranking["metric"]["statistic"] == "p99"
+        and sum(entry["rank"] == 1 for entry in ranking["entries"]) == 1
+        for ranking in doc["rankings"]
+    )
+    if len(doc["recommendations"]) != expected_recommendations:
+        raise PublisherError("recommendations do not cover every actionable ranking")
+    sensitivity_ids: set[str] = set()
+    sensitivity_keys: set[tuple[str, str, str, str, str, int]] = set()
+    for item in doc["sensitivities"]:
+        cohort = cohorts.get(item["cohort_id"])
+        if (
+            cohort is None
+            or cohort["kind"] not in {
+                "routing", "dispatch-precision", "combine-precision",
+            }
+            or not cohort["eligibility"]["decision_grade"]
+            or item["publication_tier"] != cohort["publication_tier"]
+            or item["eligibility"] != cohort["eligibility"]
+        ):
+            raise PublisherError("sensitivity references an ineligible contrast cohort")
+        if (
+            item["baseline_series_id"] == item["candidate_series_id"]
+            or not {item["baseline_series_id"], item["candidate_series_id"]}.issubset(cohort["series_ids"])
+        ):
+            raise PublisherError("sensitivity series differ from its cohort")
+        _validate_metric(item["metric"])
+        baseline_series = series[item["baseline_series_id"]]
+        if cohort["kind"] == "routing":
+            if (
+                baseline_series["workload"]["routing"] != "uniform"
+                or baseline_series["workload"]["eplb"]
+            ):
+                raise PublisherError("sensitivity baseline is not uniform without EPLB")
+        else:
+            axis = (
+                "dispatch"
+                if cohort["kind"] == "dispatch-precision"
+                else "combine"
+            )
+            field = f"{axis}_precision"
+            bf16 = identity.precision_profile(
+                identity.V1_CONTROL_PRECISION_PROFILE
+            )[axis]
+            if _canonical(baseline_series["workload"][field]) != _canonical(bf16):
+                raise PublisherError("precision sensitivity baseline is not BF16")
+        _, baseline, _ = _metric_value(series[item["baseline_series_id"]], item["metric"])
+        _, candidate, _ = _metric_value(series[item["candidate_series_id"]], item["metric"])
+        if not math.isclose(item["signed_change_ratio"], (candidate - baseline) / baseline, rel_tol=1e-12):
+            raise PublisherError("sensitivity ratio differs")
+        expected_id = _derived_id("cxsensitivity-v1-", {
+            "baseline": item["baseline_series_id"],
+            "candidate": item["candidate_series_id"],
+            "cohort": item["cohort_id"], "metric": item["metric"],
+        })
+        if item["sensitivity_id"] != expected_id or expected_id in sensitivity_ids:
+            raise PublisherError("sensitivity ID is duplicate or differs")
+        sensitivity_ids.add(expected_id)
+        sensitivity_keys.add((
+            item["cohort_id"], item["baseline_series_id"], item["candidate_series_id"],
+            item["metric"]["measure"], item["metric"]["statistic"],
+            item["metric"]["tokens_per_rank"],
+        ))
+    expected_sensitivity_keys: set[tuple[str, str, str, str, str, int]] = set()
+    for cohort in doc["cohorts"]:
+        if (
+            cohort["kind"] not in {
+                "routing", "dispatch-precision", "combine-precision",
+            }
+            or not cohort["eligibility"]["decision_grade"]
+        ):
+            continue
+        members = [series[series_id] for series_id in cohort["series_ids"]]
+        if cohort["kind"] == "routing":
+            baseline = next((
+                member for member in members
+                if member["workload"]["routing"] == "uniform"
+                and not member["workload"]["eplb"]
+            ), None)
+        else:
+            axis = (
+                "dispatch"
+                if cohort["kind"] == "dispatch-precision"
+                else "combine"
+            )
+            field = f"{axis}_precision"
+            bf16 = identity.precision_profile(
+                identity.V1_CONTROL_PRECISION_PROFILE
+            )[axis]
+            baseline = next((
+                member for member in members
+                if _canonical(member["workload"][field]) == _canonical(bf16)
+            ), None)
+        if baseline is None:
+            continue
+        tokens = set.intersection(*(
+            {point["tokens_per_rank"] for point in member["points"]}
+            for member in members
+        ))
+        expected_sensitivity_keys.update(
+            (cohort["cohort_id"], baseline["series_id"], candidate["series_id"],
+             measure, statistic, token)
+            for candidate in members if candidate is not baseline
+            for token in tokens
+            for measure in (
+                "latency_us", "activation_data_rate_gbps_at_latency_percentile",
+                "total_logical_data_rate_gbps_at_latency_percentile",
+            )
+            for statistic in ("p50", "p99")
+        )
+    if sensitivity_keys != expected_sensitivity_keys:
+        raise PublisherError("sensitivities do not cover every declared contrast metric")
+    observed_qualification_indices = sorted({
+        item["qualification_index"] for item in doc["attempts"] if item["selected"]
+    })
+    if promotion["qualification_indices"] != observed_qualification_indices:
+        raise PublisherError("promotion qualification index catalog differs from attempts")
+    if promotion["status"] == "promoted":
+        run_ids = {item["run_id"] for item in doc["attempts"] if item["selected"]}
+        repeated_cases = all(
+            {
+                attempts[attempt_id]["qualification_index"]
+                for attempt_id in coverage["attempt_ids"]
+                if attempts[attempt_id]["selected"]
+            } == {1, 2, 3}
+            for coverage in doc["coverage"]
+        )
+        if promotion["matrix_id"] != CANONICAL_FULL_V1_MATRIX_SHA256:
+            raise PublisherError("promotion requires the canonical full-v1 matrix")
+        if (
+            _case_disposition_catalog_sha256(doc["coverage"])
+            != CANONICAL_FULL_V1_CASE_CATALOG_SHA256
+        ):
+            raise PublisherError("promotion requires the canonical case/disposition catalog")
+        if (
+            terminal != len(doc["coverage"])
+            or promotion["qualification_indices"] != [1, 2, 3]
+            or promotion["measured_cases"] + promotion["unsupported_cases"]
+            != promotion["requested_cases"]
+            or promotion["measured_points"] + promotion["unsupported_points"]
+            != promotion["requested_points"]
+            or promotion["terminal_points"] != promotion["requested_points"]
+            or len(doc["source_bundle_ids"]) != REQUIRED_ALLOCATIONS
+            or len(run_ids) != REQUIRED_ALLOCATIONS
+            or not repeated_cases
+        ):
+            raise PublisherError("promoted dataset lacks complete coverage")
+        expected_outcomes = {
+            item["case_id"]: (
+                "success" if item["disposition"] == "runnable" else "unsupported"
+            )
+            for item in doc["coverage"]
+        }
+        if any(
+            item["selected"]
+            and item["outcome"] != expected_outcomes[item["case_id"]]
+            for item in doc["attempts"]
+        ):
+            raise PublisherError("promoted outcomes differ from requested dispositions")
+        runnable_cases = {
+            item["case_id"] for item in doc["coverage"]
+            if item["disposition"] == "runnable"
+        }
+        if any(
+            item["case_id"] in runnable_cases and item["outcome"] != "success"
+            for item in doc["attempts"]
+        ):
+            raise PublisherError(
+                "promotion rejects runnable cases with failed, invalid, or diagnostic retries"
+            )
+        _require_promotion_series(doc["series"])
+        _require_promotion_cohorts(doc["cohorts"], doc["series"])
+        if not doc["rankings"]:
+            raise PublisherError("promoted dataset lacks eligible rankings")
+    if promotion["status"] == "quarantined" and any((
+        doc["source_bundle_ids"], promotion["allocation_ids"], doc["coverage"],
+        doc["attempts"], doc["series"], doc["cohorts"], doc["rankings"],
+        doc["recommendations"], doc["sensitivities"],
+    )):
+        raise PublisherError("quarantined dataset exposes unvalidated evidence")
+    return doc
+
+
+def _file_record(value: Any, path: str) -> dict[str, Any]:
+    item = _exact(value, {"path", "sha256", "bytes"}, path)
+    if not isinstance(item["path"], str) or PurePosixPath(item["path"]).is_absolute() or ".." in PurePosixPath(item["path"]).parts:
+        raise PublisherError(f"{path}.path is unsafe")
+    if not isinstance(item["sha256"], str) or HEX64.fullmatch(item["sha256"]) is None:
+        raise PublisherError(f"{path}.sha256 is invalid")
+    _integer(item["bytes"], f"{path}.bytes", minimum=1)
+    return item
+
+def validate_bundle_manifest(doc: Any) -> dict[str, Any]:
+    _schema("private-bundle-v1.schema.json", doc)
+    attempts = {item["attempt_id"]: item for item in doc["attempts"]}
+    if len(attempts) != len(doc["attempts"]):
+        raise PublisherError("bundle has duplicate attempt IDs")
+    selections = doc["coverage"]["selections"]
+    if len({item["case_id"] for item in selections}) != len(selections):
+        raise PublisherError("bundle has duplicate selected cases")
+    counts = {name: 0 for name in OUTCOMES}
+    for selection in selections:
+        attempt = attempts.get(selection["selected_attempt_id"])
+        if attempt is None or not attempt["selected"] or attempt["case_id"] != selection["case_id"] or attempt["outcome"] != selection["outcome"]:
+            raise PublisherError("bundle selection differs from retained attempt")
+        counts[selection["outcome"]] += 1
+    coverage = doc["coverage"]
+    if coverage["terminal_cases"] != len(selections) or coverage["outcome_counts"] != counts:
+        raise PublisherError("bundle terminal counts differ")
+    if coverage["complete"] != (coverage["expected_cases"] == len(selections)):
+        raise PublisherError("bundle completeness differs from coverage")
+    fingerprints: dict[str, set[str]] = {}
+    for attempt in doc["attempts"]:
+        value = attempt["runtime_fingerprint_sha256"]
+        if value:
+            fingerprints.setdefault(attempt["allocation_id"], set()).add(value)
+    if any(len(values) != 1 for values in fingerprints.values()):
+        raise PublisherError("bundle runtime is heterogeneous within an allocation")
+    return doc
+
+
+def _fsync_dir(path: Path) -> None:
+    descriptor = os.open(path, os.O_RDONLY | getattr(os, "O_DIRECTORY", 0))
+    try:
+        os.fsync(descriptor)
+    finally:
+        os.close(descriptor)
+
+
+def _write_bytes(path: Path, data: bytes, *, mode: int) -> None:
+    descriptor = os.open(
+        path,
+        os.O_WRONLY | os.O_CREAT | os.O_EXCL | getattr(os, "O_NOFOLLOW", 0),
+        mode,
+    )
+    try:
+        os.fchmod(descriptor, mode)
+        with os.fdopen(descriptor, "wb", closefd=False) as handle:
+            handle.write(data)
+            handle.flush()
+            os.fsync(handle.fileno())
+    finally:
+        os.close(descriptor)
+
+
+def _write_all(descriptor: int, data: bytes) -> None:
+    view = memoryview(data)
+    while view:
+        view = view[os.write(descriptor, view):]
+
+
+def _write_json(path: Path, value: Any, *, mode: int) -> bytes:
+    data = _canonical(value) + b"\n"
+    _write_bytes(path, data, mode=mode)
+    return data
+
+
+def _file_metadata(path: Path, relative_to: Path) -> dict[str, Any]:
+    return {
+        "path": path.relative_to(relative_to).as_posix(),
+        "sha256": _sha_file(path),
+        "bytes": path.stat().st_size,
+    }
+
+
+def _tree_files(root: Path) -> list[Path]:
+    return sorted(
+        path for path in root.rglob("*")
+        if path.is_file() and not path.is_symlink() and path.name != "COMPLETE"
+    )
+
+
+def _verify_regular_file(path: Path, expected_mode: int) -> None:
+    _reject_symlinked_path(path.parent)
+    try:
+        metadata = os.lstat(path)
+    except FileNotFoundError as exc:
+        raise PublisherError(f"required file is missing: {path.name}") from exc
+    if (
+        not stat.S_ISREG(metadata.st_mode)
+        or metadata.st_uid != os.getuid()
+        or stat.S_IMODE(metadata.st_mode) != expected_mode
+    ):
+        raise PublisherError(
+            f"file is not an owned regular {expected_mode:o} object: {path.name}"
+        )
+
+
+def _verify_frozen_tree(root: Path, *, private: bool) -> None:
+    _reject_symlinked_path(root)
+    directory_mode = 0o500 if private else 0o555
+    file_mode = 0o400 if private else 0o444
+    try:
+        root_metadata = os.lstat(root)
+    except OSError as exc:
+        raise PublisherError(f"cannot inspect immutable object: {root.name}") from exc
+    if not stat.S_ISDIR(root_metadata.st_mode):
+        raise PublisherError(f"immutable object is not a real directory: {root.name}")
+    try:
+        entries = [root, *root.rglob("*")]
+    except OSError as exc:
+        raise PublisherError(f"cannot inspect immutable object: {root.name}") from exc
+    for path in entries:
+        metadata = os.lstat(path)
+        if metadata.st_uid != os.getuid():
+            raise PublisherError(f"immutable object has the wrong owner: {path.name}")
+        if stat.S_ISDIR(metadata.st_mode):
+            expected = directory_mode
+        elif stat.S_ISREG(metadata.st_mode):
+            expected = file_mode
+        else:
+            raise PublisherError(f"immutable object contains a linked or special entry: {path.name}")
+        if stat.S_IMODE(metadata.st_mode) != expected:
+            raise PublisherError(
+                f"immutable object mode differs for {path.name}: expected {expected:o}"
+            )
+
+
+def _freeze_tree(root: Path, *, private: bool) -> None:
+    files: list[Path] = []
+    directories = [root]
+    for path in root.rglob("*"):
+        metadata = os.lstat(path)
+        if stat.S_ISDIR(metadata.st_mode):
+            directories.append(path)
+        elif stat.S_ISREG(metadata.st_mode):
+            files.append(path)
+        else:
+            raise PublisherError(f"immutable object contains a linked or special entry: {path.name}")
+    for path in files:
+        os.chmod(path, 0o400 if private else 0o444)
+    for path in sorted(directories, key=lambda item: len(item.parts), reverse=True):
+        os.chmod(path, 0o500 if private else 0o555)
+        _fsync_dir(path)
+    _verify_frozen_tree(root, private=private)
+
+
+def _reject_symlinked_path(path: Path) -> None:
+    current = Path(path.anchor)
+    for part in path.parts[1:]:
+        current /= part
+        try:
+            metadata = os.lstat(current)
+        except FileNotFoundError:
+            break
+        if stat.S_ISLNK(metadata.st_mode):
+            raise PublisherError("COLLECTIVEX_STORE_ROOT must not traverse a symlinked parent")
+        if not stat.S_ISDIR(metadata.st_mode):
+            raise PublisherError(f"store path component is not a directory: {current}")
+
+
+class Store:
+    """Atomic private/public directory operations on one operator filesystem."""
+
+    def __init__(self, root: str | os.PathLike[str]):
+        candidate = Path(os.path.abspath(os.path.expanduser(root)))
+        _reject_symlinked_path(candidate)
+        candidate.mkdir(parents=True, exist_ok=True, mode=0o750)
+        resolved = candidate.resolve()
+        if candidate != resolved:
+            raise PublisherError(
+                "COLLECTIVEX_STORE_ROOT must not traverse a symlinked parent"
+            )
+        root_metadata = candidate.stat()
+        if root_metadata.st_uid != os.getuid() or stat.S_IMODE(root_metadata.st_mode) & 0o022:
+            raise PublisherError(
+                "COLLECTIVEX_STORE_ROOT must be owned by this user and not group/world writable"
+            )
+        os.chmod(candidate, 0o750)
+        if stat.S_IMODE(candidate.stat().st_mode) != 0o750:
+            raise PublisherError("COLLECTIVEX_STORE_ROOT mode must be 750")
+        self.root = resolved
+        raw = self.root
+        self.private = raw / "private"
+        self.incoming = self.private / "incoming"
+        self.bundles = self.private / "bundles"
+        self.quarantine = self.private / "quarantine"
+        self.public = raw / "public"
+        self.datasets = self.public / "datasets"
+        self.channels = self.public / "channels"
+        self.locks = raw / "locks"
+        for path, mode in (
+            (self.private, 0o700), (self.incoming, 0o700), (self.bundles, 0o700),
+            (self.quarantine, 0o700), (self.public, 0o755), (self.datasets, 0o755),
+            (self.channels, 0o755), (self.locks, 0o700),
+        ):
+            path.mkdir(parents=True, exist_ok=True, mode=mode)
+            if path.is_symlink() or not path.is_dir():
+                raise PublisherError(f"store path is not a real directory: {path}")
+            os.chmod(path, mode)
+
+    @contextlib.contextmanager
+    def locked(self) -> Iterator[None]:
+        lock_path = self.locks / "publisher.lock"
+        descriptor = os.open(
+            lock_path,
+            os.O_RDWR | os.O_CREAT | getattr(os, "O_NOFOLLOW", 0),
+            0o600,
+        )
+        try:
+            os.fchmod(descriptor, 0o600)
+            metadata = os.fstat(descriptor)
+            if (
+                not stat.S_ISREG(metadata.st_mode)
+                or metadata.st_uid != os.getuid()
+                or stat.S_IMODE(metadata.st_mode) != 0o600
+            ):
+                raise PublisherError("publisher lock is not an owned regular 600 file")
+            fcntl.flock(descriptor, fcntl.LOCK_EX)
+            yield
+        finally:
+            fcntl.flock(descriptor, fcntl.LOCK_UN)
+            os.close(descriptor)
+
+    @contextlib.contextmanager
+    def staging(self, parent: Path, *, private: bool) -> Iterator[Path]:
+        stage = Path(tempfile.mkdtemp(prefix=".staging-", dir=parent))
+        os.chmod(stage, 0o700 if private else 0o755)
+        try:
+            yield stage
+        finally:
+            if stage.exists():
+                for path in stage.rglob("*"):
+                    metadata = os.lstat(path)
+                    if stat.S_ISDIR(metadata.st_mode):
+                        os.chmod(path, 0o700)
+                    elif stat.S_ISREG(metadata.st_mode):
+                        os.chmod(path, 0o600)
+                os.chmod(stage, 0o700)
+            shutil.rmtree(stage, ignore_errors=True)
+
+    @staticmethod
+    def complete(stage: Path, value: str, *, private: bool) -> None:
+        _write_bytes(stage / "COMPLETE", (value + "\n").encode(), mode=0o600 if private else 0o644)
+        _fsync_dir(stage)
+
+    @staticmethod
+    def install(stage: Path, destination: Path, *, private: bool) -> None:
+        if destination.is_symlink():
+            raise PublisherError(f"immutable destination is a symlink: {destination.name}")
+        if destination.exists():
+            _verify_frozen_tree(destination, private=private)
+            marker = destination / "COMPLETE"
+            if not marker.is_file() or marker.read_text().strip() != destination.name:
+                raise PublisherError(f"immutable destination is incomplete: {destination.name}")
+            return
+        _freeze_tree(stage, private=private)
+        os.rename(stage, destination)
+        _fsync_dir(destination.parent)
+        _verify_frozen_tree(destination, private=private)
+
+    def install_dataset(self, dataset: dict[str, Any]) -> tuple[str, int]:
+        validate_public_dataset(dataset)
+        payload = _canonical(dataset) + b"\n"
+        if len(payload) > MAX_PUBLIC_DATASET_BYTES:
+            raise PublisherError("public dataset exceeds the serving size limit")
+        digest = _sha_bytes(payload)
+        destination = self.datasets / digest
+        with self.staging(self.datasets, private=False) as stage:
+            _write_bytes(stage / "dataset.json", payload, mode=0o644)
+            self.complete(stage, digest, private=False)
+            self.install(stage, destination, private=False)
+        stored = destination / "dataset.json"
+        marker = destination / "COMPLETE"
+        if (not marker.is_file() or marker.read_text().strip() != digest
+                or _sha_file(stored) != digest or stored.stat().st_size != len(payload)):
+            raise PublisherError("stored dataset checksum differs after installation")
+        return digest, len(payload)
+
+    def update_channel(self, channel: str, digest: str, size: int, generated_at: str) -> None:
+        if size > MAX_PUBLIC_DATASET_BYTES:
+            raise PublisherError("channel dataset exceeds the serving size limit")
+        _verify_frozen_tree(self.datasets / digest, private=False)
+        marker = self.datasets / digest / "COMPLETE"
+        if not marker.is_file() or marker.read_text().strip() != digest:
+            raise PublisherError("cannot advance a channel to an incomplete dataset")
+        dataset_path = self.datasets / digest / "dataset.json"
+        dataset = validate_public_dataset(strict_load(dataset_path))
+        if (
+            _sha_file(dataset_path) != digest
+            or dataset_path.stat().st_size != size
+            or dataset["generated_at"] != generated_at
+        ):
+            raise PublisherError("channel metadata differs from its stored dataset")
+        if channel == "dev-latest" and dataset["promotion"]["status"] != "promoted":
+            raise PublisherError("dev-latest may only reference a promoted dataset")
+        pointer = {
+            "format": FORMAT_CHANNEL,
+            "channel": channel,
+            "dataset": {
+                "path": f"datasets/{digest}/dataset.json",
+                "sha256": digest,
+                "bytes": size,
+            },
+            "generated_at": generated_at,
+        }
+        validate_channel(pointer, expected_channel=channel)
+        destination = self.channels / f"{channel}.json"
+        temporary = self.channels / f".{channel}.tmp-{os.getpid()}"
+        try:
+            data = _canonical(pointer) + b"\n"
+            _write_bytes(temporary, data, mode=0o644)
+            os.replace(temporary, destination)
+            _fsync_dir(self.channels)
+        finally:
+            temporary.unlink(missing_ok=True)
+
+    def verify_channel(self, channel: str) -> dict[str, Any]:
+        channel_path = self.channels / f"{channel}.json"
+        _verify_regular_file(channel_path, 0o644)
+        pointer = validate_channel(strict_load(channel_path), expected_channel=channel)
+        target = self.public / pointer["dataset"]["path"]
+        _verify_frozen_tree(target.parent, private=False)
+        if target.stat().st_size != pointer["dataset"]["bytes"] or _sha_file(target) != pointer["dataset"]["sha256"]:
+            raise PublisherError(f"channel {channel} dataset checksum differs")
+        marker = target.parent / "COMPLETE"
+        if not marker.is_file() or marker.read_text().strip() != pointer["dataset"]["sha256"]:
+            raise PublisherError(f"channel {channel} dataset is incomplete")
+        dataset = validate_public_dataset(strict_load(target))
+        if pointer["generated_at"] != dataset["generated_at"]:
+            raise PublisherError(f"channel {channel} metadata differs from its dataset")
+        if channel == "dev-latest" and dataset["promotion"]["status"] != "promoted":
+            raise PublisherError("dev-latest points to a non-promoted dataset")
+        return pointer
+
+
+def _copy_source(source: Path, destination: Path) -> None:
+    if source.is_symlink() or not source.is_file() or not stat.S_ISREG(source.stat().st_mode):
+        raise PublisherError(f"source must be a regular non-symlink file: {source}")
+    descriptor = os.open(source, os.O_RDONLY | getattr(os, "O_NOFOLLOW", 0))
+    try:
+        output = os.open(destination, os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0o600)
+        try:
+            while True:
+                chunk = os.read(descriptor, 1024 * 1024)
+                if not chunk:
+                    break
+                _write_all(output, chunk)
+            os.fsync(output)
+        finally:
+            os.close(output)
+    finally:
+        os.close(descriptor)
+
+
+def _archive_download_directory(source: Path, destination: Path) -> None:
+    if source.is_symlink() or not source.is_dir():
+        raise PublisherError(f"artifact directory is invalid: {source}")
+    files: list[Path] = []
+    for path in source.rglob("*"):
+        if path.is_symlink():
+            raise PublisherError("artifact directory contains a symlink")
+        if path.is_dir():
+            continue
+        if not path.is_file():
+            raise PublisherError("artifact directory contains a non-regular entry")
+        files.append(path)
+    files.sort()
+    if not files or len(files) > MAX_ARCHIVE_MEMBERS:
+        raise PublisherError("artifact directory has an invalid file count")
+    total = 0
+    with zipfile.ZipFile(destination, "x", compression=zipfile.ZIP_STORED) as archive:
+        for path in files:
+            descriptor = os.open(path, os.O_RDONLY | getattr(os, "O_NOFOLLOW", 0))
+            with os.fdopen(descriptor, "rb") as handle:
+                metadata = os.fstat(handle.fileno())
+                if not stat.S_ISREG(metadata.st_mode):
+                    raise PublisherError("artifact directory member changed type")
+                size = metadata.st_size
+                total += size
+                if size > MAX_ARCHIVE_MEMBER_BYTES or total > MAX_ARCHIVE_TOTAL_BYTES:
+                    raise PublisherError("artifact directory exceeds size limits")
+                relative = path.relative_to(source).as_posix()
+                _safe_member(relative)
+                info = zipfile.ZipInfo(relative, date_time=(1980, 1, 1, 0, 0, 0))
+                info.compress_type = zipfile.ZIP_STORED
+                info.external_attr = (stat.S_IFREG | 0o600) << 16
+                with archive.open(info, "w") as output:
+                    written = 0
+                    for chunk in iter(lambda: handle.read(1024 * 1024), b""):
+                        output.write(chunk)
+                        written += len(chunk)
+                    if written != size:
+                        raise PublisherError("artifact directory member changed size")
+    descriptor = os.open(destination, os.O_RDONLY)
+    try:
+        os.fsync(descriptor)
+    finally:
+        os.close(descriptor)
+
+
+def _artifact_name(source: Path) -> str:
+    name = source.name if source.is_dir() else source.name.removesuffix(".zip")
+    if (
+        not source.is_dir() and source.suffix != ".zip"
+        or ARTIFACT_NAME.fullmatch(name) is None
+    ):
+        raise PublisherError(f"artifact source has an invalid GHA name: {source.name}")
+    return name
+
+
+def archive_incoming(
+    store: Store,
+    matrix: Path,
+    artifacts: Sequence[Path],
+    run: dict[str, Any],
+) -> tuple[str, Path, list[dict[str, Any]]]:
+    """Copy exact delivery bytes into immutable incoming before any JSON/ZIP parse."""
+    if not artifacts:
+        raise PublisherError("at least one GitHub artifact archive is required")
+    with store.staging(store.incoming, private=True) as stage:
+        sources = stage / "sources"
+        sources.mkdir(mode=0o700)
+        copied: list[dict[str, Any]] = []
+        named_artifacts = sorted(
+            ((_artifact_name(path), path) for path in artifacts), key=lambda item: item[0]
+        )
+        artifact_names = [name for name, _ in named_artifacts]
+        if len(artifact_names) != len(set(artifact_names)):
+            raise PublisherError("artifact delivery contains duplicate GHA names")
+        inputs = [("matrix.json", matrix, "matrix", None)] + [
+            (f"artifact-{index:04d}.zip", path, "artifact", artifact_name)
+            for index, (artifact_name, path) in enumerate(named_artifacts)
+        ]
+        for name, source, kind, artifact_name in inputs:
+            destination = sources / name
+            if source.is_dir():
+                _archive_download_directory(source, destination)
+            else:
+                if source != matrix and source.stat().st_size > MAX_ARCHIVE_TOTAL_BYTES:
+                    raise PublisherError("artifact archive exceeds the size limit")
+                _copy_source(source, destination)
+            copied.append({
+                **_file_metadata(destination, stage),
+                "kind": kind,
+                "artifact_name": artifact_name,
+            })
+        ingest_id = _sha_bytes(_canonical({"run": run, "sources": copied}))
+        incoming_manifest = {
+            "format": "collectivex.incoming.v1",
+            "schema_version": 1,
+            "ingest_id": ingest_id,
+            "run": run,
+            "sources": copied,
+        }
+        _write_json(stage / "incoming.json", incoming_manifest, mode=0o600)
+        store.complete(stage, ingest_id, private=True)
+        destination = store.incoming / ingest_id
+        store.install(stage, destination, private=True)
+    installed = store.incoming / ingest_id
+    if strict_load(installed / "incoming.json") != incoming_manifest:
+        raise PublisherError("existing incoming object differs from archived delivery")
+    for record in copied:
+        _resolve_bundle_file(installed, record)
+    return ingest_id, installed, copied
+
+
+def _safe_member(name: str) -> PurePosixPath:
+    if "\\" in name or "\0" in name:
+        raise PublisherError("archive member has an unsafe separator")
+    path = PurePosixPath(name)
+    if path.is_absolute() or not path.parts or any(part in {"", ".", ".."} for part in path.parts):
+        raise PublisherError("archive member path escapes its artifact")
+    return path
+
+
+def extract_archive(archive: Path, destination: Path) -> list[Path]:
+    """Extract a bounded regular-file ZIP without trusting member paths or links."""
+    try:
+        handle = zipfile.ZipFile(archive)
+    except (OSError, zipfile.BadZipFile) as exc:
+        raise PublisherError("artifact is not a valid ZIP archive") from exc
+    extracted: list[Path] = []
+    seen: set[str] = set()
+    total = 0
+    with handle:
+        members = handle.infolist()
+        if not members or len(members) > MAX_ARCHIVE_MEMBERS:
+            raise PublisherError("artifact has an invalid member count")
+        for member in members:
+            path = _safe_member(member.filename.rstrip("/"))
+            key = path.as_posix()
+            if key in seen:
+                raise PublisherError("artifact contains duplicate member paths")
+            seen.add(key)
+            mode = member.external_attr >> 16
+            if stat.S_ISLNK(mode) or (mode and not (stat.S_ISREG(mode) or stat.S_ISDIR(mode))):
+                raise PublisherError("artifact contains a non-regular member")
+            if member.flag_bits & 0x1:
+                raise PublisherError("encrypted artifact members are not accepted")
+            if member.file_size > MAX_ARCHIVE_MEMBER_BYTES:
+                raise PublisherError("artifact member exceeds the size limit")
+            total += member.file_size
+            if total > MAX_ARCHIVE_TOTAL_BYTES:
+                raise PublisherError("artifact exceeds the expanded size limit")
+            target = destination.joinpath(*path.parts)
+            if member.is_dir():
+                target.mkdir(parents=True, exist_ok=True, mode=0o700)
+                continue
+            target.parent.mkdir(parents=True, exist_ok=True, mode=0o700)
+            output = os.open(target, os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0o600)
+            try:
+                with handle.open(member, "r") as source:
+                    written = 0
+                    while True:
+                        chunk = source.read(1024 * 1024)
+                        if not chunk:
+                            break
+                        _write_all(output, chunk)
+                        written += len(chunk)
+                    if written != member.file_size:
+                        raise PublisherError("artifact member size changed during extraction")
+                os.fsync(output)
+            finally:
+                os.close(output)
+            extracted.append(target)
+    return extracted
+
+
+def validate_matrix(document: Any) -> list[dict[str, Any]]:
+    try:
+        artifact_safety.assert_publication_safe([document])
+        matrix = sweep_matrix.validate_matrix_document(document)
+    except (SystemExit, ValueError, artifact_safety.ArtifactSafetyError) as exc:
+        raise PublisherError(f"requested matrix is invalid: {exc}") from exc
+    return [
+        {
+            "sku": item["sku"],
+            **item["case"],
+            "_disposition": item["disposition"],
+            "_reason": item["reason"],
+        }
+        for item in matrix["requested_cases"]
+    ]
+
+
+def _expected_deliveries(
+    matrix: dict[str, Any], cases: Sequence[dict[str, Any]], run: dict[str, Any]
+) -> dict[str, tuple[str, str, str]]:
+    shard_by_case: dict[str, str] = {}
+    for shard in matrix["include"]:
+        for case_id in shard["case_ids"]:
+            if case_id in shard_by_case:
+                raise PublisherError("requested case appears in two runnable shards")
+            shard_by_case[case_id] = shard["id"]
+    suffix = f"{run['run_id']}-{run['run_attempt']}"
+    deliveries: dict[str, tuple[str, str, str]] = {}
+    for case in cases:
+        case_id = case["case_id"]
+        if case["_disposition"] == "unsupported":
+            deliveries[case_id] = (
+                f"cxunsupported-{suffix}", "setup",
+                f"{run['run_id']}_{run['run_attempt']}_unsupported",
+            )
+            continue
+        shard_id = shard_by_case.get(case_id)
+        if shard_id is None:
+            raise PublisherError("runnable case has no matrix shard")
+        deliveries[case_id] = (
+            f"cxshard-{shard_id}-{suffix}", "sweep",
+            f"{run['run_id']}_{run['run_attempt']}_{shard_id}",
+        )
+    return deliveries
+
+
+def _document_git_run(document: dict[str, Any]) -> dict[str, Any] | None:
+    provenance = document.get("provenance")
+    if not isinstance(provenance, dict):
+        return None
+    value = provenance.get("git_run", provenance)
+    return value if isinstance(value, dict) else None
+
+
+def _run_matches(document: dict[str, Any], run: dict[str, Any]) -> bool:
+    git_run = _document_git_run(document)
+    if git_run is None:
+        return False
+    return (
+        str(git_run.get("run_id")) == run["run_id"]
+        and str(git_run.get("run_attempt")) == str(run["run_attempt"])
+        and git_run.get("qualification_index") == run["qualification_index"]
+        and git_run.get("source_sha") == run["source_sha"]
+        and (git_run.get("repo") or git_run.get("repository")) == run["repository"]
+    )
+
+
+def _case_matches(document: dict[str, Any], expected: dict[str, Any]) -> bool:
+    scheduled = {
+        key: value for key, value in expected.items()
+        if key not in {"sku", "case_id"} and not key.startswith("_")
+    }
+    return document.get("identity", {}).get("case_factors") == {
+        "case": scheduled,
+        "profile": identity.profile_for_case(scheduled),
+        "sku": expected["sku"],
+    }
+
+
+def _outcome(document: dict[str, Any]) -> tuple[str, str | None]:
+    status = document["outcome"]["status"]
+    if status == "success":
+        return status, None
+    native = document["outcome"].get("reason")
+    reason = native if isinstance(native, str) and REASON.fullmatch(native) else {
+        "unsupported": "unsupported-capability", "failed": "execution-failed",
+        "invalid": "validation-failed", "diagnostic": "diagnostic-evidence",
+    }.get(status)
+    if reason is None:
+        raise PublisherError(f"unsupported native outcome {status!r}")
+    return status, reason
+
+
+def _attempt_record(
+    document: dict[str, Any], path: Path, root: Path, *, selected: bool
+) -> dict[str, Any]:
+    normalized = contracts.normalize_attempt(document)
+    runtime = normalized["runtime_fingerprint"]
+    runtime_sha = _sha_bytes(_canonical(runtime)) if runtime is not None else None
+    sample_record = None
+    evidence_ids: list[str] = []
+    series_ids: list[str] = []
+    if document["format"] == contracts.RAW_FORMAT:
+        sample_path = path.with_name(document["sample_artifact"]["path"])
+        sample_record = _file_metadata(sample_path, root)
+        evidence_ids = [row["evidence_id"] for row in document["measurement"]["rows"]]
+        series_ids = [document["identity"]["series_id"]]
+        declared = document["identity"]["series_factors"]["runtime_fingerprint_sha256"]
+        if runtime_sha != declared:
+            raise PublisherError("runtime fingerprint checksum differs from series identity")
+    status, reason = _outcome(document)
+    return {
+        "attempt_id": normalized["attempt_id"],
+        "allocation_id": normalized["allocation_id"],
+        "case_id": normalized["case_id"],
+        "outcome": status,
+        "reason": reason,
+        "selected": selected,
+        "document": _file_metadata(path, root),
+        "samples": sample_record,
+        "runtime_fingerprint_sha256": runtime_sha,
+        "series_ids": series_ids,
+        "evidence_ids": evidence_ids,
+    }
+
+
+def _validate_delivery_binding(
+    document: dict[str, Any], path: Path, raw_root: Path,
+    artifact_by_root: dict[str, str], expected_by_id: dict[str, dict[str, Any]],
+    expected_deliveries: dict[str, tuple[str, str, str]], run: dict[str, Any],
+) -> str:
+    case_id = document["identity"]["case_id"]
+    if case_id not in expected_by_id:
+        raise PublisherError("artifact contains an extra case outcome")
+    expected = expected_by_id[case_id]
+    if not _case_matches(document, expected):
+        raise PublisherError("attempt case coordinates differ from the requested matrix")
+    unsupported = document["outcome"]["status"] == "unsupported"
+    if (expected["_disposition"] == "unsupported") != unsupported:
+        raise PublisherError("terminal outcome differs from requested capability disposition")
+    if unsupported and document["outcome"]["reason"] != expected["_reason"]:
+        raise PublisherError("unsupported outcome reason differs from requested matrix")
+    if not _run_matches(document, run):
+        raise PublisherError("attempt provenance differs from publisher run metadata")
+    relative = path.relative_to(raw_root)
+    if len(relative.parts) < 2:
+        raise PublisherError("attempt document is outside a delivered artifact")
+    delivered_name = artifact_by_root.get(relative.parts[0])
+    expected_name, expected_job, expected_execution = expected_deliveries[case_id]
+    git_run = _document_git_run(document)
+    allocation = document["identity"]["allocation_factors"]
+    if (
+        git_run is None
+        or delivered_name != expected_name
+        or git_run["artifact"] != delivered_name
+        or git_run["job"] != expected_job
+        or allocation["execution_id"] != expected_execution
+    ):
+        raise PublisherError("attempt provenance differs from its delivered GHA shard")
+    return case_id
+
+
+def _parse_extracted(root: Path) -> tuple[list[tuple[Path, dict[str, Any]]], set[Path]]:
+    attempts: list[tuple[Path, dict[str, Any]]] = []
+    consumed_samples: set[Path] = set()
+    json_paths = sorted(path for path in root.rglob("*.json") if path.is_file())
+    for path in json_paths:
+        if path in consumed_samples:
+            continue
+        try:
+            document = contracts.strict_load(path)
+            artifact_safety.assert_publication_safe([document])
+            format_name = document.get("format") if isinstance(document, dict) else None
+            if format_name == contracts.SAMPLES_FORMAT:
+                _schema("samples-v1.schema.json", document)
+                # It must be claimed by a raw document; orphan checking happens after the scan.
+                continue
+            if format_name == contracts.RAW_FORMAT:
+                _schema("raw-case-v1.schema.json", document)
+                sample_path = path.with_name(document["sample_artifact"]["path"])
+                sample_document = contracts.strict_load(sample_path)
+                artifact_safety.assert_publication_safe([sample_document])
+                _schema("samples-v1.schema.json", sample_document)
+                validated = contracts.load_raw_attempt(path)
+                consumed_samples.add(sample_path)
+            elif format_name == contracts.TERMINAL_FORMAT:
+                _schema("terminal-outcome-v1.schema.json", document)
+                validated = contracts.validate_terminal_document(document)
+            else:
+                raise PublisherError(f"artifact contains unknown JSON document {path.name}")
+        except (
+            contracts.ContractError, artifact_safety.ArtifactSafetyError,
+            jsonschema.ValidationError, OSError,
+        ) as exc:
+            raise PublisherError(f"native contract rejected {path.name}: {exc}") from exc
+        attempts.append((path, validated))
+    orphan_samples = [
+        path for path in json_paths
+        if isinstance((doc := contracts.strict_load(path)), dict)
+        and doc.get("format") == contracts.SAMPLES_FORMAT
+        and path not in consumed_samples
+    ]
+    if orphan_samples:
+        raise PublisherError("artifact contains an orphan samples document")
+    if not attempts:
+        raise PublisherError("artifact contains zero native attempt documents")
+    return attempts, consumed_samples
+
+
+def build_bundle(
+    store: Store,
+    incoming_id: str,
+    incoming_path: Path,
+    run: dict[str, Any],
+) -> tuple[str, dict[str, Any], list[dict[str, Any]]]:
+    """Validate one exact workflow delivery and install its immutable private bundle."""
+    incoming_manifest = strict_load(incoming_path / "incoming.json")
+    _exact(
+        incoming_manifest,
+        {"format", "schema_version", "ingest_id", "run", "sources"},
+        "incoming",
+    )
+    artifact_safety.assert_publication_safe([incoming_manifest])
+    if (
+        incoming_manifest["format"] != "collectivex.incoming.v1"
+        or incoming_manifest["schema_version"] != 1
+        or incoming_manifest["ingest_id"] != incoming_id
+        or incoming_manifest["run"] != run
+        or _sha_bytes(_canonical({"run": run, "sources": incoming_manifest["sources"]}))
+        != incoming_id
+    ):
+        raise PublisherError("incoming manifest identity differs from archived delivery")
+    incoming_sources = _array(incoming_manifest["sources"], "incoming.sources", nonempty=True)
+    for index, record in enumerate(incoming_sources):
+        _exact(
+            record,
+            {"path", "sha256", "bytes", "kind", "artifact_name"},
+            f"incoming.sources[{index}]",
+        )
+        _resolve_bundle_file(incoming_path, record)
+    matrix_records = [record for record in incoming_sources if record["kind"] == "matrix"]
+    artifact_records = [record for record in incoming_sources if record["kind"] == "artifact"]
+    if (
+        len(matrix_records) != 1
+        or matrix_records[0]["artifact_name"] is not None
+        or not artifact_records
+        or any(ARTIFACT_NAME.fullmatch(record["artifact_name"] or "") is None
+               for record in artifact_records)
+        or len({record["artifact_name"] for record in artifact_records}) != len(artifact_records)
+    ):
+        raise PublisherError("incoming source catalog is invalid")
+    matrix_source = _resolve_bundle_file(incoming_path, matrix_records[0])
+    matrix_document = strict_load(matrix_source)
+    expected_cases = validate_matrix(matrix_document)
+    expected_by_id = {case["case_id"]: case for case in expected_cases}
+    expected_deliveries = _expected_deliveries(matrix_document, expected_cases, run)
+    if {record["artifact_name"] for record in artifact_records} != {
+        delivery[0] for delivery in expected_deliveries.values()
+    }:
+        raise PublisherError("incoming artifact archive set differs from requested matrix shards")
+    with store.staging(store.bundles, private=True) as stage:
+        source_copy = stage / "source"
+        raw_root = stage / "raw"
+        source_copy.mkdir(mode=0o700)
+        raw_root.mkdir(mode=0o700)
+        matrix_path = stage / "matrix.json"
+        _copy_source(matrix_source, matrix_path)
+        source_records: list[dict[str, Any]] = []
+        artifact_by_root: dict[str, str] = {}
+        for index, source_record in enumerate(artifact_records):
+            archive = _resolve_bundle_file(incoming_path, source_record)
+            copied = source_copy / f"artifact-{index:04d}.zip"
+            _copy_source(archive, copied)
+            source_records.append({
+                **_file_metadata(copied, stage),
+                "artifact_name": source_record["artifact_name"],
+            })
+            artifact_root = raw_root / f"artifact-{index:04d}"
+            artifact_root.mkdir(mode=0o700)
+            artifact_by_root[artifact_root.name] = source_record["artifact_name"]
+            extract_archive(copied, artifact_root)
+        parsed, consumed_samples = _parse_extracted(raw_root)
+        created_at = _latest_timestamp(
+            [document["generated_at"] for _, document in parsed]
+        )
+        consumed_files = {path for path, _ in parsed} | consumed_samples
+        extracted_files = {
+            path for path in raw_root.rglob("*")
+            if path.is_file() and not path.is_symlink()
+        }
+        if consumed_files != extracted_files:
+            raise PublisherError("artifact contains an unconsumed non-native member")
+        by_case: dict[str, list[tuple[Path, dict[str, Any]]]] = {}
+        for path, document in parsed:
+            case_id = _validate_delivery_binding(
+                document, path, raw_root, artifact_by_root, expected_by_id,
+                expected_deliveries, run,
+            )
+            by_case.setdefault(case_id, []).append((path, document))
+        missing = set(expected_by_id) - set(by_case)
+        if missing:
+            raise PublisherError(f"artifact is missing {len(missing)} requested case outcomes")
+        attempt_records: list[dict[str, Any]] = []
+        selections: list[dict[str, Any]] = []
+        selected_documents: list[dict[str, Any]] = []
+        runtime_hashes: set[str] = set()
+        outcome_counts = {name: 0 for name in OUTCOMES}
+        for case_id in sorted(expected_by_id):
+            case_attempts = by_case[case_id]
+            ordinals = [document["identity"]["attempt_ordinal"] for _, document in case_attempts]
+            allocations_for_case = {
+                document["identity"]["allocation_id"] for _, document in case_attempts
+            }
+            if len(allocations_for_case) != 1 or sorted(ordinals) != list(
+                range(1, len(ordinals) + 1)
+            ):
+                raise PublisherError(
+                    "case retries must retain contiguous ordinals in one allocation"
+                )
+            _, selected_document = max(
+                case_attempts, key=lambda item: item[1]["identity"]["attempt_ordinal"]
+            )
+            selected_id = selected_document["identity"]["attempt_id"]
+            selected_documents.append(selected_document)
+            selected_status, _ = _outcome(selected_document)
+            selections.append({
+                "case_id": case_id,
+                "selected_attempt_id": selected_id,
+                "outcome": selected_status,
+            })
+            outcome_counts[selected_status] += 1
+            for path, document in sorted(
+                case_attempts, key=lambda item: item[1]["identity"]["attempt_ordinal"]
+            ):
+                normalized = contracts.normalize_attempt(document)
+                if document["format"] == contracts.RAW_FORMAT:
+                    sample_path = path.with_name(document["sample_artifact"]["path"])
+                    if sample_path not in consumed_samples:
+                        raise PublisherError("validated raw attempt lost its samples document")
+                record = _attempt_record(
+                    document, path, stage,
+                    selected=normalized["attempt_id"] == selected_id,
+                )
+                if record["runtime_fingerprint_sha256"]:
+                    runtime_hashes.add(record["runtime_fingerprint_sha256"])
+                attempt_records.append(record)
+        # Every extracted byte is covered; the bundle manifest anchors this checksum catalog.
+        payload_records = [_file_metadata(path, stage) for path in _tree_files(stage)]
+        checksum_document = {
+            "format": "collectivex.checksums.v1",
+            "files": payload_records,
+        }
+        checksum_path = stage / "checksums.json"
+        _write_json(checksum_path, checksum_document, mode=0o600)
+        bundle = {
+            "format": FORMAT_BUNDLE,
+            "schema_version": 1,
+            "created_at": created_at,
+            "ingest_id": incoming_id,
+            "run": run,
+            "matrix": _file_metadata(matrix_path, stage),
+            "sources": source_records,
+            "attempts": attempt_records,
+            "coverage": {
+                "expected_cases": len(expected_cases),
+                "terminal_cases": len(selections),
+                "complete": len(selections) == len(expected_cases),
+                "outcome_counts": outcome_counts,
+                "selections": selections,
+            },
+            "runtime_fingerprints": sorted(runtime_hashes),
+            "checksums": _file_metadata(checksum_path, stage),
+            "validation": {
+                "policy": PUBLISHER_POLICY,
+                "passed": True,
+                "checks": [
+                    "archive-safety", "checksums", "exact-coverage", "identity",
+                    "native-schema", "privacy", "runtime-homogeneity", "terminal-outcomes",
+                ],
+            },
+        }
+        validate_bundle_manifest(bundle)
+        # Runtime homogeneity is scoped to a realized allocation, not across unlike SKUs.
+        by_allocation: dict[str, set[str]] = {}
+        for attempt in attempt_records:
+            fingerprint = attempt["runtime_fingerprint_sha256"]
+            if fingerprint:
+                by_allocation.setdefault(attempt["allocation_id"], set()).add(fingerprint)
+        if any(len(values) != 1 for values in by_allocation.values()):
+            raise PublisherError("runtime fingerprint is heterogeneous within an allocation")
+        bundle_bytes = _canonical(bundle) + b"\n"
+        bundle_id = _sha_bytes(bundle_bytes)
+        _write_bytes(stage / "bundle.json", bundle_bytes, mode=0o600)
+        store.complete(stage, bundle_id, private=True)
+        store.install(stage, store.bundles / bundle_id, private=True)
+    installed = load_bundle(store, bundle_id)
+    if installed["manifest"] != bundle:
+        raise PublisherError("existing bundle differs from validated manifest")
+    return bundle_id, bundle, selected_documents
+
+
+def _slug(value: Any, fallback: str = "unknown") -> str:
+    text = re.sub(r"[^a-z0-9_.-]+", "-", str(value or "").lower()).strip("-.")
+    return text[:128] if text and SAFE_ID.fullmatch(text[:128]) else fallback
+
+
+def _derived_id(prefix: str, value: Any) -> str:
+    return f"{prefix}{_sha_bytes(_canonical(value))}"
+
+
+def _git_run(document: dict[str, Any]) -> dict[str, Any]:
+    return _document_git_run(document) or {}
+
+
+def _public_attempt(document: dict[str, Any], *, selected: bool = False) -> dict[str, Any]:
+    normalized = contracts.normalize_attempt(document)
+    run = _git_run(document)
+    evidence = (
+        [{"evidence_id": row["evidence_id"], "point_id": row["point_id"]}
+         for row in document["measurement"]["rows"]]
+        if document["format"] == contracts.RAW_FORMAT else []
+    )
+    status, reason = _outcome(document)
+    failure_mode = document["outcome"].get("failure_mode")
+    if not isinstance(failure_mode, str) or REASON.fullmatch(failure_mode) is None:
+        failure_mode = None if status == "success" else reason
+    series_id = normalized["series_id"] if status == "success" and selected else None
+    return {
+        "attempt_id": normalized["attempt_id"],
+        "evidence": evidence,
+        "case_id": normalized["case_id"],
+        "allocation_id": normalized["allocation_id"],
+        "run_id": str(run["run_id"]),
+        "run_attempt": int(run["run_attempt"]),
+        "qualification_index": int(run["qualification_index"]),
+        "attempt_index": document["identity"]["attempt_ordinal"],
+        "selected": selected,
+        "outcome": status,
+        "failure_mode": failure_mode,
+        "reason": reason,
+        "series_id": series_id,
+        "completed_at": document["generated_at"],
+    }
+
+
+def _ratio(values: Sequence[float]) -> float | None:
+    return max(values) / min(values) if len(values) >= REQUIRED_ALLOCATIONS and min(values) > 0 else None
+
+
+def _private_trial_components(sample_document: dict[str, Any]) -> dict[int, dict[str, Any]]:
+    """Copy validated trial blocks into publisher-private memory without fixing component names."""
+    points: dict[int, dict[str, Any]] = {}
+    for point in sample_document["points"]:
+        token = point["tokens_per_rank"]
+        components: dict[str, Any] = {}
+        for name, component in point["components"].items():
+            availability = component["availability"]
+            if availability in {"unavailable", "not-applicable"}:
+                components[name] = None
+                continue
+            if availability != "measured":
+                raise PublisherError(f"private sample component {name} has invalid availability")
+            trials = component["trials"]
+            if (
+                not isinstance(trials, list)
+                or len(trials) != 64
+                or any(not isinstance(trial, list) or len(trial) != 8 for trial in trials)
+            ):
+                raise PublisherError(f"private sample component {name} is not 64x8")
+            copied = tuple(
+                tuple(float(sample) for sample in trial)
+                for trial in trials
+            )
+            if any(
+                not math.isfinite(sample) or sample < 0
+                for trial in copied for sample in trial
+            ):
+                raise PublisherError(f"private sample component {name} is not finite")
+            components[name] = copied
+        points[token] = components
+    return points
+
+
+def _trial_diagnostics(
+    trial_blocks: dict[str, dict[int, dict[str, Any]]], token: int,
+) -> dict[str, Any]:
+    components: dict[str, Any] = {}
+    reasons: set[str] = set()
+    for name in ("dispatch", "stage", "combine", "roundtrip"):
+        values = [trial_blocks[run_id][token][name] for run_id in sorted(trial_blocks)]
+        if all(value is None for value in values):
+            components[name] = None
+            continue
+        if any(value is None for value in values):
+            raise PublisherError(f"{name} trial availability differs across qualification runs")
+        array = np.asarray(values, dtype=np.float64)
+        if array.shape != (REQUIRED_ALLOCATIONS, 64, 8) or not np.isfinite(array).all():
+            raise PublisherError(f"{name} trial diagnostics require three finite 64x8 runs")
+        medians = np.median(array, axis=2)
+        first = np.median(medians[:, :8], axis=1)
+        last = np.median(medians[:, -8:], axis=1)
+        if np.any(first <= 0) or np.any(last <= 0):
+            raise PublisherError(f"{name} trial diagnostics require positive latency")
+        drift_ratio = float(np.max(np.maximum(first / last, last / first)))
+        center = float(np.median(medians))
+        mad = float(np.median(np.abs(medians - center)))
+        if mad == 0:
+            outliers = np.abs(medians - center) > 0
+        else:
+            outliers = np.abs(medians - center) > (
+                TRIAL_OUTLIER_MAD_MULTIPLIER * 1.4826 * mad
+            )
+        outlier_fraction = float(np.count_nonzero(outliers) / medians.size)
+        drift_flagged = drift_ratio > TRIAL_DRIFT_RATIO_LIMIT
+        outlier_flagged = outlier_fraction > TRIAL_OUTLIER_FRACTION_LIMIT
+        if drift_flagged:
+            reasons.add("trial-drift")
+        if outlier_flagged:
+            reasons.add("trial-outliers")
+        components[name] = {
+            "drift_flagged": drift_flagged,
+            "first_last_median_ratio": drift_ratio,
+            "outlier_flagged": outlier_flagged,
+            "robust_outlier_fraction": outlier_fraction,
+            "trial_count": int(medians.size),
+        }
+    return {
+        "flagged": bool(reasons),
+        "reasons": sorted(reasons),
+        "components": components,
+    }
+
+
+def _nearest_rank_p99(blocks: Sequence[Sequence[float]]) -> float:
+    samples = sorted(float(sample) for block in blocks for sample in block)
+    if len(samples) != 512 or samples[0] < 0 or not all(map(math.isfinite, samples)):
+        raise PublisherError("p99 bootstrap input must contain 512 finite samples")
+    return samples[math.ceil(0.99 * len(samples)) - 1]
+
+
+def _roundtrip_trial_array(
+    internal: dict[str, Any], token: int
+) -> tuple[tuple[str, ...], np.ndarray]:
+    trial_blocks = internal.get("trial_blocks")
+    if not isinstance(trial_blocks, dict):
+        raise PublisherError("series is missing private trial blocks")
+    run_ids = tuple(sorted(trial_blocks, key=lambda value: (int(value), value)))
+    if len(run_ids) != REQUIRED_ALLOCATIONS:
+        raise PublisherError("p99 bootstrap requires exactly three run blocks")
+    values = []
+    for run_id in run_ids:
+        point = trial_blocks[run_id].get(token)
+        blocks = point.get("roundtrip") if isinstance(point, dict) else None
+        if blocks is None:
+            raise PublisherError("p99 bootstrap requires measured roundtrip blocks")
+        if len(blocks) != 64 or any(len(block) != 8 for block in blocks):
+            raise PublisherError("p99 bootstrap roundtrip blocks must be 64x8")
+        values.append(blocks)
+    array = np.asarray(values, dtype=np.float64)
+    if array.shape != (REQUIRED_ALLOCATIONS, 64, 8):
+        raise PublisherError("p99 bootstrap trial array shape differs")
+    if not np.isfinite(array).all() or np.any(array <= 0):
+        raise PublisherError("p99 bootstrap latencies must be finite and positive")
+    return run_ids, array
+
+
+def _bootstrap_seed(
+    dataset_binding: str, baseline_series_id: str, candidate_series_id: str, token: int
+) -> tuple[str, int]:
+    payload = _canonical({
+        "policy": BOOTSTRAP_POLICY,
+        "resamples": BOOTSTRAP_RESAMPLES,
+        "confidence": BOOTSTRAP_CONFIDENCE,
+        "equivalence_band": BOOTSTRAP_EQUIVALENCE_BAND,
+        "dataset_binding": dataset_binding,
+        "baseline_series_id": baseline_series_id,
+        "candidate_series_id": candidate_series_id,
+        "tokens_per_rank": token,
+    })
+    digest = hashlib.sha256(payload).digest()
+    return digest.hex(), int.from_bytes(digest[:16], "big")
+
+
+def _hierarchical_p99_ratio(
+    baseline_series_id: str,
+    candidate_series_id: str,
+    token: int,
+    internals: dict[str, dict[str, Any]],
+    dataset_binding: str,
+) -> dict[str, Any]:
+    """Bootstrap candidate/baseline p99 across runs, then 64 trial blocks."""
+    baseline_runs, baseline = _roundtrip_trial_array(
+        internals[baseline_series_id], token
+    )
+    candidate_runs, candidate = _roundtrip_trial_array(
+        internals[candidate_series_id], token
+    )
+    if baseline_runs != candidate_runs:
+        raise PublisherError("p99 bootstrap run blocks are not aligned")
+    seed_sha256, seed = _bootstrap_seed(
+        dataset_binding, baseline_series_id, candidate_series_id, token
+    )
+    cache_key = (
+        seed_sha256,
+        _sha_bytes(baseline.tobytes()),
+        _sha_bytes(candidate.tobytes()),
+    )
+    cached = _BOOTSTRAP_CACHE.get(cache_key)
+    if cached is not None:
+        return dict(cached)
+
+    baseline_run_p99 = np.asarray(
+        [_nearest_rank_p99(run) for run in baseline], dtype=np.float64
+    )
+    candidate_run_p99 = np.asarray(
+        [_nearest_rank_p99(run) for run in candidate], dtype=np.float64
+    )
+    run_ratios = candidate_run_p99 / baseline_run_p99
+    point_ratio = float(np.median(candidate_run_p99) / np.median(baseline_run_p99))
+
+    rng = np.random.Generator(np.random.PCG64(seed))
+    ratios = np.empty(BOOTSTRAP_RESAMPLES, dtype=np.float64)
+    p99_index = math.ceil(0.99 * 512) - 1
+    for start in range(0, BOOTSTRAP_RESAMPLES, BOOTSTRAP_CHUNK_SIZE):
+        size = min(BOOTSTRAP_CHUNK_SIZE, BOOTSTRAP_RESAMPLES - start)
+        sampled_runs = rng.integers(0, REQUIRED_ALLOCATIONS, size=(size, 3))
+        sampled_blocks = rng.integers(0, 64, size=(size, 3, 64))
+        run_index = sampled_runs[:, :, None]
+        baseline_sample = baseline[run_index, sampled_blocks].reshape(size, 3, 512)
+        candidate_sample = candidate[run_index, sampled_blocks].reshape(size, 3, 512)
+        baseline_p99 = np.partition(baseline_sample, p99_index, axis=2)[:, :, p99_index]
+        candidate_p99 = np.partition(candidate_sample, p99_index, axis=2)[:, :, p99_index]
+        ratios[start:start + size] = (
+            np.median(candidate_p99, axis=1) / np.median(baseline_p99, axis=1)
+        )
+    ratios.sort()
+    tail = (1.0 - BOOTSTRAP_CONFIDENCE) / 2.0
+    lower_index = max(0, math.ceil(tail * BOOTSTRAP_RESAMPLES) - 1)
+    upper_index = min(
+        BOOTSTRAP_RESAMPLES - 1,
+        math.ceil((1.0 - tail) * BOOTSTRAP_RESAMPLES) - 1,
+    )
+    ci = [float(ratios[lower_index]), float(ratios[upper_index])]
+    threshold = 1.0 + BOOTSTRAP_EQUIVALENCE_BAND
+    baseline_wins = ci[0] > threshold and bool(np.all(run_ratios > threshold))
+    result = {
+        "policy": BOOTSTRAP_POLICY,
+        "resamples": BOOTSTRAP_RESAMPLES,
+        "confidence": BOOTSTRAP_CONFIDENCE,
+        "equivalence_band": BOOTSTRAP_EQUIVALENCE_BAND,
+        "seed_sha256": seed_sha256,
+        "point_ratio": point_ratio,
+        "ci95": ci,
+        "run_ratios": [float(value) for value in run_ratios],
+        "all_runs_agree": bool(np.all(run_ratios > threshold)),
+        "baseline_wins": baseline_wins,
+        "tie": not baseline_wins,
+    }
+    _BOOTSTRAP_CACHE[cache_key] = result
+    return dict(result)
+
+
+def _bootstrap_inputs_ready(
+    members: Sequence[dict[str, Any]],
+    internals: dict[str, dict[str, Any]],
+    tokens: Sequence[int],
+) -> bool:
+    try:
+        expected_runs: tuple[str, ...] | None = None
+        for member in members:
+            for token in tokens:
+                run_ids, _ = _roundtrip_trial_array(internals[member["series_id"]], token)
+                if expected_runs is None:
+                    expected_runs = run_ids
+                elif run_ids != expected_runs:
+                    return False
+        return expected_runs is not None
+    except (KeyError, PublisherError, TypeError, ValueError):
+        return False
+
+
+def _eligibility_record(
+    allocations: Sequence[str],
+    *,
+    complete: bool,
+    correct: bool,
+    measured: bool,
+    stable_ordering: bool,
+    p50_ratio: float | None,
+    p99_ratio: float | None,
+    extra_reasons: Sequence[str] = (),
+) -> dict[str, Any]:
+    ids = sorted(set(allocations))
+    stable_p50 = p50_ratio is not None and p50_ratio <= P50_STABILITY_LIMIT
+    stable_p99 = p99_ratio is not None and p99_ratio <= P99_STABILITY_LIMIT
+    reasons = list(extra_reasons)
+    for condition, reason in (
+        (len(ids) >= REQUIRED_ALLOCATIONS, "insufficient-allocations"),
+        (complete, "incomplete-repeat-coverage"),
+        (correct, "correctness-failed"),
+        (measured, "missing-measured-roundtrip-p99"),
+        (stable_p50, "unstable-p50"),
+        (stable_p99, "unstable-p99"),
+        (stable_ordering, "unstable-ordering"),
+    ):
+        if not condition:
+            reasons.append(reason)
+    reasons = sorted(set(reasons))
+    decision = not reasons
+    return {
+        "decision_grade": decision,
+        "allocation_ids": ids,
+        "complete": complete,
+        "correct": correct,
+        "measured_roundtrip_p99": measured,
+        "stable_p50": stable_p50,
+        "stable_p99": stable_p99,
+        "stable_ordering": stable_ordering,
+        "p50_max_min_ratio": p50_ratio,
+        "p99_max_min_ratio": p99_ratio,
+        "reasons": reasons,
+    }
+
+
+def _aggregate_percentiles(values: Sequence[dict[str, Any]]) -> dict[str, float]:
+    return {
+        name: float(statistics.median(float(value[name]) for value in values))
+        for name in ("p50", "p90", "p95", "p99")
+    }
+
+
+def _aggregate_component(
+    rows: Sequence[dict[str, Any]], name: str
+) -> dict[str, Any] | None:
+    components = [row["components"][name] for row in rows]
+    if all(component["availability"] == "unavailable" for component in components):
+        return None
+    if any(component["availability"] == "unavailable" for component in components):
+        raise PublisherError("component availability differs across repeat allocations")
+    latency = _aggregate_percentiles([component["percentiles_us"] for component in components])
+    if name == "isolated_sum":
+        byte_provenance = {
+            "accounting_contract": "activation-data-plus-scales-v1",
+            "activation_data_bytes": 0,
+            "scale_bytes": 0,
+            "total_logical_bytes": 0,
+        }
+        return {
+            "origin": "derived",
+            "latency_us": latency,
+            "byte_provenance": byte_provenance,
+            "activation_data_rate_gbps_at_latency_percentile": None,
+            "total_logical_data_rate_gbps_at_latency_percentile": None,
+            "sample_count": None,
+        }
+    byte_provenance = _exact_repeat_value(
+        [row["byte_provenance"][name] for row in rows],
+        f"{name} byte accounting",
+    )
+    activation_rates = {
+        statistic: byte_provenance["activation_data_bytes"] / (latency[statistic] * 1000.0)
+        for statistic in latency
+    }
+    total_rates = {
+        statistic: byte_provenance["total_logical_bytes"] / (latency[statistic] * 1000.0)
+        for statistic in latency
+    }
+    return {
+        "origin": "measured",
+        "latency_us": latency,
+        "byte_provenance": byte_provenance,
+        "activation_data_rate_gbps_at_latency_percentile": activation_rates,
+        "total_logical_data_rate_gbps_at_latency_percentile": total_rates,
+        "sample_count": 512,
+    }
+
+
+def _exact_repeat_value(values: Sequence[Any], label: str) -> Any:
+    if not values or len({_canonical(value) for value in values}) != 1:
+        raise PublisherError(f"{label} differs across repeat allocations")
+    return values[0]
+
+
+def _eplb_descriptor(document: dict[str, Any]) -> dict[str, Any]:
+    value = document["case"]["eplb"]
+    return {
+        "enabled": value["enabled"],
+        "calibration_workload_id": value["calibration_workload_id"],
+        "calibration_trace_sha256": value["calibration_trace_sha256"],
+        "calibration_window": value["calibration_window"],
+        "calibration_token_offset": value["calibration_token_offset"],
+        "planner": value["planner"],
+        "mapping_sha256": value["mapping_hash"],
+        "logical_experts": value["num_logical_experts"],
+        "physical_experts": value["num_physical_experts"],
+        "redundant_experts": value["num_redundant"],
+        "reference_tokens_per_rank": value["reference_tokens_per_rank"],
+        "replicated_experts": value["replicated_experts"],
+        "max_replicas": value["max_replicas"],
+        "imbalance_before": value["imbalance_before"],
+        "imbalance_after": value["imbalance_after"],
+    }
+
+
+def _routing_facts(row: dict[str, Any]) -> dict[str, Any]:
+    routing = row["routing"]
+    return {
+        "fanout_mean": routing["fanout_mean"],
+        "recv_tokens_max": row["receive"]["max"],
+        "expert_load_cv": routing["expert_load_cv"],
+        "payload_rank_cv": routing["payload_rank_cv"],
+        "hotspot_ratio": routing["hotspot_ratio"],
+        "empty_expert_count": routing["empty_expert_count"],
+        "empty_rank_count": routing["empty_rank_count"],
+        "routed_copies": routing["routed_copies"],
+    }
+
+
+def _aggregate_precision_evidence(rows: Sequence[dict[str, Any]]) -> dict[str, Any]:
+    values = [row["correctness"]["precision"] for row in rows]
+    profile_ids = {value["profile_id"] for value in values}
+    if len(profile_ids) != 1:
+        raise PublisherError("precision evidence profile differs across qualification runs")
+    result: dict[str, Any] = {"profile_id": profile_ids.pop()}
+    for direction in ("dispatch", "combine"):
+        axes = [value[direction] for value in values]
+        finite = [axis["scales_finite"] for axis in axes]
+        positive = [axis["scales_positive"] for axis in axes]
+        result[direction] = {
+            "encoded_payload_valid": all(axis["encoded_payload_valid"] for axis in axes),
+            "scales_finite": None if all(value is None for value in finite) else all(
+                value is True for value in finite
+            ),
+            "scales_positive": None if all(value is None for value in positive) else all(
+                value is True for value in positive
+            ),
+            "dequantized_semantics": all(axis["dequantized_semantics"] for axis in axes),
+            "saturation_count": max(axis["saturation_count"] for axis in axes),
+            "saturation_rate": max(axis["saturation_rate"] for axis in axes),
+            "max_abs_error": max(axis["max_abs_error"] for axis in axes),
+            "max_rel_error": max(axis["max_rel_error"] for axis in axes),
+            "passed": all(axis["passed"] for axis in axes),
+        }
+    result["passed"] = result["dispatch"]["passed"] and result["combine"]["passed"]
+    return result
+
+
+def _series_extra_reasons(documents: Sequence[dict[str, Any]]) -> list[str]:
+    reasons: set[str] = set()
+    for document in documents:
+        validity = document["outcome"]["validity"]
+        rows = document["measurement"]["rows"]
+        if validity.get("provenance_complete") is not True:
+            reasons.add("incomplete-provenance")
+        if validity.get("workload_source") != "canonical-serialized":
+            reasons.add("noncanonical-workload")
+        if validity.get("anomaly_free") is not True or any(row["anomalies"] for row in rows):
+            reasons.add("unresolved-anomaly")
+        if validity.get("semantic_correctness") != "pass":
+            reasons.add("semantic-correctness-failed")
+        if validity.get("measurement_conformance") != "conformant" or validity.get("sampling_conformance") != "conformant":
+            reasons.add("measurement-nonconformant")
+        profile = identity.case_profile(document["case"]["mode"])
+        scopes = {row["correctness"].get("scope") for row in rows}
+        if scopes != {profile["correctness_scope"]}:
+            reasons.add("expert-oracle-incomplete")
+    return sorted(reasons)
+
+
+BACKEND_LABELS = {
+    "deepep": "DeepEP V1",
+    "deepep-v2": "DeepEP V2",
+    "deepep-hybrid": "DeepEP Hybrid",
+    "uccl": "UCCL",
+    "mori": "MoRI",
+    "nccl-ep": "NCCL/RCCL reference",
+}
+
+
+def _build_series(
+    series_id: str,
+    documents: Sequence[dict[str, Any]],
+    sample_documents: Sequence[dict[str, Any]],
+    expected_repeats: int,
+) -> tuple[dict[str, Any], dict[str, Any]]:
+    if not documents:
+        raise PublisherError("cannot aggregate an empty series")
+    first = documents[0]
+    if any(document["identity"]["series_id"] != series_id for document in documents):
+        raise PublisherError("series aggregation mixed identities")
+    if len(sample_documents) != len(documents):
+        raise PublisherError("series aggregation lost private sample documents")
+    allocations = [document["identity"]["allocation_id"] for document in documents]
+    if len(allocations) != len(set(allocations)):
+        raise PublisherError("series repeats reuse an allocation identity")
+    row_maps = [
+        {row["tokens_per_rank"]: row for row in document["measurement"]["rows"]}
+        for document in documents
+    ]
+    token_sets = {tuple(sorted(rows)) for rows in row_maps}
+    if len(token_sets) != 1:
+        raise PublisherError("series token coverage differs across allocations")
+    tokens = list(next(iter(token_sets)))
+    qualification_indices = sorted(
+        document["measurement"]["qualification_index"] for document in documents
+    )
+    p50_ratios = [
+        _ratio([rows[token]["components"]["roundtrip"]["percentiles_us"]["p50"] for rows in row_maps])
+        for token in tokens
+    ]
+    p99_ratios = [
+        _ratio([rows[token]["components"]["roundtrip"]["percentiles_us"]["p99"] for rows in row_maps])
+        for token in tokens
+    ]
+    p50_ratio = max((value for value in p50_ratios if value is not None), default=None)
+    p99_ratio = max((value for value in p99_ratios if value is not None), default=None)
+    correct = all(
+        row["correctness"]["passed"]
+        for document in documents for row in document["measurement"]["rows"]
+    )
+    measured = all(
+        row["components"]["roundtrip"]["availability"] == "measured"
+        and row["components"]["roundtrip"]["percentiles_us"].get("p99") is not None
+        for document in documents for row in document["measurement"]["rows"]
+    )
+    extra_reasons = _series_extra_reasons(documents)
+    case = first["case"]
+    shape = case["shape"]
+    topology = first["topology"]
+    runtime = first["runtime_fingerprint"]
+    workload_id = first["workload"]["workload_id"]
+    if not identity.is_typed_id(workload_id, "workload"):
+        raise PublisherError("raw workload is not canonical")
+    backend_id = case["backend"]
+    resource_raw = first["implementation"]["resource_profile"]
+    public_config = contracts.public_series_config(
+        kernel_generation=first["implementation"]["kernel_generation"],
+        provenance=first["implementation"]["provenance"],
+        resource_profile=resource_raw,
+        resource_mode=case["resource_mode"],
+        device_product=topology["device_product"],
+    )
+    resource_profile = public_config["resource"]["profile"]
+    configured_units = public_config["resource"]["configured_units"]
+    units_kind = public_config["resource"]["comm_units_kind"]
+    resource_label = (
+        f"{configured_units} {str(units_kind).upper()}"
+        if configured_units is not None and units_kind
+        else resource_profile
+    )
+    eplb = _exact_repeat_value(
+        [_eplb_descriptor(document) for document in documents], "EPLB descriptor"
+    )
+    points: list[dict[str, Any]] = []
+    run_metrics: dict[str, dict[int, dict[str, float]]] = {}
+    trial_blocks: dict[str, dict[int, dict[str, Any]]] = {}
+    for document, sample_document, rows in zip(
+        documents, sample_documents, row_maps, strict=True
+    ):
+        if any(
+            sample_document[field] != document["identity"][field]
+            for field in ("allocation_id", "attempt_id", "case_id", "series_id")
+        ):
+            raise PublisherError("private samples differ from their selected raw attempt")
+        if sample_document["qualification_index"] != document["measurement"]["qualification_index"]:
+            raise PublisherError("private sample qualification index differs from raw attempt")
+        run_id = str(_git_run(document)["run_id"])
+        if run_id in run_metrics:
+            raise PublisherError("series has two allocations from one workflow run")
+        trial_blocks[run_id] = _private_trial_components(sample_document)
+        run_metrics[run_id] = {}
+        for token in tokens:
+            latency = rows[token]["components"]["roundtrip"]["percentiles_us"]
+            byte_provenance = rows[token]["byte_provenance"]["roundtrip"]
+            run_metrics[run_id][token] = {
+                "latency_us": {statistic: latency[statistic] for statistic in ("p50", "p99")},
+                "activation_data_rate_gbps_at_latency_percentile": {
+                    statistic: byte_provenance["activation_data_bytes"]
+                    / (latency[statistic] * 1000.0)
+                    for statistic in ("p50", "p99")
+                },
+                "total_logical_data_rate_gbps_at_latency_percentile": {
+                    statistic: byte_provenance["total_logical_bytes"]
+                    / (latency[statistic] * 1000.0)
+                    for statistic in ("p50", "p99")
+                },
+            }
+    for token in tokens:
+        rows = [row_map[token] for row_map in row_maps]
+        diagnostics = _trial_diagnostics(trial_blocks, token)
+        if diagnostics["flagged"]:
+            extra_reasons.append("unresolved-trial-diagnostic")
+        routing = _exact_repeat_value(
+            [_routing_facts(row) for row in rows], "routing/load facts"
+        )
+        components = {
+            name: _aggregate_component(rows, name)
+            for name in ("dispatch", "stage", "combine", "roundtrip")
+        }
+        if components["dispatch"] is None:
+            components["isolated_sum"] = None
+        else:
+            latency = {
+                statistic: components["dispatch"]["latency_us"][statistic]
+                + (
+                    components["stage"]["latency_us"][statistic]
+                    if components["stage"] is not None else 0.0
+                )
+                + components["combine"]["latency_us"][statistic]
+                for statistic in ("p50", "p90", "p95", "p99")
+            }
+            components["isolated_sum"] = {
+                "origin": "derived",
+                "latency_us": latency,
+                "byte_provenance": components["roundtrip"]["byte_provenance"],
+                "activation_data_rate_gbps_at_latency_percentile": None,
+                "total_logical_data_rate_gbps_at_latency_percentile": None,
+                "sample_count": None,
+            }
+        points.append({
+            "point_id": rows[0]["point_id"],
+            "tokens_per_rank": token,
+            "global_tokens": token * case["ep_size"],
+            "correctness": {
+                "semantic_pass": all(row["correctness"]["passed"] for row in rows),
+                "precision": _aggregate_precision_evidence(rows),
+            },
+            "anomalies": sorted({
+                anomaly["type"].replace("_", "-")
+                for row in rows for anomaly in row["anomalies"]
+            } | set(diagnostics["reasons"])),
+            "stability": {
+                "complete": qualification_indices == [1, 2, 3],
+                "qualification_indices": qualification_indices,
+                "p50_max_min_ratio": p50_ratios[tokens.index(token)]
+                if qualification_indices == [1, 2, 3] else None,
+                "p99_max_min_ratio": p99_ratios[tokens.index(token)]
+                if qualification_indices == [1, 2, 3] else None,
+                "stable_p50": bool(
+                    qualification_indices == [1, 2, 3]
+                    and p50_ratios[tokens.index(token)] is not None
+                    and p50_ratios[tokens.index(token)] <= P50_STABILITY_LIMIT
+                ),
+                "stable_p99": bool(
+                    qualification_indices == [1, 2, 3]
+                    and p99_ratios[tokens.index(token)] is not None
+                    and p99_ratios[tokens.index(token)] <= P99_STABILITY_LIMIT
+                ),
+            },
+            "trial_diagnostics": diagnostics,
+            "routing": routing,
+            "components": components,
+            "roundtrip_token_rate_at_latency_percentile": {
+                statistic: (token * case["ep_size"])
+                / (components["roundtrip"]["latency_us"][statistic] * 1e-6)
+                for statistic in ("p50", "p90", "p95", "p99")
+            },
+            "evidence_ids": [row["evidence_id"] for row in rows],
+        })
+    eligibility = _eligibility_record(
+        allocations,
+        complete=len(documents) == expected_repeats,
+        correct=correct,
+        measured=measured,
+        # Ordering is defined only across alternatives in a controlled cohort.
+        stable_ordering=True,
+        p50_ratio=p50_ratio,
+        p99_ratio=p99_ratio,
+        extra_reasons=sorted(set(extra_reasons)),
+    )
+    series = {
+        "series_id": series_id,
+        "label": (
+            f"{case['runner'].upper()} / {BACKEND_LABELS.get(backend_id, backend_id)} / "
+            f"EP{case['ep_size']} / {topology['nodes']} node"
+            f"{'s' if topology['nodes'] != 1 else ''} / {topology['scope']} / "
+            f"{case['mode']} / {case['phase']} / {shape['routing']}"
+            f"{' + EPLB' if case['eplb']['enabled'] else ''} / {resource_label}"
+        ),
+        "status": "decision-grade" if eligibility["decision_grade"] else "diagnostic",
+        "case_ids": sorted({document["identity"]["case_id"] for document in documents}),
+        "allocation_ids": sorted(allocations),
+        "model": _slug(case["workload_name"]),
+        "suite": _slug(case["suite"]),
+        "mode": case["mode"],
+        "phase": case["phase"],
+        "publication_tier": case["required_publication"],
+        "backend": {
+            "id": _slug(backend_id),
+            "label": BACKEND_LABELS.get(backend_id, backend_id),
+            "role": "reference" if backend_id == "nccl-ep" else "library",
+            **public_config["backend"],
+        },
+        "build": {
+            "implementation_contract_sha256": first["identity"]["series_factors"][
+                "implementation_contract_sha256"
+            ],
+            "public_config_sha256": first["identity"]["series_factors"][
+                "public_config_sha256"
+            ],
+            "routing_control_sha256": first["identity"]["series_factors"][
+                "routing_control_sha256"
+            ],
+            "runtime_fingerprint_sha256": first["identity"]["series_factors"][
+                "runtime_fingerprint_sha256"
+            ],
+            "image_digest": first["identity"]["series_factors"]["image_digest"],
+            "source_sha": first["identity"]["series_factors"]["source_sha"],
+            "squash_sha256": first["identity"]["series_factors"]["squash_sha256"],
+        },
+        "system": {
+            "sku": _slug(case["runner"]),
+            "label": public_config["system"]["label"],
+            "vendor": runtime["vendor"],
+            "topology_class": _slug(topology["topology_class"]),
+            "transport": _slug(topology["transport"]),
+            "scale_up_transport": _slug(topology["scale_up_transport"]),
+            "scale_out_transport": (
+                _slug(topology["scale_out_transport"])
+                if topology["scale_out_transport"] is not None
+                else None
+            ),
+            "scope": topology["scope"],
+            "nodes": topology["nodes"],
+            "gpus_per_node": topology["gpus_per_node"],
+            "scale_up_domain": topology["scale_up_domain"],
+            "world_size": topology["world_size"],
+            "ep_size": case["ep_size"],
+            "placement": topology["placement"],
+        },
+        "workload": {
+            "workload_id": workload_id,
+            "hidden": shape["hidden"],
+            "top_k": shape["topk"],
+            "experts": case["eplb"]["num_logical_experts"],
+            "routing": shape["routing"],
+            "eplb": case["eplb"]["enabled"],
+            "precision_profile": shape["precision_profile"],
+            "dispatch_precision": shape["dispatch_precision"],
+            "combine_precision": shape["combine_precision"],
+            "activation_profile": shape["activation_profile"],
+        },
+        "eplb": eplb,
+        "resource": public_config["resource"],
+        "measurement": {
+            "contract": first["measurement"]["contract"],
+            "component_order_contract": first["measurement"]["component_order_contract"],
+            "combine_semantics": identity.case_profile(case["mode"])["combine_semantics"],
+            "payload_unit": identity.case_profile(case["mode"])["payload_unit"],
+            "sampling_contract": first["measurement"]["sampling"]["contract"],
+            "iters": first["measurement"]["sampling"]["iterations_per_trial"],
+            "trials": first["measurement"]["sampling"]["trials"],
+            "warmups": first["measurement"]["sampling"]["warmup_iterations"],
+            "samples_per_component": first["measurement"]["sampling"]["samples_per_component"],
+            "qualification_indices": qualification_indices,
+            "headline_component": "roundtrip",
+            "headline_percentile": "p99",
+        },
+        "points": points,
+        "eligibility": eligibility,
+    }
+    internal = {
+        "documents": list(documents),
+        "run_metrics": run_metrics,
+        "trial_blocks": trial_blocks,
+        "series_factors": first["identity"]["series_factors"],
+    }
+    return series, internal
+
+
+def _resolve_bundle_file(root: Path, record: dict[str, Any]) -> Path:
+    path = root.joinpath(*PurePosixPath(record["path"]).parts)
+    try:
+        path.relative_to(root)
+    except ValueError as exc:
+        raise PublisherError("bundle record escapes its directory") from exc
+    if path.resolve() != path or path.is_symlink() or not path.is_file():
+        raise PublisherError("bundle record points to a missing or linked file")
+    if path.stat().st_size != record["bytes"] or _sha_file(path) != record["sha256"]:
+        raise PublisherError("bundle file checksum differs from its manifest")
+    return path
+
+
+def load_bundle(store: Store, bundle_id: str) -> dict[str, Any]:
+    if HEX64.fullmatch(bundle_id) is None:
+        raise PublisherError("bundle ID must be a SHA-256 digest")
+    root = store.bundles / bundle_id
+    if root.is_symlink() or not (root / "COMPLETE").is_file():
+        raise PublisherError(f"bundle {bundle_id} is missing or incomplete")
+    _verify_frozen_tree(root, private=True)
+    if (root / "COMPLETE").read_text().strip() != bundle_id:
+        raise PublisherError("bundle COMPLETE marker differs")
+    manifest_path = root / "bundle.json"
+    if _sha_file(manifest_path) != bundle_id:
+        raise PublisherError("bundle directory digest differs from bundle.json")
+    manifest = validate_bundle_manifest(strict_load(manifest_path))
+    checksum_path = _resolve_bundle_file(root, manifest["checksums"])
+    checksum_document = strict_load(checksum_path)
+    checksum_document = _exact(checksum_document, {"format", "files"}, "checksums")
+    if checksum_document["format"] != "collectivex.checksums.v1":
+        raise PublisherError("bundle checksum format is invalid")
+    records = [_file_record(value, f"checksums.files[{index}]")
+               for index, value in enumerate(_array(checksum_document["files"], "checksums.files"))]
+    _unique([record["path"] for record in records], "checksums.files[].path")
+    for record in records:
+        _resolve_bundle_file(root, record)
+    expected_paths = {
+        path.relative_to(root).as_posix() for path in _tree_files(root)
+        if path.name not in {"bundle.json", "checksums.json"}
+    }
+    if {record["path"] for record in records} != expected_paths:
+        raise PublisherError("bundle checksum catalog does not cover its payload exactly")
+    artifact_by_root: dict[str, str] = {}
+    for index, source in enumerate(manifest["sources"]):
+        _resolve_bundle_file(root, source)
+        archive_key = f"artifact-{index:04d}"
+        if source["path"] != f"source/{archive_key}.zip":
+            raise PublisherError("bundle source catalog order/path differs")
+        artifact_by_root[archive_key] = source["artifact_name"]
+    if len(set(artifact_by_root.values())) != len(artifact_by_root):
+        raise PublisherError("bundle source catalog repeats an artifact name")
+    matrix_path = _resolve_bundle_file(root, manifest["matrix"])
+    matrix_document = strict_load(matrix_path)
+    cases = validate_matrix(matrix_document)
+    expected_by_id = {case["case_id"]: case for case in cases}
+    expected_deliveries = _expected_deliveries(
+        matrix_document, cases, manifest["run"]
+    )
+    if {item["case_id"] for item in manifest["coverage"]["selections"]} != set(expected_by_id):
+        raise PublisherError("bundle selected coverage differs from requested matrix")
+    documents: dict[str, dict[str, Any]] = {}
+    sample_documents: dict[str, dict[str, Any]] = {}
+    runtime_fingerprints: set[str] = set()
+    for attempt in manifest["attempts"]:
+        document_path = _resolve_bundle_file(root, attempt["document"])
+        document = contracts.strict_load(document_path)
+        artifact_safety.assert_publication_safe([document])
+        if document.get("format") == contracts.RAW_FORMAT:
+            _schema("raw-case-v1.schema.json", document)
+            sample_path = document_path.with_name(document["sample_artifact"]["path"])
+            if attempt["samples"] is None:
+                raise PublisherError("raw attempt is missing its sample manifest record")
+            manifest_sample_path = _resolve_bundle_file(root, attempt["samples"])
+            if manifest_sample_path != sample_path:
+                raise PublisherError("sample manifest record points to the wrong raw evidence")
+            sample_document = contracts.strict_load(sample_path)
+            artifact_safety.assert_publication_safe([sample_document])
+            _schema("samples-v1.schema.json", sample_document)
+            sample_document = contracts.validate_samples_document(sample_document)
+            document = contracts.load_raw_attempt(document_path)
+            sample_documents[attempt["attempt_id"]] = sample_document
+        else:
+            if attempt["samples"] is not None:
+                raise PublisherError("terminal attempt unexpectedly names a sample artifact")
+            _schema("terminal-outcome-v1.schema.json", document)
+            document = contracts.validate_terminal_document(document)
+        _validate_delivery_binding(
+            document, document_path, root / "raw", artifact_by_root,
+            expected_by_id, expected_deliveries, manifest["run"],
+        )
+        expected_record = _attempt_record(
+            document, document_path, root, selected=attempt["selected"]
+        )
+        if expected_record != attempt:
+            raise PublisherError("bundle attempt record differs from native document")
+        if attempt["runtime_fingerprint_sha256"]:
+            runtime_fingerprints.add(attempt["runtime_fingerprint_sha256"])
+        documents[attempt["attempt_id"]] = document
+    if sorted(runtime_fingerprints) != manifest["runtime_fingerprints"]:
+        raise PublisherError("bundle runtime fingerprint catalog differs from attempts")
+    selected = {
+        selection["case_id"]: documents[selection["selected_attempt_id"]]
+        for selection in manifest["coverage"]["selections"]
+    }
+    return {
+        "id": bundle_id,
+        "root": root,
+        "manifest": manifest,
+        "cases": cases,
+        "documents": documents,
+        "sample_documents": sample_documents,
+        "selected": selected,
+    }
+
+
+def _cohort_control(
+    kind: str, series: dict[str, Any], internal: dict[str, Any]
+) -> tuple[dict[str, Any], list[str], list[str], Any]:
+    binary_build = series["build"]
+    source = binary_build["source_sha"]
+    workload = series["workload"]
+    shape = {
+        key: workload[key]
+        for key in (
+            "hidden", "top_k", "experts", "precision_profile", "dispatch_precision",
+            "combine_precision", "activation_profile",
+        )
+    }
+    common = {
+        "model": series["model"], "mode": series["mode"],
+        "phase": series["phase"], "shape": shape,
+        "measurement": series["measurement"], "ep_size": series["system"]["ep_size"],
+    }
+    if kind == "library":
+        control = {**common, "system": series["system"], "workload": workload,
+                   "resource_mode": series["resource"]["mode"], "source": source}
+        return control, ["system", "workload", "mode", "phase", "measurement", "resource.mode", "source"], ["backend", "resource"], series["backend"]["id"]
+    if kind == "chip":
+        control = {**common, "backend": series["backend"], "source": source,
+                   "workload": workload, "resource_mode": series["resource"]["mode"]}
+        return control, ["backend", "source", "workload", "mode", "phase", "measurement", "resource.mode"], ["system", "resource"], series["system"]
+    if kind == "system":
+        control = {**common, "workload": workload, "source": source}
+        varying = [series["system"]["sku"], series["backend"]["id"], series["resource"]["profile"]]
+        return control, ["workload", "mode", "phase", "measurement", "source"], ["system", "backend", "resource"], varying
+    if kind == "routing":
+        control = {
+            **common,
+            "backend": series["backend"],
+            "system": series["system"],
+            "resource": series["resource"],
+            "build": _routing_build_control(binary_build),
+        }
+        varying = [
+            workload["routing"], workload["eplb"],
+            binary_build["implementation_contract_sha256"],
+        ]
+        return (
+            control,
+            ["backend", "implementation-static-build", "system", "model-shape", "mode", "phase", "measurement", "resource"],
+            ["workload.routing", "workload.eplb", "implementation-config"],
+            varying,
+        )
+    if kind in PRECISION_COHORT_KINDS:
+        control, variant = _public_cohort_factors(kind, series)
+        if kind == "dispatch-precision":
+            controlled = [
+                "backend", "implementation-static-build", "system", "model-shape",
+                "mode", "phase", "workload.routing", "workload.eplb", "measurement",
+                "resource", "combine-precision",
+            ]
+            varying = ["dispatch-precision"]
+        elif kind == "combine-precision":
+            controlled = [
+                "backend", "implementation-static-build", "system", "model-shape",
+                "mode", "phase", "workload.routing", "workload.eplb", "measurement",
+                "resource", "dispatch-precision",
+            ]
+            varying = ["combine-precision"]
+        else:
+            controlled = [
+                "backend", "implementation-static-build", "system", "model-shape",
+                "mode", "phase", "workload.routing", "workload.eplb", "measurement",
+            ]
+            varying = [
+                "dispatch-precision", "combine-precision", "precision-profile", "resource",
+            ]
+        return control, controlled, varying, variant
+    raise PublisherError(f"unknown cohort kind {kind}")
+
+
+def _cohort_ordering(
+    members: Sequence[dict[str, Any]], internals: dict[str, dict[str, Any]], tokens: Sequence[int]
+) -> tuple[bool, int]:
+    run_ids = set.intersection(*(
+        set(internals[member["series_id"]]["run_metrics"]) for member in members
+    ))
+    if len(run_ids) < REQUIRED_ALLOCATIONS:
+        return False, len(run_ids)
+    orders: list[tuple[str, str, int, str, tuple[str, ...]]] = []
+    for run_id in sorted(run_ids):
+        for token in tokens:
+            for measure in (
+                "latency_us", "activation_data_rate_gbps_at_latency_percentile",
+                "total_logical_data_rate_gbps_at_latency_percentile",
+            ):
+                for statistic in ("p50", "p99"):
+                    ordered = tuple(
+                        member["series_id"]
+                        for member in sorted(
+                            members,
+                            key=lambda item: (
+                                internals[item["series_id"]]["run_metrics"][run_id][token][measure][statistic],
+                                item["series_id"],
+                            ),
+                            reverse=measure != "latency_us",
+                        )
+                    )
+                    orders.append((measure, statistic, token, run_id, ordered))
+    for token in tokens:
+        for measure in (
+            "latency_us", "activation_data_rate_gbps_at_latency_percentile",
+            "total_logical_data_rate_gbps_at_latency_percentile",
+        ):
+            for statistic in ("p50", "p99"):
+                observed = {
+                    entry[4]
+                    for entry in orders
+                    if entry[0] == measure and entry[1] == statistic and entry[2] == token
+                }
+                if len(observed) != 1:
+                    return False, len(run_ids)
+    return True, len(run_ids)
+
+
+def _p99_top_tie_ids(
+    members: Sequence[dict[str, Any]],
+    internals: dict[str, dict[str, Any]],
+    token: int,
+    dataset_binding: str,
+    cohort_id: str,
+) -> set[str]:
+    metric = {
+        "operation": "roundtrip",
+        "statistic": "p99",
+        "measure": "latency_us",
+        "objective": "min",
+        "tokens_per_rank": token,
+        "phase": members[0]["phase"],
+    }
+    ordered = sorted(
+        members,
+        key=lambda member: (
+            _metric_value(member, metric)[1], member["series_id"]
+        ),
+    )
+    baseline_id = ordered[0]["series_id"]
+    comparisons: dict[str, dict[str, Any]] = {}
+    tie_end = 0
+    for index, candidate in enumerate(ordered[1:], 1):
+        candidate_id = candidate["series_id"]
+        result = _hierarchical_p99_ratio(
+            baseline_id, candidate_id, token, internals, dataset_binding
+        )
+        comparisons[candidate_id] = result
+        if not result["baseline_wins"]:
+            tie_end = index
+    tie_ids = {member["series_id"] for member in ordered[:tie_end + 1]}
+    internals[baseline_id].setdefault("decision_statistics", {})[
+        f"{cohort_id}:p99:{token}"
+    ] = {
+        "baseline_series_id": baseline_id,
+        "comparisons": comparisons,
+        "tie_series_ids": sorted(tie_ids),
+    }
+    return tie_ids
+
+
+def build_decisions(
+    series: Sequence[dict[str, Any]],
+    internals: dict[str, dict[str, Any]],
+    *,
+    dataset_binding: str | None = None,
+) -> tuple[list[dict[str, Any]], list[dict[str, Any]], list[dict[str, Any]], list[dict[str, Any]]]:
+    if dataset_binding is None:
+        dataset_binding = _sha_bytes(_canonical({
+            "series_ids": sorted(item["series_id"] for item in series),
+        }))
+    cohorts: list[dict[str, Any]] = []
+    for kind in (*REQUIRED_COHORT_KINDS, *PRECISION_COHORT_KINDS):
+        groups: dict[bytes, list[tuple[dict[str, Any], Any, list[str], list[str]]]] = {}
+        for item in series:
+            if kind == "library" and item["backend"]["role"] != "library":
+                continue
+            if kind == "system" and item["backend"]["role"] != "reference":
+                continue
+            control, controlled, varying, variant = _cohort_control(kind, item, internals[item["series_id"]])
+            groups.setdefault(_canonical(control), []).append((item, variant, controlled, varying))
+        for entries in groups.values():
+            variants = {_canonical(entry[1]) for entry in entries}
+            if len(entries) < 2 or len(variants) < 2:
+                continue
+            members = sorted((entry[0] for entry in entries), key=lambda item: item["series_id"])
+            token_sets = [set(point["tokens_per_rank"] for point in member["points"]) for member in members]
+            tokens = sorted(set.intersection(*token_sets))
+            same_points = len({tuple(sorted(values)) for values in token_sets}) == 1
+            ordering, aligned_runs = _cohort_ordering(members, internals, tokens) if tokens else (False, 0)
+            allocations = sorted({value for member in members for value in member["allocation_ids"]})
+            p50_ratio = max(
+                (member["eligibility"]["p50_max_min_ratio"] for member in members
+                 if member["eligibility"]["p50_max_min_ratio"] is not None), default=None
+            )
+            p99_ratio = max(
+                (member["eligibility"]["p99_max_min_ratio"] for member in members
+                 if member["eligibility"]["p99_max_min_ratio"] is not None), default=None
+            )
+            extra = {
+                reason for member in members for reason in member["eligibility"]["reasons"]
+                if reason not in {"unstable-ordering"}
+            }
+            if aligned_runs < REQUIRED_ALLOCATIONS:
+                extra.add("incomplete-aligned-repeats")
+            if tokens and not _bootstrap_inputs_ready(members, internals, tokens):
+                extra.add("missing-trial-blocks")
+            if kind == "routing" and sum(
+                member["workload"]["routing"] == "uniform"
+                and not member["workload"]["eplb"]
+                for member in members
+            ) != 1:
+                extra.add("missing-uniform-baseline")
+            if kind == "routing" and {
+                (member["workload"]["routing"], member["workload"]["eplb"])
+                for member in members
+            } != {("uniform", False), ("zipf", False), ("zipf", True)}:
+                extra.add("incomplete-routing-anchors")
+            if kind == "routing" and _routing_implementation_mismatch(members):
+                extra.add("implementation-config-mismatch")
+            endpoint_contrast = kind == "routing" or kind in PRECISION_COHORT_KINDS
+            if not tokens or (not endpoint_contrast and not same_points):
+                extra.add("unmatched-token-coverage")
+            if kind in {"dispatch-precision", "combine-precision"}:
+                axis = "dispatch" if kind == "dispatch-precision" else "combine"
+                field = f"{axis}_precision"
+                bf16 = identity.precision_profile(
+                    identity.V1_CONTROL_PRECISION_PROFILE
+                )[axis]
+                if sum(
+                    _canonical(member["workload"][field]) == _canonical(bf16)
+                    for member in members
+                ) != 1:
+                    extra.add("missing-bf16-precision-baseline")
+            eligibility = _eligibility_record(
+                allocations,
+                complete=all(member["eligibility"]["complete"] for member in members)
+                and bool(tokens) and (endpoint_contrast or same_points),
+                correct=all(member["eligibility"]["correct"] for member in members),
+                measured=all(member["eligibility"]["measured_roundtrip_p99"] for member in members),
+                stable_ordering=ordering,
+                p50_ratio=p50_ratio,
+                p99_ratio=p99_ratio,
+                extra_reasons=sorted(extra),
+            )
+            member_ids = [member["series_id"] for member in members]
+            publication_tier = (
+                "comparable-experimental"
+                if any(member["publication_tier"] == "comparable-experimental" for member in members)
+                else "official"
+            )
+            controlled, varying = entries[0][2], entries[0][3]
+            cohort_id = _derived_id("cxcohort-v1-", {
+                "kind": kind, "series_ids": member_ids,
+                "controlled_factors": controlled, "varying_factors": varying,
+            })
+            kind_label = {
+                "chip": "Platform",
+                "dispatch-precision": "Dispatch precision",
+                "combine-precision": "Combine precision",
+                "precision-pair": "Precision profile",
+            }.get(kind, kind.title())
+            first = members[0]
+            routing_label = first["workload"]["routing"] + (
+                "+EPLB" if first["workload"]["eplb"] else ""
+            )
+            context = {
+                "library": (
+                    f"{first['system']['sku'].upper()} EP{first['system']['ep_size']} / "
+                    f"{first['mode']} / {first['phase']} / {routing_label}"
+                ),
+                "chip": (
+                    f"{first['backend']['label']} EP{first['system']['ep_size']} / "
+                    f"{first['mode']} / {first['phase']} / {routing_label}"
+                ),
+                "system": (
+                    f"Reference EP{first['system']['ep_size']} / {first['mode']} / "
+                    f"{first['phase']} / {routing_label}"
+                ),
+                "routing": (
+                    f"{first['system']['sku'].upper()} / {first['backend']['label']} / "
+                    f"EP{first['system']['ep_size']} / {first['mode']} / {first['phase']}"
+                ),
+                "dispatch-precision": (
+                    f"{first['system']['sku'].upper()} / {first['backend']['label']} / "
+                    f"EP{first['system']['ep_size']} / {first['mode']} / {first['phase']}"
+                ),
+                "combine-precision": (
+                    f"{first['system']['sku'].upper()} / {first['backend']['label']} / "
+                    f"EP{first['system']['ep_size']} / {first['mode']} / {first['phase']}"
+                ),
+                "precision-pair": (
+                    f"{first['system']['sku'].upper()} / {first['backend']['label']} / "
+                    f"EP{first['system']['ep_size']} / {first['mode']} / {first['phase']}"
+                ),
+            }[kind]
+            cohorts.append({
+                "cohort_id": cohort_id,
+                "kind": kind,
+                "label": f"{context} / {kind_label} contrast ({len(members)} series)",
+                "description": (
+                    "Publisher-controlled NCCL/RCCL system comparison"
+                    if kind == "system"
+                    else (
+                        "Descriptive configured-stack precision comparison; no isolated axis claim"
+                        if kind == "precision-pair"
+                        else f"Publisher-controlled {kind_label.lower()} comparison"
+                    )
+                ),
+                "series_ids": member_ids,
+                "controlled_factors": controlled,
+                "varying_factors": varying,
+                "publication_tier": publication_tier,
+                "eligibility": eligibility,
+            })
+    cohorts.sort(key=lambda item: item["cohort_id"])
+    series_by_id = {item["series_id"]: item for item in series}
+    rankings: list[dict[str, Any]] = []
+    recommendations: list[dict[str, Any]] = []
+    sensitivities: list[dict[str, Any]] = []
+    for cohort in cohorts:
+        if not cohort["eligibility"]["decision_grade"]:
+            continue
+        members = [series_by_id[series_id] for series_id in cohort["series_ids"]]
+        tokens = sorted(set.intersection(*(
+            {point["tokens_per_rank"] for point in member["points"]} for member in members
+        )))
+        for token in tokens:
+            p99_tie_ids = _p99_top_tie_ids(
+                members, internals, token, dataset_binding, cohort["cohort_id"]
+            )
+            for measure, objective, unit in (
+                ("latency_us", "min", "us"),
+                ("activation_data_rate_gbps_at_latency_percentile", "max", "GB/s"),
+                ("total_logical_data_rate_gbps_at_latency_percentile", "max", "GB/s"),
+            ):
+                for statistic in ("p50", "p99"):
+                    metric = {
+                        "operation": "roundtrip", "statistic": statistic,
+                        "measure": measure, "objective": objective,
+                        "tokens_per_rank": token, "phase": members[0]["phase"],
+                    }
+                    entries = []
+                    for member in members:
+                        point_id, value, observed_unit = _metric_value(member, metric)
+                        if observed_unit != unit:
+                            raise PublisherError("publisher metric unit differs")
+                        entries.append({
+                            "rank": 0, "series_id": member["series_id"], "point_id": point_id,
+                            "value": value, "unit": unit,
+                        })
+                    entries.sort(key=lambda item: (item["value"], item["series_id"]), reverse=objective == "max")
+                    for rank, entry in enumerate(entries, 1):
+                        entry["rank"] = (
+                            1
+                            if measure == "latency_us"
+                            and statistic == "p99"
+                            and entry["series_id"] in p99_tie_ids
+                            else rank
+                        )
+                    ranking_id = _derived_id("cxranking-v1-", {
+                        "cohort_id": cohort["cohort_id"], "metric": metric,
+                    })
+                    metric_label = _metric_label(measure, statistic)
+                    rankings.append({
+                        "ranking_id": ranking_id, "cohort_id": cohort["cohort_id"],
+                        "label": f"{cohort['kind'].title()} {metric_label} T={token}",
+                        "metric": metric, "entries": entries,
+                        "publication_tier": cohort["publication_tier"],
+                        "eligibility": cohort["eligibility"],
+                    })
+                    if (
+                        cohort["publication_tier"] != "official"
+                        or measure != "latency_us"
+                        or statistic != "p99"
+                        or sum(entry["rank"] == 1 for entry in entries) != 1
+                    ):
+                        continue
+                    objective_name = "min-p99-latency"
+                    top = entries[0]
+                    recommendation_id = _derived_id("cxrecommendation-v1-", {
+                        "objective": objective_name, "ranking_id": ranking_id,
+                    })
+                    recommendations.append({
+                        "recommendation_id": recommendation_id,
+                        "cohort_id": cohort["cohort_id"],
+                        "label": f"Best {metric_label} at T={token}",
+                        "objective": objective_name,
+                        "series_id": top["series_id"], "point_id": top["point_id"],
+                        "value": top["value"], "unit": top["unit"],
+                        "rationale": (
+                            "Unique p99 winner after deterministic hierarchical bootstrap "
+                            "and all-run agreement"
+                        ),
+                        "publication_tier": cohort["publication_tier"],
+                        "eligibility": cohort["eligibility"],
+                    })
+        if cohort["kind"] == "routing":
+            baseline = next(
+                (member for member in members
+                 if member["workload"]["routing"] == "uniform" and not member["workload"]["eplb"]),
+                None,
+            )
+            if baseline:
+                for candidate in members:
+                    if candidate is baseline:
+                        continue
+                    for token in tokens:
+                        for measure, objective in (
+                            ("latency_us", "min"),
+                            ("activation_data_rate_gbps_at_latency_percentile", "max"),
+                            ("total_logical_data_rate_gbps_at_latency_percentile", "max"),
+                        ):
+                            for statistic in ("p50", "p99"):
+                                metric = {
+                                    "operation": "roundtrip", "statistic": statistic,
+                                    "measure": measure, "objective": objective,
+                                    "tokens_per_rank": token, "phase": baseline["phase"],
+                                }
+                                _, base_value, _ = _metric_value(baseline, metric)
+                                _, candidate_value, _ = _metric_value(candidate, metric)
+                                sensitivity_id = _derived_id("cxsensitivity-v1-", {
+                                    "baseline": baseline["series_id"], "candidate": candidate["series_id"],
+                                    "cohort": cohort["cohort_id"], "metric": metric,
+                                })
+                                sensitivities.append({
+                                    "sensitivity_id": sensitivity_id,
+                                    "cohort_id": cohort["cohort_id"],
+                                    "label": (
+                                        f"Routing sensitivity: "
+                                        f"{_metric_label(measure, statistic)} T={token}"
+                                    ),
+                                    "baseline_series_id": baseline["series_id"],
+                                    "candidate_series_id": candidate["series_id"],
+                                    "metric": metric,
+                                    "signed_change_ratio": (candidate_value - base_value) / base_value,
+                                    "publication_tier": cohort["publication_tier"],
+                                    "eligibility": cohort["eligibility"],
+                                })
+        if cohort["kind"] in {"dispatch-precision", "combine-precision"}:
+            axis = (
+                "dispatch"
+                if cohort["kind"] == "dispatch-precision"
+                else "combine"
+            )
+            field = f"{axis}_precision"
+            bf16 = identity.precision_profile(
+                identity.V1_CONTROL_PRECISION_PROFILE
+            )[axis]
+            baseline = next(
+                member for member in members
+                if _canonical(member["workload"][field]) == _canonical(bf16)
+            )
+            for candidate in members:
+                if candidate is baseline:
+                    continue
+                for token in tokens:
+                    for measure, objective in (
+                        ("latency_us", "min"),
+                        ("activation_data_rate_gbps_at_latency_percentile", "max"),
+                        ("total_logical_data_rate_gbps_at_latency_percentile", "max"),
+                    ):
+                        for statistic in ("p50", "p99"):
+                            metric = {
+                                "operation": "roundtrip",
+                                "statistic": statistic,
+                                "measure": measure,
+                                "objective": objective,
+                                "tokens_per_rank": token,
+                                "phase": baseline["phase"],
+                            }
+                            _, base_value, _ = _metric_value(baseline, metric)
+                            _, candidate_value, _ = _metric_value(candidate, metric)
+                            sensitivity_id = _derived_id("cxsensitivity-v1-", {
+                                "baseline": baseline["series_id"],
+                                "candidate": candidate["series_id"],
+                                "cohort": cohort["cohort_id"],
+                                "metric": metric,
+                            })
+                            sensitivities.append({
+                                "sensitivity_id": sensitivity_id,
+                                "cohort_id": cohort["cohort_id"],
+                                "label": (
+                                    f"{axis.title()} precision sensitivity: "
+                                    f"{_metric_label(measure, statistic)} T={token}"
+                                ),
+                                "baseline_series_id": baseline["series_id"],
+                                "candidate_series_id": candidate["series_id"],
+                                "metric": metric,
+                                "signed_change_ratio": (
+                                    candidate_value - base_value
+                                ) / base_value,
+                                "publication_tier": cohort["publication_tier"],
+                                "eligibility": cohort["eligibility"],
+                            })
+    rankings.sort(key=lambda item: item["ranking_id"])
+    recommendations.sort(key=lambda item: item["recommendation_id"])
+    sensitivities.sort(key=lambda item: item["sensitivity_id"])
+    return cohorts, rankings, recommendations, sensitivities
+
+
+def _require_runnable_promotion_success(
+    bundles: Sequence[dict[str, Any]], cases: dict[str, dict[str, Any]]
+) -> None:
+    for bundle in bundles:
+        for case_id, case in cases.items():
+            if case["_disposition"] != "runnable":
+                continue
+            status, _ = _outcome(bundle["selected"][case_id])
+            if status != "success":
+                raise PublisherError(
+                    "promotion requires every runnable matrix case to succeed "
+                    "in every selected bundle"
+                )
+            prior_statuses = {
+                _outcome(document)[0]
+                for document in bundle["documents"].values()
+                if document["identity"]["case_id"] == case_id
+            }
+            if prior_statuses != {"success"}:
+                raise PublisherError(
+                    "promotion rejects runnable cases with failed, invalid, or diagnostic retries"
+                )
+
+
+def _expected_chip_cohort_count(series: Sequence[dict[str, Any]]) -> int:
+    groups: dict[bytes, set[bytes]] = {}
+    for item in series:
+        control, variant = _public_cohort_factors("chip", item)
+        groups.setdefault(_canonical(control), set()).add(_canonical(variant))
+    return sum(len(variants) >= 2 for variants in groups.values())
+
+
+def _require_promotion_cohorts(
+    cohorts: Sequence[dict[str, Any]], series: Sequence[dict[str, Any]]
+) -> None:
+    eligible_kinds = {
+        cohort["kind"]
+        for cohort in cohorts
+        if cohort["eligibility"]["decision_grade"]
+    }
+    required_kinds = list(REQUIRED_COHORT_KINDS)
+    if any(
+        item["workload"].get(
+            "precision_profile", identity.V1_CONTROL_PRECISION_PROFILE
+        )
+        != identity.V1_CONTROL_PRECISION_PROFILE
+        for item in series
+    ):
+        required_kinds.extend(PRECISION_COHORT_KINDS)
+    missing = [kind for kind in required_kinds if kind not in eligible_kinds]
+    if missing:
+        raise PublisherError(
+            "promotion lacks decision-grade cohort kinds: " + ", ".join(missing)
+        )
+    for kind, expected in REQUIRED_PROMOTION_COHORT_COUNTS.items():
+        members = [cohort for cohort in cohorts if cohort["kind"] == kind]
+        if len(members) != expected or any(
+            not cohort["eligibility"]["decision_grade"] for cohort in members
+        ):
+            raise PublisherError(
+                f"promotion requires exactly {expected} decision-grade {kind} cohorts"
+            )
+
+    chip_cohorts = [cohort for cohort in cohorts if cohort["kind"] == "chip"]
+    expected_chips = _expected_chip_cohort_count(series)
+    if len(chip_cohorts) != expected_chips or any(
+        not cohort["eligibility"]["decision_grade"] for cohort in chip_cohorts
+    ):
+        raise PublisherError(
+            f"promotion requires all {expected_chips} derived chip cohorts to be decision-grade"
+        )
+
+    by_id = {item["series_id"]: item for item in series}
+    anchors = {("uniform", False), ("zipf", False), ("zipf", True)}
+    for cohort in (
+        item for item in cohorts
+        if item["kind"] == "routing" and item["eligibility"]["decision_grade"]
+    ):
+        observed = {
+            (by_id[series_id]["workload"]["routing"], by_id[series_id]["workload"]["eplb"]):
+            by_id[series_id]
+            for series_id in cohort["series_ids"]
+        }
+        if len(cohort["series_ids"]) != len(anchors) or set(observed) != anchors:
+            raise PublisherError(
+                "promotion routing cohorts require exact uniform, zipf, and zipf+EPLB anchors"
+            )
+        if (
+            observed[("uniform", False)]["build"]["implementation_contract_sha256"]
+            != observed[("zipf", False)]["build"]["implementation_contract_sha256"]
+        ):
+            raise PublisherError(
+                "promotion routing cohorts require identical off-EPLB generated implementation"
+            )
+
+
+def _require_promotion_series(series: Sequence[dict[str, Any]]) -> None:
+    if not series or any(item["status"] != "decision-grade" for item in series):
+        raise PublisherError("promotion has unstable or incomplete required series")
+
+
+def build_dataset(
+    store: Store,
+    bundle_ids: Sequence[str],
+    *,
+    promote: bool,
+) -> dict[str, Any]:
+    if not bundle_ids or len(bundle_ids) != len(set(bundle_ids)):
+        raise PublisherError("dataset requires unique explicit bundle IDs")
+    loaded = [load_bundle(store, bundle_id) for bundle_id in bundle_ids]
+    loaded.sort(key=lambda bundle: (
+        int(bundle["manifest"]["run"]["run_id"]),
+        bundle["manifest"]["run"]["run_attempt"],
+        bundle["id"],
+    ))
+    matrix_ids = {bundle["manifest"]["matrix"]["sha256"] for bundle in loaded}
+    case_sets = [{case["case_id"] for case in bundle["cases"]} for bundle in loaded]
+    if len(matrix_ids) != 1 or len({tuple(sorted(values)) for values in case_sets}) != 1:
+        raise PublisherError("dataset bundles do not share one exact requested matrix")
+    run_ids = [bundle["manifest"]["run"]["run_id"] for bundle in loaded]
+    qualification_indices = sorted(
+        bundle["manifest"]["run"]["qualification_index"] for bundle in loaded
+    )
+    if promote and (
+        len(loaded) != REQUIRED_ALLOCATIONS
+        or len(run_ids) != len(set(run_ids))
+        or qualification_indices != [1, 2, 3]
+        or any(bundle["manifest"]["run"]["run_attempt"] != 1 for bundle in loaded)
+    ):
+        raise PublisherError(
+            "promotion requires qualification indices 1, 2, and 3 from first-attempt runs"
+        )
+    if promote and matrix_ids != {CANONICAL_FULL_V1_MATRIX_SHA256}:
+        raise PublisherError("promotion requires the canonical full-v1 matrix")
+    cases = {case["case_id"]: case for case in loaded[0]["cases"]}
+    if promote:
+        _require_runnable_promotion_success(loaded, cases)
+    all_documents = [
+        document for bundle in loaded for document in bundle["documents"].values()
+    ]
+    selected_ids = {
+        selection["selected_attempt_id"]
+        for bundle in loaded for selection in bundle["manifest"]["coverage"]["selections"]
+    }
+    public_attempts = [
+        _public_attempt(
+            document, selected=document["identity"]["attempt_id"] in selected_ids
+        )
+        for document in all_documents
+    ]
+    _unique([attempt["attempt_id"] for attempt in public_attempts], "dataset attempts")
+    selected_by_case: dict[str, list[dict[str, Any]]] = {
+        case_id: [bundle["selected"][case_id] for bundle in loaded]
+        for case_id in sorted(cases)
+    }
+    samples_by_attempt = {
+        attempt_id: sample_document
+        for bundle in loaded
+        for attempt_id, sample_document in bundle["sample_documents"].items()
+    }
+    coverage: list[dict[str, Any]] = []
+    for case_id, case in sorted(cases.items()):
+        attempts = sorted(
+            (attempt for attempt in public_attempts if attempt["case_id"] == case_id),
+            key=lambda attempt: (
+                int(attempt["run_id"]), attempt["run_attempt"],
+                attempt["attempt_index"], attempt["attempt_id"],
+            ),
+        )
+        selected_document = selected_by_case[case_id][-1]
+        selected = _public_attempt(selected_document, selected=True)
+        precision_profile = case.get(
+            "precision_profile", identity.V1_CONTROL_PRECISION_PROFILE
+        )
+        precision = identity.precision_profile(precision_profile)
+        selected_raw = (
+            selected_document
+            if selected_document["format"] == contracts.RAW_FORMAT
+            and selected_document["outcome"]["status"] == "success"
+            else None
+        )
+        if selected_raw is not None:
+            backend_generation = selected_raw["implementation"]["kernel_generation"]
+            projected = contracts.public_series_config(
+                kernel_generation=backend_generation,
+                provenance=selected_raw["implementation"]["provenance"],
+                resource_profile=selected_raw["implementation"]["resource_profile"],
+                resource_mode=selected_raw["case"]["resource_mode"],
+                device_product=selected_raw["topology"]["device_product"],
+            )
+            resource = projected["resource"]
+            rows_by_token = {
+                row["tokens_per_rank"]: row for row in selected_raw["measurement"]["rows"]
+            }
+            series_id = selected_raw["identity"]["series_id"]
+        else:
+            backend_generation = None
+            resource = {
+                "mode": "fixed-profile",
+                "profile": None,
+                "comm_units_kind": None,
+                "configured_units": None,
+            }
+            rows_by_token = {}
+            series_id = None
+        point_status = (
+            "measured" if selected["outcome"] == "success" else selected["outcome"]
+        )
+        point_reason = (
+            None
+            if point_status == "measured"
+            else case["_reason"]
+            if point_status == "unsupported"
+            else selected["reason"]
+        )
+        token_ladder = [int(value) for value in case["ladder"].split()]
+        coverage_points = []
+        for token in token_ladder:
+            row = rows_by_token.get(token)
+            coverage_points.append({
+                "point_id": row["point_id"] if row is not None else None,
+                "series_id": series_id if row is not None else None,
+                "tokens_per_rank": token,
+                "global_tokens": token * case["ep"],
+                "terminal_status": point_status,
+                "reason": point_reason,
+            })
+        coverage.append({
+            "case_id": case_id,
+            "label": (
+                f"{case['sku'].upper()} / {case['backend']} / EP{case['ep']} / "
+                f"{case['mode']} / {case['phase']} / {case['routing']}"
+            ),
+            "required": True,
+            "sku": _slug(case["sku"]),
+            "suite": _slug(case["suite"]),
+            "workload": _slug(case["workload"]),
+            "publication_tier": case["required_publication"],
+            "backend": _slug(case["backend"]),
+            "backend_generation": backend_generation,
+            "mode": case["mode"],
+            "phase": case["phase"],
+            "routing": case["routing"],
+            "eplb": case["eplb"],
+            "precision_profile": precision_profile,
+            "dispatch_precision": precision["dispatch"],
+            "combine_precision": precision["combine"],
+            "resource": resource,
+            "topology": _coverage_topology(case),
+            "points": coverage_points,
+            "disposition": case["_disposition"],
+            "selected_attempt_id": selected["attempt_id"],
+            "outcome": selected["outcome"],
+            "failure_mode": selected["failure_mode"],
+            "reason": case["_reason"] if case["_disposition"] == "unsupported" else selected["reason"],
+            "attempt_ids": [attempt["attempt_id"] for attempt in attempts],
+        })
+    by_series: dict[str, list[dict[str, Any]]] = {}
+    for case_documents in selected_by_case.values():
+        for document in case_documents:
+            if (
+                document["format"] == contracts.RAW_FORMAT
+                and document["outcome"]["status"] == "success"
+            ):
+                by_series.setdefault(document["identity"]["series_id"], []).append(document)
+    series: list[dict[str, Any]] = []
+    internals: dict[str, dict[str, Any]] = {}
+    for series_id, documents in sorted(by_series.items()):
+        try:
+            sample_documents = [
+                samples_by_attempt[document["identity"]["attempt_id"]]
+                for document in documents
+            ]
+        except KeyError as exc:
+            raise PublisherError(
+                "selected raw evidence is missing its private sample document"
+            ) from exc
+        item, internal = _build_series(
+            series_id, documents, sample_documents, len(loaded)
+        )
+        series.append(item)
+        internals[series_id] = internal
+    dataset_binding = _sha_bytes(_canonical({
+        "matrix_id": next(iter(matrix_ids)),
+        "source_bundle_ids": sorted(bundle_ids),
+    }))
+    cohorts, rankings, recommendations, sensitivities = build_decisions(
+        series, internals, dataset_binding=dataset_binding
+    )
+    allocation_ids = sorted({attempt["allocation_id"] for attempt in public_attempts})
+    qualification_indices = sorted({int(value) for value in qualification_indices})
+    measured_cases = sum(
+        all(point["terminal_status"] == "measured" for point in item["points"])
+        for item in coverage
+    )
+    unsupported_cases = sum(
+        all(point["terminal_status"] == "unsupported" for point in item["points"])
+        for item in coverage
+    )
+    requested_points = sum(len(item["points"]) for item in coverage)
+    measured_points = sum(
+        point["terminal_status"] == "measured"
+        for item in coverage for point in item["points"]
+    )
+    unsupported_points = sum(
+        point["terminal_status"] == "unsupported"
+        for item in coverage for point in item["points"]
+    )
+    status = "promoted" if promote else "diagnostic"
+    dataset = {
+        "format": FORMAT_PUBLIC,
+        "schema_version": 1,
+        "generated_at": _latest_timestamp(
+            [bundle["manifest"]["created_at"] for bundle in loaded]
+        ),
+        "source_bundle_ids": sorted(bundle_ids),
+        "promotion": {
+            "status": status,
+            "reason": None,
+            "matrix_id": next(iter(matrix_ids)),
+            "allocation_ids": allocation_ids,
+            "required_allocations": REQUIRED_ALLOCATIONS,
+            "qualification_indices": qualification_indices,
+            "requested_cases": len(coverage),
+            "terminal_cases": len(coverage),
+            "measured_cases": measured_cases,
+            "unsupported_cases": unsupported_cases,
+            "requested_points": requested_points,
+            "terminal_points": requested_points,
+            "measured_points": measured_points,
+            "unsupported_points": unsupported_points,
+            "policy": POLICY,
+        },
+        "coverage": coverage,
+        "attempts": sorted(public_attempts, key=lambda attempt: attempt["attempt_id"]),
+        "series": series,
+        "cohorts": cohorts,
+        "rankings": rankings,
+        "recommendations": recommendations,
+        "sensitivities": sensitivities,
+    }
+    if promote:
+        _require_promotion_series(series)
+        _require_promotion_cohorts(cohorts, series)
+    validate_public_dataset(dataset)
+    return dataset
+
+
+def quarantine_incoming(
+    store: Store, ingest_id: str, reason: str, generated_at: str
+) -> str:
+    if REASON.fullmatch(reason) is None:
+        raise PublisherError("quarantine reason must be a machine code")
+    public_reason = f"{reason}-{ingest_id}"
+    if REASON.fullmatch(public_reason) is None:
+        raise PublisherError("quarantine reason and incoming ID exceed the public reason contract")
+    manifest = {
+        "format": "collectivex.quarantine.v1",
+        "schema_version": 1,
+        "created_at": generated_at,
+        "incoming_id": ingest_id,
+        "reason": reason,
+    }
+    digest = _sha_bytes(_canonical(manifest))
+    with store.staging(store.quarantine, private=True) as stage:
+        _write_json(stage / "quarantine.json", manifest, mode=0o600)
+        store.complete(stage, digest, private=True)
+        store.install(stage, store.quarantine / digest, private=True)
+    if _sha_bytes(_canonical(strict_load(store.quarantine / digest / "quarantine.json"))) != digest:
+        raise PublisherError("existing quarantine object differs")
+    return digest
+
+
+def _store_from_args(args: argparse.Namespace) -> Store:
+    root = args.store_root or os.environ.get("COLLECTIVEX_STORE_ROOT")
+    if not root:
+        raise PublisherError("COLLECTIVEX_STORE_ROOT or --store-root is required")
+    if not Path(root).is_absolute():
+        raise PublisherError("COLLECTIVEX_STORE_ROOT must be an absolute path")
+    return Store(root)
+
+
+def _run_metadata(args: argparse.Namespace) -> dict[str, Any]:
+    """Validate offline operator assertions about a completed successful GHA run.
+
+    The publisher deliberately performs no network access. The caller must preflight workflow
+    identity and conclusion against GitHub before supplying these values; artifact-internal
+    provenance is then required to match them exactly.
+    """
+    run = {
+        "repository": args.repository,
+        "run_id": args.run_id,
+        "run_attempt": args.run_attempt,
+        "qualification_index": args.qualification_index,
+        "source_sha": args.source_sha,
+    }
+    # Reuse the authoritative private schema constraints before any filesystem mutation.
+    if not re.fullmatch(r"[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+", run["repository"] or ""):
+        raise PublisherError("--repository must be owner/name")
+    if not re.fullmatch(r"[1-9][0-9]*", run["run_id"] or ""):
+        raise PublisherError("--run-id must be a positive decimal string")
+    if type(run["run_attempt"]) is not int or run["run_attempt"] < 1:
+        raise PublisherError("--run-attempt must be positive")
+    if type(run["qualification_index"]) is not int or run["qualification_index"] not in range(1, 4):
+        raise PublisherError("--qualification-index must be 1, 2, or 3")
+    if not re.fullmatch(r"[0-9a-f]{40}", run["source_sha"] or ""):
+        raise PublisherError("--source-sha must be a 40-character lowercase Git SHA")
+    return run
+
+
+def _ingest_inputs(
+    args: argparse.Namespace,
+) -> tuple[dict[str, Any], Path, list[Path]]:
+    run = _run_metadata(args)
+    matrix = Path(args.matrix).absolute()
+    if matrix.is_symlink() or not matrix.is_file():
+        raise PublisherError("--matrix must be a regular non-symlink file")
+    artifacts = [Path(value).absolute() for value in args.artifact]
+    if not artifacts:
+        raise PublisherError("at least one --artifact is required")
+    names = [_artifact_name(path) for path in artifacts]
+    if len(names) != len(set(names)):
+        raise PublisherError("--artifact contains duplicate GHA names")
+    for path in artifacts:
+        if path.is_symlink() or not (path.is_dir() or path.is_file()):
+            raise PublisherError("--artifact must be a regular ZIP or real directory")
+    return run, matrix, artifacts
+
+
+def _bundle_ids(values: Sequence[str], *, promote: bool) -> list[str]:
+    bundle_ids = list(values)
+    if (
+        not bundle_ids
+        or len(bundle_ids) != len(set(bundle_ids))
+        or any(HEX64.fullmatch(value) is None for value in bundle_ids)
+    ):
+        raise PublisherError("bundle IDs must be unique SHA-256 digests")
+    if promote and len(bundle_ids) != REQUIRED_ALLOCATIONS:
+        raise PublisherError("promotion requires exactly three explicit bundle IDs")
+    return bundle_ids
+
+
+def ingest_command(args: argparse.Namespace) -> dict[str, Any]:
+    run, matrix, artifacts = _ingest_inputs(args)
+    store = _store_from_args(args)
+    with store.locked():
+        ingest_id, incoming, _ = archive_incoming(
+            store, matrix, artifacts, run
+        )
+        try:
+            bundle_id, _, _ = build_bundle(store, ingest_id, incoming, run)
+            return {
+                "status": "accepted", "incoming_id": ingest_id,
+                "bundle_id": bundle_id,
+            }
+        except (
+            PublisherError, contracts.ContractError, artifact_safety.ArtifactSafetyError,
+            jsonschema.ValidationError,
+        ) as exc:
+            # Invalid delivery bytes provide no trusted timestamp. A fixed sentinel keeps
+            # repeated quarantine of the same immutable incoming object content-idempotent.
+            generated_at = "1970-01-01T00:00:00Z"
+            quarantine_id = quarantine_incoming(
+                store, ingest_id, "artifact-validation-failed", generated_at
+            )
+            raise PublisherError(
+                f"incoming {ingest_id} quarantined as {quarantine_id}: {exc}"
+            ) from exc
+
+
+def promote_command(args: argparse.Namespace) -> dict[str, Any]:
+    bundle_ids = _bundle_ids(args.bundle, promote=True)
+    store = _store_from_args(args)
+    with store.locked():
+        dataset = build_dataset(store, bundle_ids, promote=True)
+        digest, size = store.install_dataset(dataset)
+        store.update_channel("dev-latest", digest, size, dataset["generated_at"])
+        store.verify_channel("dev-latest")
+    return {
+        "status": "promoted", "bundle_ids": bundle_ids,
+        "dataset_sha256": digest, "channel": "dev-latest",
+    }
+
+
+def verify_command(args: argparse.Namespace) -> dict[str, Any]:
+    bundle_ids = _bundle_ids(args.bundle, promote=False) if args.bundle else []
+    channels = args.channel or ["dev-latest"]
+    if any(channel != "dev-latest" for channel in channels):
+        raise PublisherError("unknown channel")
+    store = _store_from_args(args)
+    with store.locked():
+        pointers = {channel: store.verify_channel(channel) for channel in channels}
+        bundles = [load_bundle(store, bundle_id)["id"] for bundle_id in bundle_ids]
+    return {"status": "verified", "channels": pointers, "bundle_ids": bundles}
+
+
+def _parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(description="CollectiveX isolated filesystem publisher")
+    parser.add_argument("--store-root", help="defaults to COLLECTIVEX_STORE_ROOT")
+    subparsers = parser.add_subparsers(dest="command", required=True)
+    ingest = subparsers.add_parser("ingest", help="archive and validate one complete GHA run")
+    ingest.add_argument("--matrix", required=True)
+    ingest.add_argument("--artifact", action="append", required=True)
+    ingest.add_argument("--repository", required=True)
+    ingest.add_argument("--run-id", required=True)
+    ingest.add_argument("--run-attempt", required=True, type=int)
+    ingest.add_argument("--qualification-index", required=True, type=int)
+    ingest.add_argument("--source-sha", required=True)
+    promote = subparsers.add_parser("promote", help="publish explicit independent bundles")
+    promote.add_argument("--bundle", action="append", required=True)
+    verify = subparsers.add_parser("verify", help="verify immutable targets and pointers")
+    verify.add_argument("--channel", action="append", choices=["dev-latest"])
+    verify.add_argument("--bundle", action="append", default=[])
+    return parser
+
+
+def main() -> int:
+    args = _parser().parse_args()
+    try:
+        if args.command == "ingest":
+            result = ingest_command(args)
+        elif args.command == "promote":
+            result = promote_command(args)
+        elif args.command == "verify":
+            result = verify_command(args)
+        else:
+            raise PublisherError(f"unknown command {args.command!r}")
+    except (
+        PublisherError, contracts.ContractError, artifact_safety.ArtifactSafetyError,
+        jsonschema.ValidationError, OSError,
+    ) as exc:
+        print(json.dumps({"status": "error", "error": str(exc)}), file=sys.stderr)
+        return 2
+    print(json.dumps(result, sort_keys=True))
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/experimental/CollectiveX/requirements.txt b/experimental/CollectiveX/requirements.txt
new file mode 100644
index 000000000..f68f97d83
--- /dev/null
+++ b/experimental/CollectiveX/requirements.txt
@@ -0,0 +1,8 @@
+# Host-side matrix generation. GPU libraries are supplied by benchmark images.
+PyYAML==6.0.2
+
+# Canonical workload serialization.
+numpy>=1.26,<3
+
+# Host-only strict artifact publisher schemas (never imported by GPU execution).
+jsonschema==4.25.1
diff --git a/experimental/CollectiveX/runtime/common.sh b/experimental/CollectiveX/runtime/common.sh
new file mode 100644
index 000000000..13d8bbf04
--- /dev/null
+++ b/experimental/CollectiveX/runtime/common.sh
@@ -0,0 +1,2435 @@
+# shellcheck shell=bash
+# CollectiveX — shared launcher helpers (sourced, not executed).
+#
+# Cluster-generic scaffolding only (Slurm/container/build/staging); no
+# model-serving. Logging goes to stderr so functions can `echo` a single
+# result on stdout.
+
+_CX_COMMON_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+CX_SQUASH_FORMAT_VERSION="repro-v1"
+CX_SQUASH_SOURCE_DATE_EPOCH=1
+CX_DEEPEP_V2_COMMIT="fa8a9b16898204afd347c663b89e65ef87dc6ce6" # pragma: allowlist secret
+CX_DEEPEP_V2_TREE="29809e75c5874e6609dac4804e7b651d5226959f" # pragma: allowlist secret
+CX_DEEPEP_V2_FMT_COMMIT="a4c7e17133ee9cb6a2f45545f6e974dd3c393efa" # pragma: allowlist secret
+CX_DEEPEP_HYBRID_COMMIT="e0a5b1d9848ab3e7b4a67842bf06f067bfac67f8" # pragma: allowlist secret
+CX_DEEPEP_HYBRID_TREE="d77aeab7f1bb52b615666fe178d26ced41fae08e" # pragma: allowlist secret
+CX_DEEPEP_HYBRID_NCCL_COMMIT="1e0c869c39bb33f1034cb9920bd2a8a8406f04a3" # pragma: allowlist secret
+unset COLLECTIVEX_OPERATOR_CONFIG_LOADED COLLECTIVEX_EPHEMERAL_CONFIG_PATH
+
+cx_log() { printf '[collectivex] %s\n' "$*" >&2; }
+cx_die() { printf '[collectivex] FATAL: %s\n' "$*" >&2; exit 1; }
+
+# Public failure telemetry is a closed vocabulary. Raw scheduler, container,
+# host, and filesystem diagnostics stay in the mode-0600 private logs.
+cx_set_failure_stage() {
+  local stage="$1"
+  case "$stage" in
+    setup|repository-stage|registry-verification|scheduler-allocation|container-import) ;;
+    container-hash|container-launch|backend-setup|execution|artifact-collection) ;;
+    *) cx_die "invalid launcher failure stage" ;;
+  esac
+  export CX_FAILSAFE_MODE="$stage"
+}
+
+cx_fail_stage() {
+  local stage="$1" log_path="${2:-}" diagnostic="unknown"
+  cx_set_failure_stage "$stage"
+  if [ -n "$log_path" ] && [ -f "$log_path" ]; then
+    if grep -aEqi 'no space left|disk quota|quota exceeded' "$log_path"; then
+      diagnostic="storage-capacity"
+    elif grep -aEqi 'permission denied|operation not permitted|read-only file system|source mount (creation|ownership validation|permission inspection|permission normalization|permission validation) failed' "$log_path"; then
+      diagnostic="storage-permission"
+    elif grep -aEqi 'outside one realized LSA domain|lsa(Size| team| domain).*(mismatch|invalid|expected)|ranks.*not in (one|the same) nvlink.domain' "$log_path" \
+        || { [ "${CX_BENCH:-}" = deepep-v2 ] \
+          && grep -aEqi 'nccl[.]cu:(111|112)([^0-9]|$)' "$log_path"; }; then
+      diagnostic="accelerator-topology"
+    elif grep -aEqi 'cuda driver version is insufficient|call requires newer driver|cudaErrorCallRequiresNewerDriver|CUDA_ERROR_SYSTEM_DRIVER_MISMATCH|unsupported toolchain' "$log_path"; then
+      diagnostic="accelerator-driver"
+    elif grep -aEqi 'ncclDevCommCreate|ncclCommWindowRegister|ncclGetLsa(Device)?Pointer|Communicator does not support symmetric memory|Symmetric memory is not supported' "$log_path" \
+        || { [ "${CX_BENCH:-}" = deepep-v2 ] \
+          && grep -aEqi 'nccl[.]cu:(106|127|128|129|135)([^0-9]|$)' "$log_path"; }; then
+      diagnostic="nccl-device-api"
+    elif grep -aEqi 'NVCC (PTX )?compilation failed|cuobjdump failed|invalid device (kernel )?image|no kernel image is available' "$log_path"; then
+      diagnostic="jit-toolchain"
+    elif grep -aEqi 'cuda out of memory|CUDA_ERROR_OUT_OF_MEMORY|out of memory.*cuda' "$log_path"; then
+      diagnostic="accelerator-memory"
+    elif grep -aEqi 'does not match its pinned image contract|requires the exact pinned|version mismatch' "$log_path"; then
+      diagnostic="backend-version"
+    elif grep -aEqi 'nvshmem is unavailable|build-tool installation failed' "$log_path"; then
+      diagnostic="backend-dependency"
+    elif grep -aEqi 'revision fetch failed|submodule fetch failed|package installation failed|staged source is invalid|source (pin resolution|seed validation|seed copy|checkout creation|publication validation|existing source validation) failed' "$log_path"; then
+      diagnostic="backend-source"
+    elif grep -aEqi 'failed to mount|squashfs|enroot|pyxis|mount.*invalid argument|invalid argument.*mount' "$log_path"; then
+      diagnostic="container-runtime"
+    elif grep -aEqi 'backend preparation failed|build (failed|is incomplete)|cache (mount identity )?validation failed|import failed' "$log_path"; then
+      diagnostic="backend-build"
+    elif grep -aEqi 'command not found|not found on this runner|git lookup failed' "$log_path"; then
+      diagnostic="missing-runtime"
+    elif grep -aEqi 'too many requests|rate.?limit' "$log_path"; then
+      diagnostic="registry-rate-limit"
+    elif grep -aEqi 'timed out|operation timeout|wait timeout after|watchdog.*timeout|timeout: sending signal|connection reset|could not resolve|TLS|certificate' "$log_path"; then
+      diagnostic="network-or-timeout"
+    elif grep -aEqi 'salloc:|srun:.*(unable to create step|step creation|invalid partition|invalid account)|unable to create step|job allocation' "$log_path"; then
+      diagnostic="scheduler"
+    elif grep -aEqi 'SHARD done: [0-9]+/[0-9]+ case\(s\) failed|WARN: .* run failed rc=|completed with invalid semantic evidence' "$log_path"; then
+      diagnostic="benchmark-case-failure"
+    elif [ -s "$log_path" ]; then
+      diagnostic="unclassified"
+    else
+      diagnostic="empty-log"
+    fi
+  fi
+  cx_log "ERROR: failure-class=$stage diagnostic=$diagnostic"
+  return 1
+}
+
+# Runner-local deployment settings are strict JSON kept outside the checkout.
+# Only the selected runner's allowlisted values are exported; the document is
+# never sourced or evaluated as shell.
+cx_load_operator_config() {
+  [ -n "${COLLECTIVEX_OPERATOR_CONFIG_LOADED:-}" ] \
+    && [ "$COLLECTIVEX_OPERATOR_CONFIG_LOADED" = "$$" ] && return 0
+  local config_path generated=0 parsed_path config_log key value
+  unset CX_PARTITION CX_ACCOUNT CX_SQUASH_DIR CX_STAGE_DIR CX_ENROOT_CACHE_PATH
+  unset ENROOT_CACHE_PATH
+  unset CX_EXCLUDE_NODES CX_NODELIST CX_LOCK_DIR CX_MASTER_PORT
+  unset CX_SOCKET_IFNAME CX_RDMA_DEVICES CX_IB_GID_INDEX CX_RDMA_SERVICE_LEVEL
+  unset CX_AUDIT_SALT
+  unset MASTER_ADDR MASTER_PORT RANK WORLD_SIZE LOCAL_RANK LOCAL_WORLD_SIZE
+  config_path="${COLLECTIVEX_OPERATOR_CONFIG:-${XDG_CONFIG_HOME:-${HOME}/.config}/inferencex/collectivex.json}"
+  if [ -n "${COLLECTIVEX_OPERATOR_CONFIG_CONTENT:-}" ]; then
+    umask 077
+    if [[ "${CX_JOB_ROOT:-}" =~ ^/tmp/inferencex-collectivex-[0-9]+-[0-9]+-[A-Za-z0-9._-]+$ ]] \
+        && [ -d "$CX_JOB_ROOT" ] && [ ! -L "$CX_JOB_ROOT" ] \
+        && [ "$(stat -c '%u:%a' "$CX_JOB_ROOT" 2>/dev/null)" = "$(id -u):700" ]; then
+      config_path="$CX_JOB_ROOT/operator-config.json"
+      (set -C; : > "$config_path") 2>/dev/null \
+        || cx_die "cannot create ephemeral runner configuration"
+    else
+      config_path="$(mktemp /tmp/inferencex-collectivex-config.XXXXXX)" \
+        || cx_die "cannot create ephemeral runner configuration"
+    fi
+    COLLECTIVEX_EPHEMERAL_CONFIG_PATH="$config_path"
+    generated=1
+    if ! printf '%s' "$COLLECTIVEX_OPERATOR_CONFIG_CONTENT" > "$config_path"; then
+      unset COLLECTIVEX_OPERATOR_CONFIG_CONTENT
+      rm -f -- "$config_path"
+      unset COLLECTIVEX_EPHEMERAL_CONFIG_PATH
+      cx_die "cannot materialize runner configuration"
+    fi
+  elif [ "${COLLECTIVEX_OPERATOR_CONFIG_REQUIRED:-0}" = 1 ]; then
+    unset COLLECTIVEX_OPERATOR_CONFIG_CONTENT
+    cx_die "runner configuration is unavailable"
+  fi
+  unset COLLECTIVEX_OPERATOR_CONFIG_CONTENT COLLECTIVEX_OPERATOR_CONFIG_REQUIRED
+  if [ ! -e "$config_path" ]; then
+    [ "${COLLECTIVEX_CANONICAL_GHA:-0}" != 1 ] \
+      || cx_die "runner configuration is unavailable"
+    COLLECTIVEX_OPERATOR_CONFIG_LOADED="$$"
+    return 0
+  fi
+  umask 077
+  parsed_path="$(mktemp /tmp/inferencex-collectivex-parsed.XXXXXX)" || {
+    [ "$generated" = 0 ] || rm -f -- "$config_path"
+    cx_die "cannot parse runner configuration"
+  }
+  config_log="$(cx_private_log_path operator-config)"
+  if ! python3 - "$config_path" "${CX_RUNNER:-${CX_SHARD_SKU:-${CX_PUBLIC_RUNNER:-}}}" \
+      "${COLLECTIVEX_CANONICAL_GHA:-0}" \
+      > "$parsed_path" 2> "$config_log" <<'PY'
+import json
+import os
+import posixpath
+import re
+import stat
+import sys
+
+RUNNERS = {
+    "h100-dgxc", "h200-dgxc", "b200-dgxc", "b300",
+    "gb200", "gb300", "mi325x", "mi355x",
+}
+FIELDS = {
+    "partition": "CX_PARTITION",
+    "account": "CX_ACCOUNT",
+    "squash_dir": "CX_SQUASH_DIR",
+    "stage_dir": "CX_STAGE_DIR",
+    "enroot_cache_path": "CX_ENROOT_CACHE_PATH",
+    "exclude_nodes": "CX_EXCLUDE_NODES",
+    "nodelist": "CX_NODELIST",
+    "lock_dir": "CX_LOCK_DIR",
+    "socket_ifname": "CX_SOCKET_IFNAME",
+    "rdma_devices": "CX_RDMA_DEVICES",
+    "ib_gid_index": "CX_IB_GID_INDEX",
+    "rdma_service_level": "CX_RDMA_SERVICE_LEVEL",
+}
+NETWORK_FIELDS = {
+    "socket_ifname", "rdma_devices", "ib_gid_index", "rdma_service_level",
+}
+REQUIRED = {
+    "h100-dgxc": {"partition", "account", "squash_dir", "stage_dir"},
+    "h200-dgxc": {"partition", "squash_dir", "stage_dir"},
+    "b200-dgxc": {"partition", "account", "squash_dir", "stage_dir"},
+    "b300": {
+        "partition", "account", "squash_dir", "stage_dir",
+    },
+    "gb200": {"partition", "account", "storage_roots"},
+    "gb300": {"partition", "account", "squash_dir", "stage_dir", "enroot_cache_path"},
+    "mi325x": {"partition", "squash_dir", "stage_dir"},
+    "mi355x": {"partition", "squash_dir", "stage_dir"},
+}
+ALLOWED = {
+    "h100-dgxc": REQUIRED["h100-dgxc"] | {"exclude_nodes", "stage_dir"} | NETWORK_FIELDS,
+    "h200-dgxc": REQUIRED["h200-dgxc"] | {"account", "exclude_nodes", "stage_dir"} | NETWORK_FIELDS,
+    "b200-dgxc": REQUIRED["b200-dgxc"] | {"exclude_nodes", "stage_dir"} | NETWORK_FIELDS,
+    "b300": REQUIRED["b300"] | {"exclude_nodes"} | NETWORK_FIELDS,
+    "gb200": REQUIRED["gb200"] | NETWORK_FIELDS,
+    "gb300": REQUIRED["gb300"] | NETWORK_FIELDS,
+    "mi325x": REQUIRED["mi325x"] | {"exclude_nodes", "nodelist", "stage_dir", "lock_dir"} | NETWORK_FIELDS,
+    "mi355x": REQUIRED["mi355x"] | {"exclude_nodes", "nodelist", "stage_dir", "lock_dir"} | NETWORK_FIELDS,
+}
+TOKEN = re.compile(r"^[A-Za-z0-9_.\[\],-]+$")
+PATH = re.compile(r"^/[A-Za-z0-9._/+\-]+$")
+IPV4 = re.compile(r"(?<!\d)(?:\d{1,3}\.){3}\d{1,3}(?!\d)")
+INTERFACES = re.compile(r"^[A-Za-z][A-Za-z0-9_.-]{0,31}(?:,[A-Za-z][A-Za-z0-9_.-]{0,31})*$")
+RDMA_DEVICES = re.compile(r"^[A-Za-z][A-Za-z0-9_.-]{0,31}(?::[1-9][0-9]*)?(?:,[A-Za-z][A-Za-z0-9_.-]{0,31}(?::[1-9][0-9]*)?)*$")
+AUDIT_SALT = re.compile(r"^[0-9a-f]{64}$")
+
+def pairs(items):
+    result = {}
+    for key, value in items:
+        if key in result:
+            raise ValueError
+        result[key] = value
+    return result
+
+def valid_path(value):
+    return (
+        isinstance(value, str) and len(value) <= 1024 and PATH.fullmatch(value)
+        and posixpath.normpath(value) == value and not IPV4.search(value)
+    )
+
+try:
+    path, runner, audit_required = sys.argv[1:]
+    if runner not in RUNNERS or audit_required not in {"0", "1"}:
+        raise ValueError
+    metadata = os.lstat(path)
+    if (
+        not stat.S_ISREG(metadata.st_mode) or metadata.st_uid != os.getuid()
+        or stat.S_IMODE(metadata.st_mode) != 0o600 or metadata.st_size > 65536
+    ):
+        raise ValueError
+    flags = os.O_RDONLY | getattr(os, "O_NOFOLLOW", 0)
+    descriptor = os.open(path, flags)
+    try:
+        opened = os.fstat(descriptor)
+        if (opened.st_dev, opened.st_ino) != (metadata.st_dev, metadata.st_ino):
+            raise ValueError
+        payload = b""
+        while len(payload) <= 65536:
+            chunk = os.read(descriptor, 65537 - len(payload))
+            if not chunk:
+                break
+            payload += chunk
+        document = json.loads(
+            payload.decode("utf-8"),
+            object_pairs_hook=pairs,
+            parse_constant=lambda _: (_ for _ in ()).throw(ValueError()),
+        )
+    finally:
+        os.close(descriptor)
+    if (
+        set(document) not in (
+            {"schema_version", "runners"},
+            {"schema_version", "audit_salt", "runners"},
+        )
+        or type(document["schema_version"]) is not int
+        or document["schema_version"] != 1
+    ):
+        raise ValueError
+    audit_salt = document.get("audit_salt")
+    if (
+        (audit_salt is not None and (
+            not isinstance(audit_salt, str) or not AUDIT_SALT.fullmatch(audit_salt)
+        ))
+        or (audit_required == "1" and audit_salt is None)
+    ):
+        raise ValueError
+    runners = document["runners"]
+    if (
+        not isinstance(runners, dict) or not runners or set(runners) - RUNNERS
+        or runner not in runners
+    ):
+        raise ValueError
+    selected = None
+    for name, config in runners.items():
+        if (
+            not isinstance(config, dict)
+            or (name == runner and not REQUIRED[name].issubset(config))
+        ):
+            raise ValueError
+        if set(config) - ALLOWED[name]:
+            raise ValueError
+        for field, value in config.items():
+            if field == "storage_roots":
+                if (
+                    not isinstance(value, list) or not 1 <= len(value) <= 16
+                    or len(value) != len(set(value)) or not all(valid_path(item) for item in value)
+                ):
+                    raise ValueError
+            elif field == "socket_ifname":
+                if not isinstance(value, str) or not INTERFACES.fullmatch(value):
+                    raise ValueError
+            elif field == "rdma_devices":
+                if not isinstance(value, str) or not RDMA_DEVICES.fullmatch(value):
+                    raise ValueError
+            elif field == "ib_gid_index":
+                if type(value) is not int or not 0 <= value <= 255:
+                    raise ValueError
+            elif field == "rdma_service_level":
+                if type(value) is not int or not 0 <= value <= 15:
+                    raise ValueError
+            elif field.endswith(("_dir", "_path")):
+                if not valid_path(value):
+                    raise ValueError
+            elif (
+                not isinstance(value, str) or not value or len(value) > 512
+                or not TOKEN.fullmatch(value) or IPV4.search(value)
+            ):
+                raise ValueError
+        if name == runner:
+            selected = dict(config)
+    if selected is None:
+        raise ValueError
+    roots = selected.pop("storage_roots", None)
+    if roots is not None:
+        for root in roots:
+            squash = posixpath.join(root, "collectivex", "containers")
+            stage = posixpath.join(root, "collectivex", "stage")
+            probes = []
+            try:
+                for directory in (squash, stage):
+                    os.makedirs(directory, mode=0o700, exist_ok=True)
+                    probe = posixpath.join(directory, f".write-probe-{os.getpid()}")
+                    fd = os.open(probe, os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0o600)
+                    os.close(fd)
+                    probes.append(probe)
+                selected.update(squash_dir=squash, stage_dir=stage)
+                break
+            except OSError:
+                pass
+            finally:
+                for probe in probes:
+                    try:
+                        os.unlink(probe)
+                    except OSError:
+                        pass
+        else:
+            raise ValueError
+    if audit_salt is not None:
+        sys.stdout.buffer.write(b"CX_AUDIT_SALT\0" + audit_salt.encode() + b"\0")
+    for field, value in selected.items():
+        key = FIELDS[field]
+        sys.stdout.buffer.write(
+            key.encode() + b"\0" + str(value).encode() + b"\0"
+        )
+except (KeyError, OSError, TypeError, UnicodeError, ValueError):
+    raise SystemExit(1)
+PY
+  then
+    rm -f -- "$parsed_path"
+    [ "$generated" = 0 ] || rm -f -- "$config_path"
+    unset COLLECTIVEX_EPHEMERAL_CONFIG_PATH
+    unset COLLECTIVEX_OPERATOR_CONFIG COLLECTIVEX_OPERATOR_CONFIG_EPHEMERAL
+    cx_die "runner-local configuration failed"
+  fi
+  while IFS= read -r -d '' key && IFS= read -r -d '' value; do
+    printf -v "$key" '%s' "$value"
+    export "${key?}"
+  done < "$parsed_path"
+  rm -f -- "$parsed_path"
+  if [ "$generated" = 1 ] || [ "${COLLECTIVEX_OPERATOR_CONFIG_EPHEMERAL:-0}" = 1 ]; then
+    rm -f -- "$config_path" || cx_die "cannot remove ephemeral runner configuration"
+  fi
+  unset COLLECTIVEX_EPHEMERAL_CONFIG_PATH
+  unset COLLECTIVEX_OPERATOR_CONFIG COLLECTIVEX_OPERATOR_CONFIG_EPHEMERAL
+  COLLECTIVEX_OPERATOR_CONFIG_LOADED="$$"
+}
+
+cx_private_log_path() {
+  local label="$1" tag="${COLLECTIVEX_EXECUTION_ID:-manual_$$}" path
+  path="$(python3 - "$tag" "$label" <<'PY' 2>/dev/null
+import os
+import re
+import shutil
+import stat
+import sys
+import time
+
+tag, label = sys.argv[1:]
+if not all(re.fullmatch(r"[A-Za-z0-9][A-Za-z0-9._-]*", value) for value in (tag, label)):
+    raise SystemExit(1)
+root = f"/tmp/inferencex-collectivex-{os.getuid()}"
+old_umask = os.umask(0o077)
+flags = os.O_RDONLY | os.O_DIRECTORY | getattr(os, "O_NOFOLLOW", 0)
+try:
+    try:
+        os.mkdir(root, 0o700)
+    except FileExistsError:
+        pass
+    root_fd = os.open(root, flags)
+    try:
+        metadata = os.fstat(root_fd)
+        if metadata.st_uid != os.getuid() or stat.S_IMODE(metadata.st_mode) != 0o700:
+            raise OSError("unsafe root")
+        cutoff = time.time() - 86400
+        for entry in os.scandir(root):
+            try:
+                if (
+                    entry.name != tag and entry.is_dir(follow_symlinks=False)
+                    and entry.stat(follow_symlinks=False).st_mtime < cutoff
+                ):
+                    shutil.rmtree(entry.path)
+            except OSError:
+                pass
+        try:
+            os.mkdir(tag, 0o700, dir_fd=root_fd)
+        except FileExistsError:
+            pass
+        directory_fd = os.open(tag, flags, dir_fd=root_fd)
+        try:
+            metadata = os.fstat(directory_fd)
+            if metadata.st_uid != os.getuid() or stat.S_IMODE(metadata.st_mode) != 0o700:
+                raise OSError("unsafe directory")
+            log_flags = os.O_WRONLY | os.O_CREAT | os.O_EXCL | getattr(os, "O_NOFOLLOW", 0)
+            log_fd = os.open(f"{label}.log", log_flags, 0o600, dir_fd=directory_fd)
+            os.close(log_fd)
+        finally:
+            os.close(directory_fd)
+    finally:
+        os.close(root_fd)
+finally:
+    os.umask(old_umask)
+print(f"{root}/{tag}/{label}.log", end="")
+PY
+)" || cx_die "cannot create private runtime log"
+  printf '%s' "$path"
+}
+
+# Manual successes delete diagnostics immediately. Canonical workflow logs survive
+# until artifact upload succeeds; failed logs remain private for debugging, and a
+# later run prunes abandoned directories older than 24 hours.
+cx_cleanup_private_logs() {
+  local rc="$1" tag="${COLLECTIVEX_EXECUTION_ID:-manual_$$}"
+  [ "$rc" = 0 ] || return 0
+  python3 - "$tag" <<'PY' >/dev/null 2>&1 || true
+import os
+import re
+import shutil
+import stat
+import sys
+
+tag = sys.argv[1]
+if not re.fullmatch(r"[A-Za-z0-9][A-Za-z0-9._-]*", tag):
+    raise SystemExit(1)
+root = f"/tmp/inferencex-collectivex-{os.getuid()}"
+flags = os.O_RDONLY | os.O_DIRECTORY | getattr(os, "O_NOFOLLOW", 0)
+root_fd = os.open(root, flags)
+try:
+    metadata = os.fstat(root_fd)
+    if metadata.st_uid != os.getuid() or stat.S_IMODE(metadata.st_mode) != 0o700:
+        raise SystemExit(1)
+finally:
+    os.close(root_fd)
+path = os.path.join(root, tag)
+if os.path.isdir(path) and not os.path.islink(path):
+    shutil.rmtree(path)
+PY
+}
+
+# Explicit Slurm export boundary. Operator config, runner credentials, HOME,
+# workspace paths, and unrelated service secrets never enter the container.
+cx_container_exports() {
+  printf '%s' 'COLLECTIVEX_SOURCE_SHA,COLLECTIVEX_ARTIFACT_NAME,COLLECTIVEX_EXECUTION_ID,COLLECTIVEX_CONTROL_SHA256,COLLECTIVEX_IMAGE,COLLECTIVEX_IMAGE_DIGEST,COLLECTIVEX_IMAGE_DIGEST_VERIFIED,COLLECTIVEX_SQUASH_SHA256,GITHUB_REF_NAME,GITHUB_REF,GITHUB_REPOSITORY,GITHUB_JOB,GITHUB_RUN_ID,GITHUB_RUN_ATTEMPT,GITHUB_SHA,CX_RUNNER,CX_BENCH,CX_NODES,CX_GPUS_PER_NODE,CX_SCALE_UP_DOMAIN,CX_SHARD_FILE,CX_SHARD_SKU,CX_PRECISION_PROBE,CX_NGPUS,CX_TS,CX_TOPO,CX_SCOPE,CX_TRANSPORT,CX_SCALE_UP_TRANSPORT,CX_SCALE_OUT_TRANSPORT,CX_MODE,CX_PHASE,CX_ROUTING,CX_EPLB,CX_CASE_ID,CX_SUITE,CX_WORKLOAD_NAME,CX_REQUIRED_PUBLICATION,CX_PRECISION_PROFILE,CX_QUALIFICATION_INDEX,CX_HIDDEN,CX_TOPK,CX_EXPERTS,CX_TOKENS_LADDER,CX_CANONICAL,CX_ITERS,CX_TRIALS,CX_WARMUP,CX_SAMPLES_PER_POINT,CX_WARMUP_SEMANTICS,CX_SEED,CX_RUN_TIMEOUT,CX_NCCL_HOME,CX_ALLOW_MNNVL,CX_ATTEMPT_ID,CX_RUNTIME_MARKER,CX_MORI_KERNEL_TYPE,CX_WORKLOAD_DIR,CX_BACKEND_CACHE_ROOT,CX_BACKEND_CACHE_SENTINEL_SHA256,CX_BACKEND_SOURCE_ROOT,CX_AUDIT_SALT,CX_SOCKET_IFNAME,CX_RDMA_DEVICES,CX_IB_GID_INDEX,CX_RDMA_SERVICE_LEVEL,MASTER_ADDR,MASTER_PORT,RANK,WORLD_SIZE,LOCAL_RANK,LOCAL_WORLD_SIZE,NCCL_NET,NCCL_SOCKET_IFNAME,GLOO_SOCKET_IFNAME,NCCL_IB_HCA,NCCL_IB_GID_INDEX,NCCL_IB_SL,NVSHMEM_HCA_LIST,NVSHMEM_IB_GID_INDEX,NVSHMEM_IB_SL,NVSHMEM_IB_ENABLE_IBGDA,NVSHMEM_IBGDA_NIC_HANDLER,EP_NIC_NAME,EP_OVERRIDE_RDMA_SL,UCCL_SOCKET_IFNAME,UCCL_IB_GID_INDEX,UCCL_IB_SL,MORI_RDMA_DEVICES,HYBRID_EP_MULTINODE,USE_NIXL,RDMA_CORE_HOME,DEEPEP_HYBRID_BUILD_MODE,NCCL_CUMEM_ENABLE,NCCL_MNNVL_ENABLE,MC_FORCE_MNNVL,MORI_DISABLE_AUTO_XGMI,MORI_ENABLE_SDMA,MORI_APP_LOG_LEVEL,MORI_SHMEM_LOG_LEVEL,MORI_IO_LOG_LEVEL'
+  printf '%s' ',MORI_COMMIT'
+}
+
+# Host-side utility steps need only the basic login paths. They never receive
+# the complete Actions or runner environment.
+cx_host_exports() {
+  printf '%s' 'HOME,PATH,USER,XDG_CACHE_HOME,ENROOT_CACHE_PATH'
+}
+
+cx_prepare_runtime_marker() {
+  local mount_src="$1" tag="${COLLECTIVEX_EXECUTION_ID:-${CX_TS:-}}" marker
+  [[ "$tag" =~ ^[A-Za-z0-9][A-Za-z0-9._-]*$ ]] \
+    || cx_die "cannot create runtime stage marker"
+  marker=".shards/runtime-stage-${tag}.txt"
+  mkdir -p "$mount_src/experimental/CollectiveX/.shards" >/dev/null 2>&1 \
+    || cx_die "cannot create runtime stage marker"
+  rm -f -- "$mount_src/experimental/CollectiveX/$marker" >/dev/null 2>&1 \
+    || cx_die "cannot reset runtime stage marker"
+  export CX_RUNTIME_MARKER="$marker"
+}
+
+cx_write_runtime_stage() {
+  local stage="$1" marker="${CX_RUNTIME_MARKER:-}"
+  [ -n "$marker" ] || return 0
+  [[ "$marker" =~ ^\.shards/runtime-stage-[A-Za-z0-9][A-Za-z0-9._-]*\.txt$ ]] \
+    || return 1
+  case "$stage" in backend-setup|execution) ;; *) return 1 ;; esac
+  printf '%s\n' "$stage" > "$marker"
+}
+
+cx_adopt_runtime_stage() {
+  local mount_src="$1" marker="${CX_RUNTIME_MARKER:-}" stage=""
+  [ -n "$marker" ] || return 0
+  if [[ "$marker" =~ ^\.shards/runtime-stage-[A-Za-z0-9][A-Za-z0-9._-]*\.txt$ ]] \
+      && [ -f "$mount_src/experimental/CollectiveX/$marker" ]; then
+    IFS= read -r stage < "$mount_src/experimental/CollectiveX/$marker" || true
+    rm -f -- "$mount_src/experimental/CollectiveX/$marker" >/dev/null 2>&1 || true
+    case "$stage" in
+      backend-setup|execution) cx_set_failure_stage "$stage" ;;
+    esac
+  fi
+}
+
+cx_require_vars() {
+  local name
+  local -a missing=()
+  for name in "$@"; do
+    [ -n "${!name:-}" ] || missing+=("$name")
+  done
+  [ "${#missing[@]}" -eq 0 ] || cx_die \
+    "missing runner-local configuration: ${missing[*]} (set them in COLLECTIVEX_OPERATOR_CONFIG)"
+}
+
+cx_bool_enabled() {
+  local normalized
+  normalized="$(printf '%s' "$1" | tr '[:upper:]' '[:lower:]')"
+  case "$normalized" in
+    1|true|yes) return 0 ;;
+    *) return 1 ;;
+  esac
+}
+
+cx_require_record_safe() {
+  local value
+  for value in "$@"; do
+    case "$value" in
+      *'|'*|*$'\n'*|*$'\r'*) cx_die "manual case field contains a record delimiter" ;;
+    esac
+  done
+}
+
+cx_require_single_node() {
+  [ "${CX_NODES:-1}" = "1" ] || cx_die "$1 supports one-node EP only"
+}
+
+# Convert private, runner-local network selectors into the public library
+# variables needed inside the container. Values are interface/HCA identifiers,
+# never addresses; the rendezvous hostname is derived from the allocation.
+cx_apply_network_profile() {
+  local nodes="$1" transport="$2" selector rdma_name rdma_names="" ep_nic=""
+  local -a selectors
+  [[ "$nodes" =~ ^[1-9][0-9]*$ ]] || cx_die "invalid network placement"
+  unset NCCL_NET NCCL_SOCKET_IFNAME GLOO_SOCKET_IFNAME NCCL_IB_HCA
+  unset NCCL_IB_GID_INDEX NCCL_IB_SL
+  unset NVSHMEM_HCA_LIST NVSHMEM_IB_GID_INDEX NVSHMEM_IB_SL
+  unset NVSHMEM_IB_ENABLE_IBGDA NVSHMEM_IBGDA_NIC_HANDLER
+  unset EP_NIC_NAME EP_OVERRIDE_RDMA_SL
+  unset UCCL_SOCKET_IFNAME UCCL_IB_GID_INDEX UCCL_IB_SL MORI_RDMA_DEVICES
+  [ "$nodes" -gt 1 ] && [ "$transport" != mnnvl ] || return 0
+  [ -n "${CX_SOCKET_IFNAME:-}" ] && [ -n "${CX_RDMA_DEVICES:-}" ] \
+    || cx_die "multi-node execution requires private socket and RDMA selectors"
+  if [ -n "${CX_SOCKET_IFNAME:-}" ]; then
+    [[ "$CX_SOCKET_IFNAME" =~ ^[A-Za-z][A-Za-z0-9_.-]{0,31}(,[A-Za-z][A-Za-z0-9_.-]{0,31})*$ ]] \
+      || cx_die "invalid private socket interface selector"
+    export NCCL_SOCKET_IFNAME="$CX_SOCKET_IFNAME" GLOO_SOCKET_IFNAME="$CX_SOCKET_IFNAME"
+    export UCCL_SOCKET_IFNAME="$CX_SOCKET_IFNAME"
+  fi
+  if [ -n "${CX_RDMA_DEVICES:-}" ]; then
+    [[ "$CX_RDMA_DEVICES" =~ ^[A-Za-z][A-Za-z0-9_.-]{0,31}(:[1-9][0-9]*)?(,[A-Za-z][A-Za-z0-9_.-]{0,31}(:[1-9][0-9]*)?)*$ ]] \
+      || cx_die "invalid private RDMA device selector"
+    IFS=, read -r -a selectors <<< "$CX_RDMA_DEVICES"
+    for selector in "${selectors[@]}"; do
+      rdma_name="${selector%%:*}"
+      rdma_names="${rdma_names}${rdma_names:+,}${rdma_name}"
+      [ -n "$ep_nic" ] || ep_nic="$rdma_name"
+    done
+    export NCCL_NET=IB NCCL_IB_HCA="=$CX_RDMA_DEVICES"
+    export NVSHMEM_HCA_LIST="$CX_RDMA_DEVICES"
+    export MORI_RDMA_DEVICES="$rdma_names" EP_NIC_NAME="$ep_nic"
+  fi
+  if [ -n "${CX_IB_GID_INDEX:-}" ]; then
+    [[ "$CX_IB_GID_INDEX" =~ ^[0-9]+$ ]] && [ "$CX_IB_GID_INDEX" -le 255 ] \
+      || cx_die "invalid private IB GID index"
+    export NCCL_IB_GID_INDEX="$CX_IB_GID_INDEX" NVSHMEM_IB_GID_INDEX="$CX_IB_GID_INDEX"
+    export UCCL_IB_GID_INDEX="$CX_IB_GID_INDEX"
+  fi
+  if [ -n "${CX_RDMA_SERVICE_LEVEL:-}" ]; then
+    [[ "$CX_RDMA_SERVICE_LEVEL" =~ ^[0-9]+$ ]] && [ "$CX_RDMA_SERVICE_LEVEL" -le 15 ] \
+      || cx_die "invalid private RDMA service level"
+    export NCCL_IB_SL="$CX_RDMA_SERVICE_LEVEL" NVSHMEM_IB_SL="$CX_RDMA_SERVICE_LEVEL"
+    export UCCL_IB_SL="$CX_RDMA_SERVICE_LEVEL"
+    export EP_OVERRIDE_RDMA_SL="$CX_RDMA_SERVICE_LEVEL"
+  fi
+  export NVSHMEM_IB_ENABLE_IBGDA=1 NVSHMEM_IBGDA_NIC_HANDLER=gpu
+}
+
+# Prove that the operator-pinned scale-out fabric exists on every allocated
+# node before image import or backend initialization. Selector values and node
+# diagnostics stay in the runner-private log.
+cx_validate_network_profile_on_job() {
+  local job_id="$1" nodes="$2" transport="$3" log rc=0
+  [ "$nodes" -gt 1 ] && [ "$transport" != mnnvl ] || return 0
+  [[ "$job_id" =~ ^[1-9][0-9]*$ && "$nodes" =~ ^[1-9][0-9]*$ ]] \
+    || return 1
+  [ -n "${CX_SOCKET_IFNAME:-}" ] && [ -n "${CX_RDMA_DEVICES:-}" ] \
+    || return 1
+  log="$(cx_private_log_path network-profile)" || return 1
+  srun --jobid="$job_id" --nodes="$nodes" --ntasks="$nodes" --ntasks-per-node=1 \
+    --chdir=/tmp --input=all \
+    --export="$(cx_host_exports),CX_SOCKET_IFNAME,CX_RDMA_DEVICES,CX_IB_GID_INDEX" \
+    bash -s > "$log" 2>&1 <<'BASH' || rc=$?
+set -euo pipefail
+[[ "$CX_SOCKET_IFNAME" =~ ^[A-Za-z][A-Za-z0-9_.-]{0,31}(,[A-Za-z][A-Za-z0-9_.-]{0,31})*$ ]]
+[[ "$CX_RDMA_DEVICES" =~ ^[A-Za-z][A-Za-z0-9_.-]{0,31}(:[1-9][0-9]*)?(,[A-Za-z][A-Za-z0-9_.-]{0,31}(:[1-9][0-9]*)?)*$ ]]
+if [ -n "${CX_IB_GID_INDEX:-}" ]; then
+  [[ "$CX_IB_GID_INDEX" =~ ^[0-9]+$ ]] && [ "$CX_IB_GID_INDEX" -le 255 ]
+fi
+IFS=, read -r -a interfaces <<< "$CX_SOCKET_IFNAME"
+for interface in "${interfaces[@]}"; do
+  [ -d "/sys/class/net/$interface" ]
+  state="$(cat "/sys/class/net/$interface/operstate")"
+  [ "$state" = up ] || [ "$state" = unknown ]
+done
+check_port() {
+  local port_path="$1" state gid
+  [ -d "$port_path" ] || return 1
+  read -r state _ < "$port_path/state"
+  [ "$state" = 4: ] || return 1
+  if [ -n "${CX_IB_GID_INDEX:-}" ]; then
+    [ -r "$port_path/gids/$CX_IB_GID_INDEX" ] || return 1
+    gid="$(tr -d ':0[:space:]' < "$port_path/gids/$CX_IB_GID_INDEX")"
+    [ -n "$gid" ] || return 1
+  fi
+}
+IFS=, read -r -a devices <<< "$CX_RDMA_DEVICES"
+for selector in "${devices[@]}"; do
+  device="${selector%%:*}"
+  configured_port=""
+  [ "$selector" = "$device" ] || configured_port="${selector#*:}"
+  ports="/sys/class/infiniband/$device/ports"
+  [ -d "$ports" ]
+  if [ -n "$configured_port" ]; then
+    check_port "$ports/$configured_port"
+  else
+    active=0
+    for port_path in "$ports"/*; do
+      if check_port "$port_path"; then
+        active=1
+        break
+      fi
+    done
+    [ "$active" = 1 ]
+  fi
+done
+BASH
+  if [ "$rc" != 0 ]; then
+    cx_fail_stage setup "$log" || true
+    return "$rc"
+  fi
+}
+
+cx_resolve_slurm_rendezvous() {
+  local job_id="$1" nodes master_addr master_port
+  [[ "$job_id" =~ ^[1-9][0-9]*$ ]] || cx_die "invalid rendezvous allocation"
+  nodes="$(squeue -j "$job_id" -h -o %N 2>/dev/null)"
+  master_addr="$(scontrol show hostnames "$nodes" 2>/dev/null | head -n1)"
+  master_port="${CX_MASTER_PORT:-29551}"
+  [[ "$master_addr" =~ ^[A-Za-z0-9][A-Za-z0-9._-]*$ ]] \
+    || cx_die "could not resolve the allocated primary node"
+  [[ "$master_port" =~ ^[1-9][0-9]*$ ]] && [ "$master_port" -le 65535 ] \
+    || cx_die "invalid distributed rendezvous port"
+  export MASTER_ADDR="$master_addr" MASTER_PORT="$master_port"
+}
+
+# Printed into `bash -c` for one Slurm task per GPU. Every rank derives its
+# identity from Slurm rather than accepting caller-supplied rank values.
+cx_slurm_rank_wrapper() {
+  cat <<'BASH'
+case "${SLURM_PROCID:-}:${SLURM_NTASKS:-}:${SLURM_LOCALID:-}:${SLURM_NODEID:-}" in
+  *[!0-9:]*|:*|*::*|*:) exit 67 ;;
+esac
+[ "$SLURM_NTASKS" = "$CX_NGPUS" ] || exit 67
+[ "$SLURM_LOCALID" -lt "$CX_GPUS_PER_NODE" ] || exit 67
+export RANK="$SLURM_PROCID" WORLD_SIZE="$SLURM_NTASKS"
+export LOCAL_RANK="$SLURM_LOCALID" LOCAL_WORLD_SIZE="$CX_GPUS_PER_NODE"
+case "${CX_PRECISION_PROBE:-0}" in
+  1) exec python3 tests/probe_precision.py "$@" ;;
+  0|'') exec python3 tests/run_ep.py "$@" ;;
+  *) exit 67 ;;
+esac
+BASH
+}
+
+# A set shard path is an execution contract, never a hint. Validate it before
+# staging/allocation and again in-container so a missing or stale control file
+# cannot silently fall back to a manual single-case run.
+cx_validate_shard_control() {
+  local cx_root="$1" shard="${CX_SHARD_FILE:-}" path expected_sku control_sha256
+  [ -n "$shard" ] || return 0
+  expected_sku="${CX_SHARD_SKU:-}"
+  [ -n "$expected_sku" ] || cx_die "CX_SHARD_SKU is required with CX_SHARD_FILE"
+  [ -n "${CX_BENCH:-}" ] || cx_die "CX_BENCH is required with CX_SHARD_FILE"
+  [[ "${CX_NODES:-}" =~ ^[1-9][0-9]*$ ]] \
+    || cx_die "positive CX_NODES is required with CX_SHARD_FILE"
+  path="$shard"
+  [ -f "$path" ] || path="${cx_root%/}/$shard"
+  [ -f "$path" ] || cx_die "shard control does not exist"
+  [ -s "$path" ] || cx_die "shard control is empty"
+  if [ "${CX_PRECISION_PROBE:-0}" = 1 ]; then
+    python3 "${cx_root%/}/tests/probe_precision.py" \
+      --validate-control "$path" --expect-sku "$expected_sku" \
+      --expect-backend "$CX_BENCH" --expect-nodes "$CX_NODES" >/dev/null 2>&1 \
+      || cx_die "invalid precision probe control"
+  else
+    python3 "${cx_root%/}/sweep_matrix.py" \
+      --validate-control "$path" --expect-sku "$expected_sku" \
+      --expect-backend "$CX_BENCH" --expect-nodes "$CX_NODES" >/dev/null 2>&1 \
+      || cx_die "invalid shard control"
+  fi
+  control_sha256="$(sha256sum "$path" | awk '{print $1}')"
+  [[ "$control_sha256" =~ ^[0-9a-f]{64}$ ]] \
+    || cx_die "cannot hash shard control"
+  export COLLECTIVEX_CONTROL_SHA256="$control_sha256"
+}
+
+cx_precision_probe_control_fields() {
+  local cx_root="$1" shard="${CX_SHARD_FILE:-}" path
+  [ "${CX_PRECISION_PROBE:-0}" = 1 ] || return 1
+  path="$shard"
+  [ -f "$path" ] || path="${cx_root%/}/$shard"
+  python3 - "$path" <<'PY'
+import json
+import pathlib
+import sys
+
+path = pathlib.Path(sys.argv[1])
+document = json.loads(path.read_text())
+target = document["target"]
+values = (
+    document["id"], target["backend"], target["sku"], target["ep"],
+    target["mode"], target["precision_profile"],
+)
+if any("|" in str(value) or "\n" in str(value) for value in values):
+    raise SystemExit("unsafe precision probe control field")
+print("|".join(map(str, values)))
+PY
+}
+
+cx_apply_timing_profile() {
+  [ -n "${CX_TIMING:-}" ] || return 0
+  local iters trials warmup extra
+  IFS=: read -r iters trials warmup extra <<< "$CX_TIMING"
+  [[ "$iters" =~ ^[1-9][0-9]*$ && "$trials" =~ ^[1-9][0-9]*$ \
+    && "$warmup" =~ ^[1-9][0-9]*$ && -z "$extra" ]] \
+    || cx_die "CX_TIMING must be positive iters:trials:warmup"
+  export CX_ITERS="$iters" CX_TRIALS="$trials" CX_WARMUP="$warmup"
+}
+
+# Use an opaque, execution-bound name so a missing grant message can be
+# reconciled without exposing runner or shard details in public logs.
+cx_scheduler_job_name() {
+  local execution_id="${COLLECTIVEX_EXECUTION_ID:-manual-$$}" digest
+  digest="$(printf '%s' "$execution_id" | sha256sum | awk '{print $1}')" \
+    || return 1
+  [[ "$digest" =~ ^[0-9a-f]{64}$ ]] || return 1
+  printf 'cx-%s' "${digest:0:24}"
+}
+
+# Return 0 after recovering one allocation ID, 2 after three successful empty
+# observations, and 1 for every ambiguous or failed lookup. Callers inspect the
+# state variables rather than the status because all missing-ID paths still fail.
+cx_reconcile_salloc_jobid() {
+  local job_name="$1" scheduler_user queue_output line delay attempt
+  local -a ids=()
+  scheduler_user="$(id -un 2>/dev/null)" || return 1
+  [[ "$scheduler_user" =~ ^[A-Za-z0-9_.-]+$ \
+    && "$job_name" =~ ^cx-[0-9a-f]{24}$ ]] || return 1
+  for attempt in 1 2 3; do
+    ids=()
+    if ! queue_output="$(
+      squeue -h --user="$scheduler_user" --name="$job_name" -o %A 2>/dev/null
+    )"; then
+      return 1
+    fi
+    while IFS= read -r line; do
+      [[ "$line" =~ ^[[:space:]]*$ ]] && continue
+      if [[ "$line" =~ ^[[:space:]]*([1-9][0-9]*)[[:space:]]*$ ]]; then
+        ids+=("${BASH_REMATCH[1]}")
+      else
+        return 1
+      fi
+    done <<< "$queue_output"
+    if [ "${#ids[@]}" -eq 1 ]; then
+      JOB_ID="${ids[0]}"
+      CX_ALLOCATION_UNCERTAIN=0
+      return 0
+    fi
+    [ "${#ids[@]}" -eq 0 ] || return 1
+    if [ "$attempt" -eq 3 ]; then
+      CX_ALLOCATION_UNCERTAIN=0
+      return 2
+    fi
+    delay=$((1 << (attempt - 1)))
+    sleep "$delay" || return 1
+  done
+  return 1
+}
+
+# Allocate via salloc's stable grant message and assign JOB_ID in this shell.
+# Raw scheduler output remains in the bounded private execution log.
+cx_salloc_jobid() {
+  local log job_id job_name argument salloc_rc=0
+  log="$(cx_private_log_path scheduler-allocation)"
+  for argument in "$@"; do
+    case "$argument" in
+      --job-name|--job-name=*|-J|-J*)
+        cx_log "ERROR: scheduler job names are managed by CollectiveX"
+        return 1
+        ;;
+    esac
+  done
+  job_name="$(cx_scheduler_job_name)" || return 1
+  CX_ALLOCATION_UNCERTAIN=1
+  # salloc has no portable --parsable option. Parse the stable grant message
+  # used by the production launchers, while also accepting a bare ID from
+  # site wrappers.
+  salloc "$@" --job-name="$job_name" --no-shell > "$log" 2>&1 || salloc_rc=$?
+  job_id="$(sed -nE \
+    -e 's/^([0-9]+)(;[^[:space:]]+)?$/\1/p' \
+    -e 's/.*Granted job allocation ([0-9]+).*/\1/p' \
+    "$log" | head -n1)"
+  if [ -n "$job_id" ]; then
+    [[ "$job_id" =~ ^[0-9]+$ ]] || return 1
+    JOB_ID="$job_id"
+    CX_ALLOCATION_UNCERTAIN=0
+  fi
+  if [ "$salloc_rc" != 0 ]; then
+    if [ "$salloc_rc" -ge 128 ] && [ -z "$JOB_ID" ]; then
+      cx_fail_stage scheduler-allocation "$log"
+      return 1
+    fi
+    [ -n "$JOB_ID" ] || cx_reconcile_salloc_jobid "$job_name" || true
+    cx_fail_stage scheduler-allocation "$log"
+    return 1
+  fi
+  if [ -z "$JOB_ID" ]; then
+    cx_reconcile_salloc_jobid "$job_name" || true
+    cx_fail_stage scheduler-allocation "$log"
+    return 1
+  fi
+}
+
+cx_cancel_job() {
+  local job_id="$1" active delay
+  [[ "$job_id" =~ ^[0-9]+$ ]] || return 1
+  scancel "$job_id" >/dev/null 2>&1 || true
+  for delay in 1 2 4 8 16 32; do
+    if ! active="$(squeue -h -j "$job_id" -o %A 2>/dev/null)"; then
+      sleep "$delay"
+      continue
+    fi
+    [ -n "$active" ] || return 0
+    sleep "$delay"
+  done
+  cx_log "ERROR: scheduled allocation did not terminate during cleanup"
+  return 1
+}
+
+cx_write_cleanup_guard() {
+  local state="$1" root="${CX_JOB_ROOT:-}" safe unsafe
+  [[ "$root" =~ ^/tmp/inferencex-collectivex-[0-9]+-[0-9]+-[A-Za-z0-9._-]+$ ]] \
+    && [ -d "$root" ] && [ ! -L "$root" ] \
+    && [ "$(stat -c '%u:%a' "$root" 2>/dev/null)" = "$(id -u):700" ] || return 0
+  safe="$root/cleanup-safe"
+  unsafe="$root/cleanup-unsafe"
+  umask 077
+  case "$state" in
+    safe) : > "$safe" && rm -f -- "$unsafe" ;;
+    unsafe) rm -f -- "$safe" && : > "$unsafe" ;;
+    *) return 1 ;;
+  esac
+}
+
+# Single multi-arch container for ALL NVIDIA SKUs: tag `v0.5.11-cu130` is an OCI
+# image index covering linux/amd64 (B200) + linux/arm64 (GB200); enroot import
+# pulls the matching arch. (cu130 = CUDA 13, system nccl.h in /usr/include, torch 2.9.x.)
+# Import remains tag-based because Enroot cannot reliably import a digest-qualified
+# Docker Hub reference non-interactively. The registry digest is resolved and checked
+# immediately before import, then recorded as verified provenance.
+CX_IMAGE_MULTIARCH_DIGEST="sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975"
+# (v0.5.12-cu130 was rejected: its 62 layers overflow enroot's overlay-based
+# squash creation on these nodes — "failed to mount overlay ... Invalid argument".
+# v0.5.11-cu130 imports cleanly.)
+# Runtime setup verifies the image-bundled DeepEP build for the detected GPU target.
+CX_IMAGE_MULTIARCH="lmsysorg/sglang:v0.5.11-cu130"
+
+# AMD (ROCm/CDNA): separate single-arch images bundle MoRI.
+CX_IMAGE_AMD_MORI="rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2"
+CX_IMAGE_AMD_MORI_DIGEST="sha256:24c3b30d64475937abbb6498e3b29528649adcb836dde7a468979f767809b0e8"
+CX_MORI_COMMIT_MI355="99bc0a3a6e7a70aacc6372cd9a4275ccfb4de567" # pragma: allowlist secret
+CX_IMAGE_AMD_MORI_MI325="rocm/sgl-dev:sglang-0.5.14-rocm720-mi35x-mori-0701"
+CX_IMAGE_AMD_MORI_MI325_DIGEST="sha256:ea42375343c2ef8f73b3bdb9e1b7b435556e3ca92aba5e3f74ada29ba217fabc"
+CX_MORI_COMMIT_MI325="bf99bdf18fc69887a346913ca01c315c2aa9bd4c" # pragma: allowlist secret
+cx_default_image() {
+  case "$1" in
+    mi325x*) echo "$CX_IMAGE_AMD_MORI_MI325" ;;
+    mi355x*) echo "$CX_IMAGE_AMD_MORI" ;;
+    b200*|gb200*|b300*|gb300*|h100*|h200*) echo "$CX_IMAGE_MULTIARCH" ;;
+    *) cx_die "no default image for runner prefix: $1" ;;
+  esac
+}
+
+cx_resolve_registry_digest() {
+  local image="$1" repository reference token digest registry
+  if [[ "$image" == *@* ]]; then
+    cx_die "digest-qualified image overrides are unsupported; configure a tag and pinned digest"
+  fi
+  registry="${image%%/*}"
+  if [[ "$image" == */* && ( "$registry" == *.* || "$registry" == *:* || "$registry" = localhost ) ]]; then
+    case "$registry" in
+      docker.io|registry-1.docker.io) image="${image#*/}" ;;
+      *) cx_die "only Docker Hub images are supported by the registry verifier" ;;
+    esac
+  fi
+  repository="${image%:*}"
+  reference="${image##*:}"
+  [ "$repository" != "$image" ] || { repository="$image"; reference=latest; }
+  [ -n "$repository" ] && [ -n "$reference" ] \
+    || cx_die "configured image reference is malformed"
+  [[ "$repository" == */* ]] || repository="library/$repository"
+  token="$(curl -fsSLG --connect-timeout 10 --max-time 30 --retry 2 \
+    --retry-delay 1 --retry-all-errors 'https://auth.docker.io/token' \
+    --data-urlencode 'service=registry.docker.io' \
+    --data-urlencode "scope=repository:${repository}:pull" \
+    | python3 -c 'import json,sys; print(json.load(sys.stdin)["token"])')" \
+    || cx_die "cannot authenticate to the image registry"
+  digest="$(curl -fsSI --connect-timeout 10 --max-time 30 --retry 2 \
+    --retry-delay 1 --retry-all-errors \
+    -H "Authorization: Bearer $token" \
+    -H 'Accept: application/vnd.oci.image.index.v1+json, application/vnd.oci.image.manifest.v1+json, application/vnd.docker.distribution.manifest.list.v2+json, application/vnd.docker.distribution.manifest.v2+json' \
+    "https://registry-1.docker.io/v2/${repository}/manifests/${reference}" \
+    | tr -d '\r' | awk 'tolower($1)=="docker-content-digest:" {print $2; exit}')" \
+    || cx_die "cannot resolve the configured image digest"
+  [[ "$digest" =~ ^sha256:[0-9a-f]{64}$ ]] \
+    || cx_die "registry returned an invalid image digest"
+  printf '%s' "$digest"
+}
+
+cx_verify_registry_image() {
+  local image="$1" expected actual
+  expected="${CX_IMAGE_DIGEST:-$(cx_default_image_digest "$image")}"
+  [[ "$expected" =~ ^sha256:[0-9a-f]{64}$ ]] \
+    || cx_die "a pinned digest is required for the configured image"
+  actual="$(cx_resolve_registry_digest "$image")"
+  [ "$actual" = "$expected" ] \
+    || cx_die "configured image tag no longer matches its pinned digest"
+  export COLLECTIVEX_IMAGE="$image" COLLECTIVEX_IMAGE_DIGEST="$actual"
+  export COLLECTIVEX_IMAGE_DIGEST_VERIFIED=1
+}
+
+cx_default_image_digest() {
+  case "$1" in
+    "$CX_IMAGE_MULTIARCH") printf '%s' "$CX_IMAGE_MULTIARCH_DIGEST" ;;
+    "$CX_IMAGE_AMD_MORI") printf '%s' "$CX_IMAGE_AMD_MORI_DIGEST" ;;
+    "$CX_IMAGE_AMD_MORI_MI325") printf '%s' "$CX_IMAGE_AMD_MORI_MI325_DIGEST" ;;
+  esac
+}
+
+# Canonical workflow runs must not inherit benchmark controls from a persistent
+# self-hosted runner service. Manual/SSH diagnostics retain their explicit
+# overrides by leaving COLLECTIVEX_CANONICAL_GHA unset.
+cx_gha_workspace_stage_root() {
+  local workspace="${GITHUB_WORKSPACE:-}"
+  python3 - "$workspace" <<'PY'
+import os
+import stat
+import sys
+
+workspace = sys.argv[1]
+try:
+    if (
+        not os.path.isabs(workspace)
+        or os.path.realpath(workspace) != workspace
+        or not os.path.isdir(workspace)
+    ):
+        raise OSError
+    metadata = os.stat(workspace, follow_symlinks=False)
+    # GitHub runner workspaces are runner-owned but commonly writable by the
+    # trusted runner-service group. Keep the child mode 0700 and reject world write.
+    if metadata.st_uid != os.getuid() or stat.S_IMODE(metadata.st_mode) & stat.S_IWOTH:
+        raise OSError
+except OSError:
+    raise SystemExit(1)
+print(workspace, end="")
+PY
+}
+
+# Create a per-UID cache under validated cluster-local storage. Only the fixed
+# /cx-cache mount enters the container; the operator host path does not.
+cx_prepare_backend_cache() {
+  local stage_parent="$1" cache info sentinel_sha256
+  unset CX_PREPARED_BACKEND_CACHE CX_BACKEND_CACHE_SENTINEL_SHA256
+  info="$(python3 - "$stage_parent" <<'PY'
+import hashlib
+import os
+import secrets
+import stat
+import sys
+
+configured_parent = sys.argv[1]
+try:
+    if (
+        not os.path.isabs(configured_parent)
+        or "\n" in configured_parent
+        or "\r" in configured_parent
+    ):
+        raise OSError
+    parent = os.path.realpath(configured_parent)
+    if not os.path.isdir(parent):
+        raise OSError
+    flags = os.O_RDONLY | os.O_DIRECTORY | getattr(os, "O_NOFOLLOW", 0)
+    parent_fd = os.open(parent, flags)
+    try:
+        probe_name = f".collectivex-owner-probe-{os.getpid()}-{secrets.token_hex(8)}"
+        os.mkdir(probe_name, 0o700, dir_fd=parent_fd)
+        try:
+            probe_fd = os.open(probe_name, flags, dir_fd=parent_fd)
+            try:
+                probe = os.fstat(probe_fd)
+                if stat.S_IMODE(probe.st_mode) & 0o777 != 0o700:
+                    raise OSError
+                realized_owner = probe.st_uid
+            finally:
+                os.close(probe_fd)
+        finally:
+            os.rmdir(probe_name, dir_fd=parent_fd)
+        for generation in (3, 4):
+            name = f".collectivex-backend-cache-v{generation}-{os.getuid()}"
+            try:
+                os.mkdir(name, 0o700, dir_fd=parent_fd)
+            except FileExistsError:
+                pass
+            try:
+                cache_fd = os.open(name, flags, dir_fd=parent_fd)
+                try:
+                    metadata = os.fstat(cache_fd)
+                    if (
+                        metadata.st_uid != realized_owner
+                        or stat.S_IMODE(metadata.st_mode) & 0o777 != 0o700
+                    ):
+                        raise OSError
+                    sentinel_name = ".collectivex-mount-sentinel-v1"
+                    temporary_name = (
+                        f"{sentinel_name}.tmp.{os.getpid()}.{secrets.token_hex(8)}"
+                    )
+                    create_flags = (
+                        os.O_WRONLY | os.O_CREAT | os.O_EXCL
+                        | getattr(os, "O_NOFOLLOW", 0)
+                    )
+                    payload = secrets.token_bytes(32)
+                    temporary_fd = os.open(
+                        temporary_name, create_flags, 0o600, dir_fd=cache_fd
+                    )
+                    try:
+                        try:
+                            view = memoryview(payload)
+                            try:
+                                while view:
+                                    written = os.write(temporary_fd, view)
+                                    if written <= 0:
+                                        raise OSError
+                                    view = view[written:]
+                                os.fsync(temporary_fd)
+                            finally:
+                                view.release()
+                        finally:
+                            os.close(temporary_fd)
+                        try:
+                            os.link(
+                                temporary_name,
+                                sentinel_name,
+                                src_dir_fd=cache_fd,
+                                dst_dir_fd=cache_fd,
+                                follow_symlinks=False,
+                            )
+                        except FileExistsError:
+                            pass
+                    finally:
+                        try:
+                            os.unlink(temporary_name, dir_fd=cache_fd)
+                        except FileNotFoundError:
+                            pass
+                    sentinel_fd = os.open(
+                        sentinel_name,
+                        os.O_RDONLY | getattr(os, "O_NOFOLLOW", 0),
+                        dir_fd=cache_fd,
+                    )
+                    try:
+                        sentinel = os.fstat(sentinel_fd)
+                        payload = os.read(sentinel_fd, 33)
+                        if (
+                            not stat.S_ISREG(sentinel.st_mode)
+                            or sentinel.st_uid != realized_owner
+                            or stat.S_IMODE(sentinel.st_mode) & 0o777 != 0o600
+                            or sentinel.st_size != 32
+                            or len(payload) != 32
+                        ):
+                            raise OSError
+                        sentinel_sha256 = hashlib.sha256(payload).hexdigest()
+                    finally:
+                        os.close(sentinel_fd)
+                finally:
+                    os.close(cache_fd)
+            except OSError:
+                if generation == 3:
+                    continue
+                raise
+            break
+    finally:
+        os.close(parent_fd)
+except OSError:
+    raise SystemExit(1)
+print(sentinel_sha256, os.path.join(parent, name), end="")
+PY
+)" || return 1
+  sentinel_sha256="${info%% *}"
+  cache="${info#* }"
+  [ "$cache" != "$info" ] && [[ "$sentinel_sha256" =~ ^[0-9a-f]{64}$ ]] \
+    && [[ "$cache" = /* ]] || return 1
+  export CX_PREPARED_BACKEND_CACHE="$cache"
+  export CX_BACKEND_CACHE_SENTINEL_SHA256="$sentinel_sha256"
+}
+
+cx_verify_backend_cache_mount() {
+  python3 - "${CX_BACKEND_CACHE_ROOT:-}" \
+    "${CX_BACKEND_CACHE_SENTINEL_SHA256:-}" <<'PY'
+import hashlib
+import os
+import re
+import stat
+import sys
+
+root, expected = sys.argv[1:]
+try:
+    if (
+        not os.path.isabs(root)
+        or os.path.realpath(root) != root
+        or re.fullmatch(r"[0-9a-f]{64}", expected) is None
+    ):
+        raise OSError
+    flags = os.O_RDONLY | os.O_DIRECTORY | getattr(os, "O_NOFOLLOW", 0)
+    root_fd = os.open(root, flags)
+    try:
+        root_item = os.fstat(root_fd)
+        if (
+            not stat.S_ISDIR(root_item.st_mode)
+            or stat.S_IMODE(root_item.st_mode) & 0o777 != 0o700
+        ):
+            raise OSError
+        sentinel_fd = os.open(
+            ".collectivex-mount-sentinel-v1",
+            os.O_RDONLY | getattr(os, "O_NOFOLLOW", 0),
+            dir_fd=root_fd,
+        )
+        try:
+            sentinel = os.fstat(sentinel_fd)
+            payload = os.read(sentinel_fd, 33)
+            if (
+                not stat.S_ISREG(sentinel.st_mode)
+                or sentinel.st_uid != root_item.st_uid
+                or stat.S_IMODE(sentinel.st_mode) & 0o777 != 0o600
+                or sentinel.st_size != 32
+                or len(payload) != 32
+                or hashlib.sha256(payload).hexdigest() != expected
+            ):
+                raise OSError
+        finally:
+            os.close(sentinel_fd)
+    finally:
+        os.close(root_fd)
+except OSError:
+    raise SystemExit(1)
+PY
+}
+
+cx_git() {
+  GIT_CONFIG_NOSYSTEM=1 GIT_CONFIG_GLOBAL=/dev/null GIT_TERMINAL_PROMPT=0 \
+    git -c credential.helper= "$@"
+}
+
+cx_git_in_tree() {
+  local directory="$1" canonical
+  shift
+  [[ "$directory" = /* ]] && [ -d "$directory" ] && [ ! -L "$directory" ] \
+    || return 1
+  [[ "$directory" != *'*'* && "$directory" != *$'\n'* && "$directory" != *$'\r'* ]] \
+    || return 1
+  canonical="$(cd -P -- "$directory" && pwd -P)" || return 1
+  cx_git -c "safe.directory=$canonical" -C "$canonical" "$@"
+}
+
+cx_fetch_revision() {
+  local repository="$1" revision="$2" destination="$3" attempt
+  for attempt in 1 2 3; do
+    rm -rf -- "$destination"
+    if cx_git init -q "$destination" \
+        && cx_git_in_tree "$destination" remote add origin "$repository" \
+        && cx_git_in_tree "$destination" fetch -q --no-tags --depth 1 origin "$revision" \
+        && cx_git_in_tree "$destination" -c advice.detachedHead=false \
+          checkout -q --detach FETCH_HEAD \
+        && [ "$(cx_git_in_tree "$destination" rev-parse HEAD)" = "$revision" ]; then
+      return 0
+    fi
+    [ "$attempt" = 3 ] || sleep $((attempt * 5))
+  done
+  return 1
+}
+
+cx_backend_source_pin() {
+  case "$1" in
+    deepep-v2)
+      printf '%s|%s|%s' \
+        "$CX_DEEPEP_V2_COMMIT" "$CX_DEEPEP_V2_TREE" "$CX_DEEPEP_V2_FMT_COMMIT"
+      ;;
+    deepep-hybrid)
+      printf '%s|%s||%s' "$CX_DEEPEP_HYBRID_COMMIT" "$CX_DEEPEP_HYBRID_TREE" \
+        "$CX_DEEPEP_HYBRID_NCCL_COMMIT"
+      ;;
+    *) return 1 ;;
+  esac
+}
+
+cx_backend_source_path() {
+  local root="$1" backend="$2" revision tree fmt nccl pin
+  pin="$(cx_backend_source_pin "$backend")" || return 1
+  IFS='|' read -r revision tree fmt nccl <<< "$pin"
+  printf '%s/%s-%s' "$root" "$backend" "$revision"
+}
+
+cx_backend_source_is_valid() {
+  local backend="$1" source="$2" revision tree fmt nccl pin status ignored
+  pin="$(cx_backend_source_pin "$backend")" || return 1
+  IFS='|' read -r revision tree fmt nccl <<< "$pin"
+  [ -d "$source" ] && [ ! -L "$source" ] \
+    && [ "$(cx_git_in_tree "$source" rev-parse HEAD 2>/dev/null)" = "$revision" ] \
+    && [ "$(cx_git_in_tree "$source" rev-parse 'HEAD^{tree}' 2>/dev/null)" = "$tree" ] \
+    || return 1
+  status="$(cx_git_in_tree "$source" status --porcelain --untracked-files=all \
+    --ignore-submodules=none 2>/dev/null)" || return 1
+  [ -z "$status" ] || return 1
+  ignored="$(cx_git_in_tree "$source" ls-files --others --ignored --exclude-standard \
+    2>/dev/null)" || return 1
+  [ -z "$ignored" ] || return 1
+  [ -z "$fmt" ] \
+    || [ "$(cx_git_in_tree "$source/third-party/fmt" rev-parse HEAD 2>/dev/null)" = "$fmt" ] \
+    || return 1
+  [ -z "$nccl" ] \
+    || [ "$(cx_git_in_tree "$source/third-party/nccl" rev-parse HEAD 2>/dev/null)" = "$nccl" ]
+}
+
+cx_extension_pair_sha256() {
+  python3 - "$1" "$2" "$3" <<'PY'
+import hashlib
+import os
+from pathlib import Path
+import stat
+import sys
+
+root = Path(sys.argv[1])
+digest = hashlib.sha256()
+try:
+    if root.is_symlink() or not root.is_dir():
+        raise OSError
+    for pattern in sys.argv[2:]:
+        matches = list(root.glob(pattern))
+        if len(matches) != 1 or matches[0].is_symlink():
+            raise OSError
+        path = matches[0]
+        descriptor = os.open(path, os.O_RDONLY | getattr(os, "O_NOFOLLOW", 0))
+        try:
+            metadata = os.fstat(descriptor)
+            if not stat.S_ISREG(metadata.st_mode):
+                raise OSError
+            file_digest = hashlib.sha256()
+            with os.fdopen(descriptor, "rb", closefd=False) as stream:
+                for chunk in iter(lambda: stream.read(1024 * 1024), b""):
+                    file_digest.update(chunk)
+            digest.update(path.name.encode("utf-8") + b"\0")
+            digest.update(str(metadata.st_size).encode("ascii") + b"\0")
+            digest.update(file_digest.digest())
+        finally:
+            os.close(descriptor)
+except (OSError, UnicodeError):
+    raise SystemExit(1)
+print(digest.hexdigest(), end="")
+PY
+}
+
+# Acquire source before compute allocation, preferring the verified same-run GHA seed.
+_cx_prepare_backend_source() {
+  local mount_src="$1" backend="$2" root source temporary revision tree fmt nccl pin
+  local root_mode stage_mode root_owner stage_owner
+  local seed_root="${CX_BACKEND_SOURCE_SEED_ROOT:-}" seed seed_mode
+  root="$mount_src/experimental/CollectiveX/.cx_sources"
+  CX_BACKEND_SOURCE_STEP="source mount creation"
+  if [ ! -e "$root" ] && [ ! -L "$root" ]; then
+    mkdir -m 700 -- "$root" || return 1
+  fi
+  CX_BACKEND_SOURCE_STEP="source mount ownership validation"
+  [ -d "$mount_src" ] && [ ! -L "$mount_src" ] \
+    && [ -d "$root" ] && [ ! -L "$root" ] || return 1
+  stage_owner="$(stat -c '%u' "$mount_src" 2>/dev/null)" || return 1
+  root_owner="$(stat -c '%u' "$root" 2>/dev/null)" || return 1
+  [ "$root_owner" = "$stage_owner" ] || return 1
+  stage_mode="$(stat -c '%a' "$mount_src" 2>/dev/null)" || return 1
+  case "$stage_mode" in 700|[1-7]700) ;; *) return 1 ;; esac
+  # Shared stage parents may retain harmless special bits despite mkdir -m.
+  CX_BACKEND_SOURCE_STEP="source mount permission inspection"
+  root_mode="$(stat -c '%a' "$root" 2>/dev/null)" || return 1
+  case "$root_mode" in
+    700|[1-7]700) ;;
+    *)
+      CX_BACKEND_SOURCE_STEP="source mount permission normalization"
+      chmod 700 "$root" || return 1
+      CX_BACKEND_SOURCE_STEP="source mount permission validation"
+      root_mode="$(stat -c '%a' "$root" 2>/dev/null)" || return 1
+      case "$root_mode" in 700|[1-7]700) ;; *) return 1 ;; esac
+      ;;
+  esac
+  CX_BACKEND_SOURCE_STEP="git lookup"
+  command -v git >/dev/null || return 1
+  CX_BACKEND_SOURCE_STEP="source pin resolution"
+  source="$(cx_backend_source_path "$root" "$backend")" || return 1
+  if [ -e "$source" ] || [ -L "$source" ]; then
+    CX_BACKEND_SOURCE_STEP="existing source validation"
+    cx_backend_source_is_valid "$backend" "$source"
+    return
+  fi
+  if [ -n "$seed_root" ]; then
+    CX_BACKEND_SOURCE_STEP="source seed validation"
+    [[ "$seed_root" = /* ]] && [ -d "$seed_root" ] && [ ! -L "$seed_root" ] \
+      || return 1
+    seed_mode="$(stat -c '%a' "$seed_root" 2>/dev/null)" || return 1
+    case "$seed_mode" in 700|[1-7]700) ;; *) return 1 ;; esac
+    seed="$(cx_backend_source_path "$seed_root" "$backend")" || return 1
+    cx_backend_source_is_valid "$backend" "$seed" || return 1
+    CX_BACKEND_SOURCE_STEP="source seed copy"
+    temporary="$(mktemp -d "$root/.${backend}.XXXXXX")" || return 1
+    if ! cp -R -- "$seed/." "$temporary/" \
+        || ! cx_backend_source_is_valid "$backend" "$temporary" \
+        || ! mv -- "$temporary" "$source"; then
+      rm -rf -- "$temporary"
+      return 1
+    fi
+    return
+  fi
+  if [ "${COLLECTIVEX_CANONICAL_GHA:-0}" = 1 ]; then
+    CX_BACKEND_SOURCE_STEP="source seed validation"
+    return 1
+  fi
+  CX_BACKEND_SOURCE_STEP="source checkout creation"
+  temporary="$(mktemp -d "$root/.${backend}.XXXXXX")" || return 1
+  CX_BACKEND_SOURCE_STEP="source pin resolution"
+  pin="$(cx_backend_source_pin "$backend")" || {
+    rm -rf -- "$temporary"
+    return 1
+  }
+  IFS='|' read -r revision tree fmt nccl <<< "$pin"
+  CX_BACKEND_SOURCE_STEP="revision fetch"
+  if ! cx_fetch_revision \
+      https://github.com/deepseek-ai/DeepEP "$revision" "$temporary"; then
+    rm -rf -- "$temporary"
+    return 1
+  fi
+  CX_BACKEND_SOURCE_STEP="submodule fetch"
+  if [ -n "$fmt" ] && ! cx_git_in_tree "$temporary" \
+      -c "safe.directory=$temporary/third-party/fmt" \
+      submodule update -q --init --depth 1 third-party/fmt; then
+    rm -rf -- "$temporary"
+    return 1
+  fi
+  if [ -n "$nccl" ] && ! cx_git_in_tree "$temporary" \
+      -c "safe.directory=$temporary/third-party/nccl" \
+      submodule update -q --init --depth 1 third-party/nccl; then
+    rm -rf -- "$temporary"
+    return 1
+  fi
+  CX_BACKEND_SOURCE_STEP="source publication validation"
+  if ! cx_backend_source_is_valid "$backend" "$temporary" \
+      || ! mv -- "$temporary" "$source"; then
+    rm -rf -- "$temporary"
+    return 1
+  fi
+}
+
+cx_prepare_backend_source() {
+  local log backend="$2" CX_BACKEND_SOURCE_STEP="initialization"
+  log="$(cx_private_log_path "backend-source-$backend")" || return 1
+  if _cx_prepare_backend_source "$@" > "$log" 2>&1; then
+    return 0
+  fi
+  printf '%s failed\n' "$CX_BACKEND_SOURCE_STEP" >> "$log"
+  cx_log "ERROR: backend-source-step=${CX_BACKEND_SOURCE_STEP// /-}"
+  cx_fail_stage backend-setup "$log"
+}
+
+cx_materialize_backend_source() {
+  local backend="$1" destination="$2" source parent temporary
+  [ -n "${CX_BACKEND_SOURCE_ROOT:-}" ] || return 1
+  source="$(cx_backend_source_path "$CX_BACKEND_SOURCE_ROOT" "$backend")" || return 1
+  cx_backend_source_is_valid "$backend" "$source" || return 1
+  parent="${destination%/*}"
+  [ "$parent" != "$destination" ] && [ -d "$parent" ] && [ ! -L "$parent" ] \
+    || return 1
+  temporary="$(mktemp -d "$parent/.collectivex-source.XXXXXX")" || return 1
+  if ! cp -R -- "$source/." "$temporary/" \
+      || ! cx_backend_source_is_valid "$backend" "$temporary"; then
+    rm -rf -- "$temporary"
+    return 1
+  fi
+  if ! rm -rf -- "$destination" || ! mv -- "$temporary" "$destination"; then
+    rm -rf -- "$temporary"
+    return 1
+  fi
+  if ! cx_backend_source_is_valid "$backend" "$destination"; then
+    rm -rf -- "$destination"
+    return 1
+  fi
+  return 0
+}
+
+cx_lock_canonical_gha_env() {
+  local runner="$1" expected_nodes expected_gpn expected_world trusted_lock_dir=""
+  local trusted_stage_dir=""
+  local trusted_socket_ifname="" trusted_rdma_devices=""
+  local trusted_ib_gid_index="" trusted_rdma_service_level=""
+  local trusted_audit_salt=""
+  [ "${COLLECTIVEX_CANONICAL_GHA:-0}" = 1 ] || return 0
+  [ "${GITHUB_ACTIONS:-}" = true ] \
+    || cx_die "canonical CollectiveX execution requires GitHub Actions"
+  [ -n "${CX_SHARD_FILE:-}" ] && [ "${CX_SHARD_SKU:-}" = "$runner" ] \
+    || cx_die "canonical CollectiveX execution requires a matched shard"
+  [[ "${GITHUB_RUN_ID:-}" =~ ^[1-9][0-9]*$ \
+    && "${GITHUB_RUN_ATTEMPT:-}" =~ ^[1-9][0-9]*$ \
+    && "${COLLECTIVEX_SOURCE_SHA:-}" =~ ^[0-9a-f]{40,64}$ ]] \
+    || cx_die "canonical CollectiveX workflow identity is incomplete"
+
+  # cx_load_operator_config clears inherited values before setting this process marker.
+  # Preserve only values parsed from that private strict document.
+  if [ "${COLLECTIVEX_OPERATOR_CONFIG_LOADED:-}" = "$$" ]; then
+    trusted_lock_dir="${CX_LOCK_DIR:-}"
+    trusted_stage_dir="${CX_STAGE_DIR:-}"
+    trusted_socket_ifname="${CX_SOCKET_IFNAME:-}"
+    trusted_rdma_devices="${CX_RDMA_DEVICES:-}"
+    trusted_ib_gid_index="${CX_IB_GID_INDEX:-}"
+    trusted_rdma_service_level="${CX_RDMA_SERVICE_LEVEL:-}"
+    trusted_audit_salt="${CX_AUDIT_SALT:-}"
+  fi
+  unset CX_NCCL_HOME CX_MASTER_PORT CX_MORI_KERNEL_TYPE CX_LOCK_DIR CX_STAGE_DIR
+  unset MASTER_ADDR MASTER_PORT RANK WORLD_SIZE LOCAL_RANK LOCAL_WORLD_SIZE
+  unset CX_SOCKET_IFNAME CX_RDMA_DEVICES CX_IB_GID_INDEX CX_RDMA_SERVICE_LEVEL
+  unset CX_AUDIT_SALT
+  unset NCCL_NET NCCL_SOCKET_IFNAME GLOO_SOCKET_IFNAME NCCL_IB_HCA
+  unset NCCL_IB_GID_INDEX NCCL_IB_SL
+  unset NVSHMEM_HCA_LIST NVSHMEM_IB_GID_INDEX NVSHMEM_IB_SL
+  unset NVSHMEM_IB_ENABLE_IBGDA NVSHMEM_IBGDA_NIC_HANDLER
+  unset EP_NIC_NAME EP_OVERRIDE_RDMA_SL
+  unset UCCL_SOCKET_IFNAME UCCL_IB_GID_INDEX UCCL_IB_SL MORI_RDMA_DEVICES
+  unset HYBRID_EP_MULTINODE USE_NIXL RDMA_CORE_HOME DEEPEP_HYBRID_BUILD_MODE
+  unset MORI_COMMIT MORI_DISABLE_AUTO_XGMI MORI_ENABLE_SDMA
+  unset MORI_APP_LOG_LEVEL MORI_SHMEM_LOG_LEVEL MORI_IO_LOG_LEVEL
+  unset NCCL_CUMEM_ENABLE NCCL_MNNVL_ENABLE MC_FORCE_MNNVL
+  unset CX_BACKEND_CACHE_ROOT CX_BACKEND_CACHE_SENTINEL_SHA256
+  unset CX_PREPARED_BACKEND_CACHE CX_BACKEND_SOURCE_ROOT
+
+  [ -n "${CX_SQUASH_DIR:-}" ] \
+    || cx_die "canonical CollectiveX execution requires shared container storage"
+  [ -n "$trusted_stage_dir" ] \
+    || cx_die "canonical CollectiveX execution requires a configured shared stage directory"
+  [[ "$trusted_audit_salt" =~ ^[0-9a-f]{64}$ ]] \
+    || cx_die "canonical CollectiveX execution requires a private audit salt"
+
+  case "$runner" in
+    h100-dgxc|h200-dgxc|b200-dgxc|b300)
+      expected_nodes="${CX_NODES:-}"; expected_gpn=8
+      [ "$expected_nodes" = 1 ] || [ "$expected_nodes" = 2 ] \
+        || cx_die "canonical NVIDIA execution requires one or two nodes"
+      CX_IMAGE="$CX_IMAGE_MULTIARCH"
+      CX_IMAGE_DIGEST="$CX_IMAGE_MULTIARCH_DIGEST"
+      CX_NCCL_HOME=/usr
+      ;;
+    gb200|gb300)
+      expected_nodes="${CX_NODES:-}"; expected_gpn=4
+      [ "$expected_nodes" = 2 ] || [ "$expected_nodes" = 4 ] \
+        || cx_die "canonical GB execution requires two or four trays"
+      CX_IMAGE="$CX_IMAGE_MULTIARCH"
+      CX_IMAGE_DIGEST="$CX_IMAGE_MULTIARCH_DIGEST"
+      CX_NCCL_HOME=/usr
+      CX_MASTER_PORT=29551
+      ;;
+    mi325x)
+      expected_nodes="${CX_NODES:-}"; expected_gpn=8
+      [ "$expected_nodes" = 1 ] || [ "$expected_nodes" = 2 ] \
+        || cx_die "canonical AMD execution requires one or two nodes"
+      CX_IMAGE="$CX_IMAGE_AMD_MORI_MI325"
+      CX_IMAGE_DIGEST="$CX_IMAGE_AMD_MORI_MI325_DIGEST"
+      if [ "$expected_nodes" = 2 ]; then
+        CX_MORI_KERNEL_TYPE=internode-v1
+      else
+        CX_MORI_KERNEL_TYPE=asyncll
+      fi
+      MORI_COMMIT="$CX_MORI_COMMIT_MI325"
+      MORI_DISABLE_AUTO_XGMI=0
+      MORI_ENABLE_SDMA=1
+      MORI_APP_LOG_LEVEL=info
+      MORI_SHMEM_LOG_LEVEL=info
+      MORI_IO_LOG_LEVEL=info
+      ;;
+    mi355x)
+      expected_nodes="${CX_NODES:-}"; expected_gpn=8
+      [ "$expected_nodes" = 1 ] || [ "$expected_nodes" = 2 ] \
+        || cx_die "canonical AMD execution requires one or two nodes"
+      CX_IMAGE="$CX_IMAGE_AMD_MORI"
+      CX_IMAGE_DIGEST="$CX_IMAGE_AMD_MORI_DIGEST"
+      if [ "$expected_nodes" = 2 ]; then
+        CX_MORI_KERNEL_TYPE=internode-v1
+      else
+        CX_MORI_KERNEL_TYPE=intranode
+      fi
+      MORI_COMMIT="$CX_MORI_COMMIT_MI355"
+      ;;
+    *) cx_die "canonical CollectiveX runner is not registered" ;;
+  esac
+  case "$runner:$trusted_lock_dir" in
+    mi325x:?*|mi355x:?*) export CX_LOCK_DIR="$trusted_lock_dir" ;;
+  esac
+  CX_STAGE_DIR="$trusted_stage_dir"
+  [ -z "$trusted_socket_ifname" ] \
+    || export CX_SOCKET_IFNAME="$trusted_socket_ifname"
+  [ -z "$trusted_rdma_devices" ] \
+    || export CX_RDMA_DEVICES="$trusted_rdma_devices"
+  [ -z "$trusted_ib_gid_index" ] \
+    || export CX_IB_GID_INDEX="$trusted_ib_gid_index"
+  [ -z "$trusted_rdma_service_level" ] \
+    || export CX_RDMA_SERVICE_LEVEL="$trusted_rdma_service_level"
+  CX_AUDIT_SALT="$trusted_audit_salt"
+  export CX_STAGE_DIR CX_AUDIT_SALT
+  [ "${CX_NODES:-}" = "$expected_nodes" ] \
+    && [ "${CX_GPUS_PER_NODE:-}" = "$expected_gpn" ] \
+    || cx_die "canonical CollectiveX placement differs from the shard"
+  expected_world=$((expected_nodes * expected_gpn))
+  CX_NGPUS="$expected_world"
+  CX_SEED=67
+  case "$runner" in mi325x|mi355x) CX_RUN_TIMEOUT=1800 ;; *) CX_RUN_TIMEOUT=900 ;; esac
+  unset CX_PUBLIC_RUNNER CX_GB_PRODUCT CX_DRYRUN CX_TIMING CX_ALLOW_MNNVL
+  unset CX_ENROOT_LOCAL_IMPORT COLLECTIVEX_IMAGE COLLECTIVEX_IMAGE_DIGEST
+  unset COLLECTIVEX_IMAGE_DIGEST_VERIFIED COLLECTIVEX_SQUASH_SHA256
+  export CX_IMAGE CX_IMAGE_DIGEST CX_NGPUS CX_SEED CX_RUN_TIMEOUT
+  case "$runner" in
+    h100-dgxc|h200-dgxc|b200-dgxc|b300) export CX_NCCL_HOME ;;
+    gb200|gb300) export CX_NCCL_HOME CX_MASTER_PORT ;;
+    mi325x)
+      export CX_MORI_KERNEL_TYPE MORI_COMMIT MORI_DISABLE_AUTO_XGMI MORI_ENABLE_SDMA
+      export MORI_APP_LOG_LEVEL MORI_SHMEM_LOG_LEVEL MORI_IO_LOG_LEVEL
+      ;;
+    mi355x) export CX_MORI_KERNEL_TYPE MORI_COMMIT ;;
+  esac
+}
+
+cx_reverify_registry_image() {
+  local image="$1" actual
+  [[ "${COLLECTIVEX_IMAGE_DIGEST:-}" =~ ^sha256:[0-9a-f]{64}$ ]] \
+    && [ "${COLLECTIVEX_IMAGE_DIGEST_VERIFIED:-0}" = 1 ] || return 1
+  actual="$(cx_resolve_registry_digest "$image")" || return 1
+  [ "$actual" = "$COLLECTIVEX_IMAGE_DIGEST" ] || {
+    cx_log "ERROR: configured image tag changed during container import"
+    return 1
+  }
+}
+
+cx_export_squash_identity() {
+  local image="$1" digest log
+  log="$(cx_private_log_path container-hash)"
+  digest="$(sha256sum "$image" 2>> "$log" | awk '{print $1}')"
+  [[ "$digest" =~ ^[0-9a-f]{64}$ ]] \
+    || { cx_fail_stage container-hash "$log"; return 1; }
+  export COLLECTIVEX_SQUASH_SHA256="$digest"
+}
+
+cx_squash_path() {
+  local squash_dir="$1" image="$2" key platform
+  [[ "${COLLECTIVEX_IMAGE_DIGEST:-}" =~ ^sha256:[0-9a-f]{64}$ ]] \
+    || return 1
+  case "${CX_IMAGE_PLATFORM:-}" in
+    linux/amd64) platform="" ;;
+    linux/arm64) platform="_linux_arm64" ;;
+    *) return 1 ;;
+  esac
+  key="${CX_SQUASH_FORMAT_VERSION}${platform}_${COLLECTIVEX_IMAGE_DIGEST#sha256:}_$(
+    printf '%s' "$image" | sed 's#[/:@#]#_#g'
+  )"
+  printf '%s' "$squash_dir/${key}.sqsh"
+}
+
+# cx_ensure_squash <squash_dir> <image>  ->  echoes the squash file path.
+# Imports via Enroot only if a valid squash is not already present, under a lock.
+cx_ensure_squash() {
+  local squash_dir="$1" image="$2" key sq locks lock_fd log
+  local enroot_local="" import_rc=0 machine
+  log="$(cx_private_log_path container-import)"
+  machine="$(uname -m)"
+  case "${CX_IMAGE_PLATFORM:-}:$machine" in
+    linux/amd64:x86_64|linux/amd64:amd64|linux/arm64:aarch64|linux/arm64:arm64) ;;
+    *) cx_fail_stage container-import "$log"; return 1 ;;
+  esac
+  mkdir -p "$squash_dir" 2>> "$log" \
+    || { cx_fail_stage container-import "$log"; return 1; }
+  sq="$(cx_squash_path "$squash_dir" "$image")" \
+    || { cx_fail_stage container-import "$log"; return 1; }
+  key="${sq##*/}"
+  key="${key%.sqsh}"
+  locks="$squash_dir/.locks"
+  mkdir -p "$locks" 2>> "$log" \
+    || { cx_fail_stage container-import "$log"; return 1; }
+  { exec {lock_fd}>"$locks/${key}.lock"; } 2>> "$log" \
+    || { cx_fail_stage container-import "$log"; return 1; }
+  flock -w 900 "$lock_fd" 2>> "$log" \
+    || { cx_fail_stage container-import "$log"; return 1; }
+  if unsquashfs -l "$sq" >/dev/null 2>&1; then
+    cx_log "container squash ready"
+  else
+    cx_log "importing configured container image"
+    rm -f "$sq" 2>> "$log" \
+      || { cx_fail_stage container-import "$log"; return 1; }
+    # </dev/null: never block on an interactive password prompt.
+    if [ "${CX_ENROOT_LOCAL_IMPORT:-0}" = 1 ]; then
+      enroot_local="$(mktemp -d /tmp/inferencex-collectivex-enroot.XXXXXX)" \
+        || { cx_fail_stage container-import "$log"; return 1; }
+      (
+        trap 'rm -rf -- "$enroot_local"' EXIT
+        export ENROOT_TEMP_PATH="$enroot_local/tmp"
+        export ENROOT_CACHE_PATH="$enroot_local/cache"
+        export ENROOT_DATA_PATH="$enroot_local/data"
+        export ENROOT_RUNTIME_PATH="$enroot_local/run"
+        mkdir -p "$ENROOT_TEMP_PATH" "$ENROOT_CACHE_PATH" \
+          "$ENROOT_DATA_PATH" "$ENROOT_RUNTIME_PATH"
+        SOURCE_DATE_EPOCH="$CX_SQUASH_SOURCE_DATE_EPOCH" \
+          enroot import -o "$sq" "docker://$image" </dev/null
+      ) >> "$log" 2>&1 || import_rc=$?
+      rm -rf -- "$enroot_local" >/dev/null 2>&1 || true
+      [ "$import_rc" = 0 ] \
+        || { cx_fail_stage container-import "$log"; return 1; }
+    else
+      SOURCE_DATE_EPOCH="$CX_SQUASH_SOURCE_DATE_EPOCH" \
+        enroot import -o "$sq" "docker://$image" </dev/null >> "$log" 2>&1 \
+        || { cx_fail_stage container-import "$log"; return 1; }
+    fi
+    unsquashfs -l "$sq" >> "$log" 2>&1 \
+      || { cx_fail_stage container-import "$log"; return 1; }
+  fi
+  if ! cx_reverify_registry_image "$image" >> "$log" 2>&1; then
+    flock -u "$lock_fd" >/dev/null 2>&1 || true
+    exec {lock_fd}>&-
+    cx_fail_stage container-import "$log"
+    return 1
+  fi
+  flock -u "$lock_fd"
+  exec {lock_fd}>&-
+  echo "$sq"
+}
+
+# Import on an allocated compute node so multiarch tags resolve for the target
+# architecture. The squash directory must be shared with the submit host.
+cx_ensure_squash_on_job() {
+  local job_id="$1" squash_dir="$2" image="$3" lock_dir="${4:-}" sq key lock log
+  [[ "$job_id" =~ ^[0-9]+$ ]] || return 1
+  sq="$(cx_squash_path "$squash_dir" "$image")" || return 1
+  key="${sq##*/}"
+  key="${key%.sqsh}"
+  [ -n "$lock_dir" ] || lock_dir="$squash_dir/.locks"
+  lock="$lock_dir/${key}.lock"
+  log="$(cx_private_log_path container-import)"
+  if ! srun --jobid="$job_id" --nodes=1 --ntasks=1 --chdir=/tmp \
+      --export="$(cx_host_exports)" \
+      bash -s -- "$sq" "$lock" "$image" "$CX_SQUASH_SOURCE_DATE_EPOCH" \
+      "$CX_IMAGE_PLATFORM" \
+      > "$log" 2>&1 <<'BASH'
+set -euo pipefail
+sq="$1"; lock="$2"; image="$3"; source_date_epoch="$4"; platform="$5"
+machine="$(uname -m)"
+case "$platform:$machine" in
+  linux/amd64:x86_64|linux/amd64:amd64|linux/arm64:aarch64|linux/arm64:arm64) ;;
+  *) exit 13 ;;
+esac
+compute_home="$(mktemp -d /tmp/inferencex-collectivex-home.XXXXXX)"
+trap 'rm -rf -- "$compute_home"' EXIT
+export HOME="$compute_home" XDG_CACHE_HOME="$compute_home/.cache"
+export ENROOT_TEMP_PATH="$compute_home/enroot-tmp"
+export ENROOT_CACHE_PATH="$compute_home/enroot-cache"
+export ENROOT_DATA_PATH="$compute_home/enroot-data"
+export ENROOT_RUNTIME_PATH="$compute_home/enroot-run"
+mkdir -p "$(dirname "$sq")" "$(dirname "$lock")" \
+  "$ENROOT_TEMP_PATH" "$ENROOT_CACHE_PATH" "$ENROOT_DATA_PATH" "$ENROOT_RUNTIME_PATH"
+exec 9>"$lock"
+flock -w 900 9
+if unsquashfs -l "$sq" >/dev/null 2>&1; then
+  echo 'container squash ready'
+else
+  rm -f -- "$sq"
+  SOURCE_DATE_EPOCH="$source_date_epoch" \
+    enroot import -o "$sq" "docker://$image" </dev/null
+  unsquashfs -l "$sq" >/dev/null 2>&1
+fi
+BASH
+  then
+    cx_fail_stage container-import "$log"
+    return 1
+  fi
+  if ! cx_reverify_registry_image "$image" >> "$log" 2>&1; then
+    cx_fail_stage container-import "$log"
+    return 1
+  fi
+  printf '%s' "$sq"
+}
+
+cx_preflight_allocation() {
+  local job_id="$1" nodes="$2" mount_src="$3" squash="$4" shard="${5:-}"
+  local log rc=0 runtime shard_path="" probe_root probe_token index
+  runtime="$mount_src/experimental/CollectiveX/runtime/run_in_container.sh"
+  [ -z "$shard" ] || shard_path="$mount_src/experimental/CollectiveX/$shard"
+  log="$(cx_private_log_path allocation-preflight)"
+  probe_root="$mount_src/.collectivex-preflight"
+  probe_token="$probe_root/source"
+  if [ -e "$probe_root" ] || [ -L "$probe_root" ] \
+      || ! mkdir -m 700 "$probe_root"; then
+    cx_fail_stage repository-stage "$log"
+    return 1
+  fi
+  if ! printf '%s\n' "${COLLECTIVEX_EXECUTION_ID:-manual-$$}" > "$probe_token" \
+      || ! chmod 600 "$probe_token"; then
+    chmod 700 "$probe_root" >/dev/null 2>&1 || true
+    rm -rf -- "$probe_root" >/dev/null 2>&1 || true
+    cx_fail_stage repository-stage "$log"
+    return 1
+  fi
+  srun --jobid="$job_id" --nodes="$nodes" --ntasks="$nodes" --ntasks-per-node=1 \
+    --chdir=/tmp --input=all \
+    --export="$(cx_host_exports)" bash -s -- "$runtime" "$shard_path" "$squash" \
+    "$CX_IMAGE_PLATFORM" "$probe_root" \
+    > "$log" 2>&1 <<'BASH' || rc=$?
+set -euo pipefail
+machine="$(uname -m)"
+case "$4:$machine" in
+  linux/amd64:x86_64|linux/amd64:amd64|linux/arm64:aarch64|linux/arm64:arm64) ;;
+  *) exit 13 ;;
+esac
+test -r "$1" || exit 10
+[ -z "$2" ] || test -r "$2" || exit 11
+test -r "$3" || exit 12
+unsquashfs -s "$3" >/dev/null 2>&1 || exit 12
+case "${SLURM_NODEID:-}" in ""|*[!0-9]*) exit 10 ;; esac
+[ -d "$5" ] && [ ! -L "$5" ] && [ -r "$5/source" ] || exit 10
+(set -C; cat "$5/source" > "$5/node-$SLURM_NODEID") || exit 10
+cmp -s -- "$5/source" "$5/node-$SLURM_NODEID" || exit 10
+BASH
+  if [ "$rc" = 0 ]; then
+    for ((index = 0; index < nodes; index++)); do
+      if ! cmp -s -- "$probe_token" "$probe_root/node-$index"; then
+        rc=10
+        break
+      fi
+    done
+  fi
+  if [ -d "$probe_root" ] && [ ! -L "$probe_root" ]; then
+    chmod 700 "$probe_root" >/dev/null 2>&1 || rc=10
+  fi
+  rm -rf -- "$probe_root" >/dev/null 2>&1 || rc=10
+  [ "$rc" = 0 ] && return 0
+  case "$rc" in
+    10|11) cx_fail_stage repository-stage "$log" ;;
+    12) cx_fail_stage container-hash "$log" ;;
+    *) cx_fail_stage container-launch "$log" ;;
+  esac
+  return 1
+}
+
+# Resolve the exact per-execution child before any copy starts, so the parent
+# EXIT trap can remove an interrupted partial stage. The configured base must
+# already exist on compute-visible storage and must not traverse symlinks.
+cx_stage_path() {
+  local repo_root="$1" stage_base="${2:-}" tag safe_tag stage_path
+  tag="${COLLECTIVEX_EXECUTION_ID:-${GITHUB_RUN_ID:-manual-$$}}"
+  [[ "$tag" =~ ^[A-Za-z0-9][A-Za-z0-9._-]*$ ]] \
+    || cx_die "invalid staging execution identity"
+  safe_tag="$(printf '%s' "$tag" | tr -c 'A-Za-z0-9._-' '_')"
+  if [ -z "$stage_base" ] || [ "$stage_base" = "$repo_root" ]; then
+    [ "${COLLECTIVEX_CANONICAL_GHA:-0}" != 1 ] \
+      || cx_die "canonical CollectiveX execution requires compute-visible staging"
+    [ -n "${CX_SQUASH_DIR:-}" ] \
+      || cx_die "manual CollectiveX staging requires CX_SQUASH_DIR"
+    stage_base="$CX_SQUASH_DIR"
+    stage_path="${stage_base%/}/.collectivex-stage-$safe_tag"
+  else
+    stage_path="${stage_base%/}/job_$safe_tag"
+  fi
+  python3 - "$repo_root" "$stage_base" "$stage_path" \
+    "${CX_JOB_ROOT:-}" "${GITHUB_WORKSPACE:-}" <<'PY'
+import os
+import stat
+import sys
+
+repo, base, child, job_root, workspace = sys.argv[1:]
+try:
+    if (
+        not os.path.isabs(repo)
+        or os.path.realpath(repo) != repo
+        or not os.path.isabs(base)
+        or os.path.realpath(base) != base
+        or not os.path.isabs(child)
+        or os.path.dirname(child) != base.rstrip("/")
+        or os.path.lexists(child)
+    ):
+        raise OSError
+    metadata = os.stat(base, follow_symlinks=False)
+    excluded = [repo]
+    excluded.extend(path for path in (job_root, workspace) if path)
+    for path in excluded:
+        resolved = os.path.realpath(path)
+        if os.path.commonpath((base, resolved)) == resolved:
+            raise OSError
+    if (
+        not stat.S_ISDIR(metadata.st_mode)
+        or metadata.st_uid != os.getuid()
+        or stat.S_IMODE(metadata.st_mode) & (stat.S_IWGRP | stat.S_IWOTH)
+        or not os.access(base, os.W_OK | os.X_OK)
+    ):
+        raise OSError
+except OSError:
+    raise SystemExit(1)
+print(child, end="")
+PY
+}
+
+# Stage only the public benchmark tree into a pre-resolved, private execution
+# child. A runner-owned marker makes recursive cleanup an explicit capability.
+cx_stage_repo() {
+  local repo_root="$1" stage_dir="$2" expected log tag marker
+  cx_validate_shard_control "$repo_root/experimental/CollectiveX"
+  expected="$(cx_stage_path "$repo_root" "${CX_STAGE_DIR:-}")" \
+    || cx_die "configured stage base is unavailable or unsafe"
+  [ "$stage_dir" = "$expected" ] \
+    || cx_die "execution stage differs from the configured stage base"
+  tag="${COLLECTIVEX_EXECUTION_ID:-${GITHUB_RUN_ID:-manual-$$}}"
+  if [ -e "$stage_dir" ] || [ -L "$stage_dir" ]; then
+    cx_die "refusing to reuse a pre-existing execution stage"
+  fi
+  mkdir -m 700 "$stage_dir" 2>/dev/null \
+    || cx_die "cannot create the configured stage directory"
+  chmod 700 "$stage_dir" 2>/dev/null \
+    || cx_die "cannot protect the configured stage directory"
+  marker="$stage_dir/.collectivex-stage-v1"
+  umask 077
+  (set -C; printf 'collectivex-stage-v1\n%s\n' "$tag" > "$marker") 2>/dev/null \
+    || cx_die "cannot claim the configured stage directory"
+  chmod 600 "$marker" 2>/dev/null \
+    || cx_die "cannot protect the configured stage directory"
+  mkdir -m 700 "$stage_dir/experimental" 2>/dev/null \
+    || cx_die "cannot create the configured stage directory"
+  cx_log "staging CollectiveX on compute-visible storage"
+  log="$(cx_private_log_path repository-stage)"
+  if ! rsync -a --delete --delete-excluded \
+      --exclude='__pycache__/' --exclude='results/' --exclude='.cx_workloads/' \
+      --exclude='.cx_backend/' --exclude='.cx_sources/' \
+      --exclude='configs/platforms.yaml' --exclude='private-infra.md' \
+      --exclude='goal.md' --exclude='notes.md' \
+      "$repo_root/experimental/CollectiveX" "$stage_dir/experimental/" > "$log" 2>&1; then
+    rm -rf -- "$stage_dir" >/dev/null 2>&1 \
+      || cx_log "ERROR: cannot remove the incomplete execution stage"
+    cx_fail_stage repository-stage "$log" || true
+    return 1
+  fi
+}
+
+# cx_collect_results <mount_src> <repo_root>
+# When the run used a staged (compute-visible) mount, copy result JSONs back to
+# the original checkout's results/ so the workflow's upload-artifact (which reads
+# the checkout, not the stage dir) finds them. No-op when no staging was used.
+cx_collect_results() {
+  local mount_src="$1" repo_root="$2" dst log
+  local -a files
+  [ "$mount_src" = "$repo_root" ] && return 0
+  log="$(cx_private_log_path "artifact-collection-$$-${RANDOM}")"
+  dst="$repo_root/experimental/CollectiveX/results"
+  mkdir -p "$dst" 2>> "$log" \
+    || { cx_log "ERROR: cannot create checkout result directory"; return 1; }
+  shopt -s nullglob
+  files=("$mount_src/experimental/CollectiveX/results/"*.json)
+  shopt -u nullglob
+  [ "${#files[@]}" -gt 0 ] || { cx_log "ERROR: staged run produced no result JSON"; return 1; }
+  cp -- "${files[@]}" "$dst/" >> "$log" 2>&1 \
+    || { cx_log "ERROR: staged result collection failed"; return 1; }
+  cx_log "collected staged results for artifact validation"
+}
+
+cx_cleanup_stage() {
+  local mount_src="$1" repo_root="$2" base="${CX_STAGE_DIR:-}" tag safe_tag expected
+  tag="${COLLECTIVEX_EXECUTION_ID:-${GITHUB_RUN_ID:-manual-$$}}"
+  safe_tag="$(printf '%s' "$tag" | tr -c 'A-Za-z0-9._-' '_')"
+  [ "$mount_src" != "$repo_root" ] || return 0
+  if [ -n "$base" ] && [ "$base" != "$repo_root" ]; then
+    expected="${base%/}/job_$safe_tag"
+  else
+    [ -n "${CX_SQUASH_DIR:-}" ] \
+      || { cx_log "ERROR: cannot identify the generated stage directory"; return 1; }
+    expected="${CX_SQUASH_DIR%/}/.collectivex-stage-$safe_tag"
+  fi
+  if [ "$mount_src" != "$expected" ] || [ "$mount_src" = / ] \
+      || { [ -n "$base" ] && [ "$mount_src" = "$base" ]; }; then
+    cx_log "ERROR: refusing to remove an unrecognized stage directory"
+    return 1
+  fi
+  if ! python3 - "$mount_src" "$tag" <<'PY'
+import os
+from pathlib import Path
+import stat
+import sys
+
+root = Path(sys.argv[1])
+expected = f"collectivex-stage-v1\n{sys.argv[2]}\n"
+try:
+    metadata = os.stat(root, follow_symlinks=False)
+    marker = root / ".collectivex-stage-v1"
+    if (
+        not stat.S_ISDIR(metadata.st_mode)
+        or metadata.st_uid != os.getuid()
+        or (stat.S_IMODE(metadata.st_mode) & 0o777) != 0o700
+    ):
+        raise OSError
+    entries = list(root.iterdir())
+    if marker.exists():
+        marker_metadata = os.stat(marker, follow_symlinks=False)
+        if (
+            not stat.S_ISREG(marker_metadata.st_mode)
+            or marker_metadata.st_uid != os.getuid()
+            or stat.S_IMODE(marker_metadata.st_mode) != 0o600
+        ):
+            raise OSError
+        marker_content = marker.read_text()
+        if marker_content != expected and entries != [marker]:
+            raise OSError
+    elif entries:
+        raise OSError
+except (OSError, UnicodeError):
+    raise SystemExit(1)
+PY
+  then
+    cx_log "ERROR: refusing to remove an unowned stage directory"
+    return 1
+  fi
+  rm -rf -- "$mount_src" >/dev/null 2>&1 || {
+    cx_log "ERROR: cannot remove generated stage directory"
+    return 1
+  }
+  cx_log "removed generated per-execution stage directory"
+}
+
+# Return success only when a benchmark output is a complete JSON result object.
+# Callers use this before synthesizing a terminal outcome so an emitted invalid result
+# is not shadowed by a second record for the same attempt.
+cx_has_result_doc() {
+  local path="$1"
+  python3 "$_CX_COMMON_ROOT/contracts.py" probe "$path" >/dev/null 2>&1
+}
+
+cx_result_doc_is() {
+  local path="$1" expected="$2"
+  python3 "$_CX_COMMON_ROOT/contracts.py" probe "$path" --status "$expected" \
+    >/dev/null 2>&1
+}
+
+# A rank-zero result can be written before another rank or backend teardown fails. Preserve its
+# measurements, but make the distributed command's nonzero terminal status authoritative.
+cx_demote_result_doc() {
+  local path="$1" rc="$2"
+  python3 "$_CX_COMMON_ROOT/contracts.py" demote "$path" --return-code "$rc"
+}
+
+cx_quarantine_result_doc() {
+  python3 "$_CX_COMMON_ROOT/contracts.py" quarantine-invalid "$1"
+}
+
+# cx_emit_ep_failed_case <out> <backend> <phase> <return-code>
+# Preserve failures from rack launchers that invoke run_ep.py directly and therefore cannot use
+# run_in_container.sh's emitter. Case identity is read from the exported CX_* variables.
+cx_emit_ep_failed_case() {
+  local out="$1" backend="$2" phase="$3" rc="$4"
+  local -a args=(emit-terminal --out "$out" --backend "$backend" --phase "$phase"
+    --return-code "$rc")
+  [ -z "${CX_FAILURE_MODE:-}" ] || args+=(--failure-mode "$CX_FAILURE_MODE")
+  if ! python3 "$_CX_COMMON_ROOT/contracts.py" "${args[@]}"
+  then
+    cx_log "ERROR: could not preserve terminal outcome"
+    return 1
+  fi
+}
+
+cx_case_attempt_exists() {
+  local out_dir="$1" case_id="$2"
+  python3 - "$_CX_COMMON_ROOT" "$out_dir" "$case_id" <<'PY'
+import pathlib, sys
+
+sys.path.insert(0, sys.argv[1])
+import contracts
+
+sample_paths = set()
+referenced_samples = set()
+found = False
+
+def quarantine(path, document):
+    sample = document.get("sample_artifact") if isinstance(document, dict) else None
+    if (
+        isinstance(sample, dict)
+        and isinstance(sample.get("path"), str)
+        and pathlib.Path(sample["path"]).name == sample["path"]
+    ):
+        sample_path = path.with_name(sample["path"])
+        if sample_path.is_file():
+            sample_path.replace(sample_path.with_name(sample_path.name + ".quarantine"))
+    if path.is_file():
+        path.replace(path.with_name(path.name + ".quarantine"))
+
+for path in pathlib.Path(sys.argv[2]).glob("*.json"):
+    document = None
+    try:
+        document = contracts.strict_load(path)
+        if not isinstance(document, dict):
+            continue
+        if document.get("format") == contracts.RAW_FORMAT:
+            document = contracts.load_raw_attempt(path)
+            referenced_samples.add(path.with_name(document["sample_artifact"]["path"]))
+        elif document.get("format") == contracts.TERMINAL_FORMAT:
+            document = contracts.validate_terminal_document(document)
+        elif document.get("format") == contracts.SAMPLES_FORMAT:
+            contracts.validate_samples_document(document)
+            sample_paths.add(path)
+            continue
+        else:
+            continue
+    except (contracts.ContractError, OSError, ValueError):
+        quarantine(path, document)
+        continue
+    if document["identity"]["case_id"] == sys.argv[3]:
+        found = True
+for orphan in sample_paths - referenced_samples:
+    quarantine(orphan, {})
+raise SystemExit(0 if found else 1)
+PY
+}
+
+# Emit one setup-failure record per requested case. Rack launchers call this when
+# backend preparation fails before rank processes can start.
+cx_emit_setup_failures() {
+  local root="$1" out_dir="$2" backend="$3" rc="$4" shard="${CX_SHARD_FILE:-}" path
+  local phase case_id suite workload required routing eplb ep hidden topk experts nodes
+  local gpn domain ladder canonical timing mode scope scale_up_transport scale_out_transport
+  local warmup_semantics precision_profile
+  local transport topology_class
+  local cases_file expected emitted=0 covered=0
+  mkdir -p "$out_dir" || return 1
+  export CX_FAILURE_MODE="${CX_FAILSAFE_MODE:-setup}" CX_ATTEMPT_ID=1
+  if [ -z "$shard" ]; then
+    local phases="${CX_PHASE:-decode}"
+    [ "$phases" = both ] && phases="decode prefill"
+    for phase in $phases; do
+      if [ -n "${CX_CASE_ID:-}" ] && cx_case_attempt_exists "$out_dir" "$CX_CASE_ID"; then
+        continue
+      fi
+      cx_emit_ep_failed_case "$out_dir/failed_${backend}_${phase}_${CX_TS:-setup}-a01.json" \
+        "$backend" "$phase" "$rc" || return 1
+    done
+    unset CX_FAILURE_MODE
+    return 0
+  fi
+  path="$shard"
+  [ -f "$path" ] || path="${root%/}/$shard"
+  [ -f "$path" ] || {
+    unset CX_FAILURE_MODE
+    cx_log "ERROR: cannot emit setup failures without shard control"
+    return 1
+  }
+  export COLLECTIVEX_CONTROL_SHA256
+  COLLECTIVEX_CONTROL_SHA256="$(sha256sum "$path" | awk '{print $1}')"
+  [[ "$COLLECTIVEX_CONTROL_SHA256" =~ ^[0-9a-f]{64}$ ]] || {
+    unset CX_FAILURE_MODE COLLECTIVEX_CONTROL_SHA256
+    cx_log "ERROR: cannot hash shard for setup-failure records"
+    return 1
+  }
+  cases_file="$(mktemp)" || return 1
+  if ! python3 - "$path" > "$cases_file" <<'PY'
+import json, sys
+
+with open(sys.argv[1]) as handle:
+    cases = json.load(handle)["cases"]
+for case in cases:
+    fields = (
+        case["phase"], case["mode"], case["case_id"], case["suite"], case["workload"],
+        case["required_publication"], case["routing"], "1" if case["eplb"] else "",
+        case["ep"], case["hidden"], case["topk"], case["experts"], case["nodes"],
+        case["gpus_per_node"], case["scale_up_domain"], case["scope"],
+        case["scale_up_transport"], case.get("scale_out_transport") or "",
+        case["transport"], case["topology_class"], case["ladder"],
+        case["warmup_semantics"],
+        "1" if case["canonical"] else "", case["timing"],
+        case.get("precision_profile") or "",
+    )
+    print("|".join(map(str, fields)))
+PY
+  then
+    rm -f "$cases_file"
+    unset CX_FAILURE_MODE
+    return 1
+  fi
+  expected="$(wc -l < "$cases_file" | tr -d ' ')"
+  [ "$expected" -gt 0 ] || { rm -f "$cases_file"; unset CX_FAILURE_MODE; return 1; }
+  while IFS='|' read -r phase mode case_id suite workload required routing eplb ep hidden topk \
+      experts nodes gpn domain scope scale_up_transport scale_out_transport transport \
+      topology_class ladder warmup_semantics canonical timing precision_profile; do
+    export CX_CASE_ID="$case_id" CX_SUITE="$suite" CX_WORKLOAD_NAME="$workload"
+    export CX_REQUIRED_PUBLICATION="$required" CX_ROUTING="$routing" CX_EPLB="$eplb"
+    export CX_EP="$ep" CX_NGPUS="$ep" CX_HIDDEN="$hidden" CX_TOPK="$topk" CX_EXPERTS="$experts"
+    export CX_MODE="$mode" CX_NODES="$nodes" CX_GPUS_PER_NODE="$gpn"
+    export CX_SCALE_UP_DOMAIN="$domain" CX_SCOPE="$scope"
+    export CX_SCALE_UP_TRANSPORT="$scale_up_transport"
+    export CX_SCALE_OUT_TRANSPORT="$scale_out_transport"
+    export CX_TRANSPORT="$transport" CX_TOPO="$topology_class"
+    export CX_TOKENS_LADDER="$ladder" CX_CANONICAL="$canonical"
+    export CX_PRECISION_PROFILE="$precision_profile"
+    export CX_WARMUP_SEMANTICS="$warmup_semantics"
+    IFS=: read -r CX_ITERS CX_TRIALS CX_WARMUP <<< "$timing"
+    export CX_ITERS CX_TRIALS CX_WARMUP CX_SAMPLES_PER_POINT="$((CX_ITERS * CX_TRIALS))"
+    if cx_case_attempt_exists "$out_dir" "$case_id"; then
+      covered=$((covered + 1))
+      continue
+    fi
+    cx_emit_ep_failed_case "$out_dir/failed_${case_id}-a01.json" "$backend" "$phase" "$rc" || return 1
+    emitted=$((emitted + 1))
+  done < "$cases_file"
+  rm -f "$cases_file"
+  unset CX_FAILURE_MODE
+  [ "$((emitted + covered))" -eq "$expected" ] || {
+    cx_log "ERROR: covered $((emitted + covered))/$expected terminal cases"
+    return 1
+  }
+}
+
+# Run one validated shard with one Slurm task per GPU. Launchers provide only
+# allocation/container policy through globals and CX_DISTRIBUTED_CONTAINER_ARGS.
+# shellcheck disable=SC2153
+cx_run_distributed_shard() {
+  local build_log build_rc cases_file expected_cases ci=0 failed_cases=0
+  local ph mode routing eplb hidden topk experts ladder suite workload required_pub
+  local canonical case_id ep timing case_iters case_trials case_warmup case_stem
+  local scope scale_up_transport scale_out_transport transport topology_class nodes gpn domain
+  local precision_profile
+  local workload_dir workload_ladder workload_log stage_rc attempt_tag out failure_out
+  local runtime_log run_rc expected_out case_ok summary_log
+  local -a container_args workload_args ep_args
+  [ "${NODES:-0}" -gt 1 ] && [ "${NGPUS:-0}" = "$((NODES * GPN))" ] \
+    || cx_die "invalid distributed launcher placement"
+  [ -n "${JOB_ID:-}" ] && [ -n "${SQUASH_FILE:-}" ] \
+    && [ -n "${CONTAINER_MOUNTS:-}" ] || cx_die "distributed launcher is incomplete"
+  [ -n "${SOURCE_BACKEND_ENV:-}" ] && [ -n "${BACKEND_PROBE:-}" ] \
+    && [ -n "${WRAP:-}" ] || cx_die "distributed rank wrapper is incomplete"
+
+  cx_resolve_slurm_rendezvous "$JOB_ID"
+  mkdir -p "$MOUNT_SRC/experimental/CollectiveX/results"
+  container_args=(--container-mounts="$CONTAINER_MOUNTS" --no-container-mount-home
+    --container-workdir=/ix/experimental/CollectiveX --no-container-entrypoint)
+  if declare -p CX_DISTRIBUTED_CONTAINER_ARGS >/dev/null 2>&1; then
+    container_args+=("${CX_DISTRIBUTED_CONTAINER_ARGS[@]}")
+  fi
+  local container_name="cxep_${JOB_ID}"
+
+  cx_log "distributed backend preparation: bench=$CX_BENCH nodes=$NODES"
+  cx_set_failure_stage backend-setup
+  build_log="$(cx_private_log_path backend-prepare)"
+  set +e
+  srun --jobid="$JOB_ID" --nodes="$NODES" --ntasks-per-node=1 --chdir=/tmp \
+    --container-name="$container_name" --container-image="$SQUASH_FILE" \
+    "${container_args[@]}" --export="$(cx_container_exports),CX_BUILD_ONLY=1" \
+    bash /ix/experimental/CollectiveX/runtime/run_in_container.sh \
+    </dev/null >"$build_log" 2>&1
+  build_rc=$?
+  if [ "$build_rc" = 0 ]; then
+    srun --jobid="$JOB_ID" --nodes="$NODES" --ntasks-per-node=1 --chdir=/tmp \
+      --container-name="$container_name" "${container_args[@]}" \
+      --export="$(cx_container_exports)" bash -c "$BACKEND_PROBE" \
+      </dev/null >>"$build_log" 2>&1
+    build_rc=$?
+  fi
+  set -e
+  if [ "$build_rc" != 0 ]; then
+    cx_fail_stage backend-setup "$build_log" || true
+    [ "${CX_PRECISION_PROBE:-0}" != 1 ] || return "$build_rc"
+    cx_emit_setup_failures "$CX_DIR" "$MOUNT_SRC/experimental/CollectiveX/results" \
+      "$CX_BENCH" "$build_rc"
+    return "$build_rc"
+  fi
+  cx_set_failure_stage execution
+
+  if [ "${CX_PRECISION_PROBE:-0}" = 1 ]; then
+    local fields probe_id backend sku ep mode profile
+    fields="$(cx_precision_probe_control_fields "$CX_DIR")" || return 1
+    IFS='|' read -r probe_id backend sku ep mode profile <<< "$fields"
+    [ "$backend" = "$CX_BENCH" ] && [ "$sku" = "$RUNNER" ] && [ "$ep" = "$NGPUS" ] \
+      || cx_die "precision probe control differs from runtime placement"
+    out="results/${probe_id}.json"
+    expected_out="$MOUNT_SRC/experimental/CollectiveX/$out"
+    runtime_log="$(cx_private_log_path precision-probe)"
+    set +e
+    timeout -k 30 "${CX_RUN_TIMEOUT:-900}" srun --jobid="$JOB_ID" --nodes="$NODES" \
+      --ntasks="$NGPUS" --ntasks-per-node="$GPN" --chdir=/tmp \
+      --container-name="$container_name" "${container_args[@]}" \
+      --export="$(cx_container_exports)" \
+      bash -c "$WRAP" _ --backend "$backend" --sku "$sku" --ep "$ep" \
+      --mode "$mode" --precision-profile "$profile" --out "$out" \
+      </dev/null >"$runtime_log" 2>&1
+    run_rc=$?
+    set -e
+    if [ "$run_rc" != 0 ] || ! python3 "$CX_DIR/tests/probe_precision.py" \
+        --validate-manifest "$expected_out" >/dev/null 2>&1; then
+      [ "$run_rc" != 0 ] || run_rc=1
+      cx_fail_stage execution "$runtime_log" || true
+      return "$run_rc"
+    fi
+    return 0
+  fi
+
+  cases_file="$(mktemp)" || return 1
+  local shard="${CX_SHARD_FILE:-}"
+  [ -z "$shard" ] || [ -f "$shard" ] || shard="$CX_DIR/$shard"
+  if [ -n "$shard" ]; then
+    if [ ! -f "$shard" ] || ! python3 - "$shard" > "$cases_file" <<'PY'
+import json
+import sys
+
+with open(sys.argv[1]) as handle:
+    cases = json.load(handle)["cases"]
+for case in cases:
+    get = lambda key, default="": str(case.get(key) or default)
+    fields = (
+        get("phase", "decode"), get("mode", "normal"), get("routing", "uniform"),
+        "1" if case.get("eplb") else "", get("hidden", "7168"),
+        get("topk", "8"), get("experts", "256"), get("ladder"),
+        get("suite"), get("workload"), get("required_publication"),
+        "1" if case.get("canonical") else "", get("case_id"), get("ep"),
+        get("timing", "8:64:32"), get("nodes"), get("gpus_per_node"),
+        get("scale_up_domain"), get("scope"), get("scale_up_transport"),
+        get("scale_out_transport"), get("transport"), get("topology_class"),
+        get("precision_profile"),
+    )
+    print("|".join(fields))
+PY
+    then
+      rm -f "$cases_file"
+      cx_die "could not enumerate validated shard cases"
+    fi
+  else
+    local phases="${CX_PHASE:-decode}" phase
+    [ "$phases" = both ] && phases="decode prefill"
+    cx_require_record_safe "$phases" "${CX_MODE:-normal}" "${CX_ROUTING:-uniform}" \
+      "${CX_EPLB:-}" "${CX_HIDDEN:-7168}" "${CX_TOPK:-8}" "${CX_EXPERTS:-256}" \
+      "${CX_TOKENS_LADDER:-}" "${CX_SUITE:-}" "${CX_WORKLOAD_NAME:-}" \
+      "${CX_REQUIRED_PUBLICATION:-}" "${CX_CANONICAL:-}" "${CX_CASE_ID:-}" \
+      "${CX_PRECISION_PROFILE:-}" \
+      "${CX_ITERS:-8}" "${CX_TRIALS:-64}" "${CX_WARMUP:-32}" \
+      "${CX_SCOPE:-scale-up}" \
+      "${CX_SCALE_UP_TRANSPORT:-unknown}" "${CX_SCALE_OUT_TRANSPORT:-}" \
+      "${CX_TRANSPORT:-unknown}" "${CX_TOPO:-manual}"
+    for phase in $phases; do
+      (IFS='|'; printf '%s\n' "$phase|${CX_MODE:-normal}|${CX_ROUTING:-uniform}|${CX_EPLB:-}|${CX_HIDDEN:-7168}|${CX_TOPK:-8}|${CX_EXPERTS:-256}|${CX_TOKENS_LADDER:-}|${CX_SUITE:-}|${CX_WORKLOAD_NAME:-}|${CX_REQUIRED_PUBLICATION:-}|${CX_CANONICAL:-}|${CX_CASE_ID:-}|$NGPUS|${CX_ITERS:-8}:${CX_TRIALS:-64}:${CX_WARMUP:-32}|$NODES|$GPN|$SCALE_UP_DOMAIN|${CX_SCOPE:-scale-up}|${CX_SCALE_UP_TRANSPORT:-unknown}|${CX_SCALE_OUT_TRANSPORT:-}|${CX_TRANSPORT:-unknown}|${CX_TOPO:-manual}|${CX_PRECISION_PROFILE:-}")
+    done > "$cases_file"
+  fi
+  expected_cases="$(wc -l < "$cases_file" | tr -d ' ')"
+  [ "$expected_cases" -gt 0 ] \
+    || { rm -f "$cases_file"; cx_die "distributed case list is empty"; }
+
+  while IFS='|' read -r ph mode routing eplb hidden topk experts ladder suite workload \
+      required_pub canonical case_id ep timing nodes gpn domain scope scale_up_transport \
+      scale_out_transport transport topology_class precision_profile; do
+    [ -n "$ph" ] || continue
+    ci=$((ci + 1))
+    case_stem="${RUNNER}_${CX_BENCH}_${ph}_${TS}-c$(printf '%03d' "$ci")"
+    IFS=: read -r case_iters case_trials case_warmup <<< "${timing:-8:64:32}"
+    case_iters="${case_iters:-8}"
+    case_trials="${case_trials:-64}"
+    case_warmup="${case_warmup:-32}"
+    ep="${ep:-$NGPUS}"
+    export CX_MODE="$mode" CX_CASE_ID="$case_id" CX_SUITE="$suite" CX_WORKLOAD_NAME="$workload"
+    export CX_REQUIRED_PUBLICATION="$required_pub" CX_CANONICAL="$canonical" CX_EP="$ep"
+    export CX_PRECISION_PROFILE="$precision_profile"
+    export CX_ROUTING="$routing" CX_EPLB="$eplb" CX_TOKENS_LADDER="$ladder"
+    export CX_HIDDEN="$hidden" CX_TOPK="$topk" CX_EXPERTS="$experts"
+    export CX_NODES="$nodes" CX_GPUS_PER_NODE="$gpn" CX_SCALE_UP_DOMAIN="$domain"
+    export CX_SCOPE="$scope" CX_SCALE_UP_TRANSPORT="$scale_up_transport"
+    export CX_SCALE_OUT_TRANSPORT="$scale_out_transport"
+    export CX_TRANSPORT="$transport" CX_TOPO="$topology_class"
+    export CX_ITERS="$case_iters" CX_TRIALS="$case_trials" CX_WARMUP="$case_warmup"
+    export CX_SAMPLES_PER_POINT="$((case_iters * case_trials))"
+    export CX_WARMUP_SEMANTICS="full-roundtrip-before-each-component-trial-point-v1"
+    cx_log "EP${NGPUS}[$ci] id=${case_id:-manual} $mode/$ph $CX_BENCH"
+    if [ "$ep" != "$NGPUS" ] || [ "$nodes" != "$NODES" ] || [ "$gpn" != "$GPN" ] \
+        || [ "$domain" != "$SCALE_UP_DOMAIN" ]; then
+      export CX_ATTEMPT_ID=1
+      failure_out="$MOUNT_SRC/experimental/CollectiveX/results/failed_${case_stem}-a01.json"
+      cx_emit_ep_failed_case "$failure_out" "$CX_BENCH" "$ph" 5
+      failed_cases=$((failed_cases + 1))
+      continue
+    fi
+
+    workload_dir=""
+    if cx_bool_enabled "$canonical"; then
+      workload_dir=".cx_workloads/c$(printf '%03d' "$ci")"
+      workload_ladder="$ladder"
+      [ -n "$workload_ladder" ] \
+        || workload_ladder="1 2 4 8 16 32 64 128 256 512 1024 2048 4096"
+      workload_args=(python3 tests/make_workloads.py --out-dir "$workload_dir"
+        --routing "$routing" --ep "$ep" --hidden "$hidden" --topk "$topk"
+        --experts "$experts" --seed "${CX_SEED:-67}" --tokens-ladder "$workload_ladder")
+      workload_log="$(cx_private_log_path "workload-c$(printf '%03d' "$ci")")"
+      set +e
+      srun --jobid="$JOB_ID" --nodes=1 --ntasks=1 --chdir=/tmp \
+        --container-name="$container_name" "${container_args[@]}" \
+        --export="$(cx_container_exports)" "${workload_args[@]}" \
+        </dev/null >"$workload_log" 2>&1
+      stage_rc=$?
+      set -e
+      if [ "$stage_rc" != 0 ]; then
+        export CX_ATTEMPT_ID=1
+        failure_out="$MOUNT_SRC/experimental/CollectiveX/results/failed_${case_stem}-a01.json"
+        cx_emit_ep_failed_case "$failure_out" "$CX_BENCH" "$ph" "$stage_rc"
+        failed_cases=$((failed_cases + 1))
+        continue
+      fi
+    fi
+
+    ep_args=(--backend "$CX_BENCH" --mode "$mode" --phase "$ph" --routing "$routing"
+      --precision-profile "$precision_profile"
+      --gpus-per-node "$gpn" --scale-up-domain "$domain" --scope "$scope"
+      --scale-up-transport "$scale_up_transport" --scale-out-transport "$scale_out_transport"
+      --tokens-ladder "$ladder" --hidden "$hidden" --topk "$topk" --experts "$experts"
+      --warmup "$case_warmup" --iters "$case_iters" --trials "$case_trials"
+      --seed "${CX_SEED:-67}" --runner "$RUNNER" --topology-class "$topology_class"
+      --transport "$transport" --case-id "$case_id" --suite "$suite"
+      --workload-name "$workload" --required-publication "$required_pub"
+      --qualification-index "${CX_QUALIFICATION_INDEX:-1}")
+    cx_bool_enabled "$eplb" && ep_args+=(--eplb)
+    [ -z "$workload_dir" ] || ep_args+=(--workload-dir "$workload_dir")
+    export CX_ATTEMPT_ID=1
+    attempt_tag=a01
+    out="results/${case_stem}_${attempt_tag}.json"
+    failure_out="$MOUNT_SRC/experimental/CollectiveX/results/failed_${case_stem}-${attempt_tag}.json"
+    runtime_log="$(cx_private_log_path "runtime-c$(printf '%03d' "$ci")-$attempt_tag")"
+    set +e
+    timeout -k 30 "${CX_RUN_TIMEOUT:-900}" srun --jobid="$JOB_ID" --nodes="$NODES" \
+      --ntasks="$NGPUS" --ntasks-per-node="$GPN" --chdir=/tmp \
+      --container-name="$container_name" "${container_args[@]}" \
+      --export="$(cx_container_exports)" \
+      bash -c "$WRAP" _ "${ep_args[@]}" --out "$out" \
+      </dev/null >"$runtime_log" 2>&1
+    run_rc=$?
+    set -e
+    expected_out="$MOUNT_SRC/experimental/CollectiveX/$out"
+    case_ok=0
+    if [ "$run_rc" = 0 ] && cx_result_doc_is "$expected_out" success; then
+      case_ok=1
+    elif [ "$run_rc" = 0 ] && cx_result_doc_is "$expected_out" invalid; then
+      cx_log "ERROR: EP${NGPUS}[$ci] completed with invalid semantic evidence"
+    else
+      [ "$run_rc" != 0 ] || run_rc=1
+      if cx_has_result_doc "$expected_out"; then
+        cx_demote_result_doc "$expected_out" "$run_rc" \
+          || { cx_quarantine_result_doc "$expected_out"; cx_emit_ep_failed_case "$failure_out" "$CX_BENCH" "$ph" "$run_rc"; }
+      else
+        cx_quarantine_result_doc "$expected_out"
+        cx_emit_ep_failed_case "$failure_out" "$CX_BENCH" "$ph" "$run_rc"
+      fi
+    fi
+    if [ "$case_ok" = 0 ]; then
+      [ "$run_rc" = 0 ] || cx_fail_stage execution "$runtime_log" || true
+      failed_cases=$((failed_cases + 1))
+    fi
+  done < "$cases_file"
+  rm -f "$cases_file"
+  [ "$ci" -eq "$expected_cases" ] \
+    || cx_die "enumerated $expected_cases cases but executed $ci"
+  if [ "$failed_cases" -ne 0 ]; then
+    summary_log="$(cx_private_log_path shard-summary)"
+    printf 'SHARD done: %s/%s case(s) failed\n' "$failed_cases" "$expected_cases" \
+      > "$summary_log"
+    cx_fail_stage execution "$summary_log" || true
+    return 1
+  fi
+  return 0
+}
+
+cx_launcher_cleanup() {
+  local rc="$1" stage_root="${MOUNT_SRC:-}" source_root out_dir allocation_stopped=1
+  source_root="${stage_root:-${REPO_ROOT:-}}"
+  trap - EXIT
+  if [ -n "${COLLECTIVEX_EPHEMERAL_CONFIG_PATH:-}" ]; then
+    rm -f -- "$COLLECTIVEX_EPHEMERAL_CONFIG_PATH" >/dev/null 2>&1 || true
+    unset COLLECTIVEX_EPHEMERAL_CONFIG_PATH
+  fi
+  if [ -n "${JOB_ID:-}" ]; then
+    if ! cx_cancel_job "$JOB_ID"; then
+      allocation_stopped=0
+      [ "$rc" != 0 ] || rc=1
+    fi
+  elif [ "${CX_ALLOCATION_UNCERTAIN:-0}" = 1 ]; then
+    allocation_stopped=0
+    [ "$rc" != 0 ] || rc=1
+  fi
+  if [ "$allocation_stopped" = 1 ]; then
+    cx_write_cleanup_guard safe || true
+  else
+    cx_write_cleanup_guard unsafe || true
+  fi
+  [ "$allocation_stopped" = 1 ] || source_root="${REPO_ROOT:-$source_root}"
+  if [ "$rc" != 0 ] && [ "${CX_PRECISION_PROBE:-0}" != 1 ] \
+      && [ -n "${REPO_ROOT:-}" ] && [ -n "${CX_BENCH:-}" ]; then
+    cx_log "ERROR: terminal-failure-class=${CX_FAILSAFE_MODE:-setup}"
+    [ -d "$source_root/experimental/CollectiveX" ] || source_root="$REPO_ROOT"
+    out_dir="$source_root/experimental/CollectiveX/results"
+    cx_emit_setup_failures \
+      "$source_root/experimental/CollectiveX" "$out_dir" "$CX_BENCH" "$rc" || true
+    [ "$source_root" = "$REPO_ROOT" ] \
+      || cx_collect_results "$source_root" "$REPO_ROOT" || true
+  fi
+  if [ "$allocation_stopped" = 1 ] && [ -n "${REPO_ROOT:-}" ] \
+      && [ -n "$stage_root" ] && [ "$stage_root" != "$REPO_ROOT" ]; then
+    if ! cx_cleanup_stage "$stage_root" "$REPO_ROOT"; then
+      [ "$rc" != 0 ] || rc=1
+    fi
+  fi
+  [ "${COLLECTIVEX_CANONICAL_GHA:-0}" = 1 ] || cx_cleanup_private_logs "$rc"
+  exit "$rc"
+}
+
+cx_install_launcher_fail_safe() {
+  CX_ALLOCATION_UNCERTAIN=0
+  trap 'cx_launcher_cleanup "$?"' EXIT
+}
diff --git a/experimental/CollectiveX/runtime/run_in_container.sh b/experimental/CollectiveX/runtime/run_in_container.sh
new file mode 100644
index 000000000..eeb8f632f
--- /dev/null
+++ b/experimental/CollectiveX/runtime/run_in_container.sh
@@ -0,0 +1,1116 @@
+#!/usr/bin/env bash
+# CollectiveX — generic in-container benchmark dispatcher (single-node).
+#
+# Runs INSIDE the container under `srun` for single-node shards. The GB EP8 launcher invokes
+# run_ep.py directly across nodes. The SKU adapter handles allocation/container/transport-env;
+# this script selects one EP backend from CX_BENCH and writes result JSON under results/.
+#
+# Required env (exported by the adapter): CX_RUNNER CX_NGPUS CX_TS CX_TOPO
+# Selector: CX_BENCH = deepep | deepep-v2 | mori | uccl | nccl-ep | deepep-hybrid
+# EP knobs passed to tests/run_ep.py:
+#   CX_PHASE = decode | prefill | both (default decode)   <- picks the token sweep
+#   CX_TOKENS_LADDER (space/comma sep; blank = phase default)
+#   CX_HIDDEN CX_TOPK CX_EXPERTS CX_ROUTING CX_SEED CX_ITERS
+set -euo pipefail
+
+cd /ix/experimental/CollectiveX
+# shellcheck source=../runtime/common.sh
+source runtime/common.sh
+mkdir -p results
+cx_write_runtime_stage backend-setup || cx_die "cannot record runtime stage"
+
+: "${CX_RUNNER:?CX_RUNNER not set}"
+: "${CX_NGPUS:?CX_NGPUS not set}"
+: "${CX_TS:?CX_TS not set}"
+: "${CX_TOPO:?CX_TOPO not set}"
+CX_BENCH="${CX_BENCH:-deepep}"
+CX_TRANSPORT="${CX_TRANSPORT:-}"
+
+cx_apply_timing_profile
+
+cx_log "in-container: runner=$CX_RUNNER ngpus=$CX_NGPUS bench=$CX_BENCH topo=$CX_TOPO"
+
+# Blank ladders use the phase default in tests/run_ep.py.
+cx_ep_ladder() {
+  printf '%s' "${CX_TOKENS_LADDER:-}"
+}
+
+# Canonical workload staging. Every SKU/backend generates identical canonical array bytes and
+# content IDs in-container; the NPZ container bytes themselves are not an identity boundary. When CX_CANONICAL=1
+# (and CX_WORKLOAD_DIR not already provided) we generate routing traces for the run's ladder
+# into a NON-results dir (.cx_workloads/ — so the *.manifest.json never pollute the results glob) and
+# point run_ep at it. Raw attempts remain diagnostic until the publisher validates full coverage.
+cx_stage_canonical() {
+  cx_bool_enabled "${CX_CANONICAL:-0}" || return 0
+  [ -n "${CX_WORKLOAD_DIR:-}" ] && return 0
+  local dir="$PWD/.cx_workloads"
+  local ladder; ladder="$(cx_ep_ladder)"
+  # cover both phase ladders when none is given, so either phase finds its files.
+  [ -z "$ladder" ] && ladder="1 2 4 8 16 32 64 128 256 512 1024 2048 4096"
+  cx_log "staging canonical workloads (routing=${CX_ROUTING:-uniform} ep=$CX_NGPUS ladder='$ladder')"
+  python3 tests/make_workloads.py --out-dir "$dir" --routing "${CX_ROUTING:-uniform}" \
+    --ep "$CX_NGPUS" --hidden "${CX_HIDDEN:-7168}" --topk "${CX_TOPK:-8}" \
+    --experts "${CX_EXPERTS:-256}" --seed "${CX_SEED:-67}" --tokens-ladder "$ladder" \
+    || { cx_log "ERROR: canonical workload staging failed"; return 1; }
+  export CX_WORKLOAD_DIR="$dir"
+  cx_log "canonical workloads staged at $dir"
+}
+
+# run_ep_suite <backend>
+# One tests/run_ep.py invocation per phase (decode/prefill/both); dispatch and
+# combine are timed separately inside it. One JSON per (backend, phase).
+# Preserve a failed case with its full scheduled identity instead of letting it vanish.
+emit_failed_case() {  # backend phase rc
+  cx_emit_ep_failed_case \
+    "results/failed_${CX_RUNNER}_${1}_${2}_${CX_TS}.json" "$1" "$2" "$3" || true
+}
+
+run_ep_suite() {
+    local backend="$1" phase phases ladder failure_kind rc=0 rc_run
+  ladder="$(cx_ep_ladder)"
+  phases="${CX_PHASE:-decode}"
+  [ "$phases" = "both" ] && phases="decode prefill"
+  if ! cx_stage_canonical; then
+    for phase in $phases; do
+      emit_failed_case "$backend" "$phase" 2
+    done
+    return 1
+  fi
+  for phase in $phases; do
+    cx_log "ep backend=$backend phase=$phase ngpus=$CX_NGPUS ladder='${ladder:-<phase-default>}'"
+    local out="results/${CX_RUNNER}_${backend}_${phase}_${CX_TS}.json"
+    local -a EPARGS=(--backend "$backend" --mode "${CX_MODE:-normal}" --phase "$phase"
+      --precision-profile "${CX_PRECISION_PROFILE:-}"
+      --tokens-ladder "$ladder"
+      --hidden "${CX_HIDDEN:-7168}" --topk "${CX_TOPK:-8}" --experts "${CX_EXPERTS:-256}"
+      --routing "${CX_ROUTING:-uniform}" --seed "${CX_SEED:-67}" --iters "${CX_ITERS:-8}"
+      --trials "${CX_TRIALS:-64}" --warmup "${CX_WARMUP:-32}"
+      --gpus-per-node "${CX_GPUS_PER_NODE:-0}" --scale-up-domain "${CX_SCALE_UP_DOMAIN:-0}"
+      --scope "${CX_SCOPE:-scale-up}" --scale-up-transport "${CX_SCALE_UP_TRANSPORT:-unknown}"
+      --scale-out-transport "${CX_SCALE_OUT_TRANSPORT:-}"
+      --case-id "${CX_CASE_ID:-}" --suite "${CX_SUITE:-}" --workload-name "${CX_WORKLOAD_NAME:-}"
+      --required-publication "${CX_REQUIRED_PUBLICATION:-}"
+      --qualification-index "${CX_QUALIFICATION_INDEX:-1}"
+      --runner "$CX_RUNNER" --topology-class "$CX_TOPO" --transport "$CX_TRANSPORT"
+      --out "$out")
+    cx_bool_enabled "${CX_EPLB:-0}" && EPARGS+=(--eplb)
+    [ -n "${CX_WORKLOAD_DIR:-}" ] && EPARGS+=(--workload-dir "$CX_WORKLOAD_DIR")
+    cx_write_runtime_stage execution || cx_die "cannot record runtime stage"
+    if timeout -k 30 "${CX_RUN_TIMEOUT:-900}" \
+      torchrun --nproc_per_node="$CX_NGPUS" tests/run_ep.py "${EPARGS[@]}"; then
+      rc_run=0
+    else
+      rc_run=$?
+    fi
+    if [ "$rc_run" = 0 ] && cx_result_doc_is "$out" invalid; then
+      cx_log "WARN: $backend $phase completed with invalid semantic evidence"
+      rc=1
+      continue
+    fi
+    if [ "$rc_run" = 0 ] && ! cx_result_doc_is "$out" success; then
+      rc_run=1
+    fi
+    if [ "$rc_run" != 0 ]; then
+      failure_kind=failed
+      [ "$rc_run" != 124 ] && [ "$rc_run" != 137 ] || failure_kind="timed out"
+      if [ "$failure_kind" = "timed out" ]; then
+        cx_log "WARN: $backend $phase run timed out rc=$rc_run (limit=${CX_RUN_TIMEOUT:-900}s)"
+      else
+        cx_log "WARN: $backend $phase run failed rc=$rc_run"
+      fi
+      if cx_has_result_doc "$out"; then
+        cx_demote_result_doc "$out" "$rc_run" \
+          || { cx_quarantine_result_doc "$out"; emit_failed_case "$backend" "$phase" "$rc_run"; }
+        cx_log "preserved benchmark output as a failed attempt"
+      else
+        cx_quarantine_result_doc "$out"
+        emit_failed_case "$backend" "$phase" "$rc_run"
+      fi
+      rc=1
+    fi
+  done
+  return "$rc"
+}
+
+# Resolve and verify the actual CUDA target before compiling source kernels.
+cx_cuda_arch() {
+  local expected detected
+  case "$CX_RUNNER" in
+    h100*|h200*) expected="9.0" ;;
+    b200*|gb200*) expected="10.0" ;;
+    b300*|gb300*) expected="10.3" ;;
+    *) cx_log "ERROR: no CUDA target registered for $CX_RUNNER"; return 1 ;;
+  esac
+  detected="$(python3 - <<'PY'
+import torch
+
+major, minor = torch.cuda.get_device_capability()
+print(f"{major}.{minor}")
+PY
+)" || return 1
+  [ "$detected" = "$expected" ] || {
+    cx_log "ERROR: $CX_RUNNER expected CUDA target $expected, detected $detected"
+    return 1
+  }
+  printf '%s' "$detected"
+}
+
+cx_nvidia_package_root() {
+  local package="$1" component="$2"
+  python3 - "$package" "$component" <<'PY'
+from importlib import metadata
+from pathlib import Path, PurePosixPath
+import sys
+
+package, component = sys.argv[1:]
+try:
+    distribution = metadata.distribution(package)
+    prefix = f"nvidia/{component}/"
+    entries = [str(entry).replace("\\", "/") for entry in distribution.files or ()]
+    if not any(entry.startswith(prefix) for entry in entries):
+        raise ValueError
+    root = Path(distribution.locate_file(PurePosixPath("nvidia") / component)).resolve()
+    if not root.is_dir():
+        raise ValueError
+except (metadata.PackageNotFoundError, OSError, TypeError, ValueError):
+    raise SystemExit(1)
+print(root, end="")
+PY
+}
+
+cx_prepare_cuda_cccl() {
+  local cccl="" candidate cuda_home nvcc
+  nvcc="$(command -v nvcc)" \
+    || { cx_log "ERROR: CUDA nvcc is unavailable"; return 1; }
+  nvcc="$(readlink -f -- "$nvcc")" \
+    || { cx_log "ERROR: CUDA nvcc cannot be resolved"; return 1; }
+  case "$nvcc" in
+    */bin/nvcc) cuda_home="${nvcc%/bin/nvcc}" ;;
+    *) cx_log "ERROR: CUDA nvcc has an unexpected path"; return 1 ;;
+  esac
+  [ -x "$cuda_home/bin/nvcc" ] && [ -d "$cuda_home/include" ] \
+    && [ -d "$cuda_home/lib64" ] \
+    || { cx_log "ERROR: CUDA toolkit root is incomplete"; return 1; }
+  for candidate in "$cuda_home"/targets/*/include/cccl; do
+    if [ -d "$candidate" ]; then
+      cccl="$candidate"
+      break
+    fi
+  done
+  [ -n "$cccl" ] || { cx_log "ERROR: CUDA CCCL headers are unavailable"; return 1; }
+  export CUDA_HOME="$cuda_home" CX_CUDA_CCCL="$cccl"
+  export CPATH="$cccl:${CPATH:-}"
+  export NVCC_PREPEND_FLAGS="-I$cccl ${NVCC_PREPEND_FLAGS:-}"
+}
+
+cx_prepare_deepep_toolchain() {
+  local packaged overlay path root temporary
+  packaged="$(cx_nvidia_package_root nvidia-nvshmem-cu12 nvshmem)" \
+    || { cx_log "ERROR: nvidia.nvshmem is unavailable"; return 1; }
+  root="$(cx_deepep_v2_root)" || return 1
+  overlay="$root/nvshmem-overlay"
+  if ! (
+    umask 077
+    exec 8>"$root/nvshmem-overlay.lock" || exit 1
+    flock 8 || exit 1
+    if [ ! -d "$overlay" ]; then
+      temporary="$root/.nvshmem-overlay.$$"
+      rm -rf "$temporary" || exit 1
+      mkdir -p "$temporary/lib" || exit 1
+      ln -s "$packaged/include" "$temporary/include" || exit 1
+      for path in "$packaged"/lib/*; do
+        ln -s "$path" "$temporary/lib/${path##*/}" || exit 1
+      done
+      [ ! -e "$packaged/lib/libnvshmem_host.so.3" ] \
+        || ln -sf "$packaged/lib/libnvshmem_host.so.3" \
+          "$temporary/lib/libnvshmem_host.so" || exit 1
+      mv "$temporary" "$overlay" || exit 1
+    fi
+    [ ! -L "$overlay" ] \
+      && [ "$(readlink -f "$overlay/include")" = "$(readlink -f "$packaged/include")" ] \
+      && [ -e "$overlay/lib/libnvshmem_host.so" ] \
+      && [ -e "$overlay/lib/libnvshmem_device.a" ]
+  ); then
+    cx_log "ERROR: DeepEP V2 NVSHMEM overlay is invalid"
+    return 1
+  fi
+  NVSHMEM_DIR="$overlay"
+  export NVSHMEM_DIR
+  cx_prepare_cuda_cccl || return 1
+  export LD_LIBRARY_PATH="$NVSHMEM_DIR/lib:${LD_LIBRARY_PATH:-}"
+}
+
+cx_probe_deepep() {
+  local expected_record_sha256 expected_version expected_wheel_sha256
+  if [ "${COLLECTIVEX_IMAGE:-}" != "$CX_IMAGE_MULTIARCH" ] \
+      || [ "${COLLECTIVEX_IMAGE_DIGEST:-}" != "$CX_IMAGE_MULTIARCH_DIGEST" ] \
+      || [ "${COLLECTIVEX_IMAGE_DIGEST_VERIFIED:-0}" != 1 ]; then
+    cx_log "ERROR: DeepEP V1 requires the exact pinned multi-architecture image"
+    return 1
+  fi
+  cx_cuda_arch >/dev/null || return 1
+  case "$CX_RUNNER" in
+    gb200|gb300)
+      expected_version="1.1.0+814e508"
+      expected_wheel_sha256="784dabec0877b6cf72619b7e93eda7e2f365648487bd37fc3ff6960e53669313"
+      expected_record_sha256="2671cff7baf8c2c214ff4bac721af875d513130670bec57601998bd1aae82882"
+      DEEPEP_COMMIT="814e508537c6ffc775d59f6f1b9ba43f3a65968c"
+      ;;
+    *)
+      expected_version="1.2.1"
+      expected_wheel_sha256="7c02c29306ea0fe2dd474618e72e0f310f260187a9c0700a656d2f6964e8c307"
+      expected_record_sha256="6548e9c504a12b2471af4b7f4d9546321210a57a456b5dc55bd4a8dad0f932ac"
+      DEEPEP_COMMIT="9af0e0d0e74f3577af1979c9b9e1ac2cad0104ee"
+      ;;
+  esac
+  export DEEPEP_COMMIT
+  python3 - "$expected_version" "$expected_wheel_sha256" "$expected_record_sha256" <<'PY' || {
+import base64
+import csv
+import hashlib
+import importlib.metadata as metadata
+import io
+import json
+from pathlib import Path
+import sys
+
+import deep_ep
+from deep_ep import Buffer
+
+distribution = metadata.distribution("deep_ep")
+assert distribution.version == sys.argv[1]
+assert Buffer.__name__ == "Buffer"
+recorded_files = {
+    Path(distribution.locate_file(entry)).resolve() for entry in distribution.files or ()
+}
+buffer_module = sys.modules.get(Buffer.__module__)
+assert Path(deep_ep.__file__).resolve() in recorded_files
+assert buffer_module is not None and Path(buffer_module.__file__).resolve() in recorded_files
+direct_url = json.loads(distribution.read_text("direct_url.json"))
+assert direct_url["archive_info"]["hashes"]["sha256"] == sys.argv[2]
+record_entry = next(
+    entry for entry in distribution.files or ()
+    if str(entry).endswith(".dist-info/RECORD")
+)
+record = distribution.locate_file(record_entry).read_bytes()
+assert hashlib.sha256(record).hexdigest() == sys.argv[3]
+for path, encoded_digest, size in csv.reader(io.StringIO(record.decode())):
+    if not encoded_digest:
+        continue
+    algorithm, expected = encoded_digest.split("=", 1)
+    assert algorithm == "sha256"
+    payload = distribution.locate_file(path).read_bytes()
+    observed = base64.urlsafe_b64encode(hashlib.sha256(payload).digest()).decode().rstrip("=")
+    assert observed == expected
+    assert not size or len(payload) == int(size)
+PY
+    cx_log "ERROR: container DeepEP build does not match its pinned image contract"
+    return 1
+  }
+  cx_log "DeepEP image build ready ($DEEPEP_COMMIT)"
+}
+
+# DeepEP V2 is PR #605's ElasticBuffer implementation with upstream PR #630's pure scale-up
+# initialization fix. Canonical launchers stage the pinned source and mount a private cluster-local
+# build cache at /cx-cache.
+cx_deepep_v2_root() {
+  local arch cpu base identity key image_digest
+  arch="$(cx_cuda_arch)" || return 1
+  cpu="$(uname -m)"
+  [[ "$cpu" =~ ^[A-Za-z0-9._-]+$ ]] || return 1
+  base="${CX_BACKEND_CACHE_ROOT:-}"
+  [[ "$base" = /* ]] || return 1
+  image_digest="${COLLECTIVEX_IMAGE_DIGEST:-manual-unverified}"
+  [[ "$image_digest" = manual-unverified || "$image_digest" =~ ^sha256:[0-9a-f]{64}$ ]] \
+    || return 1
+  # Bump the recipe generation whenever the build procedure changes. Benchmark-only
+  # source revisions must reuse the same immutable environment instead of leaking GBs.
+  identity="deepep-v2-cache-v2|$cpu|sm${arch/./}|image=$image_digest|recipe=aot-persistent-nvshmem-active-cuda-maxjobs16-v2|$CX_DEEPEP_V2_COMMIT|$CX_DEEPEP_V2_TREE|$CX_DEEPEP_V2_FMT_COMMIT|pip=26.1.2|setuptools=82.0.1|wheel=0.47.0|ninja=1.13.0|numpy=2.2.6|torch=2.10.0+cu130|nccl=2.30.4|nvshmem=3.3.9|max-jobs=16"
+  key="$(printf '%s' "$identity" | sha256sum | awk '{print $1}')"
+  [[ "$key" =~ ^[0-9a-f]{64}$ ]] || return 1
+  printf '%s/deepep-v2-%s' "$base" "$key"
+}
+
+cx_activate_deepep_v2() {
+  local root venv stage_root
+  root="$(cx_deepep_v2_root)" || return 1
+  venv="$root/venv"
+  [ -x "$venv/bin/python" ] \
+    || { cx_log "ERROR: DeepEP V2 venv interpreter is unavailable"; return 1; }
+  export VIRTUAL_ENV="$venv"
+  export PATH="$venv/bin:${PATH#"$venv/bin:"}"
+  EP_NCCL_ROOT_DIR="$(cx_nvidia_package_root nvidia-nccl-cu13 nccl)" \
+    || { cx_log "ERROR: DeepEP V2 NCCL package root is unavailable"; return 1; }
+  EP_NVSHMEM_ROOT_DIR="$(cx_nvidia_package_root nvidia-nvshmem-cu12 nvshmem)" \
+    || { cx_log "ERROR: DeepEP V2 NVSHMEM package root is unavailable"; return 1; }
+  export EP_NCCL_ROOT_DIR EP_NVSHMEM_ROOT_DIR
+  export LD_LIBRARY_PATH="$EP_NCCL_ROOT_DIR/lib:$EP_NVSHMEM_ROOT_DIR/lib:${LD_LIBRARY_PATH:-}"
+  case "${CX_BACKEND_SOURCE_ROOT:-}" in
+    /*/.cx_sources) stage_root="${CX_BACKEND_SOURCE_ROOT%/.cx_sources}" ;;
+    *) cx_log "ERROR: DeepEP V2 job-local source root is unavailable"; return 1 ;;
+  esac
+  [ -d "$stage_root" ] && [ ! -L "$stage_root" ] \
+    || { cx_log "ERROR: DeepEP V2 job-local stage is invalid"; return 1; }
+  # JIT CUBINs are evidence from this shard, not part of the persistent AOT environment.
+  # Keeping them on the isolated staged tree prevents a prior driver/topology attempt
+  # from seeding a later run; all ranks and cases in this shard still share one cold build.
+  export EP_JIT_CACHE_DIR="$stage_root/.cx_backend/deepep-v2-jit"
+  export EP_REUSE_NCCL_COMM=1
+  export DEEPEP_V2_PR=605 DEEPEP_V2_FIX_PR=630
+  DEEPEP_V2_COMMIT="$CX_DEEPEP_V2_COMMIT"
+  DEEPEP_V2_TREE="$CX_DEEPEP_V2_TREE"
+  DEEPEP_V2_FMT_COMMIT="$CX_DEEPEP_V2_FMT_COMMIT"
+  export DEEPEP_V2_COMMIT DEEPEP_V2_TREE DEEPEP_V2_FMT_COMMIT
+  [ ! -L "$stage_root/.cx_backend" ] && [ ! -L "$EP_JIT_CACHE_DIR" ] \
+    || { cx_log "ERROR: DeepEP V2 JIT cache path is unsafe"; return 1; }
+  if ! mkdir -p "$EP_JIT_CACHE_DIR" \
+      || ! chmod 700 "$stage_root/.cx_backend" "$EP_JIT_CACHE_DIR"; then
+    cx_log "ERROR: DeepEP V2 JIT cache is unavailable"
+    return 1
+  fi
+  unset EP_SUPPRESS_NCCL_CHECK
+}
+
+cx_enable_deepep_v2_jit_reproducibility() {
+  local seed="collectivex-deepep-v2-fa8a9b1" cccl
+  [ -n "${CUDA_HOME:-}" ] \
+    || { cx_log "ERROR: active CUDA toolkit is unavailable"; return 1; }
+  cccl="${CX_CUDA_CCCL:-}"
+  case "$cccl" in
+    "$CUDA_HOME"/targets/*/include/cccl) ;;
+    *) cx_log "ERROR: CUDA CCCL headers differ from the active toolkit"; return 1 ;;
+  esac
+  [ -d "$cccl" ] || { cx_log "ERROR: CUDA CCCL headers are unavailable"; return 1; }
+  CPATH="$cccl"
+  NVCC_PREPEND_FLAGS="--frandom-seed=$seed -I$cccl"
+  DEEPEP_V2_JIT_RANDOM_SEED="$seed"
+  EP_JIT_DUMP_SASS=1
+  unset EP_JIT_DEBUG EP_JIT_DUMP_ASM EP_JIT_DUMP_PTX EP_JIT_WITH_LINEINFO
+  unset EP_JIT_PTXAS_VERBOSE EP_JIT_PRINT_COMPILER_COMMAND EP_JIT_NVCC_COMPILER
+  unset EP_JIT_CPP_STANDARD EP_JIT_PTXAS_CHECK EP_GIN_GDAKI_DEBUG EP_NUM_TOPK_IDX_BITS
+  export CPATH DEEPEP_V2_JIT_RANDOM_SEED EP_JIT_DUMP_SASS NVCC_PREPEND_FLAGS
+}
+
+cx_probe_deepep_v2() {
+  python3 - <<'PY'
+import ctypes
+import importlib.metadata as metadata
+import inspect
+import os
+
+import torch
+
+assert torch.__version__ == "2.10.0+cu130", torch.__version__
+assert metadata.version("nvidia-nccl-cu13") == "2.30.4"
+assert metadata.version("nvidia-nvshmem-cu12") == "3.3.9"
+assert metadata.version("numpy") == "2.2.6"
+
+import deep_ep
+assert deep_ep.__version__ == "2.0.0", deep_ep.__version__
+assert metadata.version("deep_ep") == "2.0.0+fa8a9b1"
+assert inspect.isclass(deep_ep.ElasticBuffer)
+assert deep_ep.ElasticBuffer.__name__ == "ElasticBuffer"
+assert os.environ.get("EP_SUPPRESS_NCCL_CHECK") is None
+with open("/proc/self/maps", encoding="utf-8") as handle:
+    loaded_nccl = {
+        os.path.realpath(line.rstrip().split()[-1])
+        for line in handle
+        if "libnccl.so" in line and os.path.isfile(line.rstrip().split()[-1])
+    }
+assert len(loaded_nccl) == 1
+runtime_version = ctypes.c_int()
+assert ctypes.CDLL(loaded_nccl.pop()).ncclGetVersion(ctypes.byref(runtime_version)) == 0
+assert runtime_version.value == 23004, runtime_version.value
+PY
+}
+
+cx_deepep_v2_content_sha256() {
+  python3 - <<'PY'
+import hashlib
+from importlib import metadata
+import os
+from pathlib import Path, PurePosixPath
+import stat
+
+distribution = metadata.distribution("deep_ep")
+entries = sorted(distribution.files or (), key=lambda entry: entry.as_posix())
+if not entries:
+    raise SystemExit(1)
+venv_path = Path(os.environ["VIRTUAL_ENV"]).absolute()
+if venv_path.is_symlink() or not venv_path.is_dir():
+    raise SystemExit(1)
+venv = venv_path.resolve(strict=True)
+digest = hashlib.sha256()
+extension = False
+for entry in entries:
+    relative = PurePosixPath(entry.as_posix())
+    if (
+        relative.is_absolute()
+        or ".." in relative.parts
+        or not relative.parts
+        or not (
+            relative.parts[0] == "deep_ep"
+            or relative.parts[0].startswith("deep_ep-")
+            and relative.parts[0].endswith(".dist-info")
+        )
+    ):
+        raise SystemExit(1)
+    path = Path(distribution.locate_file(entry)).absolute()
+    resolved = path.resolve(strict=True)
+    try:
+        path.relative_to(venv_path)
+        resolved.relative_to(venv)
+    except ValueError:
+        raise SystemExit(1)
+    parent = path.parent
+    while parent != venv_path:
+        if parent.is_symlink():
+            raise SystemExit(1)
+        parent = parent.parent
+    item = os.lstat(path)
+    if not stat.S_ISREG(item.st_mode):
+        raise SystemExit(1)
+    descriptor = os.open(path, os.O_RDONLY | getattr(os, "O_NOFOLLOW", 0))
+    try:
+        opened = os.fstat(descriptor)
+        if (opened.st_dev, opened.st_ino) != (item.st_dev, item.st_ino):
+            raise SystemExit(1)
+        file_digest = hashlib.sha256()
+        while chunk := os.read(descriptor, 1024 * 1024):
+            file_digest.update(chunk)
+    finally:
+        os.close(descriptor)
+    name = relative.as_posix()
+    extension |= name.startswith("deep_ep/") and name.endswith(".so")
+    digest.update(name.encode())
+    digest.update(b"\0")
+    digest.update(str(item.st_size).encode())
+    digest.update(b"\0")
+    digest.update(file_digest.digest())
+if not extension:
+    raise SystemExit(1)
+print(digest.hexdigest(), end="")
+PY
+}
+
+cx_deepep_v2_marker_content_sha256() {
+  local root="$1" marker="$2" revision="$3" tree="$4" fmt_revision="$5" cache_key="$6"
+  python3 - "$root" "$marker" "$revision" "$tree" "$fmt_revision" "$cache_key" <<'PY'
+import os
+import re
+import stat
+import sys
+
+root, marker, revision, tree, fmt_revision, cache_key = sys.argv[1:]
+try:
+    root_item = os.lstat(root)
+    marker_item = os.lstat(marker)
+    children = [os.lstat(os.path.join(root, name)) for name in ("source", "venv")]
+    if (
+        not stat.S_ISDIR(root_item.st_mode)
+        or stat.S_IMODE(root_item.st_mode) & 0o777 != 0o700
+        or not stat.S_ISREG(marker_item.st_mode)
+        or marker_item.st_uid != root_item.st_uid
+        or stat.S_IMODE(marker_item.st_mode) & 0o777 != 0o600
+        or marker_item.st_size > 1024
+        or any(
+            not stat.S_ISDIR(child.st_mode)
+            or child.st_uid != root_item.st_uid
+            or stat.S_IMODE(child.st_mode) & 0o022
+            for child in children
+        )
+    ):
+        raise OSError
+    descriptor = os.open(marker, os.O_RDONLY | getattr(os, "O_NOFOLLOW", 0))
+    try:
+        opened = os.fstat(descriptor)
+        if (opened.st_dev, opened.st_ino) != (marker_item.st_dev, marker_item.st_ino):
+            raise OSError
+        payload = os.read(descriptor, 1025)
+    finally:
+        os.close(descriptor)
+    lines = payload.decode("ascii").splitlines()
+    if lines[:4] != [revision, tree, fmt_revision, cache_key] or len(lines) != 5:
+        raise ValueError
+    if not re.fullmatch(r"[0-9a-f]{64}", lines[4]):
+        raise ValueError
+except (OSError, UnicodeError, ValueError):
+    raise SystemExit(1)
+print(lines[4], end="")
+PY
+}
+
+cx_deepep_v2_cache_is_valid() {
+  local root="$1" marker="$2" revision="$3" tree="$4" fmt_revision="$5" cache_key="$6"
+  local expected_content actual_content
+  expected_content="$(
+    cx_deepep_v2_marker_content_sha256 \
+      "$root" "$marker" "$revision" "$tree" "$fmt_revision" "$cache_key"
+  )" || return 1
+  [ -d "$root/source" ] && [ ! -L "$root/source" ] \
+    && [ "$(cx_git_in_tree "$root/source" rev-parse 'HEAD^{tree}' 2>/dev/null)" = "$tree" ] \
+    && [ "$(cx_git_in_tree "$root/source/third-party/fmt" rev-parse HEAD 2>/dev/null)" = "$fmt_revision" ] \
+    || return 1
+  cx_activate_deepep_v2 || return 1
+  actual_content="$(cx_deepep_v2_content_sha256)" || return 1
+  [ "$actual_content" = "$expected_content" ]
+}
+
+cx_build_deepep_v2() {
+  local root venv source marker marker_tmp lock_path arch cache_key cache_ready content_sha256
+  local revision="fa8a9b16898204afd347c663b89e65ef87dc6ce6"
+  local tree="29809e75c5874e6609dac4804e7b651d5226959f"
+  local fmt_revision="a4c7e17133ee9cb6a2f45545f6e974dd3c393efa"
+  cx_verify_backend_cache_mount \
+    || { cx_log "ERROR: DeepEP V2 cache mount identity validation failed"; return 1; }
+  arch="$(cx_cuda_arch)" || return 1
+  root="$(cx_deepep_v2_root)" || return 1
+  cache_key="${root##*/deepep-v2-}"
+  [[ "$cache_key" =~ ^[0-9a-f]{64}$ ]] || return 1
+  venv="$root/venv"; source="$root/source"; marker="$root/.collectivex-complete"
+  lock_path="${root}.lock"
+  command -v flock >/dev/null || { cx_log "ERROR: flock is required for DeepEP V2"; return 1; }
+  mkdir -p "${root%/*}" || return 1
+  cx_log "DeepEP V2: preparing PR #605 implementation with upstream PR #630 fix ($revision)"
+  if ! (
+    [ ! -L "$lock_path" ] \
+      || { cx_log "ERROR: DeepEP V2 cache lock is unsafe"; exit 1; }
+    (umask 077; : >> "$lock_path") && chmod 600 "$lock_path" \
+      || { cx_log "ERROR: DeepEP V2 cache-lock-create failed"; exit 1; }
+    exec 9<>"$lock_path" \
+      || { cx_log "ERROR: DeepEP V2 cache-lock-open failed"; exit 1; }
+    flock 9 \
+      || { cx_log "ERROR: DeepEP V2 cache-lock-acquire failed"; exit 1; }
+    cache_ready=0
+    if [ -e "$marker" ] || [ -L "$marker" ]; then
+      if (
+        cx_deepep_v2_cache_is_valid \
+          "$root" "$marker" "$revision" "$tree" "$fmt_revision" "$cache_key"
+      ); then
+        cache_ready=1
+      else
+        cx_log "ERROR: published DeepEP V2 cache failed integrity validation; refusing reset"
+        exit 1
+      fi
+    fi
+    if [ "$cache_ready" != 1 ]; then
+      if [ -e "$root" ] || [ -L "$root" ]; then
+        rm -rf "$root" \
+          || { cx_log "ERROR: incomplete DeepEP V2 cache-reset failed"; exit 1; }
+      fi
+      mkdir -m 700 "$root" \
+        || { cx_log "ERROR: DeepEP V2 cache-create failed"; exit 1; }
+      python3 -m venv "$venv" \
+        || { cx_log "ERROR: DeepEP V2 venv creation failed"; exit 1; }
+      "$venv/bin/python" -m pip install -q --disable-pip-version-check --no-input \
+        "pip==26.1.2" "setuptools==82.0.1" "wheel==0.47.0" "ninja==1.13.0" \
+        "numpy==2.2.6" "nvidia-nvshmem-cu12==3.3.9" >&2 2>&1 \
+        || { cx_log "ERROR: DeepEP V2 build-tool installation failed"; exit 1; }
+      "$venv/bin/python" -m pip install -q --disable-pip-version-check --no-input \
+        --index-url https://download.pytorch.org/whl/cu130 \
+        --extra-index-url https://pypi.org/simple "torch==2.10.0" >&2 2>&1 \
+        || { cx_log "ERROR: torch 2.10.0+cu130 installation failed"; exit 1; }
+      # Torch pins NCCL 2.28.9; the PR #605 ElasticBuffer implementation requires 2.30.4.
+      "$venv/bin/python" -m pip install -q --disable-pip-version-check --no-input \
+        --force-reinstall --no-deps "nvidia-nccl-cu13==2.30.4" >&2 2>&1 \
+        || { cx_log "ERROR: NCCL 2.30.4 installation failed"; exit 1; }
+      cx_activate_deepep_v2 \
+        || { cx_log "ERROR: DeepEP V2 environment activation failed"; exit 1; }
+      cx_prepare_deepep_toolchain \
+        || { cx_log "ERROR: DeepEP V2 toolchain preparation failed"; exit 1; }
+      EP_NVSHMEM_ROOT_DIR="$NVSHMEM_DIR"
+      export EP_NVSHMEM_ROOT_DIR
+      cx_materialize_backend_source deepep-v2 "$source" \
+        || { cx_log "ERROR: DeepEP V2 staged source is invalid"; exit 1; }
+      (cd "$source" && SOURCE_DATE_EPOCH="$(cx_git_in_tree "$source" show -s --format=%ct HEAD)" \
+        TORCH_CUDA_ARCH_LIST="$arch" MAX_JOBS=16 \
+        python3 -m pip install -q --no-build-isolation --no-deps --force-reinstall .) >&2 2>&1 \
+        || { cx_log "ERROR: DeepEP V2 build failed"; exit 1; }
+      cx_probe_deepep_v2 \
+        || { cx_log "ERROR: DeepEP V2 ElasticBuffer/runtime probe failed"; exit 1; }
+      content_sha256="$(cx_deepep_v2_content_sha256)" \
+        || { cx_log "ERROR: DeepEP V2 installed-content hashing failed"; exit 1; }
+      marker_tmp="$(mktemp "$root/.collectivex-complete.tmp.XXXXXX")" \
+        || { cx_log "ERROR: DeepEP V2 cache-marker-create failed"; exit 1; }
+      chmod 600 "$marker_tmp" \
+        || { cx_log "ERROR: DeepEP V2 cache-marker-permission failed"; exit 1; }
+      printf '%s\n%s\n%s\n%s\n%s\n' \
+        "$revision" "$tree" "$fmt_revision" "$cache_key" "$content_sha256" > "$marker_tmp" \
+        || { cx_log "ERROR: DeepEP V2 cache-marker-write failed"; exit 1; }
+      mv -f -- "$marker_tmp" "$marker" \
+        || { cx_log "ERROR: DeepEP V2 cache-marker-publish failed"; exit 1; }
+    fi
+    cx_deepep_v2_cache_is_valid \
+      "$root" "$marker" "$revision" "$tree" "$fmt_revision" "$cache_key" \
+      || { cx_log "ERROR: DeepEP V2 cache validation failed"; exit 1; }
+  ); then
+    cx_log "ERROR: shared DeepEP V2 environment is incomplete"
+    return 1
+  fi
+  cx_activate_deepep_v2 || return 1
+  cx_prepare_deepep_toolchain || return 1
+  cx_enable_deepep_v2_jit_reproducibility || return 1
+  EP_NVSHMEM_ROOT_DIR="$NVSHMEM_DIR"
+  export EP_NVSHMEM_ROOT_DIR
+  cx_probe_deepep_v2 || { cx_log "ERROR: DeepEP V2 shared runtime probe failed"; return 1; }
+  cx_log "DeepEP V2 ready ($DEEPEP_V2_COMMIT, ElasticBuffer, NCCL Device API; LSA/Gin selected by adapter)"
+}
+
+# Build the pinned DeepEP `hybrid-ep` implementation. MNNVL remains one scale-up
+# domain; true x86 scale-out uses the upstream DOCA/RDMA build explicitly.
+cx_configure_deepep_hybrid_build() {
+  local interface device rdma_name
+  local -a interfaces devices
+  unset HYBRID_EP_MULTINODE USE_NIXL RDMA_CORE_HOME DEEPEP_HYBRID_BUILD_MODE
+  if [ "${CX_NODES:-1}" -le 1 ] || [ "${CX_TRANSPORT:-}" = mnnvl ]; then
+    export DEEPEP_HYBRID_BUILD_MODE=intradomain
+    return 0
+  fi
+  [ "$(uname -m)" = x86_64 ] \
+    || { cx_log "ERROR: hybrid-ep scale-out is registered only on x86_64"; return 1; }
+  [ -n "${GLOO_SOCKET_IFNAME:-}" ] && [ -n "${NCCL_IB_HCA:-}" ] \
+    || { cx_log "ERROR: hybrid-ep scale-out network selectors are unavailable"; return 1; }
+  IFS=, read -r -a interfaces <<< "$GLOO_SOCKET_IFNAME"
+  for interface in "${interfaces[@]}"; do
+    [ -d "/sys/class/net/$interface" ] \
+      || { cx_log "ERROR: configured hybrid-ep socket interface is absent"; return 1; }
+  done
+  IFS=, read -r -a devices <<< "$NCCL_IB_HCA"
+  for device in "${devices[@]}"; do
+    rdma_name="${device%%:*}"
+    [ -d "/sys/class/infiniband/$rdma_name" ] \
+      || { cx_log "ERROR: configured hybrid-ep RDMA device is absent"; return 1; }
+  done
+  command -v make >/dev/null \
+    || { cx_log "ERROR: make is required for hybrid-ep scale-out"; return 1; }
+  [ -r /usr/include/infiniband/verbs.h ] && [ -r /usr/include/infiniband/mlx5dv.h ] \
+    || { cx_log "ERROR: pinned hybrid-ep RDMA headers are unavailable"; return 1; }
+  python3 - <<'PY' >/dev/null 2>&1 || {
+import ctypes.util
+import sys
+sys.exit(0 if all(ctypes.util.find_library(name) for name in ("ibverbs", "mlx5")) else 1)
+PY
+    cx_log "ERROR: pinned hybrid-ep RDMA libraries are unavailable"
+    return 1
+  }
+  export HYBRID_EP_MULTINODE=1 USE_NIXL=0 RDMA_CORE_HOME=/usr
+  export DEEPEP_HYBRID_BUILD_MODE=multinode-doca
+}
+
+cx_deepep_hybrid_marker_content_sha256() {
+  python3 - "$1" "$2" "$3" "$4" "${5:-}" <<'PY'
+import os
+import re
+import stat
+import sys
+
+root, marker, revision, tree, build_mode = sys.argv[1:]
+try:
+    root_item = os.lstat(root)
+    marker_item = os.lstat(marker)
+    if (
+        not stat.S_ISDIR(root_item.st_mode)
+        or stat.S_IMODE(root_item.st_mode) & 0o777 != 0o700
+        or not stat.S_ISREG(marker_item.st_mode)
+        or marker_item.st_uid != root_item.st_uid
+        or stat.S_IMODE(marker_item.st_mode) & 0o777 != 0o600
+        or marker_item.st_size > 512
+    ):
+        raise OSError
+    descriptor = os.open(marker, os.O_RDONLY | getattr(os, "O_NOFOLLOW", 0))
+    try:
+        opened = os.fstat(descriptor)
+        if (opened.st_dev, opened.st_ino) != (marker_item.st_dev, marker_item.st_ino):
+            raise OSError
+        payload = os.read(descriptor, 513)
+    finally:
+        os.close(descriptor)
+    lines = payload.decode("ascii").splitlines()
+    expected = [revision, tree, build_mode] if build_mode else [revision, tree]
+    if len(lines) != len(expected) + 1 or lines[:-1] != expected:
+        raise ValueError
+    if not re.fullmatch(r"[0-9a-f]{64}", lines[-1]):
+        raise ValueError
+except (OSError, UnicodeError, ValueError):
+    raise SystemExit(1)
+print(lines[-1], end="")
+PY
+}
+
+cx_deepep_hybrid_cache_is_valid() {
+  local root="$1" marker="$2" revision="$3" tree="$4" build_mode="${5:-}"
+  local expected actual status extra
+  expected="$(cx_deepep_hybrid_marker_content_sha256 \
+    "$root" "$marker" "$revision" "$tree" "$build_mode")" || return 1
+  [ "$(cx_git_in_tree "$root" rev-parse HEAD 2>/dev/null)" = "$revision" ] \
+    && [ "$(cx_git_in_tree "$root" rev-parse 'HEAD^{tree}' 2>/dev/null)" = "$tree" ] \
+    || return 1
+  status="$(cx_git_in_tree "$root" status --porcelain --untracked-files=no \
+    --ignore-submodules=none 2>/dev/null)" || return 1
+  [ -z "$status" ] || return 1
+  extra="$(cx_git_in_tree "$root" ls-files --others --exclude-standard -- \
+    'deep_ep/*.py' 'deep_ep/*.so' 2>/dev/null)" || return 1
+  [ -z "$extra" ] || return 1
+  extra="$(cx_git_in_tree "$root" ls-files --others --ignored --exclude-standard -- \
+    'deep_ep/*.py' 'deep_ep/*.so' 2>/dev/null)" || return 1
+  [ -z "$extra" ] || return 1
+  actual="$(cx_extension_pair_sha256 "$root" 'deep_ep_cpp*.so' 'hybrid_ep_cpp*.so')" \
+    || return 1
+  [ "$actual" = "$expected" ]
+}
+
+cx_build_deepep_hybrid() {
+  local arch revision="$CX_DEEPEP_HYBRID_COMMIT" tree="$CX_DEEPEP_HYBRID_TREE"
+  local build_root marker marker_tmp lock_path content_sha256 cache_ready build_mode
+  export DEEPEP_COMMIT="$revision" DEEPEP_TREE="$tree"
+  arch="$(cx_cuda_arch)" || return 1
+  cx_configure_deepep_hybrid_build || return 1
+  build_mode="$DEEPEP_HYBRID_BUILD_MODE"
+  build_root="$PWD/.cx_backend/deepep-hybrid-${arch/./}-${build_mode}"
+  marker="$build_root/.collectivex-complete"
+  lock_path="${build_root}.lock"
+  cx_log "DeepEP hybrid-ep: building $revision for CUDA target $arch"
+  unset NVSHMEM_DIR
+  cx_prepare_cuda_cccl || return 1
+  command -v flock >/dev/null || { cx_log "ERROR: flock is required for hybrid-ep"; return 1; }
+  mkdir -p "$PWD/.cx_backend" || return 1
+  if ! (
+    [ ! -L "$lock_path" ] || exit 1
+    (umask 077; : >> "$lock_path") && chmod 600 "$lock_path" || exit 1
+    exec 9<>"$lock_path" || exit 1
+    flock 9 || exit 1
+    cache_ready=0
+    if [ -e "$marker" ] || [ -L "$marker" ]; then
+      cx_deepep_hybrid_cache_is_valid \
+        "$build_root" "$marker" "$revision" "$tree" "$build_mode" \
+        || exit 1
+      cache_ready=1
+    fi
+    if [ "$cache_ready" != 1 ]; then
+      cx_materialize_backend_source deepep-hybrid "$build_root" \
+        || { cx_log "ERROR: hybrid-ep staged source is invalid"; exit 1; }
+      if [ "$build_mode" = multinode-doca ]; then
+        [ "$(cx_git_in_tree "$build_root/third-party/nccl" rev-parse HEAD 2>/dev/null)" \
+          = "$CX_DEEPEP_HYBRID_NCCL_COMMIT" ] \
+          || { cx_log "ERROR: pinned hybrid-ep NCCL transport source is absent"; exit 1; }
+      fi
+      (cd "$build_root" && \
+        SOURCE_DATE_EPOCH="$(cx_git_in_tree "$build_root" show -s --format=%ct HEAD)" \
+        TORCH_CUDA_ARCH_LIST="$arch" MAX_JOBS=16 \
+        python3 setup.py build_ext --inplace) >&2 2>&1 \
+        || { cx_log "ERROR: hybrid-ep build failed"; exit 1; }
+      content_sha256="$(cx_extension_pair_sha256 \
+        "$build_root" 'deep_ep_cpp*.so' 'hybrid_ep_cpp*.so')" || exit 1
+      marker_tmp="$(mktemp "$build_root/.collectivex-complete.tmp.XXXXXX")" || exit 1
+      chmod 600 "$marker_tmp" || exit 1
+      printf '%s\n%s\n%s\n%s\n' \
+        "$revision" "$tree" "$build_mode" "$content_sha256" > "$marker_tmp" \
+        || exit 1
+      mv -f -- "$marker_tmp" "$marker" || exit 1
+    fi
+    cx_deepep_hybrid_cache_is_valid \
+      "$build_root" "$marker" "$revision" "$tree" "$build_mode"
+  ); then
+    cx_log "ERROR: shared hybrid-ep build is incomplete"
+    return 1
+  fi
+  export PYTHONPATH="$build_root:${PYTHONPATH:-}"
+  python3 -c "import deep_ep; assert hasattr(deep_ep,'HybridEPBuffer'); print('built hybrid-ep deep_ep', getattr(deep_ep,'__version__','?'))" >&2 \
+    || { cx_log "ERROR: hybrid-ep import / HybridEPBuffer missing after build"; return 1; }
+  cx_log "DeepEP hybrid-ep ready ($DEEPEP_COMMIT, mode=$build_mode)"
+}
+
+# UCCL EP (uccl.ep.Buffer is a DeepEP-API clone). The prebuilt wheel is cu12; on a cu13
+# image its kernels need a cu12 CUDA runtime on LD_LIBRARY_PATH (probe-confirmed). PEP-668
+# images need PIP_BREAK_SYSTEM_PACKAGES. Best-effort; failure to import fails loudly.
+cx_build_uccl() {
+  if [ -f /tmp/.cx_built_uccl ]; then
+    cx_log "UCCL EP already prepared this allocation — skip rebuild"
+    python3 -c "import torch; from uccl_deepep import Buffer" 2>/dev/null || return 1
+    return 0
+  fi
+  local version="0.1.1" tag="v0.1.1"
+  local wheel_sha256="390c1320918972206546e44d79b132988f2818ec07e23afcd0595f7183916cec"
+  cx_log "UCCL EP: installing uccl==$version + cu12 runtime shim"
+  export PIP_BREAK_SYSTEM_PACKAGES=1
+  pip install -q --no-deps "sortedcontainers==2.4.0" "intervaltree==3.1.0" >&2 2>&1 \
+    || { cx_log "ERROR: UCCL support dependency installation failed"; return 1; }
+  printf 'uccl==%s --hash=sha256:%s\n' "$version" "$wheel_sha256" \
+    | pip install -q --no-deps --only-binary=:all: --require-hashes -r /dev/stdin >&2 2>&1 \
+    || { cx_log "ERROR: pip install uccl==$version failed"; return 1; }
+  pip install -q --no-deps "nvidia-cuda-runtime-cu12==12.9.79" >&2 2>&1 \
+    || { cx_log "ERROR: CUDA 12 runtime shim install failed"; return 1; }
+  local cu12lib
+  cu12lib="$(python3 -c "import nvidia.cuda_runtime as m, os; print(os.path.join(os.path.dirname(m.__file__),'lib'))" 2>/dev/null)"
+  [ -n "$cu12lib" ] && export LD_LIBRARY_PATH="$cu12lib:${LD_LIBRARY_PATH:-}"
+  local installed
+  installed="$(python3 -c 'import importlib.metadata as m; print(m.version("uccl"))')" \
+    || { cx_log "ERROR: cannot read installed UCCL version"; return 1; }
+  [ "$installed" = "$version" ] \
+    || { cx_log "ERROR: expected UCCL $version, installed $installed"; return 1; }
+  UCCL_COMMIT="pkg-$installed"
+  export UCCL_COMMIT
+  # import torch FIRST: uccl.ep's C extension links libc10.so (torch), which is only on the loader
+  # path once torch is imported (rpath). The adapter (ep_uccl.py) imports torch before uccl.ep too.
+  python3 -c "import torch; from uccl.ep import Buffer; print('uccl.ep ready')" >&2 \
+    || { cx_log "ERROR: uccl.ep import failed (cu12 runtime on LD_LIBRARY_PATH?)"; return 1; }
+  # Vendor UCCL's DeepEP-API wrapper (ep/deep_ep_wrapper/deep_ep) under a NON-conflicting name
+  # (uccl_deepep) so it doesn't shadow the container's real deep_ep. Its Buffer(group, num_nvl_bytes,
+  # ...) takes a torch ProcessGroup (matching DeepEP + ep_uccl.py's calls) and runs the full
+  # proxy/IPC-handle/runtime.sync bootstrap that the low-level uccl.ep.Buffer(rank,num_ranks) lacks.
+  rm -rf /tmp/uccl_src /tmp/uccl_deepep_pkg
+  # Pin the wrapper to the SAME tag as the installed wheel (pkg-0.1.1 -> v0.1.1): the wrapper's
+  # dispatch calls into uccl.ep (get_rdma_buffer etc.), so a main-branch wrapper vs a 0.1.1 wheel
+  # mismatches signatures. Match them.
+  if git clone --depth 1 --branch "$tag" https://github.com/uccl-project/uccl /tmp/uccl_src >&2 2>&1 \
+     && [ "$(git -C /tmp/uccl_src rev-parse HEAD)" = "73ee4f12ba71717d6de34ba06806e1baaabe3f42" ] \
+     && [ -d /tmp/uccl_src/ep/deep_ep_wrapper/deep_ep ]; then
+    mkdir -p /tmp/uccl_deepep_pkg/uccl_deepep
+    cp /tmp/uccl_src/ep/deep_ep_wrapper/deep_ep/*.py /tmp/uccl_deepep_pkg/uccl_deepep/ 2>/dev/null
+    export PYTHONPATH="/tmp/uccl_deepep_pkg:${PYTHONPATH:-}"
+    python3 -c "import torch; from uccl_deepep import Buffer; print('uccl_deepep wrapper ready')" >&2 \
+      || { cx_log "ERROR: uccl_deepep wrapper import failed"; return 1; }
+    export CX_UCCL_WRAPPER=1
+    export UCCL_WRAPPER_COMMIT="73ee4f12ba71717d6de34ba06806e1baaabe3f42"
+  else
+    cx_log "ERROR: uccl deep_ep_wrapper not available"
+    return 1
+  fi
+  : > /tmp/.cx_built_uccl
+  cx_log "UCCL EP ready ($UCCL_COMMIT, wrapper=${CX_UCCL_WRAPPER:-0})"
+}
+
+# Rack build and rank steps may enter different container instances. Persist each node's
+# loader/import path and build identity on the shared staged mount, then require it from every rank.
+cx_persist_backend_env() {
+  local root="$PWD/.cx_backend/env" node_id="${SLURM_NODEID:-0}" path temporary name
+  local -a names=(PATH VIRTUAL_ENV LD_LIBRARY_PATH PYTHONPATH CUDA_HOME CPATH NVCC_PREPEND_FLAGS
+    NVSHMEM_DIR DEEPEP_COMMIT DEEPEP_TREE
+    EP_NCCL_ROOT_DIR EP_NVSHMEM_ROOT_DIR EP_JIT_CACHE_DIR EP_REUSE_NCCL_COMM
+    EP_JIT_DUMP_SASS
+    DEEPEP_V2_PR DEEPEP_V2_FIX_PR DEEPEP_V2_COMMIT DEEPEP_V2_TREE DEEPEP_V2_FMT_COMMIT
+    DEEPEP_V2_JIT_RANDOM_SEED
+    HYBRID_EP_MULTINODE USE_NIXL RDMA_CORE_HOME DEEPEP_HYBRID_BUILD_MODE
+    UCCL_COMMIT UCCL_WRAPPER_COMMIT CX_UCCL_WRAPPER)
+  [[ "$node_id" =~ ^[0-9]+$ ]] || return 1
+  mkdir -p "$root" || return 1
+  chmod 700 "$root" || return 1
+  temporary="$(mktemp "$root/.node-${node_id}.XXXXXX")" || return 1
+  chmod 600 "$temporary" || { rm -f "$temporary"; return 1; }
+  for name in "${names[@]}"; do
+    if declare -p "$name" >/dev/null 2>&1; then
+      printf 'export %s=%q\n' "$name" "${!name}" >> "$temporary" \
+        || { rm -f "$temporary"; return 1; }
+    fi
+  done
+  path="$root/node-${node_id}.sh"
+  mv -f -- "$temporary" "$path" || { rm -f "$temporary"; return 1; }
+}
+
+# Validate private scale-out selectors on every allocated compute node before a
+# backend can initialize or build transport code.
+cx_probe_scaleout_network() {
+  local interface device rdma_name
+  local -a interfaces devices
+  if [ "${CX_NODES:-1}" -le 1 ] || [ "${CX_TRANSPORT:-}" = mnnvl ]; then
+    return 0
+  fi
+  [ -n "${GLOO_SOCKET_IFNAME:-}" ] && [ -n "${NCCL_IB_HCA:-}" ] \
+    || { cx_log "ERROR: scale-out network selectors are unavailable"; return 1; }
+  IFS=, read -r -a interfaces <<< "$GLOO_SOCKET_IFNAME"
+  for interface in "${interfaces[@]}"; do
+    [ -d "/sys/class/net/$interface" ] \
+      || { cx_log "ERROR: configured scale-out socket interface is absent"; return 1; }
+  done
+  IFS=, read -r -a devices <<< "$NCCL_IB_HCA"
+  for device in "${devices[@]}"; do
+    rdma_name="${device%%:*}"
+    [ -d "/sys/class/infiniband/$rdma_name" ] \
+      || { cx_log "ERROR: configured scale-out RDMA device is absent"; return 1; }
+  done
+}
+
+# Prepare and probe one backend without running a benchmark. The same hook is used
+# by normal in-container runs and by rack launchers' persistent build-only step.
+cx_prepare_backend() {
+  local backend="${1:-}"
+  case "$backend" in
+    deepep)
+      cx_probe_deepep || return 1
+      ;;
+    deepep-v2)
+      cx_build_deepep_v2 || return 1
+      ;;
+    deepep-hybrid)
+      cx_build_deepep_hybrid || return 1
+      ;;
+    uccl)
+      cx_build_uccl || return 1
+      ;;
+    mori)
+      python3 -c "import mori" 2>/dev/null || return 1
+      ;;
+    nccl-ep)
+      ;;
+    *)
+      cx_log "ERROR: unknown backend preparation request"
+      return 1
+      ;;
+  esac
+}
+
+prepare_backend_or_record() {
+  local backend="$1" phases="${CX_PHASE:-decode}" phase
+  cx_write_runtime_stage backend-setup || return 1
+  if cx_prepare_backend "$backend"; then
+    return 0
+  fi
+  cx_log "WARN: $backend preparation failed"
+  [ "$phases" = "both" ] && phases="decode prefill"
+  for phase in $phases; do
+    CX_FAILURE_MODE=backend-setup emit_failed_case "$backend" "$phase" 6
+  done
+  return 1
+}
+
+# dispatch_bench runs the CURRENT CX_BENCH (+ CX_* config env) once. The sweep workflow runs many
+# of these per allocation (SHARD mode below), reusing this single container + its built backend.
+dispatch_bench() {
+  case "$CX_BENCH" in
+    nccl-ep)
+      run_ep_suite "$CX_BENCH"
+      ;;
+    deepep|deepep-v2|deepep-hybrid|mori|uccl)
+      prepare_backend_or_record "$CX_BENCH" && run_ep_suite "$CX_BENCH"
+      ;;
+    *)
+      cx_die "unknown CX_BENCH=$CX_BENCH (want deepep|deepep-v2|mori|uccl|nccl-ep|deepep-hybrid)"
+      ;;
+  esac
+}
+
+run_precision_probe() {
+  local fields probe_id backend sku ep mode profile out rc_run
+  fields="$(cx_precision_probe_control_fields "$PWD")" || return 1
+  IFS='|' read -r probe_id backend sku ep mode profile <<< "$fields"
+  [ "$backend" = "$CX_BENCH" ] && [ "$sku" = "$CX_RUNNER" ] && [ "$ep" = "$CX_NGPUS" ] \
+    || { cx_log "ERROR: precision probe control differs from runtime placement"; return 1; }
+  out="results/${probe_id}.json"
+  cx_write_runtime_stage execution || return 1
+  if timeout -k 30 "${CX_RUN_TIMEOUT:-900}" torchrun --nproc_per_node="$CX_NGPUS" \
+      tests/probe_precision.py --backend "$backend" --sku "$sku" --ep "$ep" \
+      --mode "$mode" --precision-profile "$profile" --out "$out"; then
+    rc_run=0
+  else
+    rc_run=$?
+  fi
+  [ "$rc_run" = 0 ] || return "$rc_run"
+  python3 tests/probe_precision.py --validate-manifest "$out"
+}
+
+rc=0
+cx_validate_shard_control "$PWD"
+# Build-only mode: rack launchers run the shared backend preparation hook once per
+# node inside a persistent named container, then direct rank processes reuse it.
+if [ -n "${CX_BUILD_ONLY:-}" ]; then
+  if cx_probe_scaleout_network && cx_prepare_backend "${CX_BENCH:-}"; then
+    cx_persist_backend_env || rc=1
+  else
+    rc=1
+  fi
+  cx_log "backend preparation: bench=${CX_BENCH:-unknown} rc=$rc"
+  exit "$rc"
+fi
+if [ "${CX_PRECISION_PROBE:-0}" = 1 ]; then
+  if cx_probe_scaleout_network && cx_prepare_backend "${CX_BENCH:-}"; then
+    run_precision_probe || rc=$?
+  else
+    rc=1
+  fi
+elif [ -n "${CX_SHARD_FILE:-}" ]; then
+  # SHARD/SWEEP mode (collectivex-sweep.yml): run EVERY case of this shard in THIS one allocation.
+  # All cases share (sku, backend, nodes), so backend preparation is paid once and cached.
+  ncases="$(python3 -c "import json;print(len(json.load(open('$CX_SHARD_FILE'))['cases']))")"
+  cx_log "SHARD mode: $ncases case(s) in one allocation (shard=$CX_SHARD_FILE)"
+  _cx_ts_base="$CX_TS"   # per-case CX_TS suffix below keeps each case's result file UNIQUE (else
+                         # cases sharing backend+phase overwrite each other at the same timestamp).
+  ci=0
+  failed_cases=0
+  while [ "$ci" -lt "$ncases" ]; do
+    CX_TS="${_cx_ts_base}-c$(printf '%03d' "$ci")"
+    export CX_TS
+    # Map varying case fields plus the frozen v1 defaults into CX_* env.
+    _exports="$(python3 - "$CX_SHARD_FILE" "$ci" <<'PY'
+import json, sys, shlex
+c = json.load(open(sys.argv[1]))["cases"][int(sys.argv[2])]
+def g(k, d=""):
+    v = c.get(k, d); return "" if v is None else str(v)
+env = {
+  "CX_BENCH": g("backend"),
+  "CX_MODE": g("mode", "normal"),
+  "CX_ROUTING": g("routing", "uniform"), "CX_PHASE": g("phase", "decode"),
+  "CX_EP": g("ep", "1"),
+  "CX_EPLB": "1" if c.get("eplb") else "",
+  "CX_CASE_ID": g("case_id"), "CX_SUITE": g("suite"), "CX_WORKLOAD_NAME": g("workload"),
+  "CX_REQUIRED_PUBLICATION": g("required_publication"),
+  "CX_HIDDEN": g("hidden"), "CX_TOPK": g("topk"), "CX_EXPERTS": g("experts"),
+  "CX_TOKENS_LADDER": g("ladder"), "CX_CANONICAL": ("1" if c.get("canonical") else ""),
+  "CX_NODES": g("nodes"), "CX_GPUS_PER_NODE": g("gpus_per_node"),
+  "CX_SCALE_UP_DOMAIN": g("scale_up_domain"), "CX_SCOPE": g("scope"),
+  "CX_SCALE_UP_TRANSPORT": g("scale_up_transport"),
+  "CX_SCALE_OUT_TRANSPORT": g("scale_out_transport"),
+  "CX_TRANSPORT": g("transport"), "CX_TOPO": g("topology_class"),
+  "CX_SAMPLES_PER_POINT": g("samples_per_point"),
+  "CX_WARMUP_SEMANTICS": g("warmup_semantics"),
+}
+lines = [f"export {k}={shlex.quote(v)}" for k, v in env.items()]
+# Per-case timing "iters:trials:warmup" (fixed-512-v1 requires 8:64:32 everywhere);
+# cases without one must fall back to the harness defaults, so UNSET rather than export-empty
+# (an empty CX_ITERS would defeat the 8-iter default and break the run_ep argparse; NOTE no
+# apostrophes in this heredoc — bash command-substitution scanning chokes on unbalanced quotes).
+timing = g("timing")
+if timing:
+    parts = (timing.split(":") + ["", "", ""])[:3]
+    for k, v in zip(("CX_ITERS", "CX_TRIALS", "CX_WARMUP"), parts):
+        if v:
+            lines.append(f"export {k}={shlex.quote(v)}")
+else:
+    lines.append("unset CX_ITERS CX_TRIALS CX_WARMUP 2>/dev/null || true")
+print("\n".join(lines))
+PY
+)"
+    eval "$_exports"
+    # Each case has its OWN routing/dims -> its own canonical workload manifest. cx_stage_canonical
+    # short-circuits when CX_WORKLOAD_DIR is already set, so without this unset the first case's
+    # staged dir is reused for the rest and run_ep.py can't find the later cases' manifests
+    # (FileNotFoundError .cx_workloads/<wid>.manifest.json). Unset so every case re-stages its own.
+    unset CX_WORKLOAD_DIR 2>/dev/null || true
+    cx_log "  [$((ci+1))/$ncases] $CX_BENCH $CX_MODE/$CX_PHASE routing=$CX_ROUTING eplb=${CX_EPLB:-0}"
+    _cx_case_ts="$CX_TS"
+    CX_TS="${_cx_case_ts}-a01"
+    export CX_ATTEMPT_ID=1 CX_TS
+    dispatch_bench || {
+      failed_cases=$((failed_cases+1))
+      cx_log "  [$((ci+1))/$ncases] $CX_BENCH case FAILED; failed-case record preserved"
+    }
+    export CX_TS="$_cx_case_ts"
+    ci=$((ci + 1))
+  done
+  if [ "${failed_cases:-0}" -gt 0 ]; then
+    cx_log "SHARD done: $failed_cases/$ncases case(s) failed"
+    rc=1
+  fi
+  # The base timestamp matches every per-case file, so the final summary covers the whole shard.
+  export CX_TS="$_cx_ts_base"
+else
+  _cx_single_ts="$CX_TS"
+  CX_TS="${_cx_single_ts}-a01"
+  export CX_ATTEMPT_ID=1 CX_TS
+  dispatch_bench || rc=1
+fi
+
+# Summary table for the log; also fails the job if no valid results were produced.
+if [ "${CX_PRECISION_PROBE:-0}" != 1 ]; then
+  python3 summarize.py --results-dir results --runner "$CX_RUNNER" --ts "$CX_TS" || rc=1
+fi
+exit "$rc"
diff --git a/experimental/CollectiveX/schemas/channel-v1.schema.json b/experimental/CollectiveX/schemas/channel-v1.schema.json
new file mode 100644
index 000000000..87ffa86b0
--- /dev/null
+++ b/experimental/CollectiveX/schemas/channel-v1.schema.json
@@ -0,0 +1,23 @@
+{
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "$id": "https://inferencex.com/schemas/collectivex/channel-v1.schema.json",
+  "title": "CollectiveX public channel v1",
+  "type": "object",
+  "additionalProperties": false,
+  "required": ["format","channel","dataset","generated_at"],
+  "properties": {
+      "format": {"const": "collectivex.channel.v1"},
+      "channel": {"const": "dev-latest"},
+      "dataset": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": ["path","sha256","bytes"],
+            "properties": {
+                    "path": {"type": "string","pattern": "^datasets/[0-9a-f]{64}/dataset\\.json$"},
+                    "sha256": {"type": "string","pattern": "^[0-9a-f]{64}$"},
+                    "bytes": {"type": "integer","minimum": 1,"maximum": 33554432}
+                  }
+          },
+      "generated_at": {"type": "string","format": "date-time"}
+    }
+}
diff --git a/experimental/CollectiveX/schemas/private-bundle-v1.schema.json b/experimental/CollectiveX/schemas/private-bundle-v1.schema.json
new file mode 100644
index 000000000..789119692
--- /dev/null
+++ b/experimental/CollectiveX/schemas/private-bundle-v1.schema.json
@@ -0,0 +1,163 @@
+{
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "$id": "https://inferencex.com/schemas/collectivex/private-bundle-v1.schema.json",
+  "title": "CollectiveX private attempt bundle v1",
+  "type": "object",
+  "additionalProperties": false,
+  "required": [
+      "format",
+      "schema_version",
+      "created_at",
+      "ingest_id",
+      "run",
+      "matrix",
+      "sources",
+      "attempts",
+      "coverage",
+      "runtime_fingerprints",
+      "checksums",
+      "validation"
+    ],
+  "properties": {
+      "format": {"const": "collectivex.private.bundle.v1"},
+      "schema_version": {"const": 1},
+      "created_at": {"type": "string","format": "date-time"},
+      "ingest_id": {"$ref": "#/$defs/sha256"},
+      "run": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": ["repository","run_id","run_attempt","qualification_index","source_sha"],
+            "properties": {
+                    "repository": {"type": "string","pattern": "^[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+$"},
+                    "run_id": {"type": "string","pattern": "^[1-9][0-9]*$"},
+                    "run_attempt": {"type": "integer","minimum": 1},
+                    "qualification_index": {"type":"integer","minimum":1,"maximum":3},
+                    "source_sha": {"type": "string","pattern": "^[0-9a-f]{40}$"}
+                  }
+          },
+      "matrix": {"$ref": "#/$defs/file"},
+      "sources": {"type": "array","minItems": 1,"uniqueItems": true,"items": {"$ref": "#/$defs/source"}},
+      "attempts": {
+            "type": "array",
+            "minItems": 1,
+            "items": {
+                    "type": "object",
+                    "additionalProperties": false,
+                    "required": [
+                              "attempt_id",
+                              "allocation_id",
+                              "case_id",
+                              "outcome",
+                              "reason",
+                              "selected",
+                              "document",
+                              "samples",
+                              "runtime_fingerprint_sha256",
+                              "series_ids",
+                              "evidence_ids"
+                            ],
+                    "properties": {
+                              "attempt_id": {"$ref": "#/$defs/attemptId"},
+                              "allocation_id": {"$ref": "#/$defs/allocationId"},
+                              "case_id": {"$ref": "#/$defs/caseId"},
+                              "outcome": {"$ref": "#/$defs/outcome"},
+                              "reason": {"$ref": "#/$defs/reason"},
+                              "selected": {"type": "boolean"},
+                              "document": {"$ref": "#/$defs/file"},
+                              "samples": {"oneOf": [{"type": "null"},{"$ref": "#/$defs/file"}]},
+                              "runtime_fingerprint_sha256": {"oneOf": [{"type": "null"},{"$ref": "#/$defs/sha256"}]},
+                              "series_ids": {"type": "array","uniqueItems": true,"items": {"$ref": "#/$defs/seriesId"}},
+                              "evidence_ids": {"type": "array","uniqueItems": true,"items": {"$ref": "#/$defs/evidenceId"}}
+                            }
+                  }
+          },
+      "coverage": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": ["expected_cases","terminal_cases","complete","outcome_counts","selections"],
+            "properties": {
+                    "expected_cases": {"type": "integer","minimum": 1},
+                    "terminal_cases": {"type": "integer","minimum": 0},
+                    "complete": {"type": "boolean"},
+                    "outcome_counts": {"$ref": "#/$defs/outcomeCounts"},
+                    "selections": {
+                              "type": "array",
+                              "minItems": 1,
+                              "items": {
+                                          "type": "object",
+                                          "additionalProperties": false,
+                                          "required": ["case_id","selected_attempt_id","outcome"],
+                                          "properties": {
+                                                        "case_id": {"$ref": "#/$defs/caseId"},
+                                                        "selected_attempt_id": {"$ref": "#/$defs/attemptId"},
+                                                        "outcome": {"$ref": "#/$defs/outcome"}
+                                                      }
+                                        }
+                            }
+                  }
+          },
+      "runtime_fingerprints": {"type": "array","uniqueItems": true,"items": {"$ref": "#/$defs/sha256"}},
+      "checksums": {"$ref": "#/$defs/file"},
+      "validation": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": ["policy","passed","checks"],
+            "properties": {
+                    "policy": {"const": "collectivex-publisher-v1"},
+                    "passed": {"const": true},
+                    "checks": {
+                              "type": "array",
+                              "minItems": 1,
+                              "uniqueItems": true,
+                              "items": {"type": "string","pattern": "^[a-z0-9][a-z0-9.-]*$"}
+                            }
+                  }
+          }
+    },
+  "$defs": {
+      "sha256": {"type": "string","pattern": "^[0-9a-f]{64}$"},
+      "caseId": {"type": "string","pattern": "^cxcase-v1-[0-9a-f]{64}$"},
+      "seriesId": {"type": "string","pattern": "^cxseries-v1-[0-9a-f]{64}$"},
+      "evidenceId": {"type": "string","pattern": "^cxevidence-v1-[0-9a-f]{64}$"},
+      "allocationId": {"type": "string","pattern": "^cxallocation-v1-[0-9a-f]{64}$"},
+      "attemptId": {"type": "string","pattern": "^cxattempt-v1-[0-9a-f]{64}$"},
+      "reason": {"oneOf": [{"type": "null"},{"type": "string","pattern": "^[a-z0-9][a-z0-9.-]*$","maxLength": 96}]},
+      "outcome": {"enum": ["success","unsupported","failed","invalid","diagnostic"]},
+      "outcomeCounts": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": ["success","unsupported","failed","invalid","diagnostic"],
+            "properties": {
+                    "success": {"type": "integer","minimum": 0},
+                    "unsupported": {"type": "integer","minimum": 0},
+                    "failed": {"type": "integer","minimum": 0},
+                    "invalid": {"type": "integer","minimum": 0},
+                    "diagnostic": {"type": "integer","minimum": 0}
+                  }
+          },
+      "file": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": ["path","sha256","bytes"],
+            "properties": {
+                    "path": {"type": "string","pattern": "^[A-Za-z0-9_.-]+(?:/[A-Za-z0-9_.-]+)*$"},
+                    "sha256": {"$ref": "#/$defs/sha256"},
+                    "bytes": {"type": "integer","minimum": 1}
+                  }
+          },
+      "source": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": ["path","sha256","bytes","artifact_name"],
+            "properties": {
+                    "path": {"type": "string","pattern": "^[A-Za-z0-9_.-]+(?:/[A-Za-z0-9_.-]+)*$"},
+                    "sha256": {"$ref": "#/$defs/sha256"},
+                    "bytes": {"type": "integer","minimum": 1},
+                    "artifact_name": {
+                              "type": "string",
+                              "pattern": "^cx(?:unsupported|shard-[a-z0-9][a-z0-9_.-]{0,127})-[1-9][0-9]*-[1-9][0-9]*$"
+                            }
+                  }
+          }
+    }
+}
diff --git a/experimental/CollectiveX/schemas/public-dataset-v1.schema.json b/experimental/CollectiveX/schemas/public-dataset-v1.schema.json
new file mode 100644
index 000000000..cf5a5eed4
--- /dev/null
+++ b/experimental/CollectiveX/schemas/public-dataset-v1.schema.json
@@ -0,0 +1,880 @@
+{
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "$id": "https://inferencex.com/schemas/collectivex/public-dataset-v1.schema.json",
+  "title": "CollectiveX sanitized public dataset v1",
+  "type": "object",
+  "additionalProperties": false,
+  "required": [
+      "format",
+      "schema_version",
+      "generated_at",
+      "source_bundle_ids",
+      "promotion",
+      "coverage",
+      "attempts",
+      "series",
+      "cohorts",
+      "rankings",
+      "recommendations",
+      "sensitivities"
+    ],
+  "properties": {
+      "format": {"const": "collectivex.public.v1"},
+      "schema_version": {"const": 1},
+      "generated_at": {"type": "string","format": "date-time"},
+      "source_bundle_ids": {"type": "array","uniqueItems": true,"items": {"$ref": "#/$defs/sha256"}},
+      "promotion": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": [
+                    "status",
+                    "reason",
+                    "matrix_id",
+                    "allocation_ids",
+                    "required_allocations",
+                    "qualification_indices",
+                    "requested_cases",
+                    "terminal_cases",
+                    "measured_cases",
+                    "unsupported_cases",
+                    "requested_points",
+                    "terminal_points",
+                    "measured_points",
+                    "unsupported_points",
+                    "policy"
+                  ],
+            "properties": {
+                    "status": {"enum": ["promoted","diagnostic","quarantined"]},
+                    "reason": {"$ref": "#/$defs/reason"},
+                    "matrix_id": {"oneOf": [{"type": "null"},{"$ref": "#/$defs/sha256"}]},
+                    "allocation_ids": {"type": "array","uniqueItems": true,"items": {"$ref": "#/$defs/allocationId"}},
+                    "required_allocations": {"const": 3},
+                    "qualification_indices": {"type":"array","minItems":0,"maxItems":3,"uniqueItems":true,"items":{"enum":[1,2,3]}},
+                    "requested_cases": {"type": "integer","minimum": 0},
+                    "terminal_cases": {"type": "integer","minimum": 0},
+                    "measured_cases": {"type": "integer","minimum": 0},
+                    "unsupported_cases": {"type": "integer","minimum": 0},
+                    "requested_points": {"type": "integer","minimum": 0},
+                    "terminal_points": {"type": "integer","minimum": 0},
+                    "measured_points": {"type": "integer","minimum": 0},
+                    "unsupported_points": {"type": "integer","minimum": 0},
+                    "policy": {"const": "collectivex-decision-grade-v1"}
+                  }
+          },
+      "coverage": {"type": "array","uniqueItems":true,"items": {"$ref": "#/$defs/coverage"}},
+      "attempts": {"type": "array","items": {"$ref": "#/$defs/attempt"}},
+      "series": {"type": "array","items": {"$ref": "#/$defs/series"}},
+      "cohorts": {"type": "array","items": {"$ref": "#/$defs/cohort"}},
+      "rankings": {"type": "array","items": {"$ref": "#/$defs/ranking"}},
+      "recommendations": {"type": "array","items": {"$ref": "#/$defs/recommendation"}},
+      "sensitivities": {"type": "array","items": {"$ref": "#/$defs/sensitivity"}}
+    },
+  "$defs": {
+      "sha256": {"type": "string","pattern": "^[0-9a-f]{64}$"},
+      "caseId": {"type": "string","pattern": "^cxcase-v1-[0-9a-f]{64}$"},
+      "workloadId": {"type": "string","pattern": "^cxwork-v1-[0-9a-f]{64}$"},
+      "seriesId": {"type": "string","pattern": "^cxseries-v1-[0-9a-f]{64}$"},
+      "pointId": {"type": "string","pattern": "^cxpoint-v1-[0-9a-f]{64}$"},
+      "evidenceId": {"type": "string","pattern": "^cxevidence-v1-[0-9a-f]{64}$"},
+      "allocationId": {"type": "string","pattern": "^cxallocation-v1-[0-9a-f]{64}$"},
+      "attemptId": {"type": "string","pattern": "^cxattempt-v1-[0-9a-f]{64}$"},
+      "safeId": {"type": "string","pattern": "^[a-z0-9][a-z0-9_.-]*$","maxLength": 128},
+      "communicationAxis": {
+            "type":"object",
+            "additionalProperties":false,
+            "required": [
+                    "alignment_contract","api_input_dtype","api_output_dtype","communication_format",
+                    "conversion_boundary","padding_contract","quantization_origin","scale_dtype",
+                    "scale_group_size","scale_layout"
+                  ],
+            "properties": {
+                    "alignment_contract":{"enum":["native-bf16-vector-alignment","hidden-block-128","native-fp8-vector-alignment","value-block-64"]},
+                    "api_input_dtype":{"enum":["bf16","fp8-e4m3fn-with-f32-scale","fp8-e4m3fnuz-with-f32-scale"]},
+                    "api_output_dtype":{"enum":["bf16","fp8-e4m3fn-with-f32-scale","fp8-e4m3fnuz-with-f32-scale"]},
+                    "communication_format":{"enum":["bf16","fp8-e4m3fn","fp8-e4m3fnuz","logfmt10"]},
+                    "conversion_boundary":{"enum":["none","before-dispatch-timing","inside-dispatch-timing","inside-combine-timing"]},
+                    "padding_contract":{"enum":["none","right-zero-pad-hidden-to-128","right-zero-pad-values-to-64"]},
+                    "quantization_origin":{"enum":["none","caller-prequantized","backend-fused","backend-internal","backend-internal-direct-cast"]},
+                    "scale_dtype":{"oneOf":[{"type":"null"},{"enum":["f32","implicit-logfmt10"]}]},
+                    "scale_group_size":{"oneOf":[{"type":"null"},{"enum":[64,128]}]},
+                    "scale_layout":{"enum":["none","per-token-hidden-block","dynamic-per-64-values"]}
+                  }
+          },
+      "precisionProfile": {
+            "enum": [
+                    "d-bf16.c-bf16",
+                    "d-fp8-e4m3fn-b128-f32-prequantized.c-bf16",
+                    "d-fp8-e4m3fnuz-b128-f32-prequantized.c-bf16",
+                    "d-fp8-e4m3fn-b128-f32-fused.c-bf16",
+                    "d-bf16.c-logfmt10-dynamic64",
+                    "d-fp8-e4m3fn-b128-f32-fused.c-logfmt10-dynamic64",
+                    "d-bf16.c-fp8-e4m3fn-direct-cast-noscale",
+                    "d-fp8-e4m3fn-b128-f32-prequantized.c-fp8-e4m3fn-direct-cast-noscale",
+                    "d-bf16.c-fp8-e4m3fnuz-direct-cast-noscale",
+                    "d-fp8-e4m3fnuz-b128-f32-prequantized.c-fp8-e4m3fnuz-direct-cast-noscale"
+                  ]
+          },
+      "byteAccounting": {
+            "type":"object",
+            "additionalProperties":false,
+            "required":["accounting_contract","activation_data_bytes","scale_bytes","total_logical_bytes"],
+            "properties": {
+                    "accounting_contract":{"const":"activation-data-plus-scales-v1"},
+                    "activation_data_bytes":{"type":"integer","minimum":0},
+                    "scale_bytes":{"type":"integer","minimum":0},
+                    "total_logical_bytes":{"type":"integer","minimum":0}
+                  }
+          },
+      "publicationTier": {"enum": ["official","comparable-experimental"]},
+      "label": {"type": "string","minLength": 1,"maxLength": 160},
+      "nullableLabel": {"oneOf": [{"type": "null"},{"$ref": "#/$defs/label"}]},
+      "reason": {"oneOf": [{"type": "null"},{"type": "string","pattern": "^[a-z0-9][a-z0-9.-]*$","maxLength": 96}]},
+      "reasonId": {"type":"string","pattern":"^[a-z0-9][a-z0-9.-]*$","maxLength":96},
+      "outcome": {"enum": ["success","unsupported","failed","invalid","diagnostic"]},
+      "precisionAxisEvidence": {
+            "type":"object",
+            "additionalProperties":false,
+            "required":[
+                    "dequantized_semantics","encoded_payload_valid","max_abs_error","max_rel_error",
+                    "passed","saturation_count","saturation_rate","scales_finite","scales_positive"
+                  ],
+            "properties": {
+                    "dequantized_semantics":{"type":"boolean"},
+                    "encoded_payload_valid":{"type":"boolean"},
+                    "max_abs_error":{"type":"number","minimum":0},
+                    "max_rel_error":{"type":"number","minimum":0},
+                    "passed":{"type":"boolean"},
+                    "saturation_count":{"type":"integer","minimum":0},
+                    "saturation_rate":{"type":"number","minimum":0,"maximum":1},
+                    "scales_finite":{"oneOf":[{"type":"null"},{"type":"boolean"}]},
+                    "scales_positive":{"oneOf":[{"type":"null"},{"type":"boolean"}]}
+                  }
+          },
+      "precisionEvidence": {
+            "type":"object",
+            "additionalProperties":false,
+            "required":["combine","dispatch","passed","profile_id"],
+            "properties": {
+                    "combine":{"$ref":"#/$defs/precisionAxisEvidence"},
+                    "dispatch":{"$ref":"#/$defs/precisionAxisEvidence"},
+                    "passed":{"type":"boolean"},
+                    "profile_id":{"$ref":"#/$defs/precisionProfile"}
+                  }
+          },
+      "pointCorrectness": {
+            "type":"object",
+            "additionalProperties":false,
+            "required":["semantic_pass","precision"],
+            "properties": {
+                    "semantic_pass":{"type":"boolean"},
+                    "precision":{"$ref":"#/$defs/precisionEvidence"}
+                  }
+          },
+      "pointStability": {
+            "type":"object",
+            "additionalProperties":false,
+            "allOf":[
+                    {
+                              "if":{"properties":{"complete":{"const":true}},"required":["complete"]},
+                              "then":{"properties":{
+                                      "qualification_indices":{"const":[1,2,3]},
+                                      "p50_max_min_ratio":{"type":"number","minimum":1},
+                                      "p99_max_min_ratio":{"type":"number","minimum":1}
+                                    }},
+                              "else":{"properties":{
+                                      "p50_max_min_ratio":{"type":"null"},
+                                      "p99_max_min_ratio":{"type":"null"},
+                                      "stable_p50":{"const":false},
+                                      "stable_p99":{"const":false}
+                                    }}
+                            }
+                  ],
+            "required":[
+                    "complete","qualification_indices","p50_max_min_ratio","p99_max_min_ratio",
+                    "stable_p50","stable_p99"
+                  ],
+            "properties": {
+                    "complete":{"type":"boolean"},
+                    "qualification_indices":{"type":"array","minItems":1,"maxItems":3,"uniqueItems":true,"items":{"enum":[1,2,3]}},
+                    "p50_max_min_ratio":{"oneOf":[{"type":"null"},{"type":"number","minimum":1}]},
+                    "p99_max_min_ratio":{"oneOf":[{"type":"null"},{"type":"number","minimum":1}]},
+                    "stable_p50":{"type":"boolean"},
+                    "stable_p99":{"type":"boolean"}
+                  }
+          },
+      "coverageTopology": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": [
+                    "ep_size",
+                    "nodes",
+                    "gpus_per_node",
+                    "scale_up_domain",
+                    "scope",
+                    "scale_up_transport",
+                    "scale_out_transport",
+                    "transport",
+                    "topology_class"
+                  ],
+            "properties": {
+                    "ep_size": {"type": "integer","minimum": 1},
+                    "nodes": {"type": "integer","minimum": 1},
+                    "gpus_per_node": {"type": "integer","minimum": 1},
+                    "scale_up_domain": {"type": "integer","minimum": 1},
+                    "scope": {"enum": ["scale-up","scale-out"]},
+                    "scale_up_transport": {"$ref": "#/$defs/safeId"},
+                    "scale_out_transport": {"oneOf": [{"type": "null"},{"$ref": "#/$defs/safeId"}]},
+                    "transport": {"$ref": "#/$defs/safeId"},
+                    "topology_class": {"$ref": "#/$defs/safeId"}
+                  }
+          },
+      "coverageResource": {
+            "type":"object",
+            "additionalProperties":false,
+            "required":["mode","profile","comm_units_kind","configured_units"],
+            "properties": {
+                    "mode":{"const":"fixed-profile"},
+                    "profile":{"oneOf":[{"type":"null"},{"$ref":"#/$defs/safeId"}]},
+                    "comm_units_kind":{"$ref":"#/$defs/nullableLabel"},
+                    "configured_units":{"oneOf":[{"type":"null"},{"type":"integer","minimum":1}]}
+                  }
+          },
+      "coveragePoint": {
+            "type":"object",
+            "additionalProperties":false,
+            "allOf":[
+                    {
+                              "if":{"properties":{"terminal_status":{"const":"measured"}},"required":["terminal_status"]},
+                              "then":{"properties":{"point_id":{"$ref":"#/$defs/pointId"},"series_id":{"$ref":"#/$defs/seriesId"},"reason":{"type":"null"}}}
+                            },
+                    {
+                              "if":{"properties":{"terminal_status":{"const":"unsupported"}},"required":["terminal_status"]},
+                              "then":{"properties":{"point_id":{"type":"null"},"series_id":{"type":"null"},"reason":{"$ref":"#/$defs/reasonId"}}}
+                            },
+                    {
+                              "if":{"properties":{"terminal_status":{"enum":["failed","invalid"]}},"required":["terminal_status"]},
+                              "then":{"properties":{"reason":{"$ref":"#/$defs/reasonId"}}}
+                            }
+                  ],
+            "required":["point_id","series_id","tokens_per_rank","global_tokens","terminal_status","reason"],
+            "properties": {
+                    "point_id":{"oneOf":[{"type":"null"},{"$ref":"#/$defs/pointId"}]},
+                    "series_id":{"oneOf":[{"type":"null"},{"$ref":"#/$defs/seriesId"}]},
+                    "tokens_per_rank":{"type":"integer","minimum":1},
+                    "global_tokens":{"type":"integer","minimum":1},
+                    "terminal_status":{"enum":["measured","unsupported","failed","invalid","diagnostic"]},
+                    "reason":{"$ref":"#/$defs/reason"}
+                  }
+          },
+      "coverage": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": [
+                    "case_id",
+                    "label",
+                    "required",
+                    "sku",
+                    "suite",
+                    "workload",
+                    "publication_tier",
+                    "backend",
+                    "backend_generation",
+                    "mode",
+                    "phase",
+                    "routing",
+                    "eplb",
+                    "precision_profile",
+                    "dispatch_precision",
+                    "combine_precision",
+                    "resource",
+                    "topology",
+                    "points",
+                    "disposition",
+                    "selected_attempt_id",
+                    "outcome",
+                    "failure_mode",
+                    "reason",
+                    "attempt_ids"
+                  ],
+            "properties": {
+                    "case_id": {"$ref": "#/$defs/caseId"},
+                    "label": {"$ref": "#/$defs/label"},
+                    "required": {"type": "boolean"},
+                    "sku": {"$ref": "#/$defs/safeId"},
+                    "suite": {"$ref":"#/$defs/safeId"},
+                    "workload": {"$ref":"#/$defs/safeId"},
+                    "publication_tier": {"$ref":"#/$defs/publicationTier"},
+                    "backend": {"$ref": "#/$defs/safeId"},
+                    "backend_generation": {"$ref":"#/$defs/nullableLabel"},
+                    "mode": {"enum": ["normal","low-latency"]},
+                    "phase": {"enum": ["decode","prefill"]},
+                    "routing": {"enum":["uniform","zipf"]},
+                    "eplb": {"type":"boolean"},
+                    "precision_profile": {"$ref":"#/$defs/precisionProfile"},
+                    "dispatch_precision": {"$ref":"#/$defs/communicationAxis"},
+                    "combine_precision": {"$ref":"#/$defs/communicationAxis"},
+                    "resource": {"$ref":"#/$defs/coverageResource"},
+                    "topology": {"$ref": "#/$defs/coverageTopology"},
+                    "points": {"type":"array","minItems":1,"uniqueItems":true,"items":{"$ref":"#/$defs/coveragePoint"}},
+                    "disposition": {"enum": ["runnable","unsupported"]},
+                    "selected_attempt_id": {"oneOf": [{"type": "null"},{"$ref": "#/$defs/attemptId"}]},
+                    "outcome": {"$ref": "#/$defs/outcome"},
+                    "failure_mode": {"$ref": "#/$defs/reason"},
+                    "reason": {"$ref": "#/$defs/reason"},
+                    "attempt_ids": {"type": "array","uniqueItems": true,"items": {"$ref": "#/$defs/attemptId"}}
+                  }
+          },
+      "attempt": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": [
+                    "attempt_id",
+                    "evidence",
+                    "case_id",
+                    "allocation_id",
+                    "run_id",
+                    "run_attempt",
+                    "qualification_index",
+                    "attempt_index",
+                    "selected",
+                    "outcome",
+                    "failure_mode",
+                    "reason",
+                    "series_id",
+                    "completed_at"
+                  ],
+            "properties": {
+                    "attempt_id": {"$ref": "#/$defs/attemptId"},
+                    "evidence": {
+                              "type": "array",
+                              "uniqueItems": true,
+                              "items": {
+                                          "type": "object",
+                                          "additionalProperties": false,
+                                          "required": ["evidence_id","point_id"],
+                                          "properties": {"evidence_id": {"$ref": "#/$defs/evidenceId"},"point_id": {"$ref": "#/$defs/pointId"}}
+                                        }
+                            },
+                    "case_id": {"$ref": "#/$defs/caseId"},
+                    "allocation_id": {"$ref": "#/$defs/allocationId"},
+                    "run_id": {"type": "string","pattern": "^[1-9][0-9]*$"},
+                    "run_attempt": {"type": "integer","minimum": 1},
+                    "qualification_index": {"type":"integer","minimum":1,"maximum":3},
+                    "attempt_index": {"type": "integer","minimum": 1},
+                    "selected": {"type": "boolean"},
+                    "outcome": {"$ref": "#/$defs/outcome"},
+                    "failure_mode": {"$ref": "#/$defs/reason"},
+                    "reason": {"$ref": "#/$defs/reason"},
+                    "series_id": {"oneOf": [{"type": "null"},{"$ref": "#/$defs/seriesId"}]},
+                    "completed_at": {"oneOf": [{"type": "null"},{"type": "string","format": "date-time"}]}
+                  }
+          },
+      "eligibility": {
+            "type": "object",
+            "additionalProperties": false,
+            "allOf": [{
+                    "if": {"properties": {"decision_grade": {"const": true}},"required": ["decision_grade"]},
+                    "then": {"properties": {"reasons": {"maxItems": 0}}},
+                    "else": {"properties": {"reasons": {"minItems": 1}}}
+                  }],
+            "required": [
+                    "decision_grade",
+                    "allocation_ids",
+                    "complete",
+                    "correct",
+                    "measured_roundtrip_p99",
+                    "stable_p50",
+                    "stable_p99",
+                    "stable_ordering",
+                    "p50_max_min_ratio",
+                    "p99_max_min_ratio",
+                    "reasons"
+                  ],
+            "properties": {
+                    "decision_grade": {"type": "boolean"},
+                    "allocation_ids": {"type": "array","uniqueItems": true,"items": {"$ref": "#/$defs/allocationId"}},
+                    "complete": {"type": "boolean"},
+                    "correct": {"type": "boolean"},
+                    "measured_roundtrip_p99": {"type": "boolean"},
+                    "stable_p50": {"type": "boolean"},
+                    "stable_p99": {"type": "boolean"},
+                    "stable_ordering": {"type": "boolean"},
+                    "p50_max_min_ratio": {"oneOf": [{"type": "null"},{"type": "number","minimum": 1}]},
+                    "p99_max_min_ratio": {"oneOf": [{"type": "null"},{"type": "number","minimum": 1}]},
+                    "reasons": {
+                              "type": "array",
+                              "uniqueItems": true,
+                              "items": {"type": "string","pattern": "^[a-z0-9][a-z0-9.-]*$","maxLength": 96}
+                            }
+                  }
+          },
+      "percentiles": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": ["p50","p90","p95","p99"],
+            "properties": {
+                    "p50": {"type": "number","exclusiveMinimum": 0},
+                    "p90": {"type": "number","exclusiveMinimum": 0},
+                    "p95": {"type": "number","exclusiveMinimum": 0},
+                    "p99": {"type": "number","exclusiveMinimum": 0}
+                  }
+          },
+      "component": {
+            "type": "object",
+            "additionalProperties": false,
+            "allOf":[
+                    {
+                              "if":{"properties":{"origin":{"const":"measured"}},"required":["origin"]},
+                              "then":{"properties":{"sample_count":{"const":512}}}
+                            },
+                    {
+                              "if":{"properties":{"origin":{"const":"derived"}},"required":["origin"]},
+                              "then":{"properties":{"sample_count":{"type":"null"}}}
+                            }
+                  ],
+            "required": ["origin","latency_us","byte_provenance","activation_data_rate_gbps_at_latency_percentile","total_logical_data_rate_gbps_at_latency_percentile","sample_count"],
+            "properties": {
+                    "origin": {"enum": ["measured","derived"]},
+                    "latency_us": {"$ref": "#/$defs/percentiles"},
+                    "byte_provenance": {"$ref":"#/$defs/byteAccounting"},
+                    "activation_data_rate_gbps_at_latency_percentile": {"oneOf":[{"type":"null"},{"$ref":"#/$defs/percentiles"}]},
+                    "total_logical_data_rate_gbps_at_latency_percentile": {"oneOf":[{"type":"null"},{"$ref":"#/$defs/percentiles"}]},
+                    "sample_count": {"oneOf": [{"type": "null"},{"type": "integer","minimum": 1}]}
+                  }
+          },
+      "nullableComponent": {"oneOf": [{"type": "null"},{"$ref": "#/$defs/component"}]},
+      "trialDiagnosticComponent": {
+            "type":"object",
+            "additionalProperties":false,
+            "required":["drift_flagged","first_last_median_ratio","outlier_flagged","robust_outlier_fraction","trial_count"],
+            "properties": {
+                    "drift_flagged":{"type":"boolean"},
+                    "first_last_median_ratio":{"type":"number","minimum":1},
+                    "outlier_flagged":{"type":"boolean"},
+                    "robust_outlier_fraction":{"type":"number","minimum":0,"maximum":1},
+                    "trial_count":{"const":192}
+                  }
+          },
+      "nullableTrialDiagnosticComponent": {"oneOf":[{"type":"null"},{"$ref":"#/$defs/trialDiagnosticComponent"}]},
+      "trialDiagnostics": {
+            "type":"object",
+            "additionalProperties":false,
+            "required":["components","flagged","reasons"],
+            "properties": {
+                    "components": {
+                              "type":"object",
+                              "additionalProperties":false,
+                              "required":["dispatch","stage","combine","roundtrip"],
+                              "properties": {
+                                      "dispatch":{"$ref":"#/$defs/nullableTrialDiagnosticComponent"},
+                                      "stage":{"$ref":"#/$defs/nullableTrialDiagnosticComponent"},
+                                      "combine":{"$ref":"#/$defs/nullableTrialDiagnosticComponent"},
+                                      "roundtrip":{"$ref":"#/$defs/nullableTrialDiagnosticComponent"}
+                                    }
+                            },
+                    "flagged":{"type":"boolean"},
+                    "reasons":{"type":"array","maxItems":2,"uniqueItems":true,"items":{"enum":["trial-drift","trial-outliers"]}}
+                  }
+          },
+      "point": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": [
+                    "point_id",
+                    "tokens_per_rank",
+                    "global_tokens",
+                    "anomalies",
+                    "correctness",
+                    "stability",
+                    "trial_diagnostics",
+                    "routing",
+                    "components",
+                    "roundtrip_token_rate_at_latency_percentile",
+                    "evidence_ids"
+                  ],
+            "properties": {
+                    "point_id": {"$ref": "#/$defs/pointId"},
+                    "tokens_per_rank": {"type": "integer","minimum": 1},
+                    "global_tokens": {"type": "integer","minimum": 1},
+                    "anomalies": {"type":"array","maxItems":16,"uniqueItems":true,"items":{"$ref":"#/$defs/reasonId"}},
+                    "correctness": {"$ref":"#/$defs/pointCorrectness"},
+                    "stability": {"$ref":"#/$defs/pointStability"},
+                    "trial_diagnostics":{"$ref":"#/$defs/trialDiagnostics"},
+                    "routing": {
+                              "type": "object",
+                              "additionalProperties": false,
+                              "required": [
+                                          "fanout_mean",
+                                          "recv_tokens_max",
+                                          "expert_load_cv",
+                                          "payload_rank_cv",
+                                          "hotspot_ratio",
+                                          "empty_expert_count",
+                                          "empty_rank_count",
+                                          "routed_copies"
+                                        ],
+                              "properties": {
+                                          "fanout_mean": {"type": "number","minimum": 0},
+                                          "recv_tokens_max": {"type": "integer","minimum": 0},
+                                          "expert_load_cv": {"type": "number","minimum": 0},
+                                          "payload_rank_cv": {"type": "number","minimum": 0},
+                                          "hotspot_ratio": {"type": "number","minimum": 0},
+                                          "empty_expert_count": {"type": "integer","minimum": 0},
+                                          "empty_rank_count": {"type": "integer","minimum": 0},
+                                          "routed_copies": {"type": "integer","minimum": 1}
+                                        }
+                            },
+                    "components": {
+                              "type": "object",
+                              "additionalProperties": false,
+                              "required": ["dispatch","stage","combine","roundtrip","isolated_sum"],
+                              "properties": {
+                                          "dispatch": {"$ref": "#/$defs/nullableComponent"},
+                                          "stage": {"$ref":"#/$defs/nullableComponent"},
+                                          "combine": {"$ref": "#/$defs/nullableComponent"},
+                                          "roundtrip": {"$ref": "#/$defs/nullableComponent"},
+                                          "isolated_sum": {"$ref": "#/$defs/nullableComponent"}
+                                        }
+                            },
+                    "roundtrip_token_rate_at_latency_percentile": {"$ref": "#/$defs/percentiles"},
+                    "evidence_ids": {"type": "array","minItems":1,"maxItems":3,"uniqueItems": true,"items": {"$ref": "#/$defs/evidenceId"}}
+                  }
+          },
+      "series": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": [
+                    "series_id",
+                    "label",
+                    "status",
+                    "case_ids",
+                    "allocation_ids",
+                    "model",
+                    "suite",
+                    "mode",
+                    "phase",
+                    "publication_tier",
+                    "backend",
+                    "build",
+                    "system",
+                    "workload",
+                    "eplb",
+                    "resource",
+                    "measurement",
+                    "points",
+                    "eligibility"
+                  ],
+            "properties": {
+                    "series_id": {"$ref": "#/$defs/seriesId"},
+                    "label": {"$ref": "#/$defs/label"},
+                    "status": {"enum": ["decision-grade","diagnostic"]},
+                    "case_ids": {"type": "array","minItems": 1,"uniqueItems": true,"items": {"$ref": "#/$defs/caseId"}},
+                    "allocation_ids": {"type": "array","minItems": 1,"uniqueItems": true,"items": {"$ref": "#/$defs/allocationId"}},
+                    "model": {"$ref": "#/$defs/safeId"},
+                    "suite": {"$ref": "#/$defs/safeId"},
+                    "mode": {"enum": ["normal","low-latency"]},
+                    "phase": {"enum": ["decode","prefill"]},
+                    "publication_tier": {"$ref": "#/$defs/publicationTier"},
+                    "backend": {
+                              "type": "object",
+                              "additionalProperties": false,
+                              "required": ["id","label","role","generation","version"],
+                              "properties": {
+                                          "id": {"$ref": "#/$defs/safeId"},
+                                          "label": {"$ref": "#/$defs/label"},
+                                          "role": {"enum": ["library","reference"]},
+                                          "generation": {"$ref": "#/$defs/nullableLabel"},
+                                          "version": {"$ref": "#/$defs/nullableLabel"}
+                                        }
+                            },
+                    "build": {
+                              "type": "object",
+                              "additionalProperties": false,
+                              "required": ["implementation_contract_sha256","public_config_sha256","routing_control_sha256","runtime_fingerprint_sha256","image_digest","source_sha","squash_sha256"],
+                              "properties": {
+                                          "implementation_contract_sha256": {"$ref": "#/$defs/sha256"},
+                                          "public_config_sha256": {"$ref": "#/$defs/sha256"},
+                                          "routing_control_sha256": {"$ref": "#/$defs/sha256"},
+                                          "runtime_fingerprint_sha256": {"$ref": "#/$defs/sha256"},
+                                          "image_digest": {"type": "string","pattern": "^sha256:[0-9a-f]{64}$"},
+                                          "source_sha": {"type": "string","pattern": "^[0-9a-f]{40,64}$"},
+                                          "squash_sha256": {"$ref": "#/$defs/sha256"}
+                                        }
+                            },
+                    "system": {
+                              "type": "object",
+                              "additionalProperties": false,
+                              "required": ["sku","label","vendor","topology_class","transport","scale_up_transport","scale_out_transport","scope","nodes","gpus_per_node","scale_up_domain","world_size","ep_size","placement"],
+                              "properties": {
+                                          "sku": {"$ref": "#/$defs/safeId"},
+                                          "label": {"$ref": "#/$defs/label"},
+                                          "vendor": {"enum": ["nvidia","amd"]},
+                                          "topology_class": {"$ref": "#/$defs/safeId"},
+                                          "transport": {"$ref": "#/$defs/safeId"},
+                                          "scale_up_transport": {"$ref": "#/$defs/safeId"},
+                                          "scale_out_transport": {"oneOf": [{"type": "null"},{"$ref": "#/$defs/safeId"}]},
+                                          "scope": {"enum": ["scale-up","scale-out"]},
+                                          "nodes": {"type": "integer","minimum": 1},
+                                          "gpus_per_node": {"type": "integer","minimum": 1},
+                                          "scale_up_domain": {"type": "integer","minimum": 1},
+                                          "world_size": {"type": "integer","minimum": 1},
+                                          "ep_size": {"type": "integer","minimum": 1},
+                                          "placement": {"enum": ["packed"]}
+                                        }
+                            },
+                    "workload": {
+                              "type": "object",
+                              "additionalProperties": false,
+                              "required": [
+                                          "workload_id",
+                                          "hidden",
+                                          "top_k",
+                                          "experts",
+                                          "routing",
+                                          "eplb",
+                                          "precision_profile",
+                                          "dispatch_precision",
+                                          "combine_precision",
+                                          "activation_profile"
+                                        ],
+                              "properties": {
+                                          "workload_id": {"$ref": "#/$defs/workloadId"},
+                                          "hidden": {"type": "integer","minimum": 1},
+                                          "top_k": {"type": "integer","minimum": 1},
+                                          "experts": {"type": "integer","minimum": 1},
+                                          "routing": {"enum": ["uniform","zipf"]},
+                                          "eplb": {"type": "boolean"},
+                                          "precision_profile": {"$ref":"#/$defs/precisionProfile"},
+                                          "dispatch_precision": {"$ref":"#/$defs/communicationAxis"},
+                                          "combine_precision": {"$ref":"#/$defs/communicationAxis"},
+                                          "activation_profile": {"const": "canonical-counter-source-v4"}
+                                        }
+                            },
+                    "eplb": {
+                              "type": "object",
+                              "additionalProperties": false,
+                              "allOf":[
+                                      {
+                                                "if":{"properties":{"enabled":{"const":true}},"required":["enabled"]},
+                                                "then":{"properties":{
+                                                        "calibration_workload_id":{"$ref":"#/$defs/workloadId"},
+                                                        "calibration_trace_sha256":{"$ref":"#/$defs/sha256"},
+                                                        "calibration_window":{"const":"collectivex-eplb-calibration-window-v1"},
+                                                        "calibration_token_offset":{"type":"integer","minimum":0}
+                                                      }},
+                                                "else":{"properties":{
+                                                        "calibration_workload_id":{"type":"null"},
+                                                        "calibration_trace_sha256":{"type":"null"},
+                                                        "calibration_window":{"type":"null"},
+                                                        "calibration_token_offset":{"type":"null"}
+                                                      }}
+                                              }
+                                    ],
+                              "required": [
+                                          "enabled",
+                                          "calibration_workload_id",
+                                          "calibration_trace_sha256",
+                                          "calibration_window",
+                                          "calibration_token_offset",
+                                          "planner",
+                                          "mapping_sha256",
+                                          "logical_experts",
+                                          "physical_experts",
+                                          "redundant_experts",
+                                          "reference_tokens_per_rank",
+                                          "replicated_experts",
+                                          "max_replicas",
+                                          "imbalance_before",
+                                          "imbalance_after"
+                                        ],
+                              "properties": {
+                                          "enabled": {"type": "boolean"},
+                                          "calibration_workload_id": {"oneOf":[{"type":"null"},{"$ref":"#/$defs/workloadId"}]},
+                                          "calibration_trace_sha256": {"oneOf":[{"type":"null"},{"$ref":"#/$defs/sha256"}]},
+                                          "calibration_window": {"oneOf":[{"type":"null"},{"const":"collectivex-eplb-calibration-window-v1"}]},
+                                          "calibration_token_offset": {"oneOf":[{"type":"null"},{"type":"integer","minimum":0}]},
+                                          "planner": {"$ref": "#/$defs/nullableLabel"},
+                                          "mapping_sha256": {"oneOf": [{"type": "null"},{"$ref": "#/$defs/sha256"}]},
+                                          "logical_experts": {"type": "integer","minimum": 1},
+                                          "physical_experts": {"type": "integer","minimum": 1},
+                                          "redundant_experts": {"type": "integer","minimum": 0},
+                                          "reference_tokens_per_rank": {"oneOf": [{"type": "null"},{"type": "integer","minimum": 1}]},
+                                          "replicated_experts": {"type": "integer","minimum": 0},
+                                          "max_replicas": {"oneOf": [{"type": "null"},{"type": "integer","minimum": 0}]},
+                                          "imbalance_before": {"oneOf": [{"type": "null"},{"type": "number","minimum": 0}]},
+                                          "imbalance_after": {"oneOf": [{"type": "null"},{"type": "number","minimum": 0}]}
+                                        }
+                            },
+                    "resource": {
+                              "type": "object",
+                              "additionalProperties": false,
+                              "required": ["mode","profile","comm_units_kind","configured_units"],
+                              "properties": {
+                                          "mode": {"const": "fixed-profile"},
+                                          "profile": {"$ref": "#/$defs/safeId"},
+                                          "comm_units_kind": {"$ref": "#/$defs/nullableLabel"},
+                                          "configured_units": {"oneOf": [{"type": "null"},{"type": "integer","minimum": 1}]}
+                                        }
+                            },
+                    "measurement": {
+                              "type": "object",
+                              "additionalProperties": false,
+                              "required": [
+                                          "contract",
+                                          "component_order_contract",
+                                          "combine_semantics",
+                                          "payload_unit",
+                                          "sampling_contract",
+                                          "iters",
+                                          "trials",
+                                          "warmups",
+                                          "samples_per_component",
+                                          "qualification_indices",
+                                          "headline_component",
+                                          "headline_percentile"
+                                        ],
+                              "properties": {
+                                          "contract": {"enum": ["layout-and-dispatch-v1","expert-packed-weighted-combine-v1"]},
+                                          "component_order_contract": {"const":"qualification-hash-rotated-components-v1"},
+                                          "combine_semantics": {"enum": ["activation-only","gate-weighted"]},
+                                          "payload_unit": {"enum": ["token-rank","token-expert"]},
+                                          "sampling_contract": {"const": "fixed-512-v1"},
+                                          "iters": {"const": 8},
+                                          "trials": {"const": 64},
+                                          "warmups": {"const": 32},
+                                          "samples_per_component": {"const": 512},
+                                          "qualification_indices": {"type":"array","minItems":1,"maxItems":3,"uniqueItems":true,"items":{"enum":[1,2,3]}},
+                                          "headline_component": {"const": "roundtrip"},
+                                          "headline_percentile": {"const": "p99"}
+                                        }
+                            },
+                    "points": {"type": "array","minItems": 1,"items": {"$ref": "#/$defs/point"}},
+                    "eligibility": {"$ref": "#/$defs/eligibility"}
+                  }
+          },
+      "cohort": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": [
+                    "cohort_id",
+                    "kind",
+                    "label",
+                    "description",
+                    "series_ids",
+                    "controlled_factors",
+                    "varying_factors",
+                    "publication_tier",
+                    "eligibility"
+                  ],
+            "properties": {
+                    "cohort_id": {"type": "string","pattern": "^cxcohort-v1-[0-9a-f]{64}$"},
+                    "kind": {"enum": ["library","chip","system","routing","dispatch-precision","combine-precision","precision-pair"]},
+                    "label": {"$ref": "#/$defs/label"},
+                    "description": {"$ref": "#/$defs/label"},
+                    "series_ids": {"type": "array","minItems": 2,"uniqueItems": true,"items": {"$ref": "#/$defs/seriesId"}},
+                    "controlled_factors": {"type": "array","minItems": 1,"uniqueItems": true,"items": {"$ref": "#/$defs/safeId"}},
+                    "varying_factors": {"type": "array","minItems": 1,"uniqueItems": true,"items": {"$ref": "#/$defs/safeId"}},
+                    "publication_tier": {"$ref": "#/$defs/publicationTier"},
+                    "eligibility": {"$ref": "#/$defs/eligibility"}
+                  }
+          },
+      "metric": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": ["operation","statistic","measure","objective","tokens_per_rank","phase"],
+            "properties": {
+                    "operation": {"const": "roundtrip"},
+                    "statistic": {"enum": ["p50","p99"]},
+                    "measure": {"enum": ["latency_us","activation_data_rate_gbps_at_latency_percentile","total_logical_data_rate_gbps_at_latency_percentile"]},
+                    "objective": {"enum": ["min","max"]},
+                    "tokens_per_rank": {"type": "integer","minimum": 1},
+                    "phase": {"enum": ["decode","prefill"]}
+                  }
+          },
+      "ranking": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": ["ranking_id","cohort_id","label","metric","entries","publication_tier","eligibility"],
+            "properties": {
+                    "ranking_id": {"type": "string","pattern": "^cxranking-v1-[0-9a-f]{64}$"},
+                    "cohort_id": {"type": "string","pattern": "^cxcohort-v1-[0-9a-f]{64}$"},
+                    "label": {"$ref": "#/$defs/label"},
+                    "metric": {"$ref": "#/$defs/metric"},
+                    "entries": {
+                              "type": "array",
+                              "minItems": 2,
+                              "items": {
+                                          "type": "object",
+                                          "additionalProperties": false,
+                                          "required": ["rank","series_id","point_id","value","unit"],
+                                          "properties": {
+                                                        "rank": {"type": "integer","minimum": 1},
+                                                        "series_id": {"$ref": "#/$defs/seriesId"},
+                                                        "point_id": {"$ref": "#/$defs/pointId"},
+                                                        "value": {"type": "number","exclusiveMinimum": 0},
+                                                        "unit": {"enum": ["us","GB/s"]}
+                                                      }
+                                        }
+                            },
+                    "publication_tier": {"$ref": "#/$defs/publicationTier"},
+                    "eligibility": {"$ref": "#/$defs/eligibility"}
+                  }
+          },
+      "recommendation": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": [
+                    "recommendation_id",
+                    "cohort_id",
+                    "label",
+                    "objective",
+                    "series_id",
+                    "point_id",
+                    "value",
+                    "unit",
+                    "rationale",
+                    "publication_tier",
+                    "eligibility"
+                  ],
+            "properties": {
+                    "recommendation_id": {"type": "string","pattern": "^cxrecommendation-v1-[0-9a-f]{64}$"},
+                    "cohort_id": {"type": "string","pattern": "^cxcohort-v1-[0-9a-f]{64}$"},
+                    "label": {"$ref": "#/$defs/label"},
+                    "objective": {"enum": ["min-p50-latency","min-p99-latency","max-activation-data-rate-at-p50-latency","max-activation-data-rate-at-p99-latency","max-total-logical-data-rate-at-p50-latency","max-total-logical-data-rate-at-p99-latency"]},
+                    "series_id": {"$ref": "#/$defs/seriesId"},
+                    "point_id": {"$ref": "#/$defs/pointId"},
+                    "value": {"type": "number","exclusiveMinimum": 0},
+                    "unit": {"enum": ["us","GB/s"]},
+                    "rationale": {"$ref": "#/$defs/label"},
+                    "publication_tier": {"const": "official"},
+                    "eligibility": {"$ref": "#/$defs/eligibility"}
+                  }
+          },
+      "sensitivity": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": [
+                    "sensitivity_id",
+                    "cohort_id",
+                    "label",
+                    "baseline_series_id",
+                    "candidate_series_id",
+                    "metric",
+                    "signed_change_ratio",
+                    "publication_tier",
+                    "eligibility"
+                  ],
+            "properties": {
+                    "sensitivity_id": {"type": "string","pattern": "^cxsensitivity-v1-[0-9a-f]{64}$"},
+                    "cohort_id": {"type": "string","pattern": "^cxcohort-v1-[0-9a-f]{64}$"},
+                    "label": {"$ref": "#/$defs/label"},
+                    "baseline_series_id": {"$ref": "#/$defs/seriesId"},
+                    "candidate_series_id": {"$ref": "#/$defs/seriesId"},
+                    "metric": {"$ref": "#/$defs/metric"},
+                    "signed_change_ratio": {"type": "number"},
+                    "publication_tier": {"$ref": "#/$defs/publicationTier"},
+                    "eligibility": {"$ref": "#/$defs/eligibility"}
+                  }
+          }
+    }
+}
diff --git a/experimental/CollectiveX/schemas/raw-case-v1.schema.json b/experimental/CollectiveX/schemas/raw-case-v1.schema.json
new file mode 100644
index 000000000..d5c8a73a5
--- /dev/null
+++ b/experimental/CollectiveX/schemas/raw-case-v1.schema.json
@@ -0,0 +1,1381 @@
+{
+  "$id": "https://inferencex.com/schemas/collectivex/raw-case-v1.schema.json",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "$defs": {
+    "deepep_v2_jit_cubin": {
+      "additionalProperties": false,
+      "properties": {
+        "cache_key": {
+          "pattern":"^kernel\\.[A-Za-z0-9_+-]+\\.[0-9a-f]{32}$",
+          "type":"string"
+        },
+        "cubin_sha256": {"pattern":"^[0-9a-f]{64}$","type":"string"},
+        "sass_sha256": {"pattern":"^[0-9a-f]{64}$","type":"string"},
+        "source_sha256": {"pattern":"^[0-9a-f]{64}$","type":"string"}
+      },
+      "required": ["cache_key","cubin_sha256","sass_sha256","source_sha256"],
+      "type": "object"
+    },
+    "hybrid_jit_rank_artifact": {
+      "additionalProperties": false,
+      "properties": {
+        "bytes": {"minimum":1,"type":"integer"},
+        "rank": {"minimum":0,"type":"integer"},
+        "sha256": {"pattern":"^[0-9a-f]{64}$","type":"string"}
+      },
+      "required": ["bytes","rank","sha256"],
+      "type": "object"
+    },
+    "hybrid_realized_config": {
+      "additionalProperties": false,
+      "properties": {
+        "backward_combine_api": {"type":"boolean"},
+        "device_side_sync_combine_api": {"type":"boolean"},
+        "device_side_sync_dispatch_api": {"type":"boolean"},
+        "forward_dispatch_api": {"type":"boolean"},
+        "hidden_dim": {"minimum":1,"type":"integer"},
+        "max_num_of_tokens_per_rank": {"minimum":1,"type":"integer"},
+        "num_of_additional_in_flight_s2g_combine_api": {"minimum":0,"type":"integer"},
+        "num_of_additional_in_flight_s2g_dispatch_api": {"minimum":0,"type":"integer"},
+        "num_of_blocks_combine_api": {"minimum":0,"type":"integer"},
+        "num_of_blocks_dispatch_api": {"minimum":0,"type":"integer"},
+        "num_of_blocks_permute": {"minimum":0,"type":"integer"},
+        "num_of_blocks_preprocessing_api": {"minimum":0,"type":"integer"},
+        "num_of_blocks_unpermute": {"minimum":0,"type":"integer"},
+        "num_of_experts_per_rank": {"minimum":1,"type":"integer"},
+        "num_of_in_flight_s2g_dispatch_api": {"minimum":0,"type":"integer"},
+        "num_of_in_flight_s2g_permute_block_dispatch_api": {"minimum":0,"type":"integer"},
+        "num_of_nodes": {"minimum":1,"type":"integer"},
+        "num_of_ranks_per_node": {"minimum":1,"type":"integer"},
+        "num_of_stages_dispatch_api": {"minimum":0,"type":"integer"},
+        "num_of_stages_g2s_combine_api": {"minimum":0,"type":"integer"},
+        "num_of_stages_permute_block_dispatch_api": {"minimum":0,"type":"integer"},
+        "num_of_stages_s2g_combine_api": {"minimum":0,"type":"integer"},
+        "num_of_threads_per_block_preprocessing_api": {"minimum":0,"type":"integer"},
+        "num_of_tokens_per_chunk_combine_api": {"minimum":0,"type":"integer"},
+        "num_of_tokens_per_chunk_dispatch_api": {"minimum":0,"type":"integer"},
+        "num_of_tokens_per_chunk_preprocessing_api": {"minimum":0,"type":"integer"},
+        "num_of_tokens_per_group_combine_api": {"minimum":0,"type":"integer"},
+        "pad_multiple": {"minimum":0,"type":"integer"},
+        "token_data_type": {"enum":["UINT8","UINT16"]}
+      },
+      "required": [
+        "backward_combine_api","device_side_sync_combine_api","device_side_sync_dispatch_api",
+        "forward_dispatch_api","hidden_dim","max_num_of_tokens_per_rank",
+        "num_of_additional_in_flight_s2g_combine_api",
+        "num_of_additional_in_flight_s2g_dispatch_api","num_of_blocks_combine_api",
+        "num_of_blocks_dispatch_api","num_of_blocks_permute","num_of_blocks_preprocessing_api",
+        "num_of_blocks_unpermute","num_of_experts_per_rank",
+        "num_of_in_flight_s2g_dispatch_api","num_of_in_flight_s2g_permute_block_dispatch_api",
+        "num_of_nodes","num_of_ranks_per_node","num_of_stages_dispatch_api",
+        "num_of_stages_g2s_combine_api","num_of_stages_permute_block_dispatch_api",
+        "num_of_stages_s2g_combine_api","num_of_threads_per_block_preprocessing_api",
+        "num_of_tokens_per_chunk_combine_api","num_of_tokens_per_chunk_dispatch_api",
+        "num_of_tokens_per_chunk_preprocessing_api","num_of_tokens_per_group_combine_api",
+        "pad_multiple","token_data_type"
+      ],
+      "type": "object"
+    },
+    "nullable_sha256": {"oneOf":[{"type":"null"},{"pattern":"^[0-9a-f]{64}$","type":"string"}]},
+    "precision_profile": {
+      "enum": [
+        "d-bf16.c-bf16",
+        "d-fp8-e4m3fn-b128-f32-prequantized.c-bf16",
+        "d-fp8-e4m3fnuz-b128-f32-prequantized.c-bf16",
+        "d-fp8-e4m3fn-b128-f32-fused.c-bf16",
+        "d-bf16.c-logfmt10-dynamic64",
+        "d-fp8-e4m3fn-b128-f32-fused.c-logfmt10-dynamic64",
+        "d-bf16.c-fp8-e4m3fn-direct-cast-noscale",
+        "d-fp8-e4m3fn-b128-f32-prequantized.c-fp8-e4m3fn-direct-cast-noscale",
+        "d-bf16.c-fp8-e4m3fnuz-direct-cast-noscale",
+        "d-fp8-e4m3fnuz-b128-f32-prequantized.c-fp8-e4m3fnuz-direct-cast-noscale"
+      ]
+    },
+    "communication_axis": {
+      "additionalProperties": false,
+      "properties": {
+        "alignment_contract": {"enum":["native-bf16-vector-alignment","hidden-block-128","native-fp8-vector-alignment","value-block-64"]},
+        "api_input_dtype": {"enum":["bf16","fp8-e4m3fn-with-f32-scale","fp8-e4m3fnuz-with-f32-scale"]},
+        "api_output_dtype": {"enum":["bf16","fp8-e4m3fn-with-f32-scale","fp8-e4m3fnuz-with-f32-scale"]},
+        "communication_format": {"enum":["bf16","fp8-e4m3fn","fp8-e4m3fnuz","logfmt10"]},
+        "conversion_boundary": {"enum":["none","before-dispatch-timing","inside-dispatch-timing","inside-combine-timing"]},
+        "padding_contract": {"enum":["none","right-zero-pad-hidden-to-128","right-zero-pad-values-to-64"]},
+        "quantization_origin": {"enum":["none","caller-prequantized","backend-fused","backend-internal","backend-internal-direct-cast"]},
+        "scale_dtype": {"oneOf":[{"type":"null"},{"enum":["f32","implicit-logfmt10"]}]},
+        "scale_group_size": {"oneOf":[{"type":"null"},{"enum":[64,128]}]},
+        "scale_layout": {"enum":["none","per-token-hidden-block","dynamic-per-64-values"]}
+      },
+      "required": [
+        "alignment_contract","api_input_dtype","api_output_dtype","communication_format",
+        "conversion_boundary","padding_contract","quantization_origin","scale_dtype",
+        "scale_group_size","scale_layout"
+      ],
+      "type": "object"
+    },
+    "communication_precision": {
+      "additionalProperties": false,
+      "properties": {
+        "combine": {"$ref":"#/$defs/communication_axis"},
+        "dispatch": {"$ref":"#/$defs/communication_axis"},
+        "modes": {
+          "items":{"enum":["normal","low-latency"]},
+          "minItems":1,
+          "type":"array",
+          "uniqueItems":true
+        },
+        "profile_id": {"$ref":"#/$defs/precision_profile"}
+      },
+      "required": ["combine","dispatch","modes","profile_id"],
+      "type": "object"
+    },
+    "byte_accounting": {
+      "additionalProperties": false,
+      "properties": {
+        "accounting_contract": {"const":"activation-data-plus-scales-v1"},
+        "activation_data_bytes": {"minimum":0,"type":"integer"},
+        "scale_bytes": {"minimum":0,"type":"integer"},
+        "total_logical_bytes": {"minimum":0,"type":"integer"}
+      },
+      "required": [
+        "accounting_contract","activation_data_bytes","scale_bytes","total_logical_bytes"
+      ],
+      "type": "object"
+    },
+    "precision_axis_evidence": {
+      "additionalProperties": false,
+      "properties": {
+        "dequantized_semantics": {"type":"boolean"},
+        "encoded_payload_valid": {"type":"boolean"},
+        "max_abs_error": {"minimum":0,"type":"number"},
+        "max_rel_error": {"minimum":0,"type":"number"},
+        "passed": {"type":"boolean"},
+        "saturation_count": {"minimum":0,"type":"integer"},
+        "saturation_rate": {"maximum":1,"minimum":0,"type":"number"},
+        "scales_finite": {"oneOf":[{"type":"null"},{"type":"boolean"}]},
+        "scales_positive": {"oneOf":[{"type":"null"},{"type":"boolean"}]}
+      },
+      "required": [
+        "dequantized_semantics","encoded_payload_valid","max_abs_error","max_rel_error",
+        "passed","saturation_count","saturation_rate","scales_finite","scales_positive"
+      ],
+      "type":"object"
+    },
+    "precision_evidence": {
+      "additionalProperties": false,
+      "properties": {
+        "combine": {"$ref":"#/$defs/precision_axis_evidence"},
+        "dispatch": {"$ref":"#/$defs/precision_axis_evidence"},
+        "passed": {"type":"boolean"},
+        "profile_id": {"$ref":"#/$defs/precision_profile"}
+      },
+      "required": ["combine","dispatch","passed","profile_id"],
+      "type":"object"
+    },
+    "case_profile": {
+      "additionalProperties": false,
+      "allOf": [
+        {
+          "if": {"properties":{"mode":{"const":"normal"}},"required":["mode"]},
+          "then": {"properties": {
+            "combine_semantics":{"const":"activation-only"},
+            "component_order_contract":{"const":"qualification-hash-rotated-components-v1"},
+            "contract":{"const":"layout-and-dispatch-v1"},
+            "correctness_scope":{"const":"dispatch-metadata-and-transformed-combine"},
+            "oracle_contract":{"const":"expert-specific-transform-v1"},
+            "payload_unit":{"const":"token-rank"}
+          }}
+        },
+        {
+          "if": {"properties":{"mode":{"const":"low-latency"}},"required":["mode"]},
+          "then": {"properties": {
+            "combine_semantics":{"const":"gate-weighted"},
+            "component_order_contract":{"const":"qualification-hash-rotated-components-v1"},
+            "contract":{"const":"expert-packed-weighted-combine-v1"},
+            "correctness_scope":{"const":"expert-assignment-and-weighted-combine"},
+            "oracle_contract":{"const":"expert-assignment-transform-v1"},
+            "payload_unit":{"const":"token-expert"}
+          }}
+        }
+      ],
+      "properties": {
+        "activation_generator": {"const":"collectivex-activation-counter-v4"},
+        "activation_profile": {"const":"canonical-counter-source-v4"},
+        "combine_dtype": {"const":"bf16"},
+        "combine_quant_mode": {"const":"none"},
+        "combine_semantics": {"enum":["activation-only","gate-weighted"]},
+        "communication_precision": {"$ref":"#/$defs/communication_precision"},
+        "component_order_contract": {"const":"qualification-hash-rotated-components-v1"},
+        "conditioning_contract": {"const":"fixed-phase-ramp-8-roundtrips-v1"},
+        "contract": {"enum":["layout-and-dispatch-v1","expert-packed-weighted-combine-v1"]},
+        "correctness_scope": {"enum":["dispatch-metadata-and-transformed-combine","expert-assignment-and-weighted-combine"]},
+        "dtype": {"const":"bf16"},
+        "eplb_planner": {"const":"greedy-rank-major-v1"},
+        "eplb_redundant_experts": {"const":32},
+        "eplb_reference_tokens_per_rank": {"const":2048},
+        "mode": {"enum":["normal","low-latency"]},
+        "oracle_contract": {"enum":["expert-specific-transform-v1","expert-assignment-transform-v1"]},
+        "oracle_tolerances": {"const":"rtol=0.05,atol=0.02"},
+        "payload_unit": {"enum":["token-rank","token-expert"]},
+        "placement": {"const":"packed"},
+        "percentile_method": {"const":"nearest-rank"},
+        "rank_reduction": {"const":"cross-rank-max-per-iteration"},
+        "resource_mode": {"const":"fixed-profile"},
+        "routing_generator": {"const":"collectivex-routing-counter-v3"},
+        "sampling_contract": {"const":"fixed-512-v1"},
+        "seed": {"const":67},
+        "source_identity_contract": {"const":"bounded-sign-bit-source-v1"}
+      },
+      "required": [
+        "activation_generator","activation_profile","combine_dtype","combine_quant_mode",
+        "combine_semantics","component_order_contract","conditioning_contract","contract",
+        "correctness_scope","dtype","eplb_planner","eplb_redundant_experts",
+        "eplb_reference_tokens_per_rank","mode","oracle_contract","oracle_tolerances",
+        "payload_unit","placement","percentile_method","rank_reduction","resource_mode",
+        "routing_generator","sampling_contract","seed","source_identity_contract"
+      ],
+      "type": "object"
+    },
+    "oracle": {
+      "additionalProperties": false,
+      "properties": {
+        "checks": {
+          "additionalProperties": false,
+          "properties": {
+            "combine_values": {"type":"boolean"},
+            "counts": {"type":"boolean"},
+            "metadata": {"type":"boolean"},
+            "multiplicity": {"type":"boolean"},
+            "payload": {"type":"boolean"},
+            "source_set": {"type":"boolean"},
+            "weights": {"type":"boolean"}
+          },
+          "required": ["combine_values","counts","metadata","multiplicity","payload","source_set","weights"],
+          "type": "object"
+        },
+        "atol": {"const":0.02},
+        "combine_weight_semantics": {"enum":["unweighted-rank-sum","gate-weighted-sum"]},
+        "contract": {"enum":["expert-specific-transform-v1","expert-assignment-transform-v1"]},
+        "dispatch_sha256": {"$ref":"#/$defs/nullable_sha256"},
+        "max_absolute_error": {"oneOf":[{"type":"null"},{"minimum":0,"type":"number"}]},
+        "max_elementwise_relative_error": {"oneOf":[{"type":"null"},{"minimum":0,"type":"number"}]},
+        "max_relative_error": {"oneOf":[{"type":"null"},{"minimum":0,"type":"number"}]},
+        "max_weight_error": {"oneOf":[{"type":"null"},{"minimum":0,"type":"number"}]},
+        "order_sha256": {"$ref":"#/$defs/nullable_sha256"},
+        "ordering_contract": {"minLength":1,"type":"string"},
+        "passed": {"type":"boolean"},
+        "receive_count": {"minimum":0,"type":"integer"},
+        "rtol": {"const":0.05}
+      },
+      "required": [
+        "atol",
+        "checks",
+        "combine_weight_semantics",
+        "contract",
+        "dispatch_sha256",
+        "max_absolute_error",
+        "max_elementwise_relative_error",
+        "max_relative_error",
+        "max_weight_error",
+        "order_sha256",
+        "ordering_contract",
+        "passed",
+        "receive_count",
+        "rtol"
+      ],
+      "type": "object"
+    },
+    "percentiles": {
+      "additionalProperties": false,
+      "properties": {
+        "p50": {"minimum":0,"type":"number"},
+        "p90": {"minimum":0,"type":"number"},
+        "p95": {"minimum":0,"type":"number"},
+        "p99": {"minimum":0,"type":"number"}
+      },
+      "required": ["p50","p90","p95","p99"],
+      "type": "object"
+    },
+    "component": {
+      "additionalProperties": false,
+      "allOf": [
+        {
+          "if": {"properties":{"availability":{"const":"measured"}},"required":["availability"]},
+          "then": {
+            "properties": {
+              "origin": {"const":"measured"},
+              "percentiles_us": {"$ref":"#/$defs/percentiles"},
+              "sample_count": {"const":512}
+            }
+          }
+        },
+        {
+          "if": {"properties":{"availability":{"const":"unavailable"}},"required":["availability"]},
+          "then": {
+            "properties": {
+              "percentiles_us": {"type":"null"},
+              "sample_count": {"const":0}
+            }
+          }
+        }
+      ],
+      "properties": {
+        "availability": {"enum":["measured","derived","unavailable"]},
+        "origin": {"oneOf":[{"type":"null"},{"minLength":1,"type":"string"}]},
+        "percentiles_us": {"oneOf":[{"type":"null"},{"$ref":"#/$defs/percentiles"}]},
+        "sample_count": {"minimum":0,"type":"integer"}
+      },
+      "required": ["availability","origin","percentiles_us","sample_count"],
+      "type": "object"
+    },
+    "histogram": {
+      "additionalProperties": false,
+      "properties": {
+        "bins": {"minimum":1,"type":"integer"},
+        "counts": {"items":{"minimum":0,"type":"integer"},"minItems":1,"type":"array"},
+        "max": {"minimum":0,"type":"number"},
+        "min": {"minimum":0,"type":"number"},
+        "n": {"minimum":1,"type":"integer"}
+      },
+      "required": ["n","min","max","bins","counts"],
+      "type": "object"
+    },
+    "scheduled_case": {
+      "additionalProperties": false,
+      "properties": {
+        "backend": {"maxLength":128,"pattern":"^[a-z0-9][a-z0-9_.-]*$","type":"string"},
+        "canonical": {"const":true},
+        "ep": {"minimum":1,"type":"integer"},
+        "eplb": {"type":"boolean"},
+        "experts": {"minimum":1,"type":"integer"},
+        "gpus_per_node": {"minimum":1,"type":"integer"},
+        "hidden": {"minimum":1,"type":"integer"},
+        "ladder": {"pattern":"^[1-9][0-9]*( [1-9][0-9]*)*$","type":"string"},
+        "mode": {"enum":["normal","low-latency"]},
+        "nodes": {"minimum":1,"type":"integer"},
+        "phase": {"enum":["decode","prefill"]},
+        "precision_profile": {"$ref":"#/$defs/precision_profile"},
+        "required_publication": {"enum":["official","comparable-experimental"]},
+        "routing": {"enum":["uniform","zipf"]},
+        "samples_per_point": {"const":512},
+        "scale_out_transport": {"oneOf":[{"type":"null"},{"maxLength":128,"pattern":"^[a-z0-9][a-z0-9_.-]*$","type":"string"}]},
+        "scale_up_domain": {"minimum":1,"type":"integer"},
+        "scale_up_transport": {"maxLength":128,"pattern":"^[a-z0-9][a-z0-9_.-]*$","type":"string"},
+        "scope": {"enum":["scale-up","scale-out"]},
+        "suite": {"maxLength":128,"pattern":"^[a-z0-9][a-z0-9_.-]*$","type":"string"},
+        "timing": {"const":"8:64:32"},
+        "topk": {"minimum":1,"type":"integer"},
+        "topology_class": {"maxLength":128,"pattern":"^[a-z0-9][a-z0-9_.-]*$","type":"string"},
+        "transport": {"maxLength":128,"pattern":"^[a-z0-9][a-z0-9_.-]*$","type":"string"},
+        "warmup_semantics": {"const":"full-roundtrip-before-each-component-trial-point-v1"},
+        "workload": {"maxLength":128,"pattern":"^[a-z0-9][a-z0-9_.-]*$","type":"string"}
+      },
+      "required": [
+        "backend",
+        "canonical",
+        "eplb",
+        "ep",
+        "experts",
+        "gpus_per_node",
+        "hidden",
+        "ladder",
+        "mode",
+        "nodes",
+        "phase",
+        "required_publication",
+        "routing",
+        "samples_per_point",
+        "scale_out_transport",
+        "scale_up_domain",
+        "scale_up_transport",
+        "scope",
+        "suite",
+        "timing",
+        "topk",
+        "topology_class",
+        "transport",
+        "warmup_semantics",
+        "workload"
+      ],
+      "type": "object"
+    },
+    "git_run": {
+      "additionalProperties": false,
+      "properties": {
+        "artifact": {"minLength":1,"type":"string"},
+        "job": {"minLength":1,"type":"string"},
+        "ref": {"minLength":1,"type":"string"},
+        "repo": {"pattern":"^[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+$","type":"string"},
+        "run_attempt": {"pattern":"^[1-9][0-9]*$","type":"string"},
+        "run_id": {"pattern":"^[1-9][0-9]*$","type":"string"},
+        "qualification_index": {"maximum":3,"minimum":1,"type":"integer"},
+        "source_sha": {"pattern":"^[0-9a-f]{40}$","type":"string"}
+      },
+      "required": ["artifact","job","qualification_index","ref","repo","run_attempt","run_id","source_sha"],
+      "type": "object"
+    }
+  },
+  "additionalProperties": false,
+  "allOf": [
+    {
+      "if": {
+        "properties": {
+          "workload": {
+            "properties": {"source": {"const":"canonical-serialized"}},
+            "required": ["source"]
+          }
+        },
+        "required": ["workload"]
+      },
+      "then": {
+        "properties": {
+          "provenance": {
+            "properties": {
+              "allocation_stratum_sha256": {
+                "pattern":"^[0-9a-f]{64}$",
+                "type":"string"
+              }
+            },
+            "required": ["allocation_stratum_sha256"]
+          }
+        }
+      }
+    }
+  ],
+  "properties": {
+    "case": {
+      "additionalProperties": false,
+      "properties": {
+        "attempt_ordinal": {"minimum":1,"type":"integer"},
+        "backend": {"maxLength":128,"pattern":"^[a-z0-9][a-z0-9_.-]*$","type":"string"},
+        "ep_size": {"minimum":1,"type":"integer"},
+        "eplb": {
+          "additionalProperties": false,
+          "allOf": [
+            {
+              "if": {"properties":{"enabled":{"const":true}},"required":["enabled"]},
+              "then": {
+                "properties": {
+                  "calibration_token_offset": {"minimum":0,"type":"integer"},
+                  "calibration_trace_sha256": {"pattern":"^[0-9a-f]{64}$","type":"string"},
+                  "calibration_window": {"const":"collectivex-eplb-calibration-window-v1"},
+                  "calibration_workload_id": {"pattern":"^cxwork-v1-[0-9a-f]{64}$","type":"string"}
+                }
+              },
+              "else": {
+                "properties": {
+                  "calibration_token_offset": {"type":"null"},
+                  "calibration_trace_sha256": {"type":"null"},
+                  "calibration_window": {"type":"null"},
+                  "calibration_workload_id": {"type":"null"}
+                }
+              }
+            }
+          ],
+          "properties": {
+            "calibration_token_offset": {"oneOf":[{"type":"null"},{"minimum":0,"type":"integer"}]},
+            "calibration_trace_sha256": {"$ref":"#/$defs/nullable_sha256"},
+            "calibration_window": {"oneOf":[{"type":"null"},{"const":"collectivex-eplb-calibration-window-v1"}]},
+            "calibration_workload_id": {"oneOf":[{"type":"null"},{"pattern":"^cxwork-v1-[0-9a-f]{64}$","type":"string"}]},
+            "enabled": {"type":"boolean"},
+            "imbalance_after": {"oneOf":[{"type":"null"},{"minimum":0,"type":"number"}]},
+            "imbalance_before": {"oneOf":[{"type":"null"},{"minimum":0,"type":"number"}]},
+            "mapping_hash": {"oneOf":[{"type":"null"},{"pattern":"^[0-9a-f]{64}$","type":"string"}]},
+            "max_replicas": {"oneOf":[{"type":"null"},{"minimum":0,"type":"integer"}]},
+            "num_logical_experts": {"minimum":1,"type":"integer"},
+            "num_physical_experts": {"minimum":1,"type":"integer"},
+            "num_redundant": {"minimum":0,"type":"integer"},
+            "planner": {"oneOf":[{"type":"null"},{"minLength":1,"type":"string"}]},
+            "reference_tokens_per_rank": {"oneOf":[{"type":"null"},{"minimum":1,"type":"integer"}]},
+            "replicated_experts": {"minimum":0,"type":"integer"}
+          },
+          "required": [
+            "calibration_token_offset",
+            "calibration_trace_sha256",
+            "calibration_window",
+            "calibration_workload_id",
+            "enabled",
+            "imbalance_after",
+            "imbalance_before",
+            "mapping_hash",
+            "max_replicas",
+            "num_logical_experts",
+            "num_physical_experts",
+            "num_redundant",
+            "planner",
+            "reference_tokens_per_rank",
+            "replicated_experts"
+          ],
+          "type": "object"
+        },
+        "mode": {"enum":["normal","low-latency"]},
+        "phase": {"enum":["decode","prefill"]},
+        "required_publication": {"enum":["official","comparable-experimental"]},
+        "resource_mode": {"const":"fixed-profile"},
+        "runner": {"maxLength":128,"pattern":"^[a-z0-9][a-z0-9_.-]*$","type":"string"},
+        "shape": {
+          "additionalProperties": false,
+          "properties": {
+            "activation_profile": {"const":"canonical-counter-source-v4"},
+            "combine_precision": {"$ref":"#/$defs/communication_axis"},
+            "dispatch_precision": {"$ref":"#/$defs/communication_axis"},
+            "eplb": {"type":"boolean"},
+            "experts": {"minimum":1,"type":"integer"},
+            "experts_per_rank": {"minimum":1,"type":"integer"},
+            "hidden": {"minimum":1,"type":"integer"},
+            "kernel_gen": {"oneOf":[{"type":"null"},{"minLength":1,"type":"string"}]},
+            "num_logical_experts": {"minimum":1,"type":"integer"},
+            "precision_profile": {"$ref":"#/$defs/precision_profile"},
+            "routing": {"enum":["uniform","zipf"]},
+            "topk": {"minimum":1,"type":"integer"}
+          },
+          "required": [
+            "activation_profile",
+            "combine_precision",
+            "dispatch_precision",
+            "eplb",
+            "experts",
+            "experts_per_rank",
+            "hidden",
+            "kernel_gen",
+            "num_logical_experts",
+            "precision_profile",
+            "routing",
+            "topk"
+          ],
+          "type": "object"
+        },
+        "suite": {"maxLength":128,"pattern":"^[a-z0-9][a-z0-9_.-]*$","type":"string"},
+        "workload_name": {"maxLength":128,"pattern":"^[a-z0-9][a-z0-9_.-]*$","type":"string"}
+      },
+      "required": [
+        "attempt_ordinal",
+        "backend",
+        "eplb",
+        "ep_size",
+        "mode",
+        "phase",
+        "required_publication",
+        "resource_mode",
+        "runner",
+        "shape",
+        "suite",
+        "workload_name"
+      ],
+      "type": "object"
+    },
+    "format": {"const":"collectivex.ep.v1"},
+    "generated_at": {"format":"date-time","type":"string"},
+    "identity": {
+      "additionalProperties": false,
+      "properties": {
+        "allocation_factors": {
+          "additionalProperties": false,
+          "properties": {
+            "artifact": {"oneOf":[{"type":"null"},{"minLength":1,"type":"string"}]},
+            "execution_id": {"oneOf":[{"type":"null"},{"minLength":1,"type":"string"}]},
+            "job": {"oneOf":[{"type":"null"},{"minLength":1,"type":"string"}]},
+            "repo": {"oneOf":[{"type":"null"},{"minLength":1,"type":"string"}]},
+            "run_attempt": {"oneOf":[{"type":"null"},{"minLength":1,"type":"string"}]},
+            "run_id": {"oneOf":[{"type":"null"},{"minLength":1,"type":"string"}]},
+            "qualification_index": {"maximum":3,"minimum":1,"type":"integer"},
+            "runner": {"oneOf":[{"type":"null"},{"minLength":1,"type":"string"}]},
+            "source_sha": {"oneOf":[{"type":"null"},{"minLength":1,"type":"string"}]}
+          },
+          "required": ["artifact","execution_id","job","qualification_index","repo","run_attempt","run_id","runner","source_sha"],
+          "type": "object"
+        },
+        "allocation_id": {"pattern":"^cxallocation-v1-[0-9a-f]{64}$","type":"string"},
+        "attempt_id": {"pattern":"^cxattempt-v1-[0-9a-f]{64}$","type":"string"},
+        "attempt_ordinal": {"minimum":1,"type":"integer"},
+        "case_factors": {
+          "additionalProperties": false,
+          "properties": {
+            "case": {"$ref":"#/$defs/scheduled_case"},
+            "profile": {"$ref":"#/$defs/case_profile"},
+            "sku": {"maxLength":128,"pattern":"^[a-z0-9][a-z0-9_.-]*$","type":"string"}
+          },
+          "required": ["case","profile","sku"],
+          "type": "object"
+        },
+        "case_id": {"pattern":"^cxcase-v1-[0-9a-f]{64}$","type":"string"},
+        "series_factors": {
+          "additionalProperties": false,
+          "properties": {
+            "backend": {"maxLength":128,"pattern":"^[a-z0-9][a-z0-9_.-]*$","type":"string"},
+            "implementation_contract_sha256": {"pattern":"^[0-9a-f]{64}$","type":"string"},
+            "public_config_sha256": {"pattern":"^[0-9a-f]{64}$","type":"string"},
+            "routing_control_sha256": {"pattern":"^[0-9a-f]{64}$","type":"string"},
+            "case_id": {"pattern":"^cxcase-v1-[0-9a-f]{64}$","type":"string"},
+            "image_digest": {"oneOf":[{"type":"null"},{"minLength":1,"type":"string"}]},
+            "runtime_fingerprint_sha256": {"pattern":"^[0-9a-f]{64}$","type":"string"},
+            "source_sha": {"oneOf":[{"type":"null"},{"pattern":"^[0-9a-f]{40}$","type":"string"}]},
+            "squash_sha256": {"oneOf":[{"type":"null"},{"pattern":"^[0-9a-f]{64}$","type":"string"}]},
+            "workload_id": {"pattern":"^cxwork-v1-[0-9a-f]{64}$","type":"string"}
+          },
+          "required": [
+            "backend",
+            "implementation_contract_sha256",
+            "public_config_sha256",
+            "routing_control_sha256",
+            "case_id",
+            "image_digest",
+            "runtime_fingerprint_sha256",
+            "source_sha",
+            "squash_sha256",
+            "workload_id"
+          ],
+          "type": "object"
+        },
+        "series_id": {"pattern":"^cxseries-v1-[0-9a-f]{64}$","type":"string"}
+      },
+      "required": [
+        "allocation_factors",
+        "allocation_id",
+        "attempt_id",
+        "attempt_ordinal",
+        "case_factors",
+        "case_id",
+        "series_factors",
+        "series_id"
+      ],
+      "type": "object"
+    },
+    "implementation": {
+      "additionalProperties": false,
+      "properties": {
+        "kernel_generation": {"oneOf":[{"type":"null"},{"minLength":1,"type":"string"}]},
+        "name": {"maxLength":128,"pattern":"^[a-z0-9][a-z0-9_.-]*$","type":"string"},
+        "provenance": {
+          "properties": {
+            "allow_hybrid_mode": {"type":"boolean"},
+            "communication_backend": {"enum":["nccl-device-lsa","nccl-gin"]},
+            "deepep_fix_pr": {"const":630},
+            "deepep_pr": {"const":605},
+            "deterministic": {"type": "boolean"},
+            "gin_enabled": {"type":"boolean"},
+            "jit_cubins": {
+              "items": {"$ref":"#/$defs/deepep_v2_jit_cubin"},
+              "maxItems": 5,
+              "minItems": 5,
+              "type": "array",
+              "uniqueItems": true
+            },
+            "jit_kernel_keys": {
+              "items": {"maxLength":512,"pattern":"^[A-Za-z0-9][A-Za-z0-9_.+-]*$","type":"string"},
+              "maxItems": 3,
+              "minItems": 3,
+              "type": "array",
+              "uniqueItems": true
+            },
+            "jit_random_seed": {"const":"collectivex-deepep-v2-fa8a9b1"},
+            "jit_shared_objects": {
+              "items": {
+                "additionalProperties": false,
+                "properties": {
+                  "kernel_key": {"maxLength":512,"pattern":"^[A-Za-z0-9][A-Za-z0-9_.+-]*$","type":"string"},
+                  "rank_artifacts": {
+                    "items": {"$ref":"#/$defs/hybrid_jit_rank_artifact"},
+                    "minItems": 1,
+                    "type": "array"
+                  }
+                },
+                "required": ["kernel_key","rank_artifacts"],
+                "type": "object"
+              },
+              "maxItems": 3,
+              "minItems": 3,
+              "type": "array"
+            },
+            "num_experts": {"minimum": 1, "type": "integer"},
+            "num_nvl_bytes": {"minimum": 0, "type": "integer"},
+            "num_qps_per_rank": {"minimum": 1, "type": "integer"},
+            "num_rdma_bytes": {"minimum": 0, "type": "integer"},
+            "rdma_block_num": {"minimum": 0, "type": "integer"},
+            "realized_config": {"$ref":"#/$defs/hybrid_realized_config"},
+            "tuning_num_experts": {"minimum": 1, "type": "integer"},
+            "uccl_dependency_versions": {
+              "additionalProperties": false,
+              "properties": {
+                "intervaltree": {"const":"3.1.0"},
+                "nvidia-cuda-runtime-cu12": {"const":"12.9.79"},
+                "sortedcontainers": {"const":"2.4.0"}
+              },
+              "required": ["intervaltree","nvidia-cuda-runtime-cu12","sortedcontainers"],
+              "type": "object"
+            },
+            "use_external_inp_buf": {"type": "boolean"}
+          },
+          "type": "object",
+          "propertyNames": {
+            "enum": [
+              "allocated_qps",
+              "allow_hybrid_mode",
+              "allow_mnnvl",
+              "allow_multiple_reduction",
+              "api",
+              "api_signature_sha256",
+              "backend",
+              "backend_lineage",
+              "block_num",
+              "block_num_floored",
+              "block_num_target",
+              "branch",
+              "collective_library",
+              "combine_dtype",
+              "combine_warps",
+              "communication_backend",
+              "cuda_version",
+              "deepep_commit",
+              "deepep_distribution_version",
+              "deepep_fix_pr",
+              "deepep_pr",
+              "deepep_tree",
+              "deepep_version",
+              "deterministic",
+              "device_cus",
+              "device_sms",
+              "dispatch_dtype",
+              "dispatch_warps",
+              "enable_sdma",
+              "fmt_commit",
+              "gpus_per_node",
+              "gin_enabled",
+              "heap_size",
+              "impl",
+              "jit_cache_key",
+              "jit_cubins",
+              "jit_kernel_keys",
+              "jit_random_seed",
+              "jit_shared_objects",
+              "kernel_type",
+              "loaded_libraries",
+              "local_experts",
+              "logical_scaleout_ranks",
+              "logical_scaleup_ranks",
+              "mapping_variant",
+              "max_num_inp_token_per_rank",
+              "max_num_tokens",
+              "max_total_recv_tokens",
+              "mnnvl_comm",
+              "mode",
+              "mori_commit",
+              "nccl_communicator",
+              "nccl_package_version",
+              "nccl_version",
+              "num_experts",
+              "num_max_tokens_per_rank",
+              "num_nvl_bytes",
+              "num_qps",
+              "num_qps_per_rank",
+              "num_rdma_bytes",
+              "num_sms",
+              "nvshmem_package_version",
+              "path",
+              "physical_nvlink_ranks",
+              "physical_rdma_ranks",
+              "prefer_overlap_with_compute",
+              "rdma_block_num",
+              "reference_semantics",
+              "realized_config",
+              "requested_num_sms",
+              "resource_mode",
+              "routing_factor",
+              "routing_metadata",
+              "sm_fraction",
+              "top_k",
+              "torch_git_version",
+              "torch_version",
+              "transport",
+              "trtllm",
+              "tuned_source",
+              "tuning_num_experts",
+              "uccl_commit",
+              "uccl_dependency_versions",
+              "uccl_version",
+              "uccl_wrapper_commit",
+              "use_external_inp_buf",
+              "workspace"
+            ]
+          }
+        },
+        "resource_profile": {
+          "additionalProperties": false,
+          "properties": {
+            "achieved_fraction": {},
+            "comm_units_kind": {},
+            "configured_units": {},
+            "conformance_class": {},
+            "device_units": {},
+            "fixed_kernel": {},
+            "nonconforming": {},
+            "pareto_eligible": {},
+            "persistent_bytes": {},
+            "qps_per_rank": {},
+            "requested_fraction": {},
+            "tuned_source": {},
+            "target_achieved_within_tol": {},
+            "tolerance": {},
+            "resource_class": {},
+            "warps_combine": {},
+            "warps_dispatch": {}
+          },
+          "required": [
+            "comm_units_kind",
+            "requested_fraction",
+            "configured_units",
+            "device_units",
+            "achieved_fraction",
+            "warps_dispatch",
+            "warps_combine",
+            "qps_per_rank",
+            "persistent_bytes",
+            "tuned_source",
+            "resource_class",
+            "conformance_class",
+            "tolerance",
+            "target_achieved_within_tol",
+            "nonconforming",
+            "fixed_kernel",
+            "pareto_eligible"
+          ],
+          "type": "object"
+        }
+      },
+      "required": ["kernel_generation","name","provenance","resource_profile"],
+      "type": "object"
+    },
+    "measurement": {
+      "additionalProperties": false,
+      "properties": {
+        "component_order_contract": {"const":"qualification-hash-rotated-components-v1"},
+        "conditioning": {
+          "additionalProperties": false,
+          "properties": {
+            "contract": {"const":"fixed-phase-ramp-8-roundtrips-v1"},
+            "ladder": {"items":{"minimum":1,"type":"integer"},"minItems":1,"type":"array"},
+            "roundtrips_per_shape": {"const":8}
+          },
+          "required": ["contract","ladder","roundtrips_per_shape"],
+          "type": "object"
+        },
+        "contract": {"enum":["layout-and-dispatch-v1","expert-packed-weighted-combine-v1"]},
+        "execution_order_sha256": {"pattern":"^[0-9a-f]{64}$","type":"string"},
+        "qualification_index": {"maximum":3,"minimum":1,"type":"integer"},
+        "rows": {
+          "items": {
+            "additionalProperties": false,
+            "properties": {
+              "anomalies": {
+                "items": {
+                  "additionalProperties": false,
+                  "properties": {
+                    "T": {"minimum":1,"type":"integer"},
+                    "component_floor_p50": {"minimum":0,"type":"number"},
+                    "isolated_sum_p99": {"minimum":0,"type":"number"},
+                    "ratio": {"minimum":0,"type":"number"},
+                    "roundtrip_p50": {"minimum":0,"type":"number"},
+                    "roundtrip_p99": {"minimum":0,"type":"number"},
+                    "threshold": {"minimum":0,"type":"number"},
+                    "type": {"enum":["roundtrip_gt_isolated_sum","roundtrip_lt_component_floor"]}
+                  },
+                  "required": ["type","T"],
+                  "type": "object"
+                },
+                "type": "array"
+              },
+              "components": {
+                "additionalProperties": false,
+                "properties": {
+                  "combine": {"$ref":"#/$defs/component"},
+                  "dispatch": {"$ref":"#/$defs/component"},
+                  "isolated_sum": {"$ref":"#/$defs/component"},
+                  "roundtrip": {"$ref":"#/$defs/component"},
+                  "stage": {"$ref":"#/$defs/component"}
+                },
+                "required": ["combine","dispatch","isolated_sum","roundtrip","stage"],
+                "type": "object"
+              },
+              "correctness": {
+                "additionalProperties": false,
+                "properties": {
+                  "contract": {"enum":["expert-specific-transform-v1","expert-assignment-transform-v1"]},
+                  "max_relative_error": {"minimum":0,"type":"number"},
+                  "passed": {"type":"boolean"},
+                  "precision": {"$ref":"#/$defs/precision_evidence"},
+                  "rank_evidence": {
+                    "items": {
+                      "additionalProperties": false,
+                      "properties": {
+                        "input_unchanged": {"type":"boolean"},
+                        "order_stable": {"type":"boolean"},
+                        "post_timing": {"$ref":"#/$defs/oracle"},
+                        "pre_timing": {"$ref":"#/$defs/oracle"},
+                        "rank": {"minimum":0,"type":"integer"}
+                      },
+                      "required": ["input_unchanged","order_stable","post_timing","pre_timing","rank"],
+                      "type": "object"
+                    },
+                    "minItems": 1,
+                    "type": "array"
+                  },
+                  "scope": {"enum":["dispatch-metadata-and-transformed-combine","expert-assignment-and-weighted-combine"]}
+                },
+                "required": ["contract","max_relative_error","passed","precision","rank_evidence","scope"],
+                "type": "object"
+              },
+              "evidence_id": {"pattern":"^cxevidence-v1-[0-9a-f]{64}$","type":"string"},
+              "global_tokens": {"minimum":1,"type":"integer"},
+              "byte_provenance": {
+                "additionalProperties": false,
+                "properties": {
+                  "combine": {"$ref":"#/$defs/byte_accounting"},
+                  "dispatch": {"$ref":"#/$defs/byte_accounting"},
+                  "roundtrip": {"$ref":"#/$defs/byte_accounting"},
+                  "stage": {"$ref":"#/$defs/byte_accounting"}
+                },
+                "required": ["combine","dispatch","roundtrip","stage"],
+                "type": "object"
+              },
+              "point_id": {"pattern":"^cxpoint-v1-[0-9a-f]{64}$","type":"string"},
+              "receive": {
+                "additionalProperties": false,
+                "properties": {
+                  "max": {"minimum":0,"type":"integer"},
+                  "mean": {"minimum":0,"type":"number"},
+                  "min": {"minimum":0,"type":"integer"},
+                  "total": {"minimum":0,"type":"integer"}
+                },
+                "required": ["max","mean","min","total"],
+                "type": "object"
+              },
+              "routing": {
+                "additionalProperties": false,
+                "properties": {
+                  "empty_expert_count": {"minimum":0,"type":"integer"},
+                  "empty_rank_count": {"minimum":0,"type":"integer"},
+                  "expert_assignment_rank_cv": {"minimum":0,"type":"number"},
+                  "expert_assignments_per_rank": {"items":{"minimum":0,"type":"integer"},"type":"array"},
+                  "expert_load_cv": {"minimum":0,"type":"number"},
+                  "expert_load_max": {"minimum":0,"type":"integer"},
+                  "expert_load_mean": {"minimum":0,"type":"number"},
+                  "expert_load_min": {"minimum":0,"type":"integer"},
+                  "fanout_histogram": {"items":{"minimum":0,"type":"integer"},"type":"array"},
+                  "fanout_max": {"minimum":1,"type":"integer"},
+                  "fanout_mean": {"minimum":0,"type":"number"},
+                  "fanout_min": {"minimum":1,"type":"integer"},
+                  "hash": {"pattern":"^[0-9a-f]{64}$","type":"string"},
+                  "hotspot_ratio": {"minimum":0,"type":"number"},
+                  "locality": {
+                    "oneOf": [
+                      {"type":"null"},
+                      {
+                        "additionalProperties": false,
+                        "properties": {
+                          "copies": {"minimum":0,"type":"integer"},
+                          "cross_domain_fraction": {"minimum":0,"type":"number"},
+                          "cross_node_fraction": {"minimum":0,"type":"number"},
+                          "gpus_per_node": {"minimum":1,"type":"integer"},
+                          "local_rank_fraction": {"minimum":0,"type":"number"},
+                          "placement": {"const":"packed"},
+                          "same_node_fraction": {"minimum":0,"type":"number"},
+                          "same_scaleup_domain_fraction": {"minimum":0,"type":"number"},
+                          "scale_up_domain": {"minimum":1,"type":"integer"}
+                        },
+                        "required": [
+                          "placement",
+                          "local_rank_fraction",
+                          "same_node_fraction",
+                          "same_scaleup_domain_fraction",
+                          "cross_node_fraction",
+                          "cross_domain_fraction",
+                          "gpus_per_node",
+                          "scale_up_domain",
+                          "copies"
+                        ],
+                        "type": "object"
+                      }
+                    ]
+                  },
+                  "payload_copies_per_rank": {"items":{"minimum":0,"type":"integer"},"type":"array"},
+                  "payload_rank_cv": {"minimum":0,"type":"number"},
+                  "routed_copies": {"minimum":1,"type":"integer"},
+                  "source_token_stats": {
+                    "oneOf": [
+                      {"type":"null"},
+                      {
+                        "additionalProperties": false,
+                        "properties": {
+                          "cv": {"minimum":0,"type":"number"},
+                          "empty_ranks": {"minimum":0,"type":"integer"},
+                          "max": {"minimum":0,"type":"integer"},
+                          "mean": {"minimum":0,"type":"number"},
+                          "min": {"minimum":0,"type":"integer"},
+                          "ranks": {"minimum":1,"type":"integer"},
+                          "total": {"minimum":0,"type":"integer"}
+                        },
+                        "required": ["min","mean","max","cv","empty_ranks","total","ranks"],
+                        "type": "object"
+                      }
+                    ]
+                  }
+                },
+                "required": [
+                  "empty_expert_count",
+                  "empty_rank_count",
+                  "expert_assignment_rank_cv",
+                  "expert_assignments_per_rank",
+                  "expert_load_cv",
+                  "expert_load_max",
+                  "expert_load_mean",
+                  "expert_load_min",
+                  "fanout_histogram",
+                  "fanout_max",
+                  "fanout_mean",
+                  "fanout_min",
+                  "hash",
+                  "hotspot_ratio",
+                  "locality",
+                  "payload_copies_per_rank",
+                  "payload_rank_cv",
+                  "routed_copies",
+                  "source_token_stats"
+                ],
+                "type": "object"
+              },
+              "sample_histograms": {
+                "additionalProperties": false,
+                "properties": {
+                  "combine": {"oneOf":[{"type":"null"},{"$ref":"#/$defs/histogram"}]},
+                  "dispatch": {"oneOf":[{"type":"null"},{"$ref":"#/$defs/histogram"}]},
+                  "roundtrip": {"$ref":"#/$defs/histogram"},
+                  "stage": {"oneOf":[{"type":"null"},{"$ref":"#/$defs/histogram"}]}
+                },
+                "required": ["dispatch","combine","roundtrip","stage"],
+                "type": "object"
+              },
+              "sample_sha256": {"pattern":"^[0-9a-f]{64}$","type":"string"},
+              "token_rate_at_latency_percentile": {"$ref":"#/$defs/percentiles"},
+              "tokens_per_rank": {"minimum":1,"type":"integer"}
+            },
+            "required": [
+              "anomalies",
+              "components",
+              "correctness",
+              "byte_provenance",
+              "evidence_id",
+              "global_tokens",
+              "point_id",
+              "receive",
+              "routing",
+              "sample_histograms",
+              "sample_sha256",
+              "token_rate_at_latency_percentile",
+              "tokens_per_rank"
+            ],
+            "type": "object"
+          },
+          "minItems": 1,
+          "type": "array"
+        },
+        "sampling": {
+          "additionalProperties": false,
+          "properties": {
+            "contract": {"const":"fixed-512-v1"},
+            "iterations_per_trial": {"const":8},
+            "percentile_method": {"const":"nearest-rank"},
+            "reduction": {"const":"cross-rank-max-per-iteration"},
+            "samples_per_component": {"const":512},
+            "trials": {"const":64},
+            "warmup_iterations": {"const":32},
+            "warmup_semantics": {"const":"full-roundtrip-before-each-component-trial-point-v1"}
+          },
+          "required": [
+            "contract",
+            "iterations_per_trial",
+            "percentile_method",
+            "reduction",
+            "samples_per_component",
+            "trials",
+            "warmup_iterations",
+            "warmup_semantics"
+          ],
+          "type": "object"
+        },
+        "source_allocation": {"const":"even"}
+      },
+      "required": [
+        "component_order_contract",
+        "conditioning",
+        "contract",
+        "execution_order_sha256",
+        "qualification_index",
+        "rows",
+        "sampling",
+        "source_allocation"
+      ],
+      "type": "object"
+    },
+    "outcome": {
+      "additionalProperties": false,
+      "properties": {
+        "publication_status": {"enum":["diagnostic","invalid"]},
+        "reasons": {"items":{"type":"string"},"type":"array"},
+        "status": {"enum":["success","invalid"]},
+        "validity": {
+          "additionalProperties": false,
+          "properties": {
+            "anomaly_free": {"type":"boolean"},
+            "execution_status": {"enum":["complete","failed"]},
+            "measurement_conformance": {"enum":["conformant","nonconformant"]},
+            "provenance_complete": {"type":"boolean"},
+            "resource_conformance": {"minLength":1,"type":"string"},
+            "sampling_conformance": {"enum":["conformant","nonconformant"]},
+            "semantic_correctness": {"enum":["pass","fail"]},
+            "workload_identity": {"enum":["consistent-across-ranks","inconsistent"]},
+            "workload_source": {"enum":["canonical-serialized","seeded-runtime"]}
+          },
+          "required": [
+            "execution_status",
+            "semantic_correctness",
+            "workload_identity",
+            "workload_source",
+            "measurement_conformance",
+            "sampling_conformance",
+            "resource_conformance",
+            "provenance_complete",
+            "anomaly_free"
+          ],
+          "type": "object"
+        }
+      },
+      "required": ["publication_status","reasons","status","validity"],
+      "type": "object"
+    },
+    "provenance": {
+      "additionalProperties": false,
+      "properties": {
+        "allocation_stratum_sha256": {
+          "oneOf": [
+            {"type":"null"},
+            {"pattern":"^[0-9a-f]{64}$","type":"string"}
+          ]
+        },
+        "command": {"minLength":1,"type":"string"},
+        "distributed_launcher": {"oneOf":[{"type":"null"},{"minLength":1,"type":"string"}]},
+        "git_run": {"oneOf":[{"type":"null"},{"$ref":"#/$defs/git_run"}]},
+        "image": {
+          "additionalProperties": false,
+          "properties": {
+            "arch": {"maxLength":128,"pattern":"^[a-z0-9][a-z0-9_.-]*$","type":"string"},
+            "digest": {
+              "oneOf": [{"type":"null"},{"pattern":"^sha256:[0-9a-f]{64}$","type":"string"}]
+            },
+            "digest_verified": {"type":"boolean"},
+            "reference": {"oneOf":[{"type":"null"},{"minLength":1,"type":"string"}]},
+            "squash_sha256": {"oneOf":[{"type":"null"},{"pattern":"^[0-9a-f]{64}$","type":"string"}]}
+          },
+          "required": ["arch","digest","digest_verified","reference","squash_sha256"],
+          "type": "object"
+        },
+        "redaction": {"const":"sanitized-v1"}
+      },
+      "required": ["allocation_stratum_sha256","command","distributed_launcher","git_run","image","redaction"],
+      "type": "object"
+    },
+    "record_type": {"const":"case-attempt"},
+    "runtime_fingerprint": {
+      "additionalProperties": false,
+      "properties": {
+        "accelerator_runtime": {
+          "additionalProperties": false,
+          "properties": {
+            "kind": {"enum":["cuda","hip"]},
+            "version": {"oneOf":[{"type":"null"},{"minLength":1,"type":"string"}]}
+          },
+          "required": ["kind","version"],
+          "type": "object"
+        },
+        "collective_library": {
+          "additionalProperties": false,
+          "properties": {
+            "kind": {"enum":["nccl","rccl"]},
+            "version": {"oneOf":[{"type":"null"},{"minLength":1,"type":"string"}]}
+          },
+          "required": ["kind","version"],
+          "type": "object"
+        },
+        "device": {
+          "additionalProperties": false,
+          "properties": {
+            "arch": {"minLength":1,"type":"string"},
+            "compute_units": {"minimum":1,"type":"integer"},
+            "memory_bytes": {"minimum":1,"type":"integer"},
+            "product": {"minLength":1,"type":"string"},
+            "warp_size": {"minimum":1,"type":"integer"}
+          },
+          "required": ["arch","compute_units","memory_bytes","product","warp_size"],
+          "type": "object"
+        },
+        "driver_version": {"oneOf":[{"type":"null"},{"minLength":1,"type":"string"}]},
+        "framework": {
+          "additionalProperties": false,
+          "properties": {"kind":{"const":"torch"},"version":{"minLength":1,"type":"string"}},
+          "required": ["kind","version"],
+          "type": "object"
+        },
+        "machine": {"maxLength":128,"pattern":"^[a-z0-9][a-z0-9_.-]*$","type":"string"},
+        "python_version": {"minLength":1,"type":"string"},
+        "vendor": {"enum":["nvidia","amd"]}
+      },
+      "required": [
+        "accelerator_runtime",
+        "collective_library",
+        "device",
+        "driver_version",
+        "framework",
+        "machine",
+        "python_version",
+        "vendor"
+      ],
+      "type": "object"
+    },
+    "sample_artifact": {
+      "additionalProperties": false,
+      "properties": {
+        "bytes": {"minimum":1,"type":"integer"},
+        "format": {"const":"collectivex.samples.v1"},
+        "path": {"pattern":"^[A-Za-z0-9_.-]+$","type":"string"},
+        "sha256": {"pattern":"^[0-9a-f]{64}$","type":"string"}
+      },
+      "required": ["bytes","format","path","sha256"],
+      "type": "object"
+    },
+    "schema_version": {"const":1},
+    "topology": {
+      "additionalProperties": false,
+      "properties": {
+        "device_count": {"minimum":1,"type":"integer"},
+        "device_product": {"minLength":1,"type":"string"},
+        "gpus_per_node": {"minimum":1,"type":"integer"},
+        "nodes": {"minimum":1,"type":"integer"},
+        "placement": {"const":"packed"},
+        "realized_placement": {
+          "additionalProperties": false,
+          "properties": {
+            "gpus_per_node": {"minimum":1,"type":"integer"},
+            "nodes": {"minimum":1,"type":"integer"},
+            "ranks_per_node": {"minimum":1,"type":"integer"},
+            "unique_local_ranks": {"const":true},
+            "valid": {"const":true}
+          },
+          "required": ["gpus_per_node","nodes","ranks_per_node","unique_local_ranks","valid"],
+          "type": "object"
+        },
+        "scale_out_transport": {"oneOf":[{"type":"null"},{"maxLength":128,"pattern":"^[a-z0-9][a-z0-9_.-]*$","type":"string"}]},
+        "scale_up_domain": {"minimum":1,"type":"integer"},
+        "scale_up_transport": {"maxLength":128,"pattern":"^[a-z0-9][a-z0-9_.-]*$","type":"string"},
+        "scope": {"enum":["scale-up","scale-out"]},
+        "topology_class": {"maxLength":128,"pattern":"^[a-z0-9][a-z0-9_.-]*$","type":"string"},
+        "transport": {"maxLength":128,"pattern":"^[a-z0-9][a-z0-9_.-]*$","type":"string"},
+        "world_size": {"minimum":1,"type":"integer"}
+      },
+      "required": [
+        "device_count",
+        "device_product",
+        "gpus_per_node",
+        "nodes",
+        "placement",
+        "realized_placement",
+        "scale_out_transport",
+        "scale_up_domain",
+        "scale_up_transport",
+        "scope",
+        "topology_class",
+        "transport",
+        "world_size"
+      ],
+      "type": "object"
+    },
+    "workload": {
+      "additionalProperties": false,
+      "properties": {
+        "activation_generator": {"const":"collectivex-activation-counter-v4"},
+        "activation_identity": {"pattern":"^[0-9a-f]{64}$","type":"string"},
+        "activation_profile": {"const":"canonical-counter-source-v4"},
+        "cross_rank_consistent": {"const":true},
+        "manifest_checksums": {
+          "oneOf": [
+            {"type":"null"},
+            {
+              "additionalProperties": {
+                "additionalProperties": false,
+                "properties": {
+                  "topk_idx": {"pattern":"^[0-9a-f]{64}$","type":"string"},
+                  "topk_weights": {"pattern":"^[0-9a-f]{64}$","type":"string"},
+                  "trace": {"pattern":"^[0-9a-f]{64}$","type":"string"}
+                },
+                "required": ["topk_idx", "topk_weights", "trace"],
+                "type": "object"
+              },
+              "type": "object"
+            }
+          ]
+        },
+        "members": {
+          "oneOf": [
+            {"type":"null"},
+            {
+              "items": {"pattern":"^cxwork-v1-[0-9a-f]{64}$","type":"string"},
+              "minItems": 1,
+              "uniqueItems": true,
+              "type": "array"
+            }
+          ]
+        },
+        "routing_generator": {"const":"collectivex-routing-counter-v3"},
+        "source": {"enum":["canonical-serialized","seeded-runtime"]},
+        "trace_hashes": {
+          "items": {"pattern":"^[0-9a-f]{64}$","type":"string"},
+          "minItems": 1,
+          "type": "array"
+        },
+        "trace_signature": {"pattern":"^[0-9a-f]{64}$","type":"string"},
+        "workload_id": {
+          "oneOf": [{"type":"null"},{"pattern":"^cxwork-v1-[0-9a-f]{64}$","type":"string"}]
+        }
+      },
+      "required": [
+        "activation_generator",
+        "activation_identity",
+        "activation_profile",
+        "cross_rank_consistent",
+        "manifest_checksums",
+        "members",
+        "routing_generator",
+        "source",
+        "trace_hashes",
+        "trace_signature",
+        "workload_id"
+      ],
+      "type": "object"
+    }
+  },
+  "required": [
+    "case",
+    "format",
+    "generated_at",
+    "identity",
+    "implementation",
+    "measurement",
+    "outcome",
+    "provenance",
+    "record_type",
+    "runtime_fingerprint",
+    "sample_artifact",
+    "schema_version",
+    "topology",
+    "workload"
+  ],
+  "title": "CollectiveX raw case attempt v1",
+  "type": "object"
+}
diff --git a/experimental/CollectiveX/schemas/samples-v1.schema.json b/experimental/CollectiveX/schemas/samples-v1.schema.json
new file mode 100644
index 000000000..f216860a8
--- /dev/null
+++ b/experimental/CollectiveX/schemas/samples-v1.schema.json
@@ -0,0 +1,93 @@
+{
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "$id": "https://inferencex.com/schemas/collectivex/samples-v1.schema.json",
+  "title": "CollectiveX exact private samples v1",
+  "type": "object",
+  "additionalProperties": false,
+  "required": ["allocation_id","attempt_id","case_id","format","points","qualification_index","sampling","schema_version","series_id"],
+  "properties": {
+      "allocation_id": {"$ref": "#/$defs/allocationId"},
+      "attempt_id": {"$ref": "#/$defs/attemptId"},
+      "case_id": {"$ref": "#/$defs/caseId"},
+      "format": {"const": "collectivex.samples.v1"},
+      "points": {
+            "type": "array",
+            "minItems": 1,
+            "items": {
+                    "type": "object",
+                    "additionalProperties": false,
+                    "required": ["components","evidence_id","point_id","sample_sha256","tokens_per_rank"],
+                    "properties": {
+                              "components": {
+                                          "type": "object",
+                                          "additionalProperties": false,
+                                          "required": ["combine","dispatch","roundtrip","stage"],
+                                          "properties": {
+                                                        "combine": {"$ref": "#/$defs/component"},
+                                                        "dispatch": {"$ref": "#/$defs/component"},
+                                                        "roundtrip": {"$ref": "#/$defs/component"},
+                                                        "stage": {"$ref":"#/$defs/component"}
+                                                      }
+                                        },
+                              "evidence_id": {"$ref": "#/$defs/evidenceId"},
+                              "point_id": {"$ref": "#/$defs/pointId"},
+                              "sample_sha256": {"$ref": "#/$defs/sha256"},
+                              "tokens_per_rank": {"type": "integer","minimum": 1}
+                            }
+                  }
+          },
+      "qualification_index": {"type":"integer","minimum":1,"maximum":3},
+      "sampling": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": ["iterations_per_trial","reduction","trials"],
+            "properties": {
+                    "iterations_per_trial": {"const": 8},
+                    "reduction": {"const": "cross-rank-max-per-iteration"},
+                    "trials": {"const": 64}
+                  }
+          },
+      "schema_version": {"const": 1},
+      "series_id": {"$ref": "#/$defs/seriesId"}
+    },
+  "$defs": {
+      "sha256": {"type": "string","pattern": "^[0-9a-f]{64}$"},
+      "caseId": {"type": "string","pattern": "^cxcase-v1-[0-9a-f]{64}$"},
+      "seriesId": {"type": "string","pattern": "^cxseries-v1-[0-9a-f]{64}$"},
+      "pointId": {"type": "string","pattern": "^cxpoint-v1-[0-9a-f]{64}$"},
+      "evidenceId": {"type": "string","pattern": "^cxevidence-v1-[0-9a-f]{64}$"},
+      "allocationId": {"type": "string","pattern": "^cxallocation-v1-[0-9a-f]{64}$"},
+      "attemptId": {"type": "string","pattern": "^cxattempt-v1-[0-9a-f]{64}$"},
+      "trials": {
+            "type":"array",
+            "minItems":64,
+            "maxItems":64,
+            "items":{"type":"array","minItems":8,"maxItems":8,"items":{"type":"number","minimum":0}}
+          },
+      "measuredComponent": {
+            "type":"object",
+            "additionalProperties":false,
+            "required":["availability","sample_count","trials"],
+            "properties": {
+                    "availability":{"const":"measured"},
+                    "sample_count":{"const":512},
+                    "trials":{"$ref":"#/$defs/trials"}
+                  }
+          },
+      "component": {
+            "oneOf": [
+                    {"$ref":"#/$defs/measuredComponent"},
+                    {
+                              "type":"object",
+                              "additionalProperties":false,
+                              "required":["availability","sample_count","trials"],
+                              "properties": {
+                                      "availability":{"const":"unavailable"},
+                                      "sample_count":{"const":0},
+                                      "trials":{"type":"null"}
+                                    }
+                            }
+                  ]
+          }
+    }
+}
diff --git a/experimental/CollectiveX/schemas/terminal-outcome-v1.schema.json b/experimental/CollectiveX/schemas/terminal-outcome-v1.schema.json
new file mode 100644
index 000000000..f4a9d99ca
--- /dev/null
+++ b/experimental/CollectiveX/schemas/terminal-outcome-v1.schema.json
@@ -0,0 +1,337 @@
+{
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "$id": "https://inferencex.com/schemas/collectivex/terminal-outcome-v1.schema.json",
+  "title": "CollectiveX terminal outcome v1",
+  "type": "object",
+  "additionalProperties": false,
+  "required": ["case","format","generated_at","identity","outcome","provenance","record_type","schema_version"],
+  "properties": {
+      "case": {"$ref": "#/$defs/case"},
+      "format": {"const": "collectivex.terminal.v1"},
+      "generated_at": {"type": "string","format": "date-time"},
+      "identity": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": ["allocation_factors","allocation_id","attempt_id","attempt_ordinal","case_factors","case_id"],
+            "properties": {
+                    "allocation_factors": {"$ref": "#/$defs/allocationFactors"},
+                    "allocation_id": {"$ref": "#/$defs/allocationId"},
+                    "attempt_id": {"$ref": "#/$defs/attemptId"},
+                    "attempt_ordinal": {"type": "integer","minimum": 1},
+                    "case_factors": {
+                              "type": "object",
+                              "additionalProperties": false,
+                              "required": ["case","profile","sku"],
+                              "properties": {
+                                          "case": {"$ref": "#/$defs/case"},
+                                          "profile": {"$ref":"#/$defs/caseProfile"},
+                                          "sku": {"$ref": "#/$defs/safeId"}
+                                        }
+                            },
+                    "case_id": {"$ref": "#/$defs/caseId"}
+                  }
+          },
+      "outcome": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": ["failure_mode","reason","return_code","status"],
+            "properties": {
+                    "failure_mode": {"$ref": "#/$defs/safeId"},
+                    "reason": {"type": "string","minLength": 1,"maxLength": 240},
+                    "return_code": {"type": "integer","minimum": 0},
+                    "status": {"enum": ["failed","invalid","unsupported"]}
+                  }
+          },
+      "provenance": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": ["git_run","control_sha256","redaction","source"],
+            "properties": {
+                    "git_run": {"oneOf": [{"type": "null"},{"$ref": "#/$defs/gitRun"}]},
+                    "control_sha256": {"oneOf": [{"type": "null"},{"$ref": "#/$defs/sha256"}]},
+                    "redaction": {"const": "sanitized-v1"},
+                    "source": {
+                              "enum": [
+                                          "runtime-emitter",
+                                          "post-emit-command",
+                                          "matrix-capability-resolver"
+                                        ]
+                            }
+                  }
+          },
+      "record_type": {"const": "terminal-outcome"},
+      "schema_version": {"const": 1}
+    },
+  "allOf": [
+      {
+            "oneOf": [
+                    {
+                          "properties": {
+                                  "provenance": {
+                                          "properties": {"source": {"const": "runtime-emitter"}}
+                                        },
+                                  "outcome": {"$ref": "#/$defs/runtimeOutcome"}
+                                }
+                        },
+                    {
+                          "properties": {
+                                  "provenance": {
+                                          "properties": {"source": {"const": "post-emit-command"}}
+                                        },
+                                  "outcome": {"$ref": "#/$defs/postEmitOutcome"}
+                                }
+                        },
+                    {
+                          "properties": {
+                                  "provenance": {
+                                          "properties": {"source": {"const": "matrix-capability-resolver"}}
+                                        },
+                                  "outcome": {"$ref": "#/$defs/capabilityOutcome"}
+                                }
+                        }
+                  ]
+          }
+    ],
+  "$defs": {
+      "runtimeOutcome": {
+            "type": "object",
+            "properties": {"status": {"const": "failed"}},
+            "allOf": [
+                    {
+                          "oneOf": [
+                                  {"properties": {"failure_mode": {"const": "setup"}, "reason": {"const": "launcher-setup-failed"}}},
+                                  {"properties": {"failure_mode": {"const": "repository-stage"}, "reason": {"const": "repository-staging-failed"}}},
+                                  {"properties": {"failure_mode": {"const": "registry-verification"}, "reason": {"const": "container-registry-verification-failed"}}},
+                                  {"properties": {"failure_mode": {"const": "scheduler-allocation"}, "reason": {"const": "scheduler-allocation-failed"}}},
+                                  {"properties": {"failure_mode": {"const": "container-import"}, "reason": {"const": "container-image-preparation-failed"}}},
+                                  {"properties": {"failure_mode": {"const": "container-hash"}, "reason": {"const": "container-image-identity-failed"}}},
+                                  {"properties": {"failure_mode": {"const": "container-launch"}, "reason": {"const": "container-runtime-launch-failed"}}},
+                                  {"properties": {"failure_mode": {"const": "backend-setup"}, "reason": {"const": "backend-setup-failed"}}},
+                                  {"properties": {"failure_mode": {"const": "artifact-collection"}, "reason": {"const": "artifact-collection-failed"}}},
+                                  {"properties": {"failure_mode": {"const": "runtime-identity"}, "reason": {"const": "runtime-identity-mismatch"}}},
+                                  {"properties": {"failure_mode": {"const": "timeout"}, "reason": {"const": "execution-timeout"}}},
+                                  {"properties": {"failure_mode": {"const": "deadlock"}, "reason": {"const": "execution-deadlock"}}},
+                                  {"properties": {"failure_mode": {"const": "execution"}, "reason": {"const": "distributed-command-failed"}}}
+                                ]
+                        }
+                  ]
+          },
+      "postEmitOutcome": {
+            "type": "object",
+            "properties": {
+                    "status": {"const": "failed"},
+                    "failure_mode": {"enum": ["runtime-identity", "timeout", "deadlock", "execution"]},
+                    "reason": {"const": "post-emit-distributed-command-failed"}
+                  }
+          },
+      "capabilityOutcome": {
+            "type": "object",
+            "properties": {
+                    "status": {"const": "unsupported"},
+                    "failure_mode": {"const": "capability"},
+                    "reason": {
+                              "enum": [
+                                          "backend-platform-unsupported",
+                                          "backend-token-capacity",
+                                          "precision-profile-unsupported"
+                                        ]
+                            }
+                  }
+          },
+      "sha256": {"type": "string","pattern": "^[0-9a-f]{64}$"},
+      "safeId": {"type": "string","pattern": "^[a-z0-9][a-z0-9_.-]*$","maxLength": 128},
+      "precisionProfile": {
+            "enum":[
+                    "d-bf16.c-bf16",
+                    "d-fp8-e4m3fn-b128-f32-prequantized.c-bf16",
+                    "d-fp8-e4m3fnuz-b128-f32-prequantized.c-bf16",
+                    "d-fp8-e4m3fn-b128-f32-fused.c-bf16",
+                    "d-bf16.c-logfmt10-dynamic64",
+                    "d-fp8-e4m3fn-b128-f32-fused.c-logfmt10-dynamic64",
+                    "d-bf16.c-fp8-e4m3fn-direct-cast-noscale",
+                    "d-fp8-e4m3fn-b128-f32-prequantized.c-fp8-e4m3fn-direct-cast-noscale",
+                    "d-bf16.c-fp8-e4m3fnuz-direct-cast-noscale",
+                    "d-fp8-e4m3fnuz-b128-f32-prequantized.c-fp8-e4m3fnuz-direct-cast-noscale"
+                  ]
+          },
+      "caseId": {"type": "string","pattern": "^cxcase-v1-[0-9a-f]{64}$"},
+      "allocationId": {"type": "string","pattern": "^cxallocation-v1-[0-9a-f]{64}$"},
+      "attemptId": {"type": "string","pattern": "^cxattempt-v1-[0-9a-f]{64}$"},
+      "nullableText": {"oneOf": [{"type": "null"},{"type": "string","minLength": 1}]},
+      "communicationAxis": {
+            "type":"object",
+            "additionalProperties":false,
+            "required":["alignment_contract","api_input_dtype","api_output_dtype","communication_format","conversion_boundary","padding_contract","quantization_origin","scale_dtype","scale_group_size","scale_layout"],
+            "properties": {
+                    "alignment_contract":{"enum":["native-bf16-vector-alignment","hidden-block-128","native-fp8-vector-alignment","value-block-64"]},
+                    "api_input_dtype":{"enum":["bf16","fp8-e4m3fn-with-f32-scale","fp8-e4m3fnuz-with-f32-scale"]},
+                    "api_output_dtype":{"enum":["bf16","fp8-e4m3fn-with-f32-scale","fp8-e4m3fnuz-with-f32-scale"]},
+                    "communication_format":{"enum":["bf16","fp8-e4m3fn","fp8-e4m3fnuz","logfmt10"]},
+                    "conversion_boundary":{"enum":["none","before-dispatch-timing","inside-dispatch-timing","inside-combine-timing"]},
+                    "padding_contract":{"enum":["none","right-zero-pad-hidden-to-128","right-zero-pad-values-to-64"]},
+                    "quantization_origin":{"enum":["none","caller-prequantized","backend-fused","backend-internal","backend-internal-direct-cast"]},
+                    "scale_dtype":{"oneOf":[{"type":"null"},{"enum":["f32","implicit-logfmt10"]}]},
+                    "scale_group_size":{"oneOf":[{"type":"null"},{"enum":[64,128]}]},
+                    "scale_layout":{"enum":["none","per-token-hidden-block","dynamic-per-64-values"]}
+                  }
+          },
+      "communicationPrecision": {
+            "type":"object",
+            "additionalProperties":false,
+            "required":["combine","dispatch","modes","profile_id"],
+            "properties": {
+                    "combine":{"$ref":"#/$defs/communicationAxis"},
+                    "dispatch":{"$ref":"#/$defs/communicationAxis"},
+                    "modes":{"type":"array","minItems":1,"uniqueItems":true,"items":{"enum":["normal","low-latency"]}},
+                    "profile_id":{"$ref":"#/$defs/precisionProfile"}
+                  }
+          },
+      "caseProfile": {
+            "type":"object",
+            "additionalProperties":false,
+            "allOf":[
+                    {
+                              "if":{"properties":{"mode":{"const":"normal"}},"required":["mode"]},
+                              "then":{"properties":{
+                                      "combine_semantics":{"const":"activation-only"},
+                                      "component_order_contract":{"const":"qualification-hash-rotated-components-v1"},
+                                      "contract":{"const":"layout-and-dispatch-v1"},
+                                      "correctness_scope":{"const":"dispatch-metadata-and-transformed-combine"},
+                                      "oracle_contract":{"const":"expert-specific-transform-v1"},
+                                      "payload_unit":{"const":"token-rank"}
+                                    }}
+                            },
+                    {
+                              "if":{"properties":{"mode":{"const":"low-latency"}},"required":["mode"]},
+                              "then":{"properties":{
+                                      "combine_semantics":{"const":"gate-weighted"},
+                                      "component_order_contract":{"const":"qualification-hash-rotated-components-v1"},
+                                      "contract":{"const":"expert-packed-weighted-combine-v1"},
+                                      "correctness_scope":{"const":"expert-assignment-and-weighted-combine"},
+                                      "oracle_contract":{"const":"expert-assignment-transform-v1"},
+                                      "payload_unit":{"const":"token-expert"}
+                                    }}
+                            }
+                  ],
+            "required":["activation_generator","activation_profile","combine_dtype","combine_quant_mode","combine_semantics","component_order_contract","conditioning_contract","contract","correctness_scope","dtype","eplb_planner","eplb_redundant_experts","eplb_reference_tokens_per_rank","mode","oracle_contract","oracle_tolerances","payload_unit","placement","percentile_method","rank_reduction","resource_mode","routing_generator","sampling_contract","seed","source_identity_contract"],
+            "properties": {
+                    "activation_generator":{"const":"collectivex-activation-counter-v4"},
+                    "activation_profile":{"const":"canonical-counter-source-v4"},
+                    "combine_dtype":{"const":"bf16"},
+                    "combine_quant_mode":{"const":"none"},
+                    "combine_semantics":{"enum":["activation-only","gate-weighted"]},
+                    "communication_precision":{"$ref":"#/$defs/communicationPrecision"},
+                    "component_order_contract":{"const":"qualification-hash-rotated-components-v1"},
+                    "conditioning_contract":{"const":"fixed-phase-ramp-8-roundtrips-v1"},
+                    "contract":{"enum":["layout-and-dispatch-v1","expert-packed-weighted-combine-v1"]},
+                    "correctness_scope":{"enum":["dispatch-metadata-and-transformed-combine","expert-assignment-and-weighted-combine"]},
+                    "dtype":{"const":"bf16"},
+                    "eplb_planner":{"const":"greedy-rank-major-v1"},
+                    "eplb_redundant_experts":{"const":32},
+                    "eplb_reference_tokens_per_rank":{"const":2048},
+                    "mode":{"enum":["normal","low-latency"]},
+                    "oracle_contract":{"enum":["expert-specific-transform-v1","expert-assignment-transform-v1"]},
+                    "oracle_tolerances":{"const":"rtol=0.05,atol=0.02"},
+                    "payload_unit":{"enum":["token-rank","token-expert"]},
+                    "placement":{"const":"packed"},
+                    "percentile_method":{"const":"nearest-rank"},
+                    "rank_reduction":{"const":"cross-rank-max-per-iteration"},
+                    "resource_mode":{"const":"fixed-profile"},
+                    "routing_generator":{"const":"collectivex-routing-counter-v3"},
+                    "sampling_contract":{"const":"fixed-512-v1"},
+                    "seed":{"const":67},
+                    "source_identity_contract":{"const":"bounded-sign-bit-source-v1"}
+                  }
+          },
+      "allocationFactors": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": ["artifact","execution_id","job","qualification_index","repo","run_attempt","run_id","runner","source_sha"],
+            "properties": {
+                    "artifact": {"$ref": "#/$defs/nullableText"},
+                    "execution_id": {"$ref": "#/$defs/nullableText"},
+                    "job": {"$ref": "#/$defs/nullableText"},
+                    "qualification_index": {"type":"integer","minimum":1,"maximum":3},
+                    "repo": {"$ref": "#/$defs/nullableText"},
+                    "run_attempt": {"$ref": "#/$defs/nullableText"},
+                    "run_id": {"$ref": "#/$defs/nullableText"},
+                    "runner": {"$ref": "#/$defs/nullableText"},
+                    "source_sha": {"$ref": "#/$defs/nullableText"}
+                  }
+          },
+      "gitRun": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": ["artifact","job","qualification_index","ref","repo","run_attempt","run_id","source_sha"],
+            "properties": {
+                    "artifact": {"type": "string","minLength": 1},
+                    "job": {"type": "string","minLength": 1},
+                    "qualification_index": {"type":"integer","minimum":1,"maximum":3},
+                    "ref": {"type": "string","minLength": 1},
+                    "repo": {"type": "string","pattern": "^[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+$"},
+                    "run_attempt": {"type": "string","pattern": "^[1-9][0-9]*$"},
+                    "run_id": {"type": "string","pattern": "^[1-9][0-9]*$"},
+                    "source_sha": {"type": "string","pattern": "^[0-9a-f]{40}$"}
+                  }
+          },
+      "case": {
+            "type": "object",
+            "additionalProperties": false,
+            "required": [
+                    "backend",
+                    "canonical",
+                    "eplb",
+                    "ep",
+                    "experts",
+                    "gpus_per_node",
+                    "hidden",
+                    "ladder",
+                    "mode",
+                    "nodes",
+                    "phase",
+                    "required_publication",
+                    "routing",
+                    "samples_per_point",
+                    "scale_out_transport",
+                    "scale_up_domain",
+                    "scale_up_transport",
+                    "scope",
+                    "suite",
+                    "timing",
+                    "topk",
+                    "topology_class",
+                    "transport",
+                    "warmup_semantics",
+                    "workload"
+                  ],
+            "properties": {
+                    "backend": {"$ref": "#/$defs/safeId"},
+                    "canonical": {"type": "boolean"},
+                    "eplb": {"type": "boolean"},
+                    "ep": {"type": "integer","minimum": 1},
+                    "experts": {"type": "integer","minimum": 1},
+                    "gpus_per_node": {"type": "integer","minimum": 1},
+                    "hidden": {"type": "integer","minimum": 1},
+                    "ladder": {"type": "string","pattern": "^[1-9][0-9]*( [1-9][0-9]*)*$"},
+                    "mode": {"enum": ["normal","low-latency"]},
+                    "nodes": {"type": "integer","minimum": 1},
+                    "phase": {"enum": ["decode","prefill"]},
+                    "precision_profile": {"$ref":"#/$defs/precisionProfile"},
+                    "required_publication": {"enum": ["official","comparable-experimental","diagnostic"]},
+                    "routing": {"enum": ["uniform","zipf"]},
+                    "samples_per_point": {"const": 512},
+                    "scale_out_transport": {"oneOf": [{"type": "null"},{"$ref": "#/$defs/safeId"}]},
+                    "scale_up_domain": {"type": "integer","minimum": 1},
+                    "scale_up_transport": {"$ref": "#/$defs/safeId"},
+                    "scope": {"enum": ["scale-up","scale-out"]},
+                    "suite": {"$ref": "#/$defs/safeId"},
+                    "timing": {"const": "8:64:32"},
+                    "topk": {"type": "integer","minimum": 1},
+                    "topology_class": {"$ref": "#/$defs/safeId"},
+                    "transport": {"$ref": "#/$defs/safeId"},
+                    "warmup_semantics": {"const": "full-roundtrip-before-each-component-trial-point-v1"},
+                    "workload": {"$ref": "#/$defs/safeId"}
+                  }
+          }
+    }
+}
diff --git a/experimental/CollectiveX/source_archive.py b/experimental/CollectiveX/source_archive.py
new file mode 100644
index 000000000..c027490a6
--- /dev/null
+++ b/experimental/CollectiveX/source_archive.py
@@ -0,0 +1,349 @@
+#!/usr/bin/env python3
+"""Validate and extract one pinned backend from a shared source tar."""
+from __future__ import annotations
+
+import argparse
+import os
+from pathlib import Path, PurePosixPath
+import stat
+import tarfile
+from typing import Optional, Sequence
+
+
+PathParts = tuple[str, ...]
+_DIRECTORY_FLAGS = os.O_RDONLY | os.O_DIRECTORY | os.O_NOFOLLOW | os.O_CLOEXEC
+_FILE_FLAGS = os.O_WRONLY | os.O_CREAT | os.O_EXCL | os.O_NOFOLLOW | os.O_CLOEXEC
+MAX_ARCHIVE_MEMBERS = 20_000
+MAX_MEMBER_BYTES = 512 * 1024 * 1024
+MAX_EXPANDED_BYTES = 2 * 1024 * 1024 * 1024
+MAX_ARCHIVE_BYTES = 4 * 1024 * 1024 * 1024
+MAX_ARCHIVE_HEADERS = 40_000
+MAX_EXTENSION_BYTES = 64 * 1024 * 1024
+MAX_EXTENSION_MEMBER_BYTES = 1024 * 1024
+MAX_EXTENSION_CHAIN = 8
+_TAR_BLOCK = 512
+_EXTENSION_TYPES = {b"L", b"K", b"x", b"g", b"X"}
+
+
+class SourceArchiveError(ValueError):
+    """The backend source archive cannot be extracted safely."""
+
+
+def _tar_size(field: bytes) -> int:
+    if field[0] in (0o200, 0o377):
+        value = int.from_bytes(field[1:], "big")
+        if field[0] == 0o377:
+            value -= 256 ** (len(field) - 1)
+        return value
+    try:
+        text = field.split(b"\0", 1)[0].decode("ascii").strip()
+        return int(text or "0", 8)
+    except (UnicodeDecodeError, ValueError) as exc:
+        raise SourceArchiveError("archive contains an invalid size field") from exc
+
+
+def _preflight_archive(descriptor: int, archive_size: int) -> None:
+    if archive_size <= 0 or archive_size > MAX_ARCHIVE_BYTES:
+        raise SourceArchiveError("backend source archive exceeds the raw size limit")
+    offset = headers = extension_bytes = extension_chain = 0
+    while offset < archive_size:
+        header = os.pread(descriptor, _TAR_BLOCK, offset)
+        if len(header) != _TAR_BLOCK:
+            raise SourceArchiveError("archive header is truncated")
+        if not any(header):
+            return
+        headers += 1
+        if headers > MAX_ARCHIVE_HEADERS:
+            raise SourceArchiveError("archive has too many physical headers")
+        size = _tar_size(header[124:136])
+        if size < 0:
+            raise SourceArchiveError("archive contains a negative payload size")
+        type_flag = header[156:157]
+        if type_flag in _EXTENSION_TYPES:
+            extension_chain += 1
+            extension_bytes += size
+            if (
+                extension_chain > MAX_EXTENSION_CHAIN
+                or size > MAX_EXTENSION_MEMBER_BYTES
+                or extension_bytes > MAX_EXTENSION_BYTES
+            ):
+                raise SourceArchiveError("archive extension metadata exceeds its limit")
+            if type_flag in {b"x", b"g", b"X"}:
+                payload = os.pread(descriptor, size, offset + _TAR_BLOCK)
+                if len(payload) != size:
+                    raise SourceArchiveError("archive extension metadata is truncated")
+                if b"GNU.sparse." in payload:
+                    raise SourceArchiveError("archive contains sparse extension metadata")
+        else:
+            extension_chain = 0
+            if type_flag == b"S":
+                raise SourceArchiveError("archive contains a sparse member")
+        blocks = (size + _TAR_BLOCK - 1) // _TAR_BLOCK
+        offset += _TAR_BLOCK + blocks * _TAR_BLOCK
+        if offset > archive_size:
+            raise SourceArchiveError("archive payload is truncated")
+
+
+def _member_parts(name: str) -> PathParts:
+    if not name or "\\" in name or "\0" in name:
+        raise SourceArchiveError("archive contains a noncanonical member path")
+    path = PurePosixPath(name)
+    if (
+        path.is_absolute()
+        or path.as_posix() != name
+        or not path.parts
+        or path.parts[0] != ".cx_sources"
+        or any(part in {"", ".", ".."} for part in path.parts)
+    ):
+        raise SourceArchiveError("archive contains a noncanonical member path")
+    return path.parts
+
+
+def _root_parts(root_basename: str) -> PathParts:
+    path = PurePosixPath(root_basename)
+    if (
+        not root_basename
+        or "\\" in root_basename
+        or "\0" in root_basename
+        or path.is_absolute()
+        or path.as_posix() != root_basename
+        or len(path.parts) != 1
+        or path.parts[0] in {"", ".", ".."}
+    ):
+        raise SourceArchiveError("invalid backend source root")
+    return (".cx_sources", root_basename)
+
+
+def _read_members(archive: tarfile.TarFile) -> list[tarfile.TarInfo]:
+    members: list[tarfile.TarInfo] = []
+    for member in archive:
+        if len(members) >= MAX_ARCHIVE_MEMBERS:
+            raise SourceArchiveError("archive has an invalid member count")
+        members.append(member)
+    return members
+
+
+def _validate_members(
+    members: list[tarfile.TarInfo], selected_root: PathParts
+) -> dict[PathParts, tarfile.TarInfo]:
+    if not members or len(members) > MAX_ARCHIVE_MEMBERS:
+        raise SourceArchiveError("archive has an invalid member count")
+    entries: dict[PathParts, tarfile.TarInfo] = {}
+    expanded_bytes = 0
+    for member in members:
+        parts = _member_parts(member.name)
+        if parts in entries:
+            raise SourceArchiveError("archive contains duplicate member paths")
+        if member.sparse is not None:
+            raise SourceArchiveError("archive contains a sparse member")
+        if member.isdir():
+            if member.size != 0:
+                raise SourceArchiveError("archive contains an invalid directory")
+        elif member.isfile():
+            if member.size < 0 or member.size > MAX_MEMBER_BYTES:
+                raise SourceArchiveError("archive member exceeds the size limit")
+            expanded_bytes += member.size
+            if expanded_bytes > MAX_EXPANDED_BYTES:
+                raise SourceArchiveError("archive exceeds the expanded size limit")
+        elif member.issym():
+            if member.size != 0:
+                raise SourceArchiveError("archive contains an invalid symbolic link")
+        else:
+            raise SourceArchiveError("archive contains a non-file member")
+        entries[parts] = member
+
+    source_parent = entries.get((".cx_sources",))
+    selected = entries.get(selected_root)
+    if source_parent is None or not source_parent.isdir():
+        raise SourceArchiveError("archive is missing its source directory")
+    if selected is None or not selected.isdir():
+        raise SourceArchiveError("archive is missing the selected backend source")
+
+    for parts in entries:
+        for depth in range(1, len(parts)):
+            parent = entries.get(parts[:depth])
+            if parent is None or not parent.isdir():
+                raise SourceArchiveError("archive member has an unsafe parent")
+
+    for parts, member in entries.items():
+        if not member.issym():
+            continue
+        target_name = member.linkname
+        target_path = PurePosixPath(target_name)
+        if (
+            not target_name
+            or "\\" in target_name
+            or "\0" in target_name
+            or target_path.is_absolute()
+            or target_path.as_posix() != target_name
+        ):
+            raise SourceArchiveError("archive contains an unsafe symbolic link")
+        target = list(parts[:-1])
+        for component in target_path.parts:
+            if component == "..":
+                if len(target) <= 2:
+                    raise SourceArchiveError("symbolic link escapes its backend source")
+                target.pop()
+            else:
+                target.append(component)
+        resolved = tuple(target)
+        if resolved[:2] != parts[:2]:
+            raise SourceArchiveError("symbolic link crosses backend sources")
+        target_member = entries.get(resolved)
+        if target_member is None or not target_member.isfile():
+            raise SourceArchiveError("symbolic link target is not a regular archive file")
+    return entries
+
+
+def _open_directory(root_fd: int, parts: PathParts) -> int:
+    descriptor = os.dup(root_fd)
+    try:
+        for part in parts:
+            child = os.open(part, _DIRECTORY_FLAGS, dir_fd=descriptor)
+            os.close(descriptor)
+            descriptor = child
+        return descriptor
+    except BaseException:
+        os.close(descriptor)
+        raise
+
+
+def _create_directory(root_fd: int, parts: PathParts) -> None:
+    parent_fd = _open_directory(root_fd, parts[:-1])
+    try:
+        os.mkdir(parts[-1], mode=0o700, dir_fd=parent_fd)
+    finally:
+        os.close(parent_fd)
+
+
+def _extract_file(
+    archive: tarfile.TarFile, root_fd: int, parts: PathParts, member: tarfile.TarInfo
+) -> None:
+    parent_fd = _open_directory(root_fd, parts[:-1])
+    descriptor = -1
+    source = None
+    try:
+        mode = 0o700 if member.mode & 0o111 else 0o600
+        descriptor = os.open(parts[-1], _FILE_FLAGS, mode, dir_fd=parent_fd)
+        source = archive.extractfile(member)
+        if source is None:
+            raise SourceArchiveError("archive file has no readable payload")
+        remaining = member.size
+        while remaining:
+            chunk = source.read(min(1024 * 1024, remaining))
+            if not chunk:
+                raise SourceArchiveError("archive file payload is truncated")
+            view = memoryview(chunk)
+            while view:
+                written = os.write(descriptor, view)
+                view = view[written:]
+            remaining -= len(chunk)
+        os.fchmod(descriptor, mode)
+    finally:
+        if source is not None:
+            source.close()
+        if descriptor >= 0:
+            os.close(descriptor)
+        os.close(parent_fd)
+
+
+def _extract_symlink(root_fd: int, parts: PathParts, member: tarfile.TarInfo) -> None:
+    parent_fd = _open_directory(root_fd, parts[:-1])
+    try:
+        os.symlink(member.linkname, parts[-1], dir_fd=parent_fd)
+    finally:
+        os.close(parent_fd)
+
+
+def _extract_selected(
+    archive: tarfile.TarFile,
+    destination_fd: int,
+    entries: dict[PathParts, tarfile.TarInfo],
+    selected_root: PathParts,
+) -> None:
+    try:
+        os.stat(".cx_sources", dir_fd=destination_fd, follow_symlinks=False)
+    except FileNotFoundError:
+        pass
+    else:
+        raise SourceArchiveError("backend source output already exists")
+
+    selected = {
+        parts: member
+        for parts, member in entries.items()
+        if parts[: len(selected_root)] == selected_root
+    }
+    _create_directory(destination_fd, (".cx_sources",))
+    directories = sorted(
+        (parts for parts, member in selected.items() if member.isdir()),
+        key=lambda parts: (len(parts), parts),
+    )
+    for parts in directories:
+        _create_directory(destination_fd, parts)
+    for parts, member in sorted(selected.items()):
+        if member.isfile():
+            _extract_file(archive, destination_fd, parts, member)
+    for parts, member in sorted(selected.items()):
+        if member.issym():
+            _extract_symlink(destination_fd, parts, member)
+
+
+def extract_source_archive(
+    archive_path: Path, destination: Path, root_basename: str
+) -> None:
+    """Validate the complete tar, then safely extract one backend source root."""
+    selected_root = _root_parts(root_basename)
+    archive_fd = os.open(archive_path, os.O_RDONLY | os.O_NOFOLLOW | os.O_CLOEXEC)
+    try:
+        metadata = os.fstat(archive_fd)
+        if (
+            not stat.S_ISREG(metadata.st_mode)
+            or metadata.st_uid != os.getuid()
+            or stat.S_IMODE(metadata.st_mode) & 0o022
+        ):
+            raise SourceArchiveError("backend source archive has unsafe metadata")
+        _preflight_archive(archive_fd, metadata.st_size)
+        with os.fdopen(os.dup(archive_fd), "rb") as stream:
+            try:
+                with tarfile.open(fileobj=stream, mode="r:") as archive:
+                    entries = _validate_members(_read_members(archive), selected_root)
+                    destination_fd = os.open(destination, _DIRECTORY_FLAGS)
+                    try:
+                        destination_metadata = os.fstat(destination_fd)
+                        if (
+                            destination_metadata.st_uid != os.getuid()
+                            or stat.S_IMODE(destination_metadata.st_mode) != 0o700
+                        ):
+                            raise SourceArchiveError("backend source destination is unsafe")
+                        previous_umask = os.umask(0o077)
+                        try:
+                            _extract_selected(
+                                archive, destination_fd, entries, selected_root
+                            )
+                        finally:
+                            os.umask(previous_umask)
+                    finally:
+                        os.close(destination_fd)
+            except RecursionError as exc:
+                raise SourceArchiveError("archive extension metadata is recursive") from exc
+    finally:
+        os.close(archive_fd)
+
+
+def main(argv: Optional[Sequence[str]] = None) -> int:
+    parser = argparse.ArgumentParser(
+        description="Safely install one pinned backend source archive"
+    )
+    parser.add_argument("archive", type=Path)
+    parser.add_argument("destination", type=Path)
+    parser.add_argument("root_basename")
+    args = parser.parse_args(argv)
+    try:
+        extract_source_archive(args.archive, args.destination, args.root_basename)
+    except (OSError, SourceArchiveError, tarfile.TarError) as exc:
+        parser.error(f"backend source archive rejected: {exc}")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/experimental/CollectiveX/summarize.py b/experimental/CollectiveX/summarize.py
new file mode 100644
index 000000000..3752db6b9
--- /dev/null
+++ b/experimental/CollectiveX/summarize.py
@@ -0,0 +1,105 @@
+#!/usr/bin/env python3
+"""Render a small native-v1 shard summary and gate on a successful case."""
+from __future__ import annotations
+
+import argparse
+from pathlib import Path
+
+import contracts
+
+
+def load_results(directory: str, runner: str | None, timestamp: str | None) -> list[dict]:
+    documents: list[dict] = []
+    for path in sorted(Path(directory).glob("*.json")):
+        if runner and not path.name.startswith(f"{runner}_"):
+            continue
+        if timestamp and timestamp not in path.name:
+            continue
+        try:
+            document = contracts.strict_load(path)
+            if document.get("format") == contracts.RAW_FORMAT:
+                documents.append(contracts.load_raw_attempt(path))
+            elif document.get("format") == contracts.TERMINAL_FORMAT:
+                documents.append(contracts.validate_terminal_document(document))
+        except (contracts.ContractError, OSError):
+            continue
+    return documents
+
+
+def _identity(document: dict) -> tuple[str, str, str, str, bool, str, int]:
+    case = document["case"]
+    if document["format"] == contracts.RAW_FORMAT:
+        routing = case["shape"]["routing"]
+        eplb = case["eplb"]["enabled"]
+    else:
+        routing = case["routing"]
+        eplb = case["eplb"]
+    sku = document["identity"]["case_factors"]["sku"]
+    return (
+        sku, case["suite"], routing, case["phase"], eplb,
+        case["required_publication"], case.get("ep_size", case.get("ep", 0)),
+    )
+
+
+def _headline(document: dict) -> tuple[int | str, float | str, float | str]:
+    if document["format"] != contracts.RAW_FORMAT:
+        return "-", "-", "-"
+    rows = document["measurement"]["rows"]
+    row = next((item for item in rows if item["tokens_per_rank"] == 64), rows[len(rows) // 2])
+    latency = row["components"]["roundtrip"]["percentiles_us"]
+    return row["tokens_per_rank"], latency["p50"], latency["p99"]
+
+
+def render(documents: list[dict], markdown: bool) -> str:
+    documents = sorted(documents, key=_identity)
+    if markdown:
+        lines = [
+            "## CollectiveX EP results", "",
+            "| sku | backend | suite | phase | routing | tier | ep | outcome | T* | p50 us | p99 us |",
+            "|---|---|---|---|---|---|--:|---|--:|--:|--:|",
+        ]
+        for document in documents:
+            sku, suite, routing, phase, eplb, tier, ep = _identity(document)
+            backend = document["case"]["backend"]
+            token, p50, p99 = _headline(document)
+            lines.append(
+                f"| {sku} | `{backend}` | {suite} | {phase} | "
+                f"{routing}{'+eplb' if eplb else ''} | {tier} | {ep} | "
+                f"{document['outcome']['status']} | {token} | {p50} | {p99} |"
+            )
+        if not documents:
+            lines.append("\n> No valid native v1 outcome documents found.")
+        return "\n".join(lines)
+    lines = ["CollectiveX EP results", "======================"]
+    for document in documents:
+        sku, suite, routing, phase, eplb, tier, ep = _identity(document)
+        backend = document["case"]["backend"]
+        token, _, p99 = _headline(document)
+        lines.append(
+            f"  {sku:<10} {backend:<16} {suite:<13} {phase:<7} "
+            f"{routing}{'+eplb' if eplb else ''} {tier} ep{ep} "
+            f"{document['outcome']['status']} T={token} roundtrip_p99_us={p99}"
+        )
+    return "\n".join(lines)
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description="Summarize CollectiveX native v1 outcomes")
+    parser.add_argument("--results-dir", default="results")
+    parser.add_argument("--runner")
+    parser.add_argument("--ts")
+    parser.add_argument("--markdown", action="store_true")
+    args = parser.parse_args()
+    documents = load_results(args.results_dir, args.runner, args.ts)
+    print(render(documents, args.markdown))
+    if args.markdown:
+        return 0
+    return 0 if any(
+        document["format"] == contracts.RAW_FORMAT
+        and document["outcome"]["status"] == "success"
+        for document in documents
+    ) else 1
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/experimental/CollectiveX/sweep_matrix.py b/experimental/CollectiveX/sweep_matrix.py
new file mode 100644
index 000000000..309199fe2
--- /dev/null
+++ b/experimental/CollectiveX/sweep_matrix.py
@@ -0,0 +1,1453 @@
+#!/usr/bin/env python3
+"""Resolve CollectiveX v1 suites and extract validated execution shards.
+
+Mode changes measurement semantics and therefore participates in case identity.
+Precision sensitivity uses allowlisted communication profiles; provisional native
+paths remain outside the executable matrix until their probes are resolved.
+"""
+from __future__ import annotations
+
+import argparse
+import datetime as dt
+import hashlib
+import itertools
+import json
+import os
+from pathlib import Path
+import re
+import sys
+from typing import Any
+
+HERE = Path(__file__).resolve().parent
+sys.path.insert(0, str(HERE))
+sys.path.insert(0, str(HERE / "tests"))
+
+try:  # Shard extraction on GPU runners is intentionally stdlib-only.
+    import yaml  # type: ignore
+except ModuleNotFoundError:  # pragma: no cover - exercised by the workflow environment
+    yaml = None
+
+import capability as cap  # noqa: E402
+import contracts  # noqa: E402
+import ep_harness  # noqa: E402
+import identity  # noqa: E402
+
+
+EP_TIMING_PROFILE = (
+    f"{ep_harness.TIMED_ITERS_PER_TRIAL}:{ep_harness.TRIALS_PER_POINT}:"
+    f"{ep_harness.WARMUP_ITERS_PER_TRIAL}"
+)
+V1_WORKLOAD = ("deepseek-v3-v1", 7168, 8, 256)
+V1_SUITE_CONTRACTS = {
+    "ep-core-v1": {
+        "mode": "normal",
+        "publication": "official",
+        "coordinates": {
+            ("normal", "decode", "uniform", False),
+            ("normal", "prefill", "uniform", False),
+        },
+        "ladders": {
+            "decode": tuple(ep_harness.DECODE_LADDER),
+            "prefill": (256, 512),
+        },
+    },
+    "ep-routing-v1": {
+        "mode": "normal",
+        "publication": "comparable-experimental",
+        "coordinates": {
+            ("normal", "decode", "zipf", False),
+            ("normal", "decode", "zipf", True),
+            ("normal", "prefill", "zipf", False),
+            ("normal", "prefill", "zipf", True),
+        },
+        "ladders": {"decode": (128,), "prefill": (512,)},
+    },
+    "ep-low-latency-v1": {
+        "mode": "low-latency",
+        "publication": "official",
+        "backends": {"deepep", "uccl"},
+        "coordinates": {("low-latency", "decode", "uniform", False)},
+        "ladders": {"decode": tuple(ep_harness.DECODE_LADDER)},
+    },
+    "ep-precision-normal-v1": {
+        "mode": "normal",
+        "publication": "comparable-experimental",
+        "backends": {"deepep", "deepep-v2", "uccl", "deepep-hybrid", "mori"},
+        "precision_profiles": identity.V1_NORMAL_PRECISION_PROFILE_IDS,
+        "coordinates": {
+            ("normal", "decode", "uniform", False),
+            ("normal", "prefill", "uniform", False),
+        },
+        "ladders": {"decode": (128,), "prefill": (512,)},
+    },
+    "ep-precision-low-latency-v1": {
+        "mode": "low-latency",
+        "publication": "comparable-experimental",
+        "backends": {"deepep", "uccl"},
+        "precision_profiles": identity.V1_LOW_LATENCY_PRECISION_PROFILE_IDS,
+        "coordinates": {("low-latency", "decode", "uniform", False)},
+        "ladders": {"decode": (128,)},
+    },
+}
+IDENTIFIER = re.compile(r"[a-z0-9][a-z0-9.-]*")
+SUITE_FIELDS = {
+    "backends", "ep_degrees", "eplb", "mode", "phases", "platforms",
+    "precision_profiles", "provisional", "required_publication", "routings",
+    "token_points", "token_points_decode", "token_points_prefill", "workloads",
+}
+SUITE_REQUIRED = {
+    "ep_degrees", "mode", "phases", "platforms", "required_publication", "routings",
+    "workloads",
+}
+TOPOLOGY_FIELDS = (
+    "nodes", "gpus_per_node", "scale_up_domain", "scope", "scale_up_transport",
+    "scale_out_transport", "transport", "topology_class",
+)
+QUALIFICATION_INDICES = range(1, 4)
+
+
+class MatrixError(ValueError):
+    """A matrix or shard-control document violates the execution contract."""
+
+
+if yaml is not None:
+    class _UniqueKeyLoader(yaml.SafeLoader):
+        pass
+
+    def _unique_mapping(loader: Any, node: Any, deep: bool = False) -> dict[Any, Any]:
+        result: dict[Any, Any] = {}
+        for key_node, value_node in node.value:
+            key = loader.construct_object(key_node, deep=deep)
+            if key in result:
+                raise SystemExit(f"duplicate YAML key {key!r} at line {key_node.start_mark.line + 1}")
+            result[key] = loader.construct_object(value_node, deep=deep)
+        return result
+
+    _UniqueKeyLoader.add_constructor(
+        yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, _unique_mapping
+    )
+
+
+def _load(name: str) -> dict[str, Any]:
+    if yaml is None:
+        raise SystemExit("matrix generation requires PyYAML; shard extraction does not")
+    try:
+        with (HERE / "configs" / name).open() as fh:
+            document = yaml.load(fh, Loader=_UniqueKeyLoader)
+    except yaml.YAMLError as exc:
+        raise SystemExit(f"configs/{name} is not valid YAML: {exc}") from exc
+    if not isinstance(document, dict):
+        raise SystemExit(f"configs/{name} must contain a YAML object")
+    return document
+
+
+def _workload_registry(workloads: dict[str, Any]) -> dict[str, dict[str, Any]]:
+    return {
+        name: cfg
+        for section in ("synthetic", "model_derived")
+        for name, cfg in (workloads.get(section) or {}).items()
+    }
+
+
+def _fields(value: Any, path: str, allowed: set[str], required: set[str]) -> dict[str, Any]:
+    if not isinstance(value, dict):
+        raise SystemExit(f"{path} must be an object")
+    if any(not isinstance(key, str) for key in value):
+        raise SystemExit(f"{path} field names must be strings")
+    unknown, missing = set(value) - allowed, required - set(value)
+    if unknown or missing:
+        raise SystemExit(f"{path} fields: unknown={sorted(unknown)}, missing={sorted(missing)}")
+    return value
+
+
+def _list(value: Any, path: str, item_type: type, allowed: set[Any] | None = None) -> list[Any]:
+    if (not isinstance(value, list) or not value
+            or any(type(item) is not item_type for item in value)
+            or len(value) != len(set(value))
+            or (allowed is not None and any(item not in allowed for item in value))):
+        raise SystemExit(f"{path} must be a non-empty unique list of valid {item_type.__name__}s")
+    return value
+
+
+def validate_config_documents(
+    suites_document: dict[str, Any], workloads: dict[str, Any]
+) -> None:
+    """Reject configuration that is ambiguous, unused, or outside the v1 grid."""
+    _fields(
+        suites_document, "configs/suites.yaml",
+        {"schema_version", "suites"}, {"schema_version", "suites"},
+    )
+    _fields(
+        workloads, "configs/workloads.yaml",
+        {"schema_version", "synthetic", "model_derived"}, {"schema_version"},
+    )
+    if type(suites_document["schema_version"]) is not int or suites_document["schema_version"] != 1:
+        raise SystemExit("configs/suites.yaml schema_version must be integer 1")
+    if type(workloads["schema_version"]) is not int or workloads["schema_version"] != 1:
+        raise SystemExit("configs/workloads.yaml schema_version must be integer 1")
+    registry: dict[str, dict[str, Any]] = {}
+    for section, expert_field in (
+        ("synthetic", "experts"),
+        ("model_derived", "routed_experts"),
+    ):
+        entries = workloads.get(section, {})
+        if not isinstance(entries, dict):
+            raise SystemExit(f"workloads.{section} must be an object")
+        for name, value in entries.items():
+            if not isinstance(name, str) or not IDENTIFIER.fullmatch(name) or name in registry:
+                raise SystemExit(f"workloads.{section} has invalid or duplicate name {name!r}")
+            fields = {"hidden", "topk", expert_field, "verified_against"}
+            config = _fields(value, f"workload {name}", fields, fields - {"verified_against"})
+            dimensions = [config[key] for key in ("hidden", "topk", expert_field)]
+            if any(type(item) is not int or item <= 0 for item in dimensions):
+                raise SystemExit(f"workload {name} dimensions must be positive integers")
+            if dimensions[1] > dimensions[2]:
+                raise SystemExit(f"workload {name}.topk exceeds its expert count")
+            source = config.get("verified_against")
+            if source is not None and (not isinstance(source, str) or not source.strip()):
+                raise SystemExit(f"workload {name}.verified_against must be a non-empty string")
+            registry[name] = config
+    if not registry:
+        raise SystemExit("configs/workloads.yaml must define at least one workload")
+
+    suites = suites_document["suites"]
+    if not isinstance(suites, dict) or not suites:
+        raise SystemExit("configs/suites.yaml suites must be a non-empty object")
+    referenced: set[str] = set()
+    for name, value in suites.items():
+        if not isinstance(name, str) or not IDENTIFIER.fullmatch(name):
+            raise SystemExit(f"invalid suite name {name!r}")
+        suite = _fields(value, f"suite {name}", SUITE_FIELDS, SUITE_REQUIRED)
+        contract = V1_SUITE_CONTRACTS.get(name)
+        if contract is None:
+            raise SystemExit(f"suite {name} is outside the frozen v1 catalog")
+        mode = suite["mode"]
+        if mode not in identity.V1_CASE_PROFILES or mode != contract["mode"]:
+            raise SystemExit(f"suite {name}.mode differs from the frozen v1 catalog")
+        suite_backends = _list(
+            suite.get("backends", list(cap.SWEEP_BACKENDS)),
+            f"suite {name}.backends",
+            str,
+            set(cap.SWEEP_BACKENDS),
+        )
+        expected_backends = contract.get("backends")
+        if expected_backends is not None and set(suite_backends) != expected_backends:
+            raise SystemExit(f"suite {name}.backends differs from the frozen v1 catalog")
+        if expected_backends is None and "backends" in suite:
+            raise SystemExit(f"suite {name}.backends must be omitted")
+        expected_profiles = contract.get("precision_profiles")
+        if expected_profiles is None:
+            if "precision_profiles" in suite or "provisional" in suite:
+                raise SystemExit(
+                    f"suite {name} cannot add precision fields to a baseline suite"
+                )
+            precision_profiles: list[str] = []
+        else:
+            precision_profiles = _list(
+                suite.get("precision_profiles"),
+                f"suite {name}.precision_profiles",
+                str,
+                set(identity.V1_PRECISION_PROFILES),
+            )
+            if tuple(precision_profiles) != expected_profiles:
+                raise SystemExit(
+                    f"suite {name}.precision_profiles differs from the frozen v1 catalog"
+                )
+            if identity.V1_CONTROL_PRECISION_PROFILE in precision_profiles:
+                raise SystemExit(
+                    f"suite {name} must reference existing BF16 evidence, not duplicate it"
+                )
+            if any(
+                mode not in identity.V1_PRECISION_PROFILES[profile]["modes"]
+                for profile in precision_profiles
+            ):
+                raise SystemExit(f"suite {name} contains a precision profile for another mode")
+            if type(suite.get("provisional")) is not bool:
+                raise SystemExit(f"suite {name}.provisional must be a boolean")
+            unresolved = cap.provisional_precision_targets(precision_profiles)
+            if suite["provisional"] != bool(unresolved):
+                raise SystemExit(
+                    f"suite {name}.provisional must track unresolved capability targets"
+                )
+            candidates = cap.precision_targets(precision_profiles)
+            covered_candidates = [
+                target for target in candidates
+                if target["backend"] in suite_backends
+                and target["sku"] in suite["platforms"]
+                and target["ep"] in suite["ep_degrees"]
+                and target["mode"] == mode
+            ]
+            if covered_candidates != candidates:
+                raise SystemExit(
+                    f"suite {name} does not cover every declared precision target"
+                )
+        suite_workloads = _list(suite["workloads"], f"suite {name}.workloads", str)
+        unknown = sorted(set(suite_workloads) - set(registry))
+        if unknown:
+            raise SystemExit(f"suite {name}: unknown workloads {unknown}")
+        referenced.update(suite_workloads)
+        platforms = _list(
+            suite["platforms"], f"suite {name}.platforms", str, set(cap.PLATFORMS)
+        )
+        phases = _list(suite["phases"], f"suite {name}.phases", str, {"decode", "prefill"})
+        routings = _list(suite["routings"], f"suite {name}.routings", str, {"uniform", "zipf"})
+        eplb = _list(suite.get("eplb", [False]), f"suite {name}.eplb", bool)
+        if True in eplb and routings != ["zipf"]:
+            raise SystemExit(f"suite {name}: EPLB is only valid for Zipf routing")
+        if suite["required_publication"] not in {"official", "comparable-experimental"}:
+            raise SystemExit(f"suite {name}.required_publication is invalid")
+        if suite["required_publication"] != contract["publication"]:
+            raise SystemExit(
+                f"suite {name}.required_publication differs from the frozen v1 catalog"
+            )
+        if suite["required_publication"] == "official":
+            unverified = [item for item in suite_workloads if not registry[item].get("verified_against")]
+            if unverified:
+                raise SystemExit(f"suite {name}: official workloads need verified_against: {unverified}")
+        degrees = _list(suite["ep_degrees"], f"suite {name}.ep_degrees", int)
+        if degrees != [8, 16]:
+            raise SystemExit(f"suite {name}.ep_degrees must be exactly [8, 16]")
+        for platform in platforms:
+            if not set(degrees).issubset(cap.PLATFORMS[platform]["ep_degrees"]):
+                raise SystemExit(f"suite {name}: invalid EP degree for {platform}")
+        for phase in {"decode", "prefill"} - set(phases):
+            if f"token_points_{phase}" in suite:
+                raise SystemExit(f"suite {name}.token_points_{phase} is unreachable")
+        if "token_points" in suite and all(
+            f"token_points_{phase}" in suite for phase in phases
+        ):
+            raise SystemExit(f"suite {name}.token_points is unreachable")
+        for phase in phases:
+            _ladder(suite, phase)
+        coordinates = {
+            (mode, phase, routing, enabled)
+            for phase, routing, enabled in itertools.product(phases, routings, eplb)
+        }
+        if coordinates != contract["coordinates"] or any(
+            tuple(map(int, _ladder(suite, phase).split())) != contract["ladders"][phase]
+            for phase in phases
+        ):
+            raise SystemExit(f"suite {name} coordinates differ from the frozen v1 catalog")
+    unused = sorted(set(registry) - referenced)
+    if unused:
+        raise SystemExit(f"unreferenced workloads: {unused}")
+
+
+def _dims(workloads: dict[str, Any], name: str) -> tuple[int, int, int]:
+    config = _workload_registry(workloads)[name]
+    values = (
+        config.get("hidden"),
+        config.get("topk"),
+        config.get("experts", config.get("routed_experts")),
+    )
+    return values  # type: ignore[return-value]
+
+
+def _ladder(suite: dict[str, Any], phase: str) -> str:
+    points = suite.get(f"token_points_{phase}", suite.get("token_points"))
+    if points is None:
+        points = ep_harness.DECODE_LADDER if phase == "decode" else ep_harness.PREFILL_LADDER
+    if (not isinstance(points, list) or not points
+            or any(isinstance(point, bool) or not isinstance(point, int) or point <= 0
+                   for point in points)
+            or points != sorted(set(points))):
+        raise SystemExit(f"invalid {phase} token ladder: {points!r}")
+    return " ".join(map(str, points))
+
+
+def _v1_requested_ladder(case: dict[str, Any]) -> str:
+    """Bind extracted controls to the frozen v1 suite and workload catalog."""
+    suite = V1_SUITE_CONTRACTS.get(case.get("suite"))
+    expected_profiles = None if suite is None else suite.get("precision_profiles")
+    precision_profile = case.get("precision_profile")
+    coordinate = (
+        case.get("mode"), case.get("phase"), case.get("routing"), case.get("eplb")
+    )
+    if (
+        suite is None
+        or coordinate not in suite["coordinates"]
+        or case.get("required_publication") != suite["publication"]
+        or (
+            case.get("workload"), case.get("hidden"), case.get("topk"), case.get("experts")
+        ) != V1_WORKLOAD
+        or (expected_profiles is None and precision_profile is not None)
+        or (expected_profiles is not None and precision_profile not in expected_profiles)
+    ):
+        raise MatrixError("case differs from the frozen v1 suite/workload catalog")
+    return " ".join(map(str, suite["ladders"][case["phase"]]))
+
+
+def _expected_disposition(
+    sku: str, case: dict[str, Any]
+) -> tuple[str, str | None, str | None]:
+    requested_ladder = _v1_requested_ladder(case)
+    precision_profile = case.get("precision_profile")
+    if precision_profile is not None and not cap.precision_target_declared(
+        precision_profile,
+        sku=sku,
+        backend=case["backend"],
+        ep=case["ep"],
+        mode=case["mode"],
+    ):
+        raise MatrixError("precision case is not an exact native capability target")
+    disposition, detail = cap.resolve_disposition(
+        sku, case["backend"], ep=case["ep"], nodes=case["nodes"],
+        routing=case["routing"], eplb=case["eplb"], mode=case["mode"],
+        precision_profile=precision_profile,
+    )
+    if disposition == "supported":
+        if case["ladder"] != requested_ladder:
+            raise MatrixError("case ladder differs from the frozen v1 suite catalog")
+        return "runnable", None, None
+    if case["ladder"] != requested_ladder:
+        raise MatrixError("unsupported case ladder differs from the frozen v1 suite catalog")
+    if disposition == "unsupported":
+        reason = (
+            "precision-profile-unsupported"
+            if precision_profile is not None
+            else "backend-platform-unsupported"
+        )
+        return "unsupported", reason, detail
+    if disposition == "provisional":
+        raise MatrixError("provisional precision target entered the executable matrix")
+    raise MatrixError("not-applicable precision tuple entered the requested matrix")
+
+
+def _case_id(sku: str, case: dict[str, Any]) -> str:
+    return identity.case_id(
+        sku=sku, profile=identity.profile_for_case(case), case=case
+    )
+
+
+def _semantic_points(sku: str, case: dict[str, Any]) -> list[str]:
+    execution = {
+        key: value for key, value in case.items()
+        if key not in {"canonical", "case_id", "ladder", "required_publication", "suite", "workload"}
+    }
+    return [
+        json.dumps(
+            {"sku": sku, "tokens_per_rank": int(point), **execution},
+            sort_keys=True,
+            separators=(",", ":"),
+        )
+        for point in case["ladder"].split()
+    ]
+
+
+def _select_backends(backend: str, backends: str) -> list[str]:
+    available = list(cap.SWEEP_BACKENDS)
+    if backend and backends:
+        raise SystemExit("--backend and --backends are mutually exclusive")
+    if backends:
+        names = available if backends == "all" else [
+            value.strip() for value in backends.split(",") if value.strip()
+        ]
+    else:
+        names = [backend or "deepep"]
+    unknown = sorted(set(names) - set(available))
+    if unknown:
+        raise SystemExit(f"unknown backend values {unknown}; have {available}")
+    if len(names) != len(set(names)):
+        raise SystemExit("backend selection contains duplicates")
+    return names
+
+
+def resolve_matrix(
+    suites: str = "all",
+    backend: str = "",
+    backends: str = "",
+    only_sku: str = "",
+    min_nodes: int = 0,
+    max_nodes: int = 0,
+    max_cases: int = 128,
+) -> dict[str, Any]:
+    """Resolve suite configuration into allocation-sized workflow shards."""
+    if max_cases <= 0:
+        raise SystemExit("--max-cases must be positive")
+    if min_nodes < 0 or max_nodes < 0 or (min_nodes and max_nodes and min_nodes > max_nodes):
+        raise SystemExit("invalid node bounds")
+    if only_sku and only_sku not in cap.PLATFORMS:
+        raise SystemExit(f"unknown --only-sku {only_sku!r}; have {sorted(cap.PLATFORMS)}")
+
+    workloads = _load("workloads.yaml")
+    suites_document = _load("suites.yaml")
+    validate_config_documents(suites_document, workloads)
+    registry = suites_document["suites"]
+    select_all = suites == "all"
+    names = (
+        [name for name, suite in registry.items() if not suite.get("provisional", False)]
+        if select_all
+        else [value.strip() for value in suites.split(",") if value.strip()]
+    )
+    if not names or len(names) != len(set(names)):
+        raise SystemExit("suite selection must be non-empty and unique")
+    unknown = sorted(set(names) - set(registry))
+    if unknown:
+        raise SystemExit(f"unknown suites {unknown}; have {sorted(registry)}")
+    blocked = [name for name in names if registry[name].get("provisional", False)]
+    if blocked:
+        unresolved = sum(
+            len(cap.provisional_precision_targets(registry[name]["precision_profiles"]))
+            for name in blocked
+        )
+        raise SystemExit(
+            f"provisional precision suites cannot be scheduled: {blocked}; "
+            f"resolve {unresolved} capability targets first"
+        )
+    targets = _select_backends(backend, backends)
+
+    shards: dict[tuple[str, str, int], list[dict[str, Any]]] = {}
+    requested_cases: list[dict[str, Any]] = []
+    scheduled: set[str] = set()
+    for suite_name in names:
+        suite = registry[suite_name]
+        mode = suite["mode"]
+        phases = suite["phases"]
+        routings = suite["routings"]
+        eplb_values = suite.get("eplb", [False])
+        precision_profiles = suite.get("precision_profiles", [None])
+        suite_backends = set(suite.get("backends", cap.SWEEP_BACKENDS))
+        suite_targets = [target for target in targets if target in suite_backends]
+        if not suite_targets:
+            continue
+        for platform_name in suite["platforms"]:
+            if only_sku and platform_name != only_sku:
+                continue
+            ep_degrees = suite["ep_degrees"]
+            for workload, ep, phase, routing, eplb, target, precision_profile in itertools.product(
+                suite["workloads"], ep_degrees, phases, routings, eplb_values,
+                suite_targets, precision_profiles,
+            ):
+                if precision_profile is not None and not cap.precision_target_declared(
+                    precision_profile,
+                    sku=platform_name,
+                    backend=target,
+                    ep=ep,
+                    mode=mode,
+                ):
+                    continue
+                topology = cap.topology_for(platform_name, ep)
+                if topology is None:
+                    raise SystemExit(
+                        f"suite {suite_name}: {platform_name} EP{ep} is not registered"
+                    )
+                nodes = int(topology["nodes"])
+                if min_nodes and nodes < min_nodes:
+                    continue
+                if max_nodes and nodes > max_nodes:
+                    continue
+                capability_disposition, capability_detail = cap.resolve_disposition(
+                    platform_name,
+                    target,
+                    ep=ep,
+                    nodes=nodes,
+                    routing=routing,
+                    eplb=bool(eplb),
+                    mode=mode,
+                    precision_profile=precision_profile,
+                )
+                hidden, topk, experts = _dims(workloads, workload)
+
+                def add_case(
+                    case_ladder: str,
+                    disposition: str,
+                    reason: str | None,
+                    detail: str | None,
+                ) -> None:
+                    case: dict[str, Any] = {
+                        "suite": suite_name,
+                        "workload": workload,
+                        "required_publication": suite["required_publication"],
+                        "backend": target,
+                        "routing": routing,
+                        "phase": phase,
+                        "ep": ep,
+                        "eplb": eplb,
+                        "hidden": hidden,
+                        "topk": topk,
+                        "experts": experts,
+                        "samples_per_point": ep_harness.TIMED_SAMPLES_PER_POINT,
+                        "warmup_semantics": ep_harness.WARMUP_SEMANTICS,
+                        "ladder": case_ladder,
+                        "mode": mode,
+                        "timing": EP_TIMING_PROFILE,
+                        "canonical": True,
+                        **{field: topology[field] for field in TOPOLOGY_FIELDS},
+                    }
+                    if precision_profile is not None:
+                        case["precision_profile"] = precision_profile
+                    for signature in _semantic_points(platform_name, case):
+                        if signature in scheduled:
+                            raise SystemExit(
+                                f"suite {suite_name}: duplicate semantic point for {platform_name}"
+                            )
+                        scheduled.add(signature)
+                    case["case_id"] = _case_id(platform_name, case)
+                    requested_cases.append(
+                        {
+                            "sku": platform_name,
+                            "case": case,
+                            "disposition": disposition,
+                            "reason": reason,
+                            "detail": detail,
+                        }
+                    )
+                    if disposition == "runnable":
+                        shards.setdefault((platform_name, target, nodes), []).append(case)
+
+                requested_ladder = _ladder(suite, phase)
+                if capability_disposition == "not-applicable":
+                    continue
+                if capability_disposition == "provisional":
+                    raise SystemExit(
+                        f"suite {suite_name}: provisional target escaped its suite gate: "
+                        f"{precision_profile} {target} {platform_name} EP{ep}"
+                    )
+                if capability_disposition == "unsupported":
+                    add_case(
+                        requested_ladder,
+                        "unsupported",
+                        (
+                            "precision-profile-unsupported"
+                            if precision_profile is not None
+                            else "backend-platform-unsupported"
+                        ),
+                        capability_detail,
+                    )
+                    continue
+                if capability_disposition != "supported":
+                    raise SystemExit(
+                        f"suite {suite_name}: invalid capability disposition "
+                        f"{capability_disposition!r}"
+                    )
+                add_case(requested_ladder, "runnable", None, None)
+
+    shards_by_sku: dict[str, list[dict[str, Any]]] = {}
+    for (sku, target, nodes), cases in sorted(shards.items()):
+        chunk_size = max_cases
+        for offset in range(0, len(cases), chunk_size):
+            chunk = cases[offset:offset + chunk_size]
+            part = offset // chunk_size
+            shard_id = f"{sku}-{target}-n{nodes}"
+            if len(cases) > chunk_size:
+                shard_id += f"-p{part}"
+            shards_by_sku.setdefault(sku, []).append({
+                "id": shard_id,
+                "sku": sku,
+                "backend": target,
+                "launcher": cap.PLATFORMS[sku]["launcher"],
+                **{field: chunk[0][field] for field in TOPOLOGY_FIELDS},
+                "n": len(chunk),
+                "execution_weight": execution_weight(chunk),
+                "case_ids": [case["case_id"] for case in chunk],
+            })
+    include = [
+        shards_by_sku[sku][round_index]
+        for round_index in range(max(map(len, shards_by_sku.values()), default=0))
+        for sku in sorted(shards_by_sku)
+        if round_index < len(shards_by_sku[sku])
+    ]
+    return {
+        "format": "collectivex.matrix.v1",
+        "schema_version": 1,
+        "requested_cases": requested_cases,
+        "include": include,
+    }
+
+
+def _strict_json_load(path: Path) -> Any:
+    def reject_constant(value: str) -> None:
+        raise MatrixError(f"non-finite JSON number {value}")
+
+    def reject_duplicates(pairs: list[tuple[str, Any]]) -> dict[str, Any]:
+        result: dict[str, Any] = {}
+        for key, value in pairs:
+            if key in result:
+                raise MatrixError(f"duplicate JSON key {key!r}")
+            result[key] = value
+        return result
+
+    if not path.is_file():
+        raise MatrixError(f"matrix does not exist: {path}")
+    if path.stat().st_size == 0:
+        raise MatrixError(f"matrix is empty: {path}")
+    try:
+        with path.open() as fh:
+            return json.load(
+                fh, parse_constant=reject_constant, object_pairs_hook=reject_duplicates
+            )
+    except (OSError, json.JSONDecodeError) as exc:
+        raise MatrixError(f"matrix is not valid JSON: {exc}") from exc
+
+
+def _positive_int(value: Any, field: str) -> int:
+    if type(value) is not int:
+        raise MatrixError(f"{field} must be a positive integer")
+    if value <= 0:
+        raise MatrixError(f"{field} must be a positive integer")
+    return value
+
+
+def _qualification_index(value: Any, field: str = "qualification_index") -> int:
+    if type(value) is not int or value not in QUALIFICATION_INDICES:
+        raise MatrixError(f"{field} must be an integer in 1..3")
+    return value
+
+
+def _requested_qualification_index(value: int | None = None) -> int:
+    if value is not None:
+        return _qualification_index(value)
+    raw = os.environ.get("CX_QUALIFICATION_INDEX", "1")
+    if raw not in {"1", "2", "3"}:
+        raise MatrixError("CX_QUALIFICATION_INDEX must be an integer in 1..3")
+    return int(raw)
+
+
+def _case_precision_profile(case: dict[str, Any]) -> str:
+    profile = case.get("precision_profile", identity.V1_CONTROL_PRECISION_PROFILE)
+    if not isinstance(profile, str) or profile not in identity.V1_PRECISION_PROFILES:
+        raise MatrixError("qualification case has an invalid precision profile")
+    return profile
+
+
+def _qualification_digest(
+    shard_id: str,
+    case_id: str,
+    profile_id: str,
+    qualification_index: int,
+) -> bytes:
+    return hashlib.sha256(
+        "\0".join((shard_id, case_id, profile_id, str(qualification_index))).encode()
+    ).digest()
+
+
+def _rotate(values: list[Any], offset: int) -> list[Any]:
+    if not values:
+        return []
+    position = offset % len(values)
+    return values[position:] + values[:position]
+
+
+def _seeded_qualification_order(
+    shard_id: str,
+    cases: list[dict[str, Any]],
+    qualification_index: int,
+) -> list[dict[str, Any]]:
+    by_profile: dict[str, list[dict[str, Any]]] = {}
+    for case in cases:
+        by_profile.setdefault(_case_precision_profile(case), []).append(case)
+    profiles = sorted(
+        by_profile,
+        key=lambda profile: _qualification_digest(
+            shard_id, "profile-order", profile, 1
+        ),
+    )
+    profiles = _rotate(profiles, qualification_index - 1)
+    groups: dict[str, list[dict[str, Any]]] = {}
+    for profile in profiles:
+        group = sorted(
+            by_profile[profile],
+            key=lambda case: _qualification_digest(
+                shard_id, case["case_id"], profile, qualification_index
+            ),
+        )
+        groups[profile] = _rotate(group, qualification_index - 1)
+    interleaved = [
+        groups[profile][position]
+        for position in range(max(map(len, groups.values())))
+        for profile in profiles
+        if position < len(groups[profile])
+    ]
+    return interleaved
+
+
+def qualification_execution_order(
+    shard_id: str,
+    cases: list[dict[str, Any]],
+    qualification_index: int,
+) -> list[dict[str, Any]]:
+    """Return one deterministic, repeat-specific permutation of a shard's cases."""
+    index = _qualification_index(qualification_index)
+    if not isinstance(shard_id, str) or not IDENTIFIER.fullmatch(shard_id):
+        raise MatrixError("qualification shard ID is invalid")
+    if not isinstance(cases, list) or not cases:
+        raise MatrixError("qualification planning requires at least one case")
+    selected: list[dict[str, Any]] = []
+    seen: set[tuple[str, ...]] = set()
+    for current in range(1, index + 1):
+        candidate = _seeded_qualification_order(shard_id, cases, current)
+        signature = tuple(case["case_id"] for case in candidate)
+        if len(cases) >= current and signature in seen:
+            for offset in range(1, len(candidate)):
+                rotated = _rotate(candidate, offset)
+                rotated_signature = tuple(case["case_id"] for case in rotated)
+                if rotated_signature not in seen:
+                    candidate = rotated
+                    signature = rotated_signature
+                    break
+        seen.add(signature)
+        selected = candidate
+    return selected
+
+
+def execution_plan_sha256(cases: list[dict[str, Any]]) -> str:
+    """Bind an execution plan to only its ordered case and precision-profile IDs."""
+    plan = [
+        [case["case_id"], _case_precision_profile(case)]
+        for case in cases
+    ]
+    payload = json.dumps(plan, ensure_ascii=True, separators=(",", ":")).encode()
+    return hashlib.sha256(payload).hexdigest()
+
+
+def execution_weight(cases: list[dict[str, Any]]) -> int:
+    """Return deterministic GPU-point work used to bound workflow parallelism."""
+    if not isinstance(cases, list) or not cases:
+        raise MatrixError("execution weight requires at least one case")
+    weight = 0
+    for case in cases:
+        ep = _positive_int(case.get("ep"), "execution-weight.ep")
+        ladder = case.get("ladder")
+        if not isinstance(ladder, str) or not ladder.split():
+            raise MatrixError("execution weight requires a token ladder")
+        weight += ep * len(ladder.split())
+    return weight
+
+
+def qualification_execution_plan_sha256(
+    matrix: dict[str, Any], qualification_index: int
+) -> str:
+    """Bind one qualification repeat to every shard's ordered case plan."""
+    index = _qualification_index(qualification_index)
+    document = validate_matrix_document(matrix)
+    requested = {
+        item["case"]["case_id"]: item["case"]
+        for item in document["requested_cases"]
+    }
+    plan = []
+    for shard in sorted(document["include"], key=lambda item: item["id"]):
+        ordered = qualification_execution_order(
+            shard["id"],
+            [requested[case_id] for case_id in shard["case_ids"]],
+            index,
+        )
+        plan.append([
+            shard["id"], shard["execution_weight"], execution_plan_sha256(ordered),
+        ])
+    payload = json.dumps(
+        {"qualification_index": index, "shards": plan},
+        ensure_ascii=True,
+        sort_keys=True,
+        separators=(",", ":"),
+    ).encode()
+    return hashlib.sha256(payload).hexdigest()
+
+
+def validate_shard_control(
+    shard: dict[str, Any],
+    *,
+    sku: str,
+    backend: str,
+    nodes: int,
+    require_runnable: bool = True,
+    qualification_index: int | None = None,
+) -> None:
+    """Validate one shard against the workflow cell that requested it."""
+    if not isinstance(shard, dict):
+        raise MatrixError("shard must be a JSON object")
+    if sku not in cap.PLATFORMS or backend not in cap.SWEEP_BACKENDS:
+        raise MatrixError("shard platform/backend is not registered")
+    top_fields = {
+        "schema_version", "id", "sku", "backend", "nodes", "n", "cases",
+        "qualification_index", "execution_plan_sha256", "execution_weight",
+    }
+    if (
+        set(shard) != top_fields
+        or type(shard.get("schema_version")) is not int
+        or shard["schema_version"] != 1
+    ):
+        raise MatrixError("shard fields or schema version differ from v1 contract")
+    if not isinstance(shard.get("id"), str) or not IDENTIFIER.fullmatch(shard["id"]):
+        raise MatrixError("shard has invalid id")
+    observed_qualification = _qualification_index(
+        shard.get("qualification_index"), "shard.qualification_index"
+    )
+    if (
+        qualification_index is not None
+        and observed_qualification != _qualification_index(qualification_index)
+    ):
+        raise MatrixError("shard qualification_index differs from the requested repeat")
+    if (
+        not isinstance(shard.get("execution_plan_sha256"), str)
+        or re.fullmatch(r"[0-9a-f]{64}", shard["execution_plan_sha256"]) is None
+    ):
+        raise MatrixError("shard execution_plan_sha256 is invalid")
+    for field, expected in (("sku", sku), ("backend", backend)):
+        if shard.get(field) != expected:
+            raise MatrixError(
+                f"shard {field} mismatch: expected {expected!r}, got {shard.get(field)!r}"
+            )
+    if _positive_int(shard.get("nodes"), "shard.nodes") != nodes:
+        raise MatrixError(
+            f"shard nodes mismatch: expected {nodes}, got {shard.get('nodes')!r}"
+        )
+    cases = shard.get("cases")
+    if not isinstance(cases, list) or not cases:
+        raise MatrixError("shard must contain at least one case")
+    if _positive_int(shard.get("n"), "shard.n") != len(cases):
+        raise MatrixError("shard.n does not match the number of cases")
+    seen: set[str] = set()
+    base_required = {
+        "case_id", "suite", "workload", "required_publication", "backend", "routing",
+        "mode", "phase", "ep", "eplb", "hidden", "topk", "experts",
+        "samples_per_point",
+        "warmup_semantics", "ladder", "timing", "canonical",
+    } | set(TOPOLOGY_FIELDS)
+    for index, case in enumerate(cases):
+        if not isinstance(case, dict):
+            raise MatrixError(f"case {index} must be a JSON object")
+        suite_contract = V1_SUITE_CONTRACTS.get(case.get("suite"))
+        required = base_required | (
+            {"precision_profile"}
+            if suite_contract is not None and "precision_profiles" in suite_contract
+            else set()
+        )
+        fields = set(case)
+        if fields != required:
+            raise MatrixError(
+                f"case {index} fields differ from v1 contract: "
+                f"missing={sorted(required - fields)}, extra={sorted(fields - required)}"
+            )
+        case_id = case["case_id"]
+        if not identity.is_typed_id(case_id, "case"):
+            raise MatrixError(f"case {index} has invalid case_id")
+        if case_id in seen:
+            raise MatrixError(f"duplicate case_id {case_id}")
+        seen.add(case_id)
+        string_fields = [
+            "suite", "workload", "required_publication", "backend", "mode", "routing",
+            "phase", "warmup_semantics", "ladder", "timing",
+        ]
+        if "precision_profile" in required:
+            string_fields.append("precision_profile")
+        for field in string_fields:
+            if not isinstance(case[field], str) or not case[field]:
+                raise MatrixError(f"case {index}.{field} must be a non-empty string")
+        identifier_fields = [
+            "suite", "workload", "required_publication", "backend", "routing", "phase",
+        ]
+        if "precision_profile" in required:
+            identifier_fields.append("precision_profile")
+        for field in identifier_fields:
+            if not IDENTIFIER.fullmatch(case[field]):
+                raise MatrixError(f"case {index}.{field} is not a safe identifier")
+        if case["required_publication"] not in {"official", "comparable-experimental"}:
+            raise MatrixError(f"case {index} has invalid publication requirement")
+        case_identity = {key: value for key, value in case.items() if key != "case_id"}
+        if case_id != _case_id(sku, case_identity):
+            raise MatrixError(f"case {index} case_id does not match its contents")
+        if case["backend"] != backend:
+            raise MatrixError(f"case {index} backend does not match shard")
+        if case["mode"] not in identity.V1_CASE_PROFILES:
+            raise MatrixError(f"case {index} mode is invalid")
+        if _positive_int(case["nodes"], f"case {index}.nodes") != nodes:
+            raise MatrixError(f"case {index} nodes does not match shard")
+        ep = _positive_int(case["ep"], f"case {index}.ep")
+        gpus_per_node = _positive_int(
+            case["gpus_per_node"], f"case {index}.gpus_per_node"
+        )
+        topology = cap.topology_for(sku, ep)
+        if topology is None or any(case[field] != topology[field] for field in TOPOLOGY_FIELDS):
+            raise MatrixError(f"case {index} differs from the platform registry")
+        if ep != nodes * gpus_per_node:
+            raise MatrixError(f"case {index} ep does not equal nodes * gpus_per_node")
+        if case["samples_per_point"] != ep_harness.TIMED_SAMPLES_PER_POINT:
+            raise MatrixError(f"case {index} violates fixed-512-v1")
+        if case["timing"] != EP_TIMING_PROFILE:
+            raise MatrixError(f"case {index} has invalid timing profile")
+        if case["warmup_semantics"] != ep_harness.WARMUP_SEMANTICS:
+            raise MatrixError(f"case {index} has invalid warmup semantics")
+        if case["phase"] not in {"decode", "prefill"}:
+            raise MatrixError(f"case {index} has invalid phase")
+        if case["routing"] not in {"uniform", "zipf"}:
+            raise MatrixError(f"case {index} has invalid routing")
+        if not isinstance(case["eplb"], bool) or (case["eplb"] and case["routing"] != "zipf"):
+            raise MatrixError(f"case {index} has invalid EPLB setting")
+        if not isinstance(case["canonical"], bool) or not case["canonical"]:
+            raise MatrixError(f"case {index} must use a canonical workload")
+        for field in ("ep", "nodes", "gpus_per_node", "hidden", "topk", "experts",
+                      "samples_per_point", "scale_up_domain"):
+            if isinstance(case[field], bool) or not isinstance(case[field], int):
+                raise MatrixError(f"case {index}.{field} must be an integer")
+            _positive_int(case[field], f"case {index}.{field}")
+        scale_up_domain = _positive_int(
+            case["scale_up_domain"], f"case {index}.scale_up_domain"
+        )
+        expected_scope = "scale-up" if ep <= scale_up_domain else "scale-out"
+        if case["scope"] != expected_scope or (
+            expected_scope == "scale-out" and ep % scale_up_domain
+        ):
+            raise MatrixError(f"case {index} has invalid scale-up/scale-out geometry")
+        try:
+            ladder = [int(value) for value in case["ladder"].split()]
+        except (AttributeError, ValueError) as exc:
+            raise MatrixError(f"case {index} has invalid token ladder") from exc
+        if (not ladder or any(value <= 0 for value in ladder)
+                or ladder != sorted(set(ladder))
+                or case["ladder"] != " ".join(map(str, ladder))):
+            raise MatrixError(f"case {index} has invalid token ladder")
+        if require_runnable:
+            disposition, reason, _ = _expected_disposition(sku, case)
+            if disposition != "runnable":
+                raise MatrixError(f"case {index} violates capability registry: {reason}")
+        else:
+            _v1_requested_ladder(case)
+    if _positive_int(
+        shard.get("execution_weight"), "shard.execution_weight"
+    ) != execution_weight(cases):
+        raise MatrixError("shard execution_weight differs from its cases")
+    expected_order = qualification_execution_order(
+        shard["id"], cases, observed_qualification
+    )
+    if [case["case_id"] for case in cases] != [
+        case["case_id"] for case in expected_order
+    ]:
+        raise MatrixError("shard cases differ from the qualification execution order")
+    if shard["execution_plan_sha256"] != execution_plan_sha256(cases):
+        raise MatrixError("shard execution_plan_sha256 differs from its ordered cases")
+
+
+def validate_matrix_document(document: Any) -> dict[str, Any]:
+    """Validate the complete requested grid and its runnable shard partition."""
+    if not isinstance(document, dict) or set(document) != {
+        "format", "schema_version", "requested_cases", "include"
+    }:
+        raise MatrixError("matrix fields differ from the v1 contract")
+    if (
+        document["format"] != "collectivex.matrix.v1"
+        or type(document["schema_version"]) is not int
+        or document["schema_version"] != 1
+    ):
+        raise MatrixError("matrix format/schema differs from v1")
+    requested = document["requested_cases"]
+    include = document["include"]
+    if not isinstance(requested, list) or not requested:
+        raise MatrixError("matrix.requested_cases must be non-empty")
+    if not isinstance(include, list):
+        raise MatrixError("matrix.include must be an array")
+
+    cases_by_id: dict[str, dict[str, Any]] = {}
+    runnable_ids: set[str] = set()
+    semantic_points: set[str] = set()
+    for index, value in enumerate(requested):
+        path = f"matrix.requested_cases[{index}]"
+        if not isinstance(value, dict) or set(value) != {
+            "sku", "case", "disposition", "reason", "detail"
+        }:
+            raise MatrixError(f"{path} fields differ from the v1 contract")
+        sku = value["sku"]
+        case = value["case"]
+        disposition = value["disposition"]
+        if sku not in cap.PLATFORMS:
+            raise MatrixError(f"{path}.sku is unknown")
+        if disposition not in {"runnable", "unsupported"}:
+            raise MatrixError(f"{path}.disposition is invalid")
+        if disposition == "runnable":
+            if value["reason"] is not None or value["detail"] is not None:
+                raise MatrixError(f"{path} runnable cases cannot have a reason")
+        else:
+            if (
+                not isinstance(value["reason"], str)
+                or not IDENTIFIER.fullmatch(value["reason"])
+                or not isinstance(value["detail"], str)
+                or not value["detail"]
+            ):
+                raise MatrixError(f"{path} unsupported cases need a public reason and detail")
+        if not isinstance(case, dict):
+            raise MatrixError(f"{path}.case must be an object")
+        backend = case.get("backend")
+        nodes = case.get("nodes")
+        if not isinstance(backend, str) or type(nodes) is not int:
+            raise MatrixError(f"{path}.case backend/nodes are invalid")
+        requested_case_plan = [case]
+        validate_shard_control(
+            {
+                "schema_version": 1,
+                "id": "requested-case",
+                "sku": sku,
+                "backend": backend,
+                "nodes": nodes,
+                "n": 1,
+                "execution_weight": execution_weight(requested_case_plan),
+                "qualification_index": 1,
+                "execution_plan_sha256": execution_plan_sha256(
+                    requested_case_plan
+                ),
+                "cases": requested_case_plan,
+            },
+            sku=sku,
+            backend=backend,
+            nodes=nodes,
+            require_runnable=disposition == "runnable",
+        )
+        case_id = case["case_id"]
+        if case_id in cases_by_id:
+            raise MatrixError(f"duplicate requested case_id {case_id}")
+        for signature in _semantic_points(sku, case):
+            if signature in semantic_points:
+                raise MatrixError(f"{path} duplicates a semantic token point")
+            semantic_points.add(signature)
+        cases_by_id[case_id] = value
+        expected = _expected_disposition(sku, case)
+        if (disposition, value["reason"], value["detail"]) != expected:
+            raise MatrixError(f"{path} disposition differs from the frozen v1 catalog")
+        if disposition == "runnable":
+            runnable_ids.add(case_id)
+
+    shard_ids: set[str] = set()
+    assigned: list[str] = []
+    for index, shard in enumerate(include):
+        path = f"matrix.include[{index}]"
+        expected = {
+            "id", "sku", "backend", "launcher", "n", "execution_weight", "case_ids",
+        } | set(TOPOLOGY_FIELDS)
+        if not isinstance(shard, dict) or set(shard) != expected:
+            raise MatrixError(f"{path} fields differ from the v1 contract")
+        shard_id = shard["id"]
+        if not isinstance(shard_id, str) or not IDENTIFIER.fullmatch(shard_id):
+            raise MatrixError(f"{path}.id is invalid")
+        if shard_id in shard_ids:
+            raise MatrixError(f"duplicate shard id {shard_id}")
+        shard_ids.add(shard_id)
+        sku = shard["sku"]
+        if sku not in cap.PLATFORMS:
+            raise MatrixError(f"{path}.sku is unknown")
+        platform = cap.PLATFORMS[sku]
+        if shard["launcher"] != platform["launcher"]:
+            raise MatrixError(f"{path}.launcher differs from the platform registry")
+        case_ids = shard["case_ids"]
+        if not isinstance(case_ids, list) or not case_ids or len(case_ids) != len(set(case_ids)):
+            raise MatrixError(f"{path}.case_ids must be a non-empty unique array")
+        if _positive_int(shard["n"], f"{path}.n") != len(case_ids):
+            raise MatrixError(f"{path}.n differs from case_ids")
+        nodes = _positive_int(shard["nodes"], f"{path}.nodes")
+        for case_id in case_ids:
+            wrapper = cases_by_id.get(case_id)
+            if wrapper is None or wrapper["disposition"] != "runnable":
+                raise MatrixError(f"{path} references a missing or unsupported case")
+            case = wrapper["case"]
+            if (
+                wrapper["sku"] != sku
+                or case["backend"] != shard["backend"]
+                or case["nodes"] != nodes
+                or any(shard[field] != case[field] for field in TOPOLOGY_FIELDS)
+            ):
+                raise MatrixError(f"{path} case does not match shard coordinates")
+            assigned.append(case_id)
+        if shard["execution_weight"] != execution_weight(
+            [cases_by_id[case_id]["case"] for case_id in case_ids]
+        ):
+            raise MatrixError(f"{path}.execution_weight differs from its cases")
+    if len(assigned) != len(set(assigned)):
+        raise MatrixError("a runnable case is assigned to more than one shard")
+    if set(assigned) != runnable_ids:
+        raise MatrixError("runnable requested cases and shard assignments differ")
+    return document
+
+
+def extract_shard(
+    matrix_path: str | os.PathLike[str],
+    shard_id: str,
+    output_path: str | os.PathLike[str],
+    *,
+    sku: str,
+    backend: str,
+    nodes: int,
+    qualification_index: int | None = None,
+) -> dict[str, Any]:
+    """Extract one strictly matched shard control file, writing it atomically."""
+    qualification = _requested_qualification_index(qualification_index)
+    document = validate_matrix_document(_strict_json_load(Path(matrix_path)))
+    include = document["include"]
+    matches = [item for item in include if isinstance(item, dict) and item.get("id") == shard_id]
+    if len(matches) != 1:
+        raise MatrixError(f"expected exactly one shard {shard_id!r}, found {len(matches)}")
+    source = matches[0]
+    requested = {
+        item["case"]["case_id"]: item
+        for item in document["requested_cases"]
+    }
+    cases = qualification_execution_order(
+        source["id"],
+        [requested[case_id]["case"] for case_id in source["case_ids"]],
+        qualification,
+    )
+    control = {
+        "schema_version": 1,
+        "id": source.get("id"),
+        "sku": source.get("sku"),
+        "backend": source.get("backend"),
+        "nodes": source.get("nodes"),
+        "n": source.get("n"),
+        "execution_weight": source.get("execution_weight"),
+        "qualification_index": qualification,
+        "execution_plan_sha256": execution_plan_sha256(cases),
+        "cases": cases,
+    }
+    validate_shard_control(
+        control,
+        sku=sku,
+        backend=backend,
+        nodes=nodes,
+        qualification_index=qualification,
+    )
+    output = Path(output_path)
+    output.parent.mkdir(parents=True, exist_ok=True)
+    temporary = output.with_name(f".{output.name}.tmp-{os.getpid()}")
+    try:
+        with temporary.open("w") as fh:
+            json.dump(control, fh, sort_keys=True, separators=(",", ":"))
+            fh.write("\n")
+        os.replace(temporary, output)
+    finally:
+        temporary.unlink(missing_ok=True)
+    return control
+
+
+def emit_unsupported(
+    matrix_path: str | os.PathLike[str], output_dir: str | os.PathLike[str]
+) -> list[Path]:
+    """Materialize one strict terminal outcome for each unsupported requested case."""
+    source = Path(matrix_path)
+    document = validate_matrix_document(_strict_json_load(source))
+    control_sha256 = hashlib.sha256(source.read_bytes()).hexdigest()
+    generated_at = dt.datetime.now(dt.timezone.utc).isoformat()
+    try:
+        qualification_index = int(os.environ.get("CX_QUALIFICATION_INDEX", "1"))
+    except ValueError as exc:
+        raise MatrixError("CX_QUALIFICATION_INDEX must be an integer in 1..3") from exc
+    if qualification_index not in range(1, 4):
+        raise MatrixError("CX_QUALIFICATION_INDEX must be in 1..3")
+    git_run = {
+        "run_id": os.environ.get("GITHUB_RUN_ID"),
+        "run_attempt": os.environ.get("GITHUB_RUN_ATTEMPT"),
+        "qualification_index": qualification_index,
+        "ref": os.environ.get("GITHUB_REF_NAME") or os.environ.get("GITHUB_REF"),
+        "source_sha": os.environ.get("COLLECTIVEX_SOURCE_SHA") or os.environ.get("GITHUB_SHA"),
+        "repo": os.environ.get("GITHUB_REPOSITORY"),
+        "job": os.environ.get("GITHUB_JOB"),
+        "artifact": os.environ.get("COLLECTIVEX_ARTIFACT_NAME"),
+    }
+    allocation_factors = {
+        "artifact": git_run["artifact"],
+        "execution_id": os.environ.get("COLLECTIVEX_EXECUTION_ID"),
+        "job": git_run["job"],
+        "qualification_index": qualification_index,
+        "repo": git_run["repo"],
+        "run_attempt": git_run["run_attempt"],
+        "run_id": git_run["run_id"],
+        "runner": "capability-resolver",
+        "source_sha": git_run["source_sha"],
+    }
+    destination = Path(output_dir)
+    destination.mkdir(parents=True, exist_ok=True)
+    written: list[Path] = []
+    for wrapper in document["requested_cases"]:
+        if wrapper["disposition"] != "unsupported":
+            continue
+        scheduled = wrapper["case"]
+        case = {key: value for key, value in scheduled.items() if key != "case_id"}
+        case_factors = {
+            "case": case,
+            "profile": identity.profile_for_case(case),
+            "sku": wrapper["sku"],
+        }
+        case_id = identity.digest("case", case_factors)
+        if case_id != scheduled["case_id"]:
+            raise MatrixError(f"unsupported case identity differs for {scheduled['case_id']}")
+        attempt_ordinal = 1
+        record = contracts.make_terminal_document(
+            allocation_factors=allocation_factors,
+            attempt_ordinal=attempt_ordinal,
+            case=case,
+            case_factors=case_factors,
+            control_sha256=control_sha256,
+            failure_mode="capability",
+            generated_at=generated_at,
+            git_run=git_run,
+            reason=wrapper["reason"],
+            return_code=5,
+            source="matrix-capability-resolver",
+            status="unsupported",
+            expected_case_id=case_id,
+        )
+        path = destination / f"unsupported_{case_id}.json"
+        temporary = path.with_name(f".{path.name}.tmp-{os.getpid()}")
+        try:
+            with temporary.open("x") as handle:
+                json.dump(record, handle, allow_nan=False, sort_keys=True, separators=(",", ":"))
+                handle.write("\n")
+                handle.flush()
+                os.fsync(handle.fileno())
+            os.replace(temporary, path)
+        finally:
+            temporary.unlink(missing_ok=True)
+        written.append(path)
+    return written
+
+
+def frontend_catalog(matrix: dict[str, Any]) -> dict[str, Any]:
+    """Project the validated requested graph into a compact frontend test fixture."""
+    document = validate_matrix_document(matrix)
+    matrix_bytes = contracts.canonical_json_bytes(document) + b"\n"
+    cases = []
+    for wrapper in document["requested_cases"]:
+        case = wrapper["case"]
+        precision_profile = case.get(
+            "precision_profile", identity.V1_CONTROL_PRECISION_PROFILE
+        )
+        cases.append({
+            "backend": case["backend"],
+            "case_id": case["case_id"],
+            "disposition": wrapper["disposition"],
+            "eplb": case["eplb"],
+            "mode": case["mode"],
+            "phase": case["phase"],
+            "precision_profile": precision_profile,
+            "publication_tier": case["required_publication"],
+            "reason": wrapper["reason"],
+            "routing": case["routing"],
+            "sku": wrapper["sku"],
+            "suite": case["suite"],
+            "topology": {
+                "ep_size": case["ep"],
+                **{field: case[field] for field in TOPOLOGY_FIELDS},
+            },
+            "workload": case["workload"],
+            "points": [
+                {
+                    "global_tokens": int(token) * case["ep"],
+                    "tokens_per_rank": int(token),
+                }
+                for token in case["ladder"].split()
+            ],
+        })
+    return {
+        "case_count": len(cases),
+        "format": "collectivex.frontend-catalog.v1",
+        "matrix_sha256": hashlib.sha256(matrix_bytes).hexdigest(),
+        "point_count": sum(len(case["points"]) for case in cases),
+        "schema_version": 1,
+        "cases": cases,
+    }
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description="CollectiveX v1 matrix resolver")
+    parser.add_argument("--suites", default="all", help="'all' or comma-list of suites")
+    parser.add_argument("--backend", default="", help="select one EP backend")
+    parser.add_argument("--backends", default="", help="'all' or comma-list of EP backends")
+    parser.add_argument("--only-sku", default="")
+    parser.add_argument("--min-nodes", type=int, default=0)
+    parser.add_argument("--max-nodes", type=int, default=0)
+    parser.add_argument("--max-cases", type=int, default=128)
+    parser.add_argument("--extract-from", default="", metavar="MATRIX")
+    parser.add_argument("--validate-control", default="", metavar="SHARD")
+    parser.add_argument("--emit-unsupported-from", default="", metavar="MATRIX")
+    parser.add_argument("--out-dir", default="")
+    parser.add_argument("--frontend-catalog", action="store_true")
+    parser.add_argument("--shard-id", default="")
+    parser.add_argument("--expect-sku", default="")
+    parser.add_argument("--expect-backend", default="")
+    parser.add_argument("--expect-nodes", type=int, default=0)
+    parser.add_argument("--qualification-index", type=int, default=None)
+    parser.add_argument("--out", default="")
+    args = parser.parse_args()
+
+    if args.emit_unsupported_from:
+        if not args.out_dir:
+            parser.error("unsupported outcome emission requires --out-dir")
+        try:
+            written = emit_unsupported(args.emit_unsupported_from, args.out_dir)
+        except MatrixError as exc:
+            parser.error(str(exc))
+        print(f"emitted {len(written)} unsupported terminal outcomes", file=sys.stderr)
+        return 0
+
+    if args.validate_control:
+        if not all((args.expect_sku, args.expect_backend, args.expect_nodes)):
+            parser.error(
+                "control validation requires --expect-sku, --expect-backend, and --expect-nodes"
+            )
+        try:
+            control = _strict_json_load(Path(args.validate_control))
+            qualification = _requested_qualification_index(
+                args.qualification_index
+            )
+            validate_shard_control(
+                control,
+                sku=args.expect_sku,
+                backend=args.expect_backend,
+                nodes=args.expect_nodes,
+                qualification_index=qualification,
+            )
+        except MatrixError as exc:
+            parser.error(str(exc))
+        print(f"validated {control.get('id')}: {control['n']} cases", file=sys.stderr)
+        return 0
+
+    if args.extract_from:
+        if not all((args.shard_id, args.expect_sku, args.expect_backend, args.expect_nodes, args.out)):
+            parser.error(
+                "shard extraction requires --shard-id, --expect-sku, --expect-backend, "
+                "--expect-nodes, and --out"
+            )
+        try:
+            control = extract_shard(
+                args.extract_from,
+                args.shard_id,
+                args.out,
+                sku=args.expect_sku,
+                backend=args.expect_backend,
+                nodes=args.expect_nodes,
+                qualification_index=args.qualification_index,
+            )
+        except MatrixError as exc:
+            parser.error(str(exc))
+        print(f"extracted {control['id']}: {control['n']} cases", file=sys.stderr)
+        print(json.dumps(control, separators=(",", ":")))
+        return 0
+
+    matrix = resolve_matrix(
+        suites=args.suites,
+        backend=args.backend,
+        backends=args.backends,
+        only_sku=args.only_sku,
+        min_nodes=args.min_nodes,
+        max_nodes=args.max_nodes,
+        max_cases=args.max_cases,
+    )
+    try:
+        validate_matrix_document(matrix)
+    except MatrixError as exc:
+        parser.error(str(exc))
+    output_document = frontend_catalog(matrix) if args.frontend_catalog else matrix
+    if args.out:
+        with open(args.out, "w") as fh:
+            json.dump(output_document, fh, sort_keys=True, separators=(",", ":"))
+            fh.write("\n")
+    runnable = sum(
+        item["disposition"] == "runnable" for item in matrix["requested_cases"]
+    )
+    unsupported = len(matrix["requested_cases"]) - runnable
+    print(
+        f"resolved {len(matrix['include'])} shard-cells, "
+        f"{runnable} runnable and {unsupported} unsupported cases",
+        file=sys.stderr,
+    )
+    print(json.dumps(output_document))
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/experimental/CollectiveX/tests/ep_deepep.py b/experimental/CollectiveX/tests/ep_deepep.py
new file mode 100644
index 000000000..5ca5f47ed
--- /dev/null
+++ b/experimental/CollectiveX/tests/ep_deepep.py
@@ -0,0 +1,424 @@
+#!/usr/bin/env python3
+"""CollectiveX DeepEP adapter for native V1 dispatch/combine precision profiles."""
+from __future__ import annotations
+
+import inspect
+import os
+import sys
+import types
+
+import torch
+import torch.distributed as dist
+import contracts
+import ep_precision
+
+try:
+    import deep_ep
+    from deep_ep import Buffer  # type: ignore
+except Exception as exc:  # pragma: no cover - requires the benchmark image
+    print(f"ERROR: deep_ep import failed: {exc!r}", file=sys.stderr)
+    raise
+
+
+def _deepep_version() -> str:
+    try:
+        import importlib.metadata as metadata
+
+        return metadata.version("deep_ep")
+    except Exception:
+        return getattr(deep_ep, "__version__", "unknown")
+
+
+def _mnnvl_buffer_configuration() -> tuple[dict[str, bool], str]:
+    """Resolve the explicit DeepEP MNNVL API contract."""
+    requested_value = os.environ.get("CX_ALLOW_MNNVL")
+    if requested_value not in {None, "", "0", "1"}:
+        raise RuntimeError("CX_ALLOW_MNNVL must be unset, 0, or 1")
+    requested = requested_value == "1"
+    if not requested:
+        return contracts.resolve_deepep_mnnvl(
+            requested=False, signature_parameters=(),
+            deepep_commit=os.environ.get("DEEPEP_COMMIT"),
+        )
+    try:
+        parameters = inspect.signature(Buffer.__init__).parameters
+    except (TypeError, ValueError) as exc:
+        raise RuntimeError("cannot inspect DeepEP Buffer MNNVL API") from exc
+    try:
+        return contracts.resolve_deepep_mnnvl(
+            requested=True, signature_parameters=parameters,
+            deepep_commit=os.environ.get("DEEPEP_COMMIT"),
+        )
+    except contracts.ContractError as exc:
+        raise RuntimeError(str(exc)) from exc
+
+
+def _normal_buffer_sizes(hidden: int, world_size: int) -> tuple[int, int]:
+    """Apply DeepEP's dispatch/combine buffer sizing contract for this EP world."""
+    hidden_bytes = hidden * torch.tensor([], dtype=torch.bfloat16).element_size()
+    configs = (Buffer.get_dispatch_config(world_size), Buffer.get_combine_config(world_size))
+    num_nvl_bytes = max(
+        int(config.get_nvl_buffer_size_hint(hidden_bytes, world_size)) for config in configs
+    )
+    num_rdma_bytes = max(
+        int(config.get_rdma_buffer_size_hint(hidden_bytes, world_size)) for config in configs
+    )
+    if num_nvl_bytes <= 0 or num_rdma_bytes < 0:
+        raise RuntimeError("DeepEP returned invalid normal-mode buffer size hints")
+    return num_nvl_bytes, num_rdma_bytes
+
+
+class DeepEPBackend:
+    name = "deepep"
+    stage_device_work = False
+    combine_needs_redispatch = False
+    # DeepEP reduces activations and top-k weights independently. The activation
+    # tensor must therefore carry the complete local weighted expert sum.
+    combine_weight_semantics = "unweighted-rank-sum"
+    oracle_layout = "token-rank"
+    payload_unit = "token-rank"
+
+    def __init__(self, args, rank, world_size, local_rank, device):
+        self.args = args
+        self.rank = rank
+        self.world_size = world_size
+        self.device = device
+        self.mode = getattr(args, "mode", "normal")
+        if self.mode not in {"normal", "low-latency"}:
+            raise ValueError(f"unsupported DeepEP mode {self.mode!r}")
+        supported_profiles = {
+            "normal": {
+                "d-bf16.c-bf16",
+                "d-fp8-e4m3fn-b128-f32-prequantized.c-bf16",
+            },
+            "low-latency": {
+                "d-bf16.c-bf16",
+                "d-fp8-e4m3fn-b128-f32-fused.c-bf16",
+                "d-bf16.c-logfmt10-dynamic64",
+                "d-fp8-e4m3fn-b128-f32-fused.c-logfmt10-dynamic64",
+            },
+        }
+        self.precision_profile_id, self.communication_precision = (
+            ep_precision.resolve_precision(
+                args,
+                backend=self.name,
+                mode=self.mode,
+                supported_profiles=supported_profiles[self.mode],
+            )
+        )
+        self._fp8_dispatch = ep_precision.is_low_precision_dispatch(
+            self.communication_precision
+        )
+        self._use_logfmt = ep_precision.uses_logfmt_combine(
+            self.communication_precision
+        )
+        self.stage_device_work = self._fp8_dispatch
+
+        self.group = dist.group.WORLD
+        device_sms = torch.cuda.get_device_properties(device).multi_processor_count
+        mnnvl_kwargs, mnnvl_comm = _mnnvl_buffer_configuration()
+        if self.mode == "low-latency":
+            ep_precision.require_keyword(
+                Buffer.low_latency_dispatch,
+                "use_fp8",
+                api="deep_ep.Buffer.low_latency_dispatch",
+            )
+            ep_precision.require_keyword(
+                Buffer.low_latency_combine,
+                "use_logfmt",
+                api="deep_ep.Buffer.low_latency_combine",
+            )
+            if args.phase != "decode":
+                raise ValueError("DeepEP low-latency mode only supports the decode ladder")
+            if args.experts % world_size:
+                raise ValueError("DeepEP low-latency experts must divide the EP group")
+            self.combine_needs_redispatch = True
+            self.combine_weight_semantics = "gate-weighted-sum"
+            self.oracle_layout = "expert-packed"
+            self.payload_unit = "token-expert"
+            self.max_tokens_per_rank = 128
+            num_qps_per_rank = args.experts // world_size
+            num_rdma_bytes = Buffer.get_low_latency_rdma_size_hint(
+                self.max_tokens_per_rank, args.hidden, world_size, args.experts
+            )
+            self.buffer = Buffer(
+                self.group,
+                num_nvl_bytes=0,
+                num_rdma_bytes=num_rdma_bytes,
+                low_latency_mode=True,
+                num_qps_per_rank=num_qps_per_rank,
+                allow_nvlink_for_low_latency_mode=True,
+                explicitly_destroy=True,
+                **mnnvl_kwargs,
+            )
+            self.buffer.clean_low_latency_buffer(
+                self.max_tokens_per_rank, args.hidden, args.experts
+            )
+            resource_provenance = {
+                "requested_num_sms": None,
+                "num_sms": None,
+                "sm_fraction": None,
+                "tuned_source": "deepep-low-latency-fixed-kernel",
+                "num_max_tokens_per_rank": self.max_tokens_per_rank,
+                "num_nvl_bytes": 0,
+                "num_rdma_bytes": num_rdma_bytes,
+                "num_qps_per_rank": num_qps_per_rank,
+            }
+        else:
+            ep_precision.require_keyword(
+                Buffer.dispatch,
+                "async_finish",
+                api="deep_ep.Buffer.dispatch",
+            )
+            ep_precision.require_keyword(
+                Buffer.combine,
+                "async_finish",
+                api="deep_ep.Buffer.combine",
+            )
+            num_nvl_bytes, num_rdma_bytes = _normal_buffer_sizes(args.hidden, world_size)
+            if world_size > args.scale_up_domain and num_rdma_bytes == 0:
+                raise RuntimeError("DeepEP scale-out configuration returned no RDMA buffer")
+            self.buffer = Buffer(
+                self.group, num_nvl_bytes, num_rdma_bytes, **mnnvl_kwargs
+            )
+            num_sms = int(getattr(Buffer, "num_sms", args.num_sms))
+            try:
+                Buffer.set_num_sms(num_sms)
+            except Exception as exc:  # pragma: no cover - version dependent
+                raise RuntimeError(
+                    f"DeepEP did not apply requested num_sms={num_sms}: {exc!r}"
+                ) from exc
+            applied_num_sms = int(getattr(Buffer, "num_sms", num_sms))
+            if applied_num_sms != num_sms:
+                raise RuntimeError(
+                    f"DeepEP num_sms mismatch: requested={num_sms} applied={applied_num_sms}"
+                )
+            resource_provenance = {
+                "requested_num_sms": num_sms,
+                "num_sms": applied_num_sms,
+                "sm_fraction": applied_num_sms / device_sms,
+                "tuned_source": "deepep-default-num_sms",
+                "num_nvl_bytes": num_nvl_bytes,
+                "num_rdma_bytes": num_rdma_bytes,
+            }
+        version = _deepep_version()
+        self.backend_provenance = {
+            "deepep_version": version,
+            "deepep_commit": os.environ.get("DEEPEP_COMMIT") or f"pkg-{version}",
+            "backend_lineage": "deepep-v1",
+            "mode": self.mode,
+            "dispatch_dtype": ep_precision.communication_format(
+                self.communication_precision, "dispatch"
+            ),
+            "combine_dtype": ep_precision.communication_format(
+                self.communication_precision, "combine"
+            ),
+            "resource_mode": "fixed-profile",
+            "device_sms": device_sms,
+            "allow_mnnvl": bool(mnnvl_kwargs),
+            "mnnvl_comm": mnnvl_comm,
+            **resource_provenance,
+        }
+
+    def buffer_cap(self, args):
+        return self.max_tokens_per_rank if self.mode == "low-latency" else None
+
+    def make_problem(self, T, idx, weights, x):
+        encoding = ep_precision.encode_dispatch(
+            torch, x, self.communication_precision
+        )
+        return types.SimpleNamespace(
+            T=T,
+            x=x,
+            dispatch_x=encoding.native_input,
+            oracle_x=encoding.semantic,
+            dispatch_precision_evidence=encoding.evidence,
+            topk_idx=idx.to(torch.int64),
+            topk_weights=weights.to(torch.float32),
+        )
+
+    def dispatch(self, p):
+        if self.mode == "low-latency":
+            recv_x, recv_counts, handle, _, _ = self.buffer.low_latency_dispatch(
+                p.x,
+                p.topk_idx,
+                self.max_tokens_per_rank,
+                self.args.experts,
+                use_fp8=self._fp8_dispatch,  # BF16 control realizes use_fp8=False.
+                async_finish=False,
+                return_recv_hook=False,
+            )
+            return types.SimpleNamespace(
+                recv_x=recv_x,
+                recv_counts=recv_counts,
+                handle=handle,
+            )
+        (
+            num_tokens_per_rank,
+            num_tokens_per_rdma_rank,
+            num_tokens_per_expert,
+            is_token_in_rank,
+            _,
+        ) = self.buffer.get_dispatch_layout(p.topk_idx, self.args.experts)
+        recv_x, recv_topk_idx, recv_topk_weights, recv_counts, handle, _ = self.buffer.dispatch(
+            p.dispatch_x,
+            topk_idx=p.topk_idx,
+            topk_weights=p.topk_weights,
+            num_tokens_per_rank=num_tokens_per_rank,
+            num_tokens_per_rdma_rank=num_tokens_per_rdma_rank,
+            is_token_in_rank=is_token_in_rank,
+            num_tokens_per_expert=num_tokens_per_expert,
+            async_finish=False,
+        )
+        return types.SimpleNamespace(
+            recv_x=recv_x,
+            recv_topk_idx=recv_topk_idx,
+            recv_topk_weights=recv_topk_weights,
+            recv_counts=recv_counts,
+            handle=handle,
+        )
+
+    def stage(self, p, h):
+        h.combine_input = self._semantic_recv(h, p)
+
+    def combine(self, p, h):
+        if self.mode == "low-latency":
+            combined_x, _, _ = self.buffer.low_latency_combine(
+                h.combine_input,
+                p.topk_idx,
+                p.topk_weights,
+                h.handle,
+                use_logfmt=self._use_logfmt,
+                async_finish=False,
+                return_recv_hook=False,
+            )
+            return combined_x
+        combined_x, _, _ = self.buffer.combine(
+            h.combine_input, h.handle, async_finish=False
+        )
+        return combined_x
+
+    def inspect_dispatch(self, p, h):
+        valid = h.recv_topk_idx >= 0
+        expert_ids = torch.where(
+            valid,
+            h.recv_topk_idx + self.rank * (self.args.experts // self.world_size),
+            h.recv_topk_idx,
+        )
+        return types.SimpleNamespace(
+            payload=self._semantic_recv(h, p),
+            encoded_payload=self._encoded_recv(h),
+            scales=self._recv_scales(h),
+            expert_ids=expert_ids,
+            weights=h.recv_topk_weights.masked_fill(~valid, 0),
+            local_expert_counts=torch.tensor(h.recv_counts, device=self.device, dtype=torch.int64),
+            ordering_contract="source-rank-major-stable-v1",
+        )
+
+    def inspect_expert_dispatch(self, p, h):
+        if self.mode != "low-latency":
+            raise RuntimeError("expert-packed inspection requires low-latency mode")
+        p.recv_counts = tuple(int(value) for value in h.recv_counts.tolist())
+        return types.SimpleNamespace(
+            payload=self._semantic_recv(h, p),
+            encoded_payload=self._encoded_recv(h),
+            scales=self._recv_scales(h),
+            local_expert_counts=h.recv_counts,
+            source_info=h.handle[0],
+            layout_range=h.handle[1],
+        )
+
+    def combine_transformed(self, p, h, transformed):
+        if self.mode == "low-latency":
+            packed = torch.zeros(
+                self._encoded_recv(h).shape,
+                dtype=torch.bfloat16,
+                device=self._encoded_recv(h).device,
+            )
+            packed[h.oracle_local_expert_slots, h.oracle_packed_positions] = transformed.to(
+                packed.dtype
+            )
+            combined, _, _ = self.buffer.low_latency_combine(
+                packed,
+                p.topk_idx,
+                p.topk_weights,
+                h.handle,
+                use_logfmt=self._use_logfmt,
+                async_finish=False,
+                return_recv_hook=False,
+            )
+            return combined
+        semantic = self._semantic_recv(h, p)
+        combined, _, _ = self.buffer.combine(
+            transformed.to(semantic.dtype), h.handle, async_finish=False
+        )
+        return combined
+
+    def recv_tokens(self, h):
+        if self.mode == "low-latency":
+            return int(h.recv_counts.to(torch.int64).sum().item())
+        return int(self._encoded_recv(h).shape[0])
+
+    def _encoded_recv(self, h):
+        return h.recv_x[0] if isinstance(h.recv_x, tuple) else h.recv_x
+
+    def _recv_scales(self, h):
+        return h.recv_x[1] if isinstance(h.recv_x, tuple) else None
+
+    def _semantic_recv(self, h, problem=None):
+        if not self._fp8_dispatch:
+            return h.recv_x
+        if not hasattr(h, "recv_semantic"):
+            if self.mode == "low-latency":
+                counts = getattr(problem, "recv_counts", None)
+                if counts is None:
+                    counts = tuple(int(value) for value in h.recv_counts.tolist())
+                    if problem is not None:
+                        problem.recv_counts = counts
+                workspace = getattr(self, "_ll_semantic_workspace", None)
+                if workspace is None:
+                    encoded = self._encoded_recv(h)
+                    workspace = torch.empty(
+                        encoded.shape, dtype=torch.bfloat16, device=encoded.device
+                    )
+                    self._ll_semantic_workspace = workspace
+                h.recv_semantic = ep_precision.dequantize_expert_prefixes(
+                    torch,
+                    self._encoded_recv(h),
+                    self._recv_scales(h),
+                    self.communication_precision["dispatch"],
+                    counts,
+                    workspace,
+                )
+            else:
+                h.recv_semantic = ep_precision.dequantize_dispatch(
+                    torch,
+                    self._encoded_recv(h),
+                    self._recv_scales(h),
+                    self.communication_precision["dispatch"],
+                )
+        return h.recv_semantic
+
+    def oracle_dispatch_payload(self, payload):
+        return ep_precision.encode_dispatch(
+            torch, payload, self.communication_precision
+        ).semantic
+
+    def precision_evidence(self, problem, view=None):
+        return ep_precision.precision_evidence(
+            torch,
+            profile_id=self.precision_profile_id,
+            profile=self.communication_precision,
+            problem=problem,
+            view=view,
+        )
+
+    def finalize(self, rc):
+        try:
+            dist.barrier()
+            if self.mode == "low-latency":
+                self.buffer.destroy()
+            dist.destroy_process_group()
+        except Exception:
+            pass
+        return rc
diff --git a/experimental/CollectiveX/tests/ep_deepep_hybrid.py b/experimental/CollectiveX/tests/ep_deepep_hybrid.py
new file mode 100644
index 000000000..f48005e4c
--- /dev/null
+++ b/experimental/CollectiveX/tests/ep_deepep_hybrid.py
@@ -0,0 +1,551 @@
+#!/usr/bin/env python3
+"""CollectiveX EP backend adapter — DeepEP `hybrid-ep` branch (NVIDIA TMA-based HybridEPBuffer).
+
+The hybrid-ep branch (https://github.com/deepseek-ai/DeepEP/tree/hybrid-ep) is NVIDIA's TMA +
+warp-pipeline implementation of expert-parallel all-to-all, exposing `deep_ep.HybridEPBuffer`
+(distinct from the mainline `deep_ep.Buffer`). HybridEP is NVIDIA's MoE backend built for NVL72
+rack-scale (Megatron `moe_flex_dispatcher_backend="hybridep"`). This adapter binds the API's
+"ranks per node" field to active ranks per NVLink/MNNVL communication domain, not physical host
+GPUs: x86 EP16 is two 8-rank domains, while GB EP8/EP16 is one 8/16-rank MNNVL domain across hosts.
+The container build is done by runtime/run_in_container.sh `cx_build_deepep_hybrid` (CUDA-13 CCCL
+include path, without the V2 NVSHMEM overlay).
+
+API (pinned on B300, branch e0a5b1d):
+  HybridEPBuffer(group, hidden_dim, max_num_of_tokens_per_rank, num_local_experts, use_fp8=False, ...)
+  .dispatch(hidden, topk_idx=, topk_weights=, num_of_experts=) -> (recv_hidden, recv_x2, None, handle)
+  .combine(hidden, handle=) -> [T, hidden]
+
+CORRECTNESS: identity expert (no expert compute), combine WITHOUT probs -> each source token is
+reconstructed as x * (distinct ranks among its top_k experts) — verified: an 8-rank uniform top_k=8
+round trip gives relerr(combined, x) = 4.28, matching E[distinct ranks] ~ 5.26 exactly. So this uses
+the same per-rank-sum combine contract (no gate re-weight). BF16 tolerance is 5e-2.
+
+STATUS: BF16 or native block-scaled FP8 dispatch, BF16 combine, normal mode. The v1 scope covers
+one MNNVL domain or x86 scale-out between two eight-GPU NVLink domains.
+"""
+from __future__ import annotations
+
+import hashlib
+import importlib
+import json
+import os
+from pathlib import Path
+import re
+import shutil
+import sys
+import tempfile
+import types
+
+import torch
+import torch.distributed as dist
+import contracts
+import ep_precision
+
+try:
+    import deep_ep
+    HybridEPBuffer = deep_ep.HybridEPBuffer
+except Exception as exc:  # pragma: no cover - needs the hybrid-ep build
+    print("ERROR: deep_ep.HybridEPBuffer import failed — the hybrid-ep branch must be built at job "
+          "setup (cx_build_deepep_hybrid). "
+          f"{exc!r}", file=sys.stderr)
+    raise
+
+
+def _deepep_hybrid_version() -> str:
+    return os.environ.get("DEEPEP_COMMIT", getattr(deep_ep, "__version__", "hybrid-ep"))
+
+
+def _hybrid_build_evidence() -> list[dict[str, str]]:
+    records = []
+    for module_name, role in (
+        ("deep_ep_cpp", "deepep-extension"),
+        ("hybrid_ep_cpp", "deepep-hybrid-extension"),
+    ):
+        module = importlib.import_module(module_name)
+        path = getattr(module, "__file__", None)
+        if not path:
+            raise RuntimeError(f"{module_name} has no loaded extension path")
+        records.append(contracts.content_manifest_evidence(
+            role=role,
+            name=module_name,
+            files=[(os.path.basename(path), path)],
+        ))
+    return sorted(records, key=lambda item: (item["role"], item["name"]))
+
+
+HYBRID_CONFIG_FIELDS = (
+    "hidden_dim", "max_num_of_tokens_per_rank", "num_of_experts_per_rank",
+    "num_of_ranks_per_node", "num_of_nodes", "pad_multiple",
+    "num_of_tokens_per_chunk_preprocessing_api",
+    "num_of_threads_per_block_preprocessing_api", "num_of_blocks_preprocessing_api",
+    "num_of_blocks_permute", "num_of_blocks_unpermute", "token_data_type",
+    "num_of_stages_dispatch_api", "num_of_stages_permute_block_dispatch_api",
+    "num_of_in_flight_s2g_dispatch_api",
+    "num_of_in_flight_s2g_permute_block_dispatch_api",
+    "num_of_additional_in_flight_s2g_dispatch_api",
+    "num_of_tokens_per_chunk_dispatch_api", "num_of_blocks_dispatch_api",
+    "forward_dispatch_api", "device_side_sync_dispatch_api",
+    "num_of_stages_g2s_combine_api", "num_of_stages_s2g_combine_api",
+    "num_of_tokens_per_chunk_combine_api", "num_of_tokens_per_group_combine_api",
+    "num_of_blocks_combine_api", "num_of_additional_in_flight_s2g_combine_api",
+    "backward_combine_api", "device_side_sync_combine_api",
+)
+
+
+def _hybrid_realized_config(config) -> dict[str, str | int | bool]:
+    """Project the Python-visible, post-autotune HybridEP config to JSON scalars."""
+    realized = {}
+    for field in HYBRID_CONFIG_FIELDS:
+        try:
+            value = getattr(config, field)
+        except AttributeError as exc:
+            raise RuntimeError(f"HybridEP realized config omits {field}") from exc
+        if field == "token_data_type":
+            token_type = getattr(value, "name", None)
+            if token_type not in {"UINT8", "UINT16"}:
+                token_type = {"uint8_t": "UINT8", "uint16_t": "UINT16"}.get(str(value))
+            if token_type is None:
+                raise RuntimeError("HybridEP realized token_data_type is invalid")
+            realized[field] = token_type
+            continue
+        if type(value) is bool:
+            realized[field] = value
+            continue
+        try:
+            realized[field] = int(value)
+        except (TypeError, ValueError) as exc:
+            raise RuntimeError(f"HybridEP realized config {field} is not integral") from exc
+    return realized
+
+
+def _sha256_with_size(path: Path) -> tuple[str, int]:
+    digest = hashlib.sha256()
+    size = 0
+    with path.open("rb") as handle:
+        for chunk in iter(lambda: handle.read(1024 * 1024), b""):
+            digest.update(chunk)
+            size += len(chunk)
+    return digest.hexdigest(), size
+
+
+def _hybrid_jit_evidence(root: Path) -> list[dict[str, str | int]]:
+    """Hash final JIT libraries without exposing rank-specific cache paths."""
+    if not root.is_dir():
+        raise RuntimeError("DeepEP Hybrid produced no JIT cache directory")
+    artifacts = []
+    for path in sorted(root.iterdir(), key=lambda item: item.name):
+        if path.suffix != ".so":
+            continue
+        if path.is_symlink() or not path.is_file():
+            raise RuntimeError("DeepEP Hybrid JIT artifact is not a regular file")
+        kernel_key = path.stem
+        if not re.fullmatch(r"[A-Za-z0-9][A-Za-z0-9_.+-]{0,511}", kernel_key):
+            raise RuntimeError("DeepEP Hybrid JIT kernel key is invalid")
+        digest, size = _sha256_with_size(path)
+        if size <= 0:
+            raise RuntimeError("DeepEP Hybrid JIT artifact is empty")
+        artifacts.append({
+            "bytes": size,
+            "kernel_key": kernel_key,
+            "sha256": digest,
+        })
+    if len(artifacts) != 3:
+        raise RuntimeError(
+            f"DeepEP Hybrid expected 3 final JIT libraries, found {len(artifacts)}"
+        )
+    return artifacts
+
+
+def _require_cross_rank_equal(value, label: str) -> None:
+    gathered = [None] * dist.get_world_size()
+    dist.all_gather_object(gathered, value)
+    canonical = {json.dumps(item, sort_keys=True, separators=(",", ":")) for item in gathered}
+    if len(canonical) != 1:
+        raise RuntimeError(f"DeepEP Hybrid {label} differs across ranks")
+
+
+def _hybrid_topology(args, world_size: int) -> dict[str, int | str]:
+    """Translate physical placement into HybridEP communication-domain geometry."""
+    gpus_per_node = int(args.gpus_per_node or world_size)
+    scale_up_domain = int(args.scale_up_domain or gpus_per_node)
+    key = (
+        world_size, gpus_per_node, scale_up_domain, args.scope,
+        args.scale_up_transport, args.scale_out_transport or None, args.transport,
+    )
+    fixed = {
+        (8, 8, 8, "scale-up", "nvlink", None, "nvlink"): (8, 1),
+        (16, 8, 8, "scale-out", "nvlink", "rdma", "nvlink-rdma"): (8, 2),
+        (8, 4, 72, "scale-up", "mnnvl", None, "mnnvl"): (8, 1),
+        (16, 4, 72, "scale-up", "mnnvl", None, "mnnvl"): (16, 1),
+    }
+    if key not in fixed:
+        raise RuntimeError("DeepEP Hybrid topology is outside the fixed v1 matrix")
+    domain_ranks, communication_domains = fixed[key]
+
+    return {
+        "communication_domains": communication_domains,
+        "domain_ranks": domain_ranks,
+        "physical_nodes": world_size // gpus_per_node,
+        "transport": str(args.transport),
+    }
+
+
+class DeepEPHybridBackend:
+    name = "deepep-hybrid"
+    stage_device_work = False
+    # HybridEPBuffer.combine consumes the recv payload + the dispatch handle (no re-dispatch needed
+    # before a timed combine); the harness times dispatch and combine separately (like ep_deepep).
+    combine_needs_redispatch = False
+    combine_weight_semantics = "unweighted-rank-sum"
+
+    def __init__(self, args, rank, world_size, local_rank, device):
+        self.args = args
+        self.rank = rank
+        self.world_size = world_size
+        self.device = device
+        self.mode = "normal"
+        self.precision_profile_id, self.communication_precision = (
+            ep_precision.resolve_precision(
+                args,
+                backend=self.name,
+                mode=self.mode,
+                supported_profiles={
+                    "d-bf16.c-bf16",
+                    "d-fp8-e4m3fn-b128-f32-prequantized.c-bf16",
+                },
+            )
+        )
+        self._fp8_dispatch = ep_precision.is_low_precision_dispatch(
+            self.communication_precision
+        )
+        self.stage_device_work = self._fp8_dispatch
+        ep_precision.require_keyword(
+            HybridEPBuffer.__init__,
+            "use_fp8",
+            api="deep_ep.HybridEPBuffer.__init__",
+        )
+        ep_precision.require_keyword(
+            HybridEPBuffer.dispatch,
+            "scaling_factor",
+            api="deep_ep.HybridEPBuffer.dispatch",
+        )
+        self.group = dist.group.WORLD
+        self.tolerance = 5e-2
+        self.top_k = int(args.topk)
+        self.num_experts = int(args.experts)
+        self.hidden = int(args.hidden)
+        self.local_experts = max(1, self.num_experts // world_size)
+        topology = _hybrid_topology(args, world_size)
+        self.domain_ranks = int(topology["domain_ranks"])
+        self.communication_domains = int(topology["communication_domains"])
+        build_mode = os.environ.get("DEEPEP_HYBRID_BUILD_MODE", "")
+        if self.communication_domains > 1:
+            if (
+                os.environ.get("HYBRID_EP_MULTINODE") != "1"
+                or build_mode != "multinode-doca"
+                or os.environ.get("USE_NIXL", "0") != "0"
+            ):
+                raise RuntimeError("DeepEP Hybrid scale-out build mode is not realized")
+        elif build_mode != "intradomain":
+            raise RuntimeError("DeepEP Hybrid scale-up requires the intradomain build")
+        if args.scale_up_transport == "mnnvl" and any(
+            os.environ.get(name) != "1"
+            for name in ("NCCL_CUMEM_ENABLE", "NCCL_MNNVL_ENABLE", "MC_FORCE_MNNVL")
+        ):
+            raise RuntimeError("DeepEP Hybrid MNNVL runtime enablement is incomplete")
+        # Token cap (per rank) for the symmetric buffer; the sweep is capped here (buffer_cap).
+        self.max_tokens = 4096
+        dev_sms = torch.cuda.get_device_properties(device).multi_processor_count
+        ver = _deepep_hybrid_version()
+        loaded_libraries = _hybrid_build_evidence()
+        _require_cross_rank_equal(loaded_libraries, "loaded extension identities")
+
+        # HybridEP's compiler uses a process-specific child of HYBRID_EP_CACHE_DIR. Give every
+        # rank a fresh private base so stale kernels cannot enter this attempt's evidence.
+        self._previous_jit_cache_dir = os.environ.get("HYBRID_EP_CACHE_DIR")
+        self._previous_domain_ranks = os.environ.get(
+            "NUM_OF_HYBRID_EP_RANKS_PER_NVLINK_DOMAIN"
+        )
+        self._jit_cache_dir = tempfile.mkdtemp(prefix=f"collectivex-hybrid-r{rank}-")
+        os.environ["HYBRID_EP_CACHE_DIR"] = self._jit_cache_dir
+        os.environ["NUM_OF_HYBRID_EP_RANKS_PER_NVLINK_DOMAIN"] = str(self.domain_ranks)
+        self._jit_root = (
+            Path(self._jit_cache_dir) / ".deepep" / "hybrid_ep" / "jit"
+            / f"proc-{os.getpid()}"
+        )
+        self._realized_config = None
+        self._deferred_semantic_snapshot = None
+        self._deferred_jit_diagnostics = None
+
+        try:
+            self.buffer = HybridEPBuffer(
+                self.group, hidden_dim=self.hidden,
+                max_num_of_tokens_per_rank=self.max_tokens,
+                num_local_experts=self.local_experts,
+                use_fp8=self._fp8_dispatch,
+            )
+            realized_geometry = (
+                int(self.buffer.num_of_hybrid_ep_ranks_per_nvlink_domain),
+                int(self.buffer.num_of_nodes),
+                int(self.buffer.local_rank),
+                int(self.buffer.node_rank),
+            )
+            expected_geometry = (
+                self.domain_ranks,
+                self.communication_domains,
+                rank % self.domain_ranks,
+                rank // self.domain_ranks,
+            )
+            buffer_config = self.buffer.configurer.buffer_config
+            if realized_geometry != expected_geometry or (
+                int(buffer_config.num_of_ranks_per_node) != self.domain_ranks
+                or int(buffer_config.num_of_nodes) != self.communication_domains
+            ):
+                raise RuntimeError(
+                    "HybridEPBuffer communication-domain geometry differs from the case"
+                )
+        except Exception as exc:
+            shutil.rmtree(self._jit_cache_dir, ignore_errors=True)
+            if self._previous_jit_cache_dir is None:
+                os.environ.pop("HYBRID_EP_CACHE_DIR", None)
+            else:
+                os.environ["HYBRID_EP_CACHE_DIR"] = self._previous_jit_cache_dir
+            if self._previous_domain_ranks is None:
+                os.environ.pop("NUM_OF_HYBRID_EP_RANKS_PER_NVLINK_DOMAIN", None)
+            else:
+                os.environ[
+                    "NUM_OF_HYBRID_EP_RANKS_PER_NVLINK_DOMAIN"
+                ] = self._previous_domain_ranks
+            raise RuntimeError(
+                f"HybridEPBuffer construction failed (hidden={self.hidden} max_tokens={self.max_tokens} "
+                f"local_experts={self.local_experts} world={world_size}): {exc!r}") from exc
+        update_template_config = self.buffer.update_template_config
+
+        def tracked_update_template_config(*call_args, **call_kwargs):
+            config = update_template_config(*call_args, **call_kwargs)
+            realized = _hybrid_realized_config(config)
+            if (
+                realized["num_of_ranks_per_node"] != self.domain_ranks
+                or realized["num_of_nodes"] != self.communication_domains
+            ):
+                raise RuntimeError("DeepEP Hybrid realized topology changed within one case")
+            expected_token_type = "UINT8" if self._fp8_dispatch else "UINT16"
+            if realized["token_data_type"] != expected_token_type:
+                raise RuntimeError(
+                    "DeepEP Hybrid realized token dtype differs from the precision profile"
+                )
+            if self._realized_config is not None and realized != self._realized_config:
+                raise RuntimeError("DeepEP Hybrid realized autotune config changed within one case")
+            self._realized_config = realized
+            return config
+
+        self.buffer.update_template_config = tracked_update_template_config
+        self.domain_rank = int(self.buffer.local_rank)
+        if rank == 0:
+            print(
+                "[deepep-hybrid] HybridEPBuffer constructed "
+                f"(domains={self.communication_domains} ranks_per_domain={self.domain_ranks} "
+                f"world={world_size} local_experts={self.local_experts} hidden={self.hidden})",
+                file=sys.stderr,
+            )
+
+        self.backend_provenance = {
+            "deepep_commit": ver, "branch": "hybrid-ep",
+            "deepep_tree": os.environ.get("DEEPEP_TREE"),
+            "backend_lineage": "deepep-hybrid",
+            "loaded_libraries": loaded_libraries,
+            "impl": "deep_ep.HybridEPBuffer (NVIDIA TMA + warp-pipeline)",
+            "mode": "normal", "transport": topology["transport"],
+            "dispatch_dtype": ep_precision.communication_format(
+                self.communication_precision, "dispatch"
+            ),
+            "combine_dtype": ep_precision.communication_format(
+                self.communication_precision, "combine"
+            ),
+            "resource_mode": "fixed-profile",
+            "num_sms": None, "device_sms": dev_sms,
+            "tuned_source": "deepep-hybrid-configurer-autotune-v1",
+            "realized_config": None, "jit_kernel_keys": [], "jit_shared_objects": [],
+            "max_num_tokens": self.max_tokens, "top_k": self.top_k,
+            "num_experts": self.num_experts, "local_experts": self.local_experts,
+            "routing_factor": "ranks",
+        }
+
+    def buffer_cap(self, args):
+        return self.max_tokens
+
+    def make_problem(self, T, idx, weights, x):
+        encoding = ep_precision.encode_dispatch(
+            torch, x, self.communication_precision
+        )
+        dispatch_x = (
+            encoding.encoded_payload.view(torch.uint8)
+            if self._fp8_dispatch
+            else encoding.native_input
+        )
+        return types.SimpleNamespace(
+            T=int(T),
+            x=x,
+            dispatch_x=dispatch_x,
+            dispatch_scales=encoding.scales,
+            oracle_x=encoding.semantic,
+            dispatch_precision_evidence=encoding.evidence,
+            topk_idx=idx.to(torch.int64),
+            topk_weights=weights.to(torch.float32),
+        )
+
+    def dispatch(self, p):
+        recv, recv_probs, _scales, handle = self.buffer.dispatch(
+            p.dispatch_x,
+            scaling_factor=p.dispatch_scales,
+            topk_idx=p.topk_idx,
+            topk_weights=p.topk_weights,
+            num_of_experts=self.num_experts,
+        )
+        return types.SimpleNamespace(
+            recv=recv,
+            recv_payload=recv,
+            recv_scales=_scales,
+            recv_probs=recv_probs,
+            handle=handle,
+            combine_input=None,
+        )
+
+    def stage(self, p, h):
+        # Identity expert: the recv hidden IS the "expert output". combine reduces it per source token.
+        h.combine_input = self._semantic_recv(h, p.recv_tokens)
+        return None
+
+    def combine(self, p, h):
+        # combine(hidden, handle=) -> [T, H] per-source-token reduction (no gate re-weight: "ranks").
+        comb = self.buffer.combine(h.combine_input, handle=h.handle)
+        return comb[0] if isinstance(comb, (tuple, list)) else comb
+
+    def capture_deferred_provenance(self):
+        torch.cuda.synchronize()
+        dist.barrier()
+        if self._realized_config is None:
+            raise RuntimeError("DeepEP Hybrid autotune config was not materialized")
+        local_artifacts = _hybrid_jit_evidence(self._jit_root)
+        semantic = {
+            "jit_kernel_keys": [item["kernel_key"] for item in local_artifacts],
+            "realized_config": dict(self._realized_config),
+        }
+        # NVCC may embed each rank's timestamped source basename in its ELF, so raw .so hashes are
+        # diagnostics rather than a cross-rank identity. Stable kernel keys encode every codegen
+        # input, including HybridEpConfigInstance fields that the Python binding does not expose.
+        _require_cross_rank_equal(semantic, "realized config/JIT kernel keys")
+        gathered_artifacts = [None] * dist.get_world_size()
+        dist.all_gather_object(gathered_artifacts, local_artifacts)
+        diagnostics = []
+        for artifact_index, kernel_key in enumerate(semantic["jit_kernel_keys"]):
+            diagnostics.append({
+                "kernel_key": kernel_key,
+                "rank_artifacts": [
+                    {
+                        "bytes": rank_artifacts[artifact_index]["bytes"],
+                        "rank": artifact_rank,
+                        "sha256": rank_artifacts[artifact_index]["sha256"],
+                    }
+                    for artifact_rank, rank_artifacts in enumerate(gathered_artifacts)
+                ],
+            })
+        if self._deferred_semantic_snapshot is not None and semantic != self._deferred_semantic_snapshot:
+            raise RuntimeError("DeepEP Hybrid config/JIT kernel set changed after measurement")
+        if self._deferred_jit_diagnostics is not None and diagnostics != self._deferred_jit_diagnostics:
+            raise RuntimeError("DeepEP Hybrid rank-local JIT artifacts changed after measurement")
+        self._deferred_semantic_snapshot = semantic
+        self._deferred_jit_diagnostics = diagnostics
+        self.backend_provenance.update(semantic)
+        self.backend_provenance["jit_shared_objects"] = diagnostics
+
+    def inspect_dispatch(self, p, h):
+        count = self.recv_tokens(h)
+        routing_map = h.handle[4][:count]
+        rows, local_expert_ids = routing_map.nonzero(as_tuple=True)
+        positions = routing_map.to(torch.int64).cumsum(dim=1)[rows, local_expert_ids] - 1
+        probability_columns = self.domain_rank * self.local_experts + local_expert_ids
+        if h.recv_probs.shape[1] < (self.domain_rank + 1) * self.local_experts:
+            raise RuntimeError("HybridEPBuffer probability tensor omits this NVLink-domain rank")
+        expert_ids = torch.full(
+            (count, self.top_k), -1, dtype=torch.int64, device=self.device
+        )
+        weights = torch.zeros(
+            (count, self.top_k), dtype=torch.float32, device=self.device
+        )
+        expert_ids[rows, positions] = local_expert_ids + self.rank * self.local_experts
+        weights[rows, positions] = h.recv_probs[:count][rows, probability_columns]
+        return types.SimpleNamespace(
+            payload=self._semantic_recv(h, count)[:count],
+            encoded_payload=h.recv_payload[:count],
+            scales=(h.recv_scales[:count] if h.recv_scales is not None else None),
+            expert_ids=expert_ids,
+            weights=weights,
+            local_expert_counts=routing_map.sum(dim=0, dtype=torch.int64),
+            ordering_contract="global-source-filter-stable-v1",
+        )
+
+    def combine_transformed(self, p, h, transformed):
+        combined = self.buffer.combine(
+            transformed.to(torch.bfloat16), handle=h.handle
+        )
+        return combined[0] if isinstance(combined, (tuple, list)) else combined
+
+    def recv_tokens(self, h):
+        return int(h.handle[3].item())
+
+    def _semantic_recv(self, h, rows):
+        if not self._fp8_dispatch:
+            return h.recv_payload
+        if not hasattr(h, "recv_semantic"):
+            semantic = torch.empty(
+                h.recv_payload.shape,
+                dtype=torch.bfloat16,
+                device=h.recv_payload.device,
+            )
+            semantic[:rows].copy_(ep_precision.dequantize_dispatch(
+                torch,
+                h.recv_payload[:rows],
+                h.recv_scales[:rows],
+                self.communication_precision["dispatch"],
+                uint8_storage=True,
+            ))
+            h.recv_semantic = semantic
+            h.recv_semantic_rows = rows
+        elif h.recv_semantic_rows != rows:
+            raise RuntimeError("DeepEP Hybrid receive count changed for one dispatch handle")
+        return h.recv_semantic
+
+    def oracle_dispatch_payload(self, payload):
+        return ep_precision.encode_dispatch(
+            torch, payload, self.communication_precision
+        ).semantic
+
+    def precision_evidence(self, problem, view=None):
+        return ep_precision.precision_evidence(
+            torch,
+            profile_id=self.precision_profile_id,
+            profile=self.communication_precision,
+            problem=problem,
+            view=view,
+            uint8_storage=True,
+        )
+
+    def finalize(self, rc):
+        try:
+            dist.barrier()
+            dist.destroy_process_group()
+        except Exception:
+            pass
+        shutil.rmtree(self._jit_cache_dir, ignore_errors=True)
+        if self._previous_jit_cache_dir is None:
+            os.environ.pop("HYBRID_EP_CACHE_DIR", None)
+        else:
+            os.environ["HYBRID_EP_CACHE_DIR"] = self._previous_jit_cache_dir
+        if self._previous_domain_ranks is None:
+            os.environ.pop("NUM_OF_HYBRID_EP_RANKS_PER_NVLINK_DOMAIN", None)
+        else:
+            os.environ[
+                "NUM_OF_HYBRID_EP_RANKS_PER_NVLINK_DOMAIN"
+            ] = self._previous_domain_ranks
+        return rc
diff --git a/experimental/CollectiveX/tests/ep_deepep_v2.py b/experimental/CollectiveX/tests/ep_deepep_v2.py
new file mode 100644
index 000000000..c6c30ad27
--- /dev/null
+++ b/experimental/CollectiveX/tests/ep_deepep_v2.py
@@ -0,0 +1,624 @@
+#!/usr/bin/env python3
+"""DeepEP PR #605 adapter with PR #630's pure scale-up initialization fix."""
+
+from __future__ import annotations
+
+import ctypes
+import hashlib
+import importlib.metadata
+import inspect
+import json
+import os
+import re
+import sys
+import types
+from pathlib import Path
+
+import torch
+import torch.distributed as dist
+import contracts
+import ep_harness
+import ep_precision
+
+try:
+    import deep_ep
+    from deep_ep import ElasticBuffer  # type: ignore
+except Exception as exc:  # pragma: no cover - requires the benchmark image
+    print(f"ERROR: DeepEP V2 import failed: {exc!r}", file=sys.stderr)
+    raise
+
+
+DEEPEP_V2_PR = 605
+DEEPEP_V2_FIX_PR = 630
+DEEPEP_V2_COMMIT = "fa8a9b16898204afd347c663b89e65ef87dc6ce6"
+DEEPEP_V2_TREE = "29809e75c5874e6609dac4804e7b651d5226959f"
+DEEPEP_V2_FMT_COMMIT = "a4c7e17133ee9cb6a2f45545f6e974dd3c393efa"
+DEEPEP_V2_VERSION = "2.0.0"
+DEEPEP_V2_DISTRIBUTION = "2.0.0+fa8a9b1"
+DEEPEP_V2_JIT_RANDOM_SEED = "collectivex-deepep-v2-fa8a9b1"
+TORCH_VERSION = "2.10.0+cu130"
+NCCL_VERSION = "2.30.4"
+NVSHMEM_VERSION = "3.3.9"
+DEEPEP_V2_JIT_KERNELS = contracts.DEEPEP_V2_JIT_KERNELS
+
+
+def _sha256(path: str) -> str:
+    digest = hashlib.sha256()
+    with open(path, "rb") as handle:
+        for chunk in iter(lambda: handle.read(1024 * 1024), b""):
+            digest.update(chunk)
+    return digest.hexdigest()
+
+
+def _api_sha256() -> str:
+    signatures = {
+        "ElasticBuffer.__init__": str(inspect.signature(ElasticBuffer.__init__)),
+        "ElasticBuffer.dispatch": str(inspect.signature(ElasticBuffer.dispatch)),
+        "ElasticBuffer.combine": str(inspect.signature(ElasticBuffer.combine)),
+    }
+    return hashlib.sha256(
+        json.dumps(signatures, sort_keys=True, separators=(",", ":")).encode()
+    ).hexdigest()
+
+
+def _loaded_library_paths() -> set[str]:
+    extension = getattr(getattr(deep_ep, "_C", None), "__file__", None)
+    if not extension or not os.path.isfile(extension):
+        raise RuntimeError("DeepEP V2 extension library is not loaded")
+    paths = {os.path.realpath(extension)}
+    try:
+        with open("/proc/self/maps", encoding="utf-8") as handle:
+            for line in handle:
+                path = line.rstrip().split()[-1]
+                name = os.path.basename(path)
+                if ("libnccl.so" in name or "libnvshmem_host.so" in name) and os.path.isfile(path):
+                    paths.add(os.path.realpath(path))
+    except OSError as exc:  # pragma: no cover - benchmark runtime is Linux
+        raise RuntimeError("cannot inspect loaded communication libraries") from exc
+    return paths
+
+
+def _loaded_nccl_version() -> str:
+    matches = [
+        path for path in _loaded_library_paths()
+        if "libnccl.so" in os.path.basename(path)
+    ]
+    if len(matches) != 1:
+        raise RuntimeError("expected exactly one loaded NCCL library")
+    version = ctypes.c_int()
+    if ctypes.CDLL(matches[0]).ncclGetVersion(ctypes.byref(version)) != 0:
+        raise RuntimeError("loaded NCCL version query failed")
+    return ep_harness.format_collective_version(version.value)
+
+
+def _loaded_library_evidence() -> list[dict[str, str]]:
+    """Return content identities, never private library paths."""
+    paths = _loaded_library_paths()
+    required = {
+        "nccl": [path for path in paths if "libnccl.so" in os.path.basename(path)],
+        "nvshmem": [path for path in paths if "libnvshmem_host.so" in os.path.basename(path)],
+    }
+    mismatches = [f"{name}={len(matches)}" for name, matches in required.items() if len(matches) != 1]
+    if mismatches:
+        raise RuntimeError("expected one loaded library for each dependency: " + ", ".join(mismatches))
+
+    def role(path: str) -> str:
+        name = os.path.basename(path)
+        if "libnccl.so" in name:
+            return "nccl"
+        if "libnvshmem_host.so" in name:
+            return "nvshmem"
+        return "deepep-extension"
+
+    def label(path: str) -> str:
+        return "deep_ep._C" if role(path) == "deepep-extension" else os.path.basename(path)
+
+    return sorted(
+        ({"role": role(path), "name": label(path), "sha256": _sha256(path)} for path in paths),
+        key=lambda item: (item["role"], item["name"], item["sha256"]),
+    )
+
+
+def _jit_artifact_evidence() -> list[dict[str, str]]:
+    root = Path(os.environ["EP_JIT_CACHE_DIR"]) / "cache"
+    if root.is_symlink() or not root.is_dir():
+        raise RuntimeError("DeepEP V2 produced no JIT cache evidence")
+    artifacts = []
+    kernel_names = set()
+    for directory in sorted(root.iterdir(), key=lambda item: item.name):
+        match = re.fullmatch(r"kernel\.([A-Za-z0-9_+-]+)\.([0-9a-f]{32})", directory.name)
+        if directory.is_symlink() or not directory.is_dir() or match is None:
+            raise RuntimeError("DeepEP V2 JIT cache contains an invalid entry")
+        if {path.name for path in directory.iterdir()} != {
+            "kernel.cu", "kernel.cubin", "kernel.sass",
+        }:
+            raise RuntimeError("DeepEP V2 JIT kernel evidence is incomplete")
+        source = directory / "kernel.cu"
+        cubin = directory / "kernel.cubin"
+        sass = directory / "kernel.sass"
+        if any(path.is_symlink() or not path.is_file() for path in (source, cubin, sass)):
+            raise RuntimeError("DeepEP V2 JIT evidence is not a regular file")
+        if any(path.stat().st_size <= 0 for path in (source, cubin, sass)):
+            raise RuntimeError("DeepEP V2 JIT evidence is empty")
+        kernel_names.add(match.group(1))
+        artifacts.append({
+            "cache_key": directory.name,
+            "source_sha256": _sha256(str(source)),
+            "sass_sha256": _sha256(str(sass)),
+            "cubin_sha256": _sha256(str(cubin)),
+        })
+    if (
+        len(artifacts) != len(DEEPEP_V2_JIT_KERNELS)
+        or kernel_names != DEEPEP_V2_JIT_KERNELS
+    ):
+        raise RuntimeError("DeepEP V2 JIT kernel set differs from the v1 contract")
+    return sorted(artifacts, key=lambda item: item["cache_key"])
+
+
+def _jit_cache_key(
+    args,
+    world_size: int,
+    max_tokens: int,
+    allow_hybrid_mode: bool,
+    realized: dict[str, int | bool],
+    precision_profile_id: str = "d-bf16.c-bf16",
+    communication_precision: dict[str, object] | None = None,
+) -> str:
+    """Key generated kernels by codegen inputs, not routing data or case identity."""
+    if communication_precision is None:
+        communication_precision = {
+            "dispatch": {
+                "communication_format": "bf16",
+                "api_input_dtype": "bf16",
+            },
+            "combine": {"communication_format": "bf16"},
+        }
+    payload = {
+        "contract": "deepep-v2-jit-config-v3",
+        "runner": args.runner,
+        "world_size": world_size,
+        "hidden": args.hidden,
+        "topk": args.topk,
+        "physical_experts": args.experts,
+        "tuning_experts": getattr(args, "num_logical_experts", args.experts),
+        "max_tokens": max_tokens,
+        "precision_profile": precision_profile_id,
+        "dispatch_dtype": communication_precision["dispatch"]["communication_format"],
+        "combine_dtype": communication_precision["combine"]["communication_format"],
+        "input_layout": communication_precision["dispatch"]["api_input_dtype"],
+        "expert_alignment": 1,
+        "do_cpu_sync": True,
+        "cached_mode": False,
+        "do_expand": False,
+        "use_expanded_layout": False,
+        "allow_hybrid_mode": allow_hybrid_mode,
+        "allow_multiple_reduction": True,
+        "prefer_overlap_with_compute": True,
+        "deterministic": False,
+        **realized,
+    }
+    return "jitcfg-v3-" + hashlib.sha256(
+        json.dumps(payload, sort_keys=True, separators=(",", ":")).encode()
+    ).hexdigest()
+
+
+def _require_cross_rank_equal(value, label: str) -> None:
+    gathered = [None] * dist.get_world_size()
+    dist.all_gather_object(gathered, value)
+    canonical = {json.dumps(item, sort_keys=True, separators=(",", ":")) for item in gathered}
+    if len(canonical) != 1:
+        raise RuntimeError(f"DeepEP V2 {label} differs across ranks")
+
+
+def _configure_gin_mode(args, world_size: int) -> bool:
+    scale_up_domain = int(
+        getattr(args, "scale_up_domain", None)
+        or getattr(args, "gpus_per_node", None)
+        or world_size
+    )
+    allow_hybrid_mode = world_size > scale_up_domain
+    if allow_hybrid_mode:
+        os.environ.pop("EP_DISABLE_GIN", None)
+    else:
+        os.environ["EP_DISABLE_GIN"] = "1"
+    return allow_hybrid_mode
+
+
+def _lsa_topology_is_valid(
+    gin_enabled: bool,
+    world_size: int,
+    scale_up_domain: int,
+    config: dict[str, int | bool],
+) -> bool:
+    if gin_enabled:
+        domains = world_size // scale_up_domain
+        return (
+            world_size % scale_up_domain == 0
+            and domains > 1
+            and config["physical_rdma_ranks"] == domains
+            and config["physical_nvlink_ranks"] == scale_up_domain
+            and config["logical_scaleout_ranks"] == domains
+            and config["logical_scaleup_ranks"] == scale_up_domain
+            and config["is_scaleup_nvlink"] is True
+        )
+    return (
+        config["physical_rdma_ranks"] == 1
+        and config["physical_nvlink_ranks"] == world_size
+        and config["logical_scaleout_ranks"] == 1
+        and config["logical_scaleup_ranks"] == world_size
+        and config["is_scaleup_nvlink"] is True
+    )
+
+
+def _require_runtime() -> tuple[str, str]:
+    expected = {
+        "DEEPEP_V2_PR": str(DEEPEP_V2_PR),
+        "DEEPEP_V2_FIX_PR": str(DEEPEP_V2_FIX_PR),
+        "DEEPEP_V2_COMMIT": DEEPEP_V2_COMMIT,
+        "DEEPEP_V2_TREE": DEEPEP_V2_TREE,
+        "DEEPEP_V2_FMT_COMMIT": DEEPEP_V2_FMT_COMMIT,
+        "DEEPEP_V2_JIT_RANDOM_SEED": DEEPEP_V2_JIT_RANDOM_SEED,
+        "EP_JIT_DUMP_SASS": "1",
+    }
+    mismatches = [
+        f"{name}={os.environ.get(name)!r}, expected {value!r}"
+        for name, value in expected.items()
+        if os.environ.get(name) != value
+    ]
+    torch_version = str(torch.__version__)
+    nccl_package_version = importlib.metadata.version("nvidia-nccl-cu13")
+    nvshmem_package_version = importlib.metadata.version("nvidia-nvshmem-cu12")
+    actual = {
+        "deep_ep": str(getattr(deep_ep, "__version__", "")),
+        "deep_ep distribution": importlib.metadata.version("deep_ep"),
+        "torch": torch_version,
+        "nvidia-nccl-cu13": nccl_package_version,
+        "nvidia-nvshmem-cu12": nvshmem_package_version,
+    }
+    required = {
+        "deep_ep": DEEPEP_V2_VERSION,
+        "deep_ep distribution": DEEPEP_V2_DISTRIBUTION,
+        "torch": TORCH_VERSION,
+        "nvidia-nccl-cu13": NCCL_VERSION,
+        "nvidia-nvshmem-cu12": NVSHMEM_VERSION,
+    }
+    mismatches.extend(
+        f"{name}={actual[name]!r}, expected {value!r}"
+        for name, value in required.items()
+        if actual[name] != value
+    )
+    if not inspect.isclass(ElasticBuffer) or ElasticBuffer.__name__ != "ElasticBuffer":
+        mismatches.append("deep_ep.ElasticBuffer is absent")
+    if os.environ.get("EP_SUPPRESS_NCCL_CHECK"):
+        mismatches.append("EP_SUPPRESS_NCCL_CHECK must be unset")
+    nccl_runtime_version = _loaded_nccl_version()
+    if nccl_runtime_version != NCCL_VERSION:
+        mismatches.append(
+            f"loaded NCCL={nccl_runtime_version!r}, expected {NCCL_VERSION!r}"
+        )
+    if mismatches:
+        raise RuntimeError("invalid DeepEP V2 runtime: " + "; ".join(mismatches))
+    return torch_version, nccl_runtime_version
+
+
+class DeepEPV2Backend:
+    name = "deepep-v2"
+    stage_device_work = False
+    combine_needs_redispatch = False
+    combine_weight_semantics = "unweighted-rank-sum"
+
+    def __init__(self, args, rank, world_size, local_rank, device):
+        self.args = args
+        self.rank = rank
+        self.world_size = world_size
+        self.device = device
+        self.mode = "normal"
+        self.precision_profile_id, self.communication_precision = (
+            ep_precision.resolve_precision(
+                args,
+                backend=self.name,
+                mode=self.mode,
+                supported_profiles={
+                    "d-bf16.c-bf16",
+                    "d-fp8-e4m3fn-b128-f32-prequantized.c-bf16",
+                },
+            )
+        )
+        self._fp8_dispatch = ep_precision.is_low_precision_dispatch(
+            self.communication_precision
+        )
+        self.stage_device_work = self._fp8_dispatch
+        ep_precision.require_keyword(
+            ElasticBuffer.__init__,
+            "use_fp8_dispatch",
+            api="deep_ep.ElasticBuffer.__init__",
+        )
+        self.group = dist.group.WORLD
+        torch_version, nccl_runtime_version = _require_runtime()
+        ladder, _ = ep_harness.token_ladder(args.tokens_ladder, args.phase, None)
+        conditioning = ep_harness.CONDITIONING_LADDERS[args.phase]
+        self.max_tokens = max([*ladder, *conditioning])
+        jit_root = Path(os.environ["EP_JIT_CACHE_DIR"])
+        scale_up_domain = int(
+            getattr(args, "scale_up_domain", None)
+            or getattr(args, "gpus_per_node", None)
+            or world_size
+        )
+        allow_hybrid_mode = _configure_gin_mode(args, world_size)
+        gin_enabled = allow_hybrid_mode
+        communication_backend = "nccl-gin" if gin_enabled else "nccl-device-lsa"
+        self._deferred_jit_snapshot = None
+        self.buffer = ElasticBuffer(
+            self.group,
+            num_max_tokens_per_rank=self.max_tokens,
+            hidden=args.hidden,
+            num_topk=args.topk,
+            use_fp8_dispatch=self._fp8_dispatch,
+            deterministic=False,
+            allow_hybrid_mode=allow_hybrid_mode,
+            allow_multiple_reduction=True,
+            prefer_overlap_with_compute=True,
+            num_gpu_timeout_secs=100,
+            explicitly_destroy=True,
+        )
+        tuning_num_experts = int(getattr(args, "num_logical_experts", args.experts))
+        self.num_sms = int(
+            self.buffer.get_theoretical_num_sms(tuning_num_experts, args.topk)
+        )
+        self.num_qps = int(self.buffer.get_theoretical_num_qps(self.num_sms))
+        properties = torch.cuda.get_device_properties(device)
+        device_sms = int(properties.multi_processor_count)
+        jit_config = {
+            "num_sms": self.num_sms,
+            "num_qps": self.num_qps,
+            "allocated_qps": int(self.buffer.num_allocated_qps),
+            "logical_scaleout_ranks": int(self.buffer.num_scaleout_ranks),
+            "logical_scaleup_ranks": int(self.buffer.num_scaleup_ranks),
+            "physical_rdma_ranks": int(self.buffer.num_rdma_ranks),
+            "physical_nvlink_ranks": int(self.buffer.num_nvlink_ranks),
+            "is_scaleup_nvlink": self.buffer.num_scaleup_ranks == self.buffer.num_nvlink_ranks,
+            "device_arch_major": int(properties.major),
+            "device_arch_minor": int(properties.minor),
+            "device_sms": device_sms,
+            "device_smem_bytes": int(properties.shared_memory_per_block_optin),
+            "gpu_timeout_cycles": 100 * int(properties.clock_rate) * 1000,
+        }
+        _require_cross_rank_equal(jit_config, "JIT configuration")
+        if not _lsa_topology_is_valid(
+            gin_enabled, world_size, scale_up_domain, jit_config
+        ):
+            raise RuntimeError("DeepEP V2 realized communication domains differ from topology")
+        self.jit_cache_key = _jit_cache_key(
+            args,
+            world_size,
+            self.max_tokens,
+            allow_hybrid_mode,
+            jit_config,
+            self.precision_profile_id,
+            self.communication_precision,
+        )
+        os.environ["EP_JIT_CACHE_DIR"] = str(jit_root / self.jit_cache_key)
+        realized_config = {
+            "jit_cache_key": self.jit_cache_key,
+            "num_max_tokens_per_rank": self.max_tokens,
+            **jit_config,
+        }
+        _require_cross_rank_equal(realized_config, "realized tuning/topology")
+        comm = getattr(self.buffer, "nccl_comm_handle", None)
+        communicator = (
+            "deepep-managed" if getattr(comm, "managed", True) else "pytorch-reused"
+        )
+
+        loaded_libraries = _loaded_library_evidence()
+        _require_cross_rank_equal(loaded_libraries, "loaded libraries")
+        self.backend_provenance = {
+            "deepep_version": DEEPEP_V2_VERSION,
+            "deepep_distribution_version": importlib.metadata.version("deep_ep"),
+            "deepep_commit": DEEPEP_V2_COMMIT,
+            "deepep_tree": DEEPEP_V2_TREE,
+            "deepep_pr": DEEPEP_V2_PR,
+            "deepep_fix_pr": DEEPEP_V2_FIX_PR,
+            "fmt_commit": DEEPEP_V2_FMT_COMMIT,
+            "api": "deep_ep.ElasticBuffer",
+            "api_signature_sha256": _api_sha256(),
+            "communication_backend": communication_backend,
+            "gin_enabled": gin_enabled,
+            "nccl_communicator": communicator,
+            "torch_version": torch_version,
+            "torch_git_version": str(torch.version.git_version),
+            "cuda_version": str(torch.version.cuda),
+            "nccl_package_version": importlib.metadata.version("nvidia-nccl-cu13"),
+            "nccl_version": nccl_runtime_version,
+            "nvshmem_package_version": importlib.metadata.version("nvidia-nvshmem-cu12"),
+            "loaded_libraries": loaded_libraries,
+            "jit_cache_key": self.jit_cache_key,
+            "jit_cubins": [],
+            "jit_random_seed": DEEPEP_V2_JIT_RANDOM_SEED,
+            "num_experts": int(args.experts),
+            "mode": "normal",
+            "dispatch_dtype": ep_precision.communication_format(
+                self.communication_precision, "dispatch"
+            ),
+            "combine_dtype": ep_precision.communication_format(
+                self.communication_precision, "combine"
+            ),
+            "deterministic": False,
+            "resource_mode": "fixed-profile",
+            "requested_num_sms": self.num_sms,
+            "tuning_num_experts": tuning_num_experts,
+            "num_sms": self.num_sms,
+            "num_qps": self.num_qps,
+            "allocated_qps": int(self.buffer.num_allocated_qps),
+            "device_sms": device_sms,
+            "sm_fraction": self.num_sms / device_sms,
+            "tuned_source": "deepep-v2-analytical-sm-qp-logical-experts-v1",
+            "num_max_tokens_per_rank": self.max_tokens,
+            "allow_hybrid_mode": bool(self.buffer.allow_hybrid_mode),
+            "allow_multiple_reduction": bool(self.buffer.allow_multiple_reduction),
+            "prefer_overlap_with_compute": bool(
+                self.buffer.prefer_overlap_with_compute
+            ),
+            "logical_scaleout_ranks": int(self.buffer.num_scaleout_ranks),
+            "logical_scaleup_ranks": int(self.buffer.num_scaleup_ranks),
+            "physical_rdma_ranks": int(self.buffer.num_rdma_ranks),
+            "physical_nvlink_ranks": int(self.buffer.num_nvlink_ranks),
+        }
+
+    def buffer_cap(self, args):
+        return self.max_tokens
+
+    def make_problem(self, T, idx, weights, x):
+        encoding = ep_precision.encode_dispatch(
+            torch, x, self.communication_precision
+        )
+        return types.SimpleNamespace(
+            T=T,
+            x=x,
+            dispatch_x=encoding.native_input,
+            oracle_x=encoding.semantic,
+            dispatch_precision_evidence=encoding.evidence,
+            topk_idx=idx.to(deep_ep.topk_idx_t),
+            topk_weights=weights.to(torch.float32),
+        )
+
+    def dispatch(self, p):
+        recv_x, recv_topk_idx, recv_topk_weights, handle, _ = self.buffer.dispatch(
+            p.dispatch_x,
+            topk_idx=p.topk_idx,
+            topk_weights=p.topk_weights,
+            num_experts=self.args.experts,
+            num_max_tokens_per_rank=self.max_tokens,
+            expert_alignment=1,
+            num_sms=self.num_sms,
+            num_qps=self.num_qps,
+            async_with_compute_stream=False,
+            do_handle_copy=True,
+            do_cpu_sync=True,
+            do_expand=False,
+        )
+        return types.SimpleNamespace(
+            recv_x=recv_x,
+            recv_topk_idx=recv_topk_idx,
+            recv_topk_weights=recv_topk_weights,
+            handle=handle,
+        )
+
+    def stage(self, p, h):
+        h.combine_input = self._semantic_recv(h, p.recv_tokens)
+
+    def combine(self, p, h):
+        combined_x, _, _ = self.buffer.combine(
+            h.combine_input,
+            handle=h.handle,
+            num_sms=self.num_sms,
+            num_qps=self.num_qps,
+            async_with_compute_stream=False,
+        )
+        return combined_x
+
+    def capture_deferred_provenance(self):
+        # destroy() uses this same barrier. Materialize its JIT kernel before hashing the
+        # implementation so the first and later routing cases see identical evidence.
+        self.buffer.barrier(use_comm_stream=True, with_cpu_sync=True)
+        torch.cuda.synchronize()
+        jit_cubins = _jit_artifact_evidence()
+        _require_cross_rank_equal(jit_cubins, "JIT CUBINs")
+        if (
+            self._deferred_jit_snapshot is not None
+            and jit_cubins != self._deferred_jit_snapshot
+        ):
+            raise RuntimeError("DeepEP V2 JIT CUBIN set changed after measurement")
+        self._deferred_jit_snapshot = jit_cubins
+        self.backend_provenance["jit_cubins"] = jit_cubins
+
+    def inspect_dispatch(self, p, h):
+        count = self.recv_tokens(h)
+        local_idx = h.recv_topk_idx[:count]
+        valid = local_idx >= 0
+        expert_ids = torch.where(
+            valid,
+            local_idx + self.rank * (self.args.experts // self.world_size),
+            local_idx,
+        )
+        local = local_idx[valid].to(torch.int64)
+        return types.SimpleNamespace(
+            payload=self._semantic_recv(h, count)[:count],
+            encoded_payload=self._encoded_recv(h)[:count],
+            scales=(
+                self._recv_scales(h)[:count]
+                if self._recv_scales(h) is not None
+                else None
+            ),
+            expert_ids=expert_ids,
+            weights=h.recv_topk_weights[:count].masked_fill(~valid, 0),
+            local_expert_counts=torch.bincount(
+                local, minlength=self.args.experts // self.world_size
+            ),
+            ordering_contract="elastic-source-metadata-v1",
+        )
+
+    def combine_transformed(self, p, h, transformed):
+        semantic = self._semantic_recv(h, self.recv_tokens(h))
+        combine_input = torch.zeros_like(semantic)
+        combine_input[: transformed.shape[0]].copy_(transformed.to(combine_input.dtype))
+        combined, _, _ = self.buffer.combine(
+            combine_input,
+            handle=h.handle,
+            num_sms=self.num_sms,
+            num_qps=self.num_qps,
+            async_with_compute_stream=False,
+        )
+        return combined
+
+    def recv_tokens(self, h):
+        return int(h.handle.psum_num_recv_tokens_per_scaleup_rank[-1].item())
+
+    def _encoded_recv(self, h):
+        return h.recv_x[0] if isinstance(h.recv_x, tuple) else h.recv_x
+
+    def _recv_scales(self, h):
+        return h.recv_x[1] if isinstance(h.recv_x, tuple) else None
+
+    def _semantic_recv(self, h, rows):
+        if not self._fp8_dispatch:
+            return h.recv_x
+        if not hasattr(h, "recv_semantic"):
+            encoded = self._encoded_recv(h)
+            semantic = torch.empty(
+                encoded.shape, dtype=torch.bfloat16, device=encoded.device
+            )
+            semantic[:rows].copy_(ep_precision.dequantize_dispatch(
+                torch,
+                encoded[:rows],
+                self._recv_scales(h)[:rows],
+                self.communication_precision["dispatch"],
+            ))
+            h.recv_semantic = semantic
+            h.recv_semantic_rows = rows
+        elif h.recv_semantic_rows != rows:
+            raise RuntimeError("DeepEP V2 receive count changed for one dispatch handle")
+        return h.recv_semantic
+
+    def oracle_dispatch_payload(self, payload):
+        return ep_precision.encode_dispatch(
+            torch, payload, self.communication_precision
+        ).semantic
+
+    def precision_evidence(self, problem, view=None):
+        return ep_precision.precision_evidence(
+            torch,
+            profile_id=self.precision_profile_id,
+            profile=self.communication_precision,
+            problem=problem,
+            view=view,
+        )
+
+    def finalize(self, rc):
+        try:
+            dist.barrier()
+            self.buffer.destroy()
+            dist.barrier()
+            dist.destroy_process_group()
+        except Exception:
+            return 1
+        return rc
diff --git a/experimental/CollectiveX/tests/ep_harness.py b/experimental/CollectiveX/tests/ep_harness.py
new file mode 100644
index 000000000..af2ec98ca
--- /dev/null
+++ b/experimental/CollectiveX/tests/ep_harness.py
@@ -0,0 +1,2167 @@
+#!/usr/bin/env python3
+"""CollectiveX — shared EP (expert-parallel) dispatch/combine benchmark harness.
+
+Backend-agnostic core. The per-backend adapters (`ep_deepep.py`, `ep_mori.py`)
+implement a small duck-typed protocol; this module owns the source-tokens-per-rank
+sweep, the timing, the correctness gate, and the provenance-tagged JSON doc.
+
+Fair-comparison contract (see docs/methodology.md):
+  * **Deterministic shared routing trace** (`routing.py`): the per-token expert IDs +
+    gate weights are generated once from a fixed seed over the *global* batch and are
+    identical on every SKU; each rank materializes its slice. So every platform runs
+    the *same* problem (no per-rank/per-platform RNG in the adapters).
+  * **Explicit measurement contract**: layout-and-dispatch-v1 includes routing-layout
+    generation in dispatch timing. Combine excludes staging.
+    Isolated sum is derived independently at each percentile and is not a measured chained op.
+  * **Correct collective percentile**: each iteration's latency is reduced MAX across
+    ranks first (a collective finishes with its slowest rank), THEN percentiled —
+    `median_i(max_r)`, not `max_r(median_i)`.
+  * **One line = one fixed config**; only T varies. Both `tokens_per_rank` and
+    `global_tokens = T * ep_size` are recorded as explicit chart coordinates.
+
+stdlib-only at module top (torch is passed in by the entrypoint; `routing` is imported
+lazily inside run_sweep) so this file `py_compile`s without torch.
+
+Backend protocol:
+    name, mode, combine_needs_redispatch, backend_provenance(dict)
+    buffer_cap(args) -> int|None
+    make_problem(T, idx, weights, x) -> problem   # materialize this rank's trace slice
+    dispatch(problem) -> handle                   # pure dispatch comm (timed)
+    stage(problem, handle)                        # expert-output placement
+    stage_device_work                             # true only when stage launches device work
+    combine(problem, handle) -> tensor            # pure combine comm (timed)
+    inspect_dispatch(problem, handle) -> view     # normalized payload/expert/weight metadata
+    combine_transformed(problem, handle, tensor) -> tensor
+    recv_tokens(handle) -> int                    # realized tokens received this rank
+    finalize(rc) -> int|NoReturn
+"""
+from __future__ import annotations
+
+import argparse
+import datetime as _dt
+import hashlib
+import json
+import math
+import os
+import types
+
+import contracts
+import identity
+import workload as workload_contract
+
+# Raw v1 result emitted by one benchmark case. Publication uses a separate contract.
+SCHEMA_VERSION = 1
+
+# Every comparison-grade EP point uses the same literal timing profile on every SKU/backend.
+# Eight timed iterations keep each MoRI burst well below its sustained-iteration wedge, 64 trials
+# provide 512 observations per operation, and 32 warmups meet Blackwell's measured clock-ramp floor.
+SAMPLING_CONTRACT = identity.V1_CASE_PROFILE["sampling_contract"]
+TIMED_SAMPLES_PER_POINT = 512
+TIMED_ITERS_PER_TRIAL = 8
+TRIALS_PER_POINT = 64
+WARMUP_ITERS_PER_TRIAL = 32
+WARMUP_SEMANTICS = "full-roundtrip-before-each-component-trial-point-v1"
+QUALIFICATION_RUNS = 3
+ROUTING_SEED = 67
+ROUTING_GENERATOR = workload_contract.GENERATOR_VERSION
+ACTIVATION_PROFILE = "canonical-counter-source-v4"
+ACTIVATION_GENERATOR = workload_contract.ACTIVATION_GENERATOR
+PLACEMENT = "packed"
+COMPONENT_ORDER_CONTRACT = "qualification-hash-rotated-components-v1"
+LOW_LATENCY_MODE = "low-latency"
+LOW_LATENCY_MAX_TOKENS_PER_RANK = 128
+LOW_LATENCY_MEASUREMENT_CONTRACT = "expert-packed-weighted-combine-v1"
+LOW_LATENCY_COMPONENT_ORDER_CONTRACT = "qualification-hash-rotated-components-v1"
+LOW_LATENCY_ORACLE_CONTRACT = "expert-assignment-transform-v1"
+LOW_LATENCY_CORRECTNESS_SCOPE = "expert-assignment-and-weighted-combine"
+
+# Phase-default sweeps — token-size regimes, NOT distinct kernels (both run normal
+# mode; "decode"/"prefill" name the small/large-token regime). Powers of two for a
+# clean log x-axis; clamped to the backend buffer ceiling (MoRI's registerable heap).
+DECODE_LADDER = [1, 2, 4, 8, 16, 32, 64, 128]
+PREFILL_LADDER = [128, 256, 512, 1024, 2048, 4096]
+CONDITIONING_LADDERS = {
+    phase: list(ladder) for phase, ladder in contracts.V1_CONDITIONING_LADDERS.items()
+}
+CONDITIONING_ROUNDS_PER_SHAPE = contracts.V1_CONDITIONING_ROUNDS_PER_SHAPE
+CONDITIONING_CONTRACT = identity.V1_CASE_PROFILE["conditioning_contract"]
+ORACLE_CONTRACT = identity.V1_CASE_PROFILE["oracle_contract"]
+ORACLE_RTOL = 5e-2
+ORACLE_ATOL = 2e-2
+
+EPLB_REDUNDANT_EXPERTS = 32
+EPLB_REFERENCE_TOKENS_PER_RANK = 2048
+EPLB_PLANNER = "greedy-rank-major-v1"
+V1_PROFILE = {
+    "dispatch_dtype": "bf16",
+    "combine_dtype": "bf16",
+    "combine_quant_mode": "none",
+    "mode": "normal",
+    "measurement_contract": "layout-and-dispatch-v1",
+    "resource_mode": "fixed-profile",
+    "placement": PLACEMENT,
+    "activation_profile": ACTIVATION_PROFILE,
+    "activation_generator": ACTIVATION_GENERATOR,
+    "routing_generator": ROUTING_GENERATOR,
+    "component_order_contract": COMPONENT_ORDER_CONTRACT,
+    "conditioning_contract": CONDITIONING_CONTRACT,
+    "eplb_reference_tokens_per_rank": EPLB_REFERENCE_TOKENS_PER_RANK,
+    "eplb_redundant_experts": EPLB_REDUNDANT_EXPERTS,
+    "eplb_planner": EPLB_PLANNER,
+    # DeepEP/UCCL use this only as the fallback when their tuned default is not exported.
+    "num_sms": 24,
+}
+
+
+def precision_byte_provenance(
+    axis: dict, logical_copies: int, hidden: int
+) -> dict[str, int | str]:
+    """Return comparable logical activation and required scale bytes for one direction."""
+    if logical_copies < 0 or hidden < 0:
+        raise ValueError("logical precision byte dimensions must be non-negative")
+    bits_per_value = {
+        "bf16": 16,
+        "fp8-e4m3fn": 8,
+        "fp8-e4m3fnuz": 8,
+        "logfmt10": 10,
+    }.get(axis["communication_format"])
+    if bits_per_value is None:
+        raise ValueError(f"unknown communication format {axis['communication_format']!r}")
+    activation_data_bytes = logical_copies * math.ceil(hidden * bits_per_value / 8)
+    scale_bytes_per_value = {None: 0, "f32": 4, "implicit-logfmt10": 0}.get(
+        axis["scale_dtype"]
+    )
+    if scale_bytes_per_value is None:
+        raise ValueError(f"unknown communication scale dtype {axis['scale_dtype']!r}")
+    group_size = axis["scale_group_size"]
+    scale_groups = math.ceil(hidden / group_size) if group_size is not None else 0
+    scale_bytes = logical_copies * scale_groups * scale_bytes_per_value
+    return {
+        "accounting_contract": "activation-data-plus-scales-v1",
+        "activation_data_bytes": activation_data_bytes,
+        "scale_bytes": scale_bytes,
+        "total_logical_bytes": activation_data_bytes + scale_bytes,
+    }
+
+def format_collective_version(raw) -> str:
+    """Normalize PyTorch's tuple or packed NCCL/RCCL version representation."""
+    if isinstance(raw, int):
+        if raw < 10_000:
+            return f"{raw // 1000}.{raw // 100 % 10}.{raw % 100}"
+        return f"{raw // 10_000}.{raw // 100 % 100}.{raw % 100}"
+    if isinstance(raw, (tuple, list)):
+        return ".".join(map(str, raw))
+    return str(raw) if raw not in (None, "") else "unknown"
+
+
+def add_common_args(ap: argparse.ArgumentParser) -> None:
+    """Add the varying v1 inputs; fixed profile values are not CLI axes."""
+    ap.set_defaults(**V1_PROFILE)
+    ap.add_argument("--mode", default="normal", choices=["normal", LOW_LATENCY_MODE])
+    ap.add_argument(
+        "--precision-profile",
+        default="",
+        choices=("", *identity.V1_PRECISION_PROFILES),
+        help="exact native dispatch/combine communication profile; blank selects BF16 control",
+    )
+    ap.add_argument("--phase", default="decode", choices=["decode", "prefill"],
+                    help="token-size regime: decode (small T) / prefill (large T) — picks the default ladder")
+    ap.add_argument("--tokens-ladder", default="",
+                    help="space/comma-separated source-tokens-per-rank sweep; blank = phase default")
+    ap.add_argument("--hidden", type=int, default=7168)
+    ap.add_argument("--topk", type=int, default=8)
+    ap.add_argument("--experts", type=int, default=256, help="TOTAL experts (fixed across EP degrees)")
+    ap.add_argument("--routing", default="uniform", choices=["uniform", "zipf"])
+    # EPLB (Expert-Parallel Load Balancer): replicate hot experts onto redundant physical
+    # slots + balanced-place so per-rank load equalizes. A pure routing-trace transform
+    # (tests/eplb.py); experts becomes num_logical+redundant. The remedy for `zipf` skew.
+    ap.add_argument("--eplb", action="store_true",
+                    help="apply EPLB expert replication/placement to the routing trace")
+    # Canonical workloads consume pre-generated trace bytes instead of the
+    # seeded runtime generator, so a result is provably the SAME workload as another machine's
+    # (checksum match). Points at a dir of <workload_id>.npz/.manifest.json (make_workloads.py).
+    ap.add_argument("--workload-dir", default="",
+                    help="dir of canonical workload traces; empty = seeded runtime generation (dev)")
+    ap.add_argument("--case-id", default="")
+    ap.add_argument("--suite", default="")
+    ap.add_argument("--workload-name", default="")
+    ap.add_argument("--required-publication", default="")
+    ap.add_argument("--seed", type=int, default=ROUTING_SEED)
+    ap.add_argument(
+        "--qualification-index",
+        type=int,
+        choices=range(1, QUALIFICATION_RUNS + 1),
+        default=os.environ.get("COLLECTIVEX_QUALIFICATION_INDEX", "1"),
+        help="one-based qualification repeat used for deterministic measurement ordering",
+    )
+    # 32: B300/Blackwell needs ~30 untimed iters to reach steady-state GPU clocks +
+    # establish NVLink/NVSHMEM connections — at warmup=8 its dispatch read ~1787us
+    # (cold), at warmup>=30 it settles to ~85us (faster than H100, reproducible within
+    # ~2.5%). H100/MI355X reach steady state much sooner; the extra iters are harmless.
+    ap.add_argument("--warmup", type=int, default=WARMUP_ITERS_PER_TRIAL,
+                    help=f"untimed full roundtrips before each trial/point; fixed by "
+                         f"{SAMPLING_CONTRACT} to {WARMUP_ITERS_PER_TRIAL}")
+    ap.add_argument("--iters", type=int, default=TIMED_ITERS_PER_TRIAL,
+                    help=f"timed iterations per trial; fixed by {SAMPLING_CONTRACT} to "
+                         f"{TIMED_ITERS_PER_TRIAL}")
+    ap.add_argument("--trials", type=int, default=TRIALS_PER_POINT,
+                    help=f"timed trials; fixed by {SAMPLING_CONTRACT} to {TRIALS_PER_POINT}")
+    # provenance / output
+    ap.add_argument("--runner", required=True)
+    ap.add_argument("--topology-class", required=True)
+    ap.add_argument("--transport", default="")
+    ap.add_argument("--scope", required=True, choices=["scale-up", "scale-out"])
+    ap.add_argument("--scale-up-transport", required=True)
+    ap.add_argument("--scale-out-transport", default="")
+    # gpus-per-node=0 means one node containing the whole EP group.
+    ap.add_argument("--gpus-per-node", type=int, default=0)
+    ap.add_argument("--scale-up-domain", type=int, default=0, help="0 = gpus_per_node*ep (one domain)")
+    ap.add_argument("--timestamp")
+    ap.add_argument("--out", required=True)
+
+
+def token_ladder(spec: str, phase: str, cap: int | None) -> tuple[list[int], list[int]]:
+    """Return (ladder, dropped): explicit spec else the phase default; positive ints;
+    clamped to `cap` with dropped points reported (never silently truncated)."""
+    if spec and spec.strip():
+        want = [int(t) for t in spec.replace(",", " ").split() if t]
+    else:
+        want = DECODE_LADDER if phase == "decode" else PREFILL_LADDER
+    want = sorted({t for t in want if t > 0})
+    if cap is not None:
+        return [t for t in want if t <= cap], [t for t in want if t > cap]
+    return want, []
+
+
+def sampling_contract_error(iters: int, trials: int, warmup: int) -> str | None:
+    """Return a user-facing error unless the exact cross-SKU timing profile is used."""
+    expected = (TIMED_ITERS_PER_TRIAL, TRIALS_PER_POINT, WARMUP_ITERS_PER_TRIAL)
+    observed = (iters, trials, warmup)
+    if observed != expected:
+        return (f"{SAMPLING_CONTRACT} requires exactly iters:trials:warmup="
+                f"{expected[0]}:{expected[1]}:{expected[2]} on every SKU/backend; got "
+                f"{observed[0]}:{observed[1]}:{observed[2]} "
+                f"({iters * trials if iters > 0 and trials > 0 else 'invalid'} timed samples)")
+    return None
+
+
+def qualification_order(
+    values: list, qualification_index: int, trial_index: int, *, identity_key: str = ""
+) -> list:
+    """Return a deterministic, position-balanced order for one qualification trial.
+
+    Official runs bind the base permutation to the case identity. The cyclic schedule then gives
+    every value every position equally often over 64 trials while qualification repeats start at
+    different offsets. Keeping the empty-key behavior stable preserves local diagnostic fixtures.
+    """
+    if not values or len(values) != len(set(values)):
+        raise ValueError("qualification order requires non-empty unique values")
+    if qualification_index not in range(1, QUALIFICATION_RUNS + 1):
+        raise ValueError(f"qualification_index must be in 1..{QUALIFICATION_RUNS}")
+    if type(trial_index) is not int or trial_index < 0:
+        raise ValueError("trial_index must be a non-negative integer")
+    if not isinstance(identity_key, str):
+        raise ValueError("qualification identity_key must be a string")
+    base_values = list(values)
+    if identity_key:
+        base_values.sort(
+            key=lambda value: hashlib.sha256(
+                f"{identity_key}\0{qualification_index}\0{value}".encode("utf-8")
+            ).digest()
+        )
+    position = trial_index + qualification_index - 1
+    cycle, offset = divmod(position, len(values))
+    base = base_values if cycle % 2 == 0 else list(reversed(base_values))
+    return base[offset:] + base[:offset]
+
+
+def sampled_component_evidence(trials: list[list[float]]) -> dict:
+    """Validate and copy private 64x8 trial blocks without flattening their boundaries."""
+    if not trials:
+        return {"availability": "unavailable", "sample_count": 0, "trials": None}
+    if len(trials) != TRIALS_PER_POINT:
+        raise ValueError(
+            f"measured component needs {TRIALS_PER_POINT} trial blocks; got {len(trials)}"
+        )
+    normalized: list[list[float]] = []
+    for trial in trials:
+        if len(trial) != TIMED_ITERS_PER_TRIAL:
+            raise ValueError(
+                f"measured trial needs {TIMED_ITERS_PER_TRIAL} samples; got {len(trial)}"
+            )
+        block = []
+        for sample in trial:
+            if isinstance(sample, bool) or not isinstance(sample, (int, float)):
+                raise ValueError("measured samples must be numeric")
+            value = float(sample)
+            if not math.isfinite(value) or value < 0:
+                raise ValueError("measured samples must be finite and non-negative")
+            block.append(value)
+        normalized.append(block)
+    count = sum(map(len, normalized))
+    if count != TIMED_SAMPLES_PER_POINT:
+        raise ValueError(
+            f"measured component needs {TIMED_SAMPLES_PER_POINT} samples; got {count}"
+        )
+    return {"availability": "measured", "sample_count": count, "trials": normalized}
+
+
+def _stats_vec(xs: list[int]) -> dict:
+    """min/mean/max/CV (+ empty count) of a per-rank count vector — self-describing source-token
+    or load summary without dumping the full vector."""
+    n = len(xs) or 1
+    mean = sum(xs) / n
+    var = sum((x - mean) ** 2 for x in xs) / n
+    cv = (var ** 0.5 / mean) if mean > 0 else 0.0
+    return {"min": min(xs) if xs else 0, "mean": round(mean, 3),
+            "max": max(xs) if xs else 0, "cv": round(cv, 4),
+            "empty_ranks": sum(1 for x in xs if x == 0), "total": sum(xs), "ranks": n}
+
+
+def percentile(xs: list[float], q: float) -> float:
+    if not xs:
+        return float("nan")
+    s = sorted(xs)
+    i = max(0, min(len(s) - 1, math.ceil(q / 100.0 * len(s)) - 1))
+    return s[i]
+
+
+def _sha256_json(value) -> str:
+    payload = json.dumps(
+        value, allow_nan=False, ensure_ascii=False, sort_keys=True, separators=(",", ":")
+    ).encode()
+    return hashlib.sha256(payload).hexdigest()
+
+
+def _series_provenance(provenance: dict) -> dict:
+    """Retain stable semantic build identity while keeping raw binaries diagnostic."""
+    return contracts.series_provenance(provenance)
+
+
+def _write_bytes_atomic(path: str, payload: bytes) -> tuple[str, int]:
+    os.makedirs(os.path.dirname(os.path.abspath(path)), exist_ok=True)
+    temporary = f"{path}.tmp-{os.getpid()}"
+    try:
+        with open(temporary, "wb") as handle:
+            handle.write(payload)
+            handle.flush()
+            os.fsync(handle.fileno())
+        os.replace(temporary, path)
+    finally:
+        try:
+            os.unlink(temporary)
+        except FileNotFoundError:
+            pass
+    return hashlib.sha256(payload).hexdigest(), len(payload)
+
+
+def _write_json_atomic(path: str, value) -> tuple[str, int]:
+    payload = (
+        json.dumps(value, allow_nan=False, ensure_ascii=False, indent=2) + "\n"
+    ).encode()
+    return _write_bytes_atomic(path, payload)
+
+
+def time_us(torch, fn, warmup: int, iters: int, pre=None) -> list[float]:
+    """Per-iteration CUDA-event latencies (µs) for THIS rank.
+
+    Without `pre`: times `fn()`. With `pre`: runs `pre()` UNTIMED each iteration (sync
+    before the start event so its GPU work can't bleed in), then times `fn(pre_result)`
+    — how combine is isolated when it consumes the dispatch state and needs a fresh
+    untimed dispatch+stage before every sample. Returns the raw per-iteration series;
+    the caller reduces across ranks per iteration before percentiling.
+    """
+    def sample():
+        arg = pre() if pre is not None else None
+        if pre is not None:
+            torch.cuda.synchronize()
+        s = torch.cuda.Event(enable_timing=True)
+        e = torch.cuda.Event(enable_timing=True)
+        s.record()
+        fn(arg) if pre is not None else fn()
+        e.record()
+        torch.cuda.synchronize()
+        return s.elapsed_time(e) * 1000.0  # ms -> us
+
+    for _ in range(max(0, warmup)):
+        if pre is not None:
+            a = pre()
+            torch.cuda.synchronize()
+            fn(a)
+        else:
+            fn()
+        # sync EACH warmup iteration, not just once after the loop: the measured-roundtrip fn
+        # interleaves dispatch+combine on a backend's persistent comm buffer, so back-to-back
+        # un-synced warmup iterations let iter N+1's dispatch race iter N's combine (CUDA abort
+        # on a rank -> NCCL-watchdog SIGABRT). Cheap (warmup is small); timed samples already sync.
+        torch.cuda.synchronize()
+    return [sample() for _ in range(iters)]
+
+
+def kernel_generation(backend) -> str:
+    """Return the adapter's explicit kernel family when one exists."""
+    declared = getattr(backend, "kernel_generation", None)
+    if declared:
+        return declared
+    return {
+        "deepep": "v1",
+        "deepep-v2": "v2-elastic-buffer",
+        "deepep-hybrid": "hybrid",
+    }.get(backend.name, "n-a")
+
+
+def _reduce_vec(torch, dist, device, vals, op):
+    t = torch.tensor(vals, device=device, dtype=torch.float64)
+    dist.all_reduce(t, op=op)
+    return [float(x) for x in t.tolist()]
+
+
+def _reduce_int(torch, dist, device, v: int, op) -> int:
+    t = torch.tensor([int(v)], device=device, dtype=torch.int64)
+    dist.all_reduce(t, op=op)
+    return int(t.item())
+
+
+def _same_hash_across_ranks(torch, dist, device, digest: str) -> bool:
+    parts = [int(digest[offset:offset + 8], 16) for offset in range(0, 64, 8)]
+    low = torch.tensor(parts, device=device, dtype=torch.int64)
+    high = low.clone()
+    dist.all_reduce(low, op=dist.ReduceOp.MIN)
+    dist.all_reduce(high, op=dist.ReduceOp.MAX)
+    return bool(torch.equal(low, high))
+
+
+def _tensor_sha256(*tensors) -> str:
+    digest = hashlib.sha256()
+    for tensor in tensors:
+        digest.update(tensor.detach().contiguous().cpu().numpy().tobytes())
+    return digest.hexdigest()
+
+
+def _normalized_expert_metadata(torch, expert_ids, weights):
+    """Sort each row by global expert ID while keeping -1 sentinels last."""
+    valid = expert_ids >= 0
+    keys = torch.where(valid, expert_ids.to(torch.int64), torch.full_like(expert_ids, 1 << 30))
+    order = torch.argsort(keys, dim=1, stable=True)
+    sorted_ids = torch.gather(expert_ids.to(torch.int64), 1, order)
+    sorted_weights = torch.gather(weights.to(torch.float32), 1, order)
+    sorted_valid = sorted_ids >= 0
+    return (
+        torch.where(sorted_valid, sorted_ids, torch.full_like(sorted_ids, -1)),
+        sorted_weights.masked_fill(~sorted_valid, 0),
+    )
+
+
+def expert_packed_slot_map(
+    counts,
+    src_info,
+    layout_range,
+    *,
+    tokens_per_rank: int,
+    experts_per_rank: int,
+    world_size: int,
+) -> list[tuple[int, int, int]]:
+    """Decode and validate DeepEP's expert-packed receive metadata.
+
+    ``src_info`` stores a source-local token index. The source rank is carried by
+    the corresponding packed ``layout_range`` interval, so neither field is
+    independently sufficient to identify a source token.
+    """
+    if tokens_per_rank <= 0 or experts_per_rank <= 0 or world_size <= 0:
+        raise ValueError("expert-packed dimensions must be positive")
+    if len(counts) != experts_per_rank:
+        raise ValueError("expert-packed count shape differs from local experts")
+    if len(src_info) != experts_per_rank or len(layout_range) != experts_per_rank:
+        raise ValueError("expert-packed metadata shape differs from local experts")
+
+    mask = (1 << 32) - 1
+    slots: list[tuple[int, int, int]] = []
+    pairs: set[tuple[int, int]] = set()
+    for local_expert in range(experts_per_rank):
+        count = counts[local_expert]
+        if type(count) is not int or count < 0:
+            raise ValueError("expert-packed receive count is invalid")
+        if len(layout_range[local_expert]) != world_size:
+            raise ValueError("expert-packed layout rank dimension is invalid")
+        if len(src_info[local_expert]) < count:
+            raise ValueError("expert-packed source metadata is truncated")
+
+        covered = [False] * count
+        for source_rank, encoded in enumerate(layout_range[local_expert]):
+            if type(encoded) is not int or encoded < 0:
+                raise ValueError("expert-packed layout range is invalid")
+            begin, span = encoded >> 32, encoded & mask
+            if begin > count or begin + span > count:
+                raise ValueError("expert-packed layout range exceeds valid slots")
+            for packed_position in range(begin, begin + span):
+                if covered[packed_position]:
+                    raise ValueError("expert-packed layout ranges overlap")
+                covered[packed_position] = True
+                local_source = src_info[local_expert][packed_position]
+                if (
+                    type(local_source) is not int
+                    or local_source < 0
+                    or local_source >= tokens_per_rank
+                ):
+                    raise ValueError("expert-packed source token index is invalid")
+                source_id = source_rank * tokens_per_rank + local_source
+                pair = (source_id, local_expert)
+                if pair in pairs:
+                    raise ValueError("expert-packed source/expert assignment is duplicated")
+                pairs.add(pair)
+                slots.append((local_expert, packed_position, source_id))
+        if not all(covered):
+            raise ValueError("expert-packed layout ranges omit valid receive slots")
+    return slots
+
+
+def expert_packed_dispatch_view(
+    torch,
+    packed_payload,
+    packed_counts,
+    packed_src_info,
+    packed_layout_range,
+    *,
+    rank: int,
+    tokens_per_rank: int,
+    experts_per_rank: int,
+    world_size: int,
+):
+    """Return the valid expert-packed rows with exact global source identities."""
+    if packed_payload.ndim != 3:
+        raise ValueError("expert-packed payload must have shape [experts, slots, hidden]")
+    if packed_payload.shape[0] != experts_per_rank:
+        raise ValueError("expert-packed payload expert dimension is invalid")
+    if tuple(packed_counts.shape) != (experts_per_rank,):
+        raise ValueError("expert-packed count tensor shape is invalid")
+    if tuple(packed_src_info.shape[:1]) != (experts_per_rank,):
+        raise ValueError("expert-packed source tensor shape is invalid")
+    if tuple(packed_layout_range.shape) != (experts_per_rank, world_size):
+        raise ValueError("expert-packed layout tensor shape is invalid")
+    if packed_src_info.ndim != 2 or packed_src_info.shape[1] < packed_payload.shape[1]:
+        raise ValueError("expert-packed source tensor capacity is invalid")
+
+    counts = [int(value) for value in packed_counts.detach().cpu().tolist()]
+    if any(count > packed_payload.shape[1] for count in counts):
+        raise ValueError("expert-packed receive count exceeds payload capacity")
+    slots = expert_packed_slot_map(
+        counts,
+        packed_src_info.detach().cpu().tolist(),
+        packed_layout_range.detach().cpu().tolist(),
+        tokens_per_rank=tokens_per_rank,
+        experts_per_rank=experts_per_rank,
+        world_size=world_size,
+    )
+    device = packed_payload.device
+    local_expert_slots = torch.tensor(
+        [slot[0] for slot in slots], device=device, dtype=torch.int64
+    )
+    packed_positions = torch.tensor(
+        [slot[1] for slot in slots], device=device, dtype=torch.int64
+    )
+    source_ids = torch.tensor(
+        [slot[2] for slot in slots], device=device, dtype=torch.int64
+    )
+    expert_ids = local_expert_slots + rank * experts_per_rank
+    payload = packed_payload[local_expert_slots, packed_positions]
+    return types.SimpleNamespace(
+        payload=payload,
+        source_ids=source_ids,
+        expert_ids=expert_ids,
+        local_expert_counts=packed_counts.to(torch.int64),
+        local_expert_slots=local_expert_slots,
+        packed_positions=packed_positions,
+        ordering_contract="expert-major/layout-addressed-packed-slot-v1",
+    )
+
+
+def _expert_transform(torch, payload, expert_ids, weights, combine_weight_semantics):
+    """Build one local expert aggregate for the v1 unweighted combine contract."""
+    if combine_weight_semantics != "unweighted-rank-sum":
+        raise ValueError("v1 requires unweighted rank-sum combine")
+    valid = expert_ids >= 0
+    expert = expert_ids.clamp(min=0).to(torch.int64)
+    gate = weights.to(torch.float32).masked_fill(~valid, 0)
+    scale = ((expert * 17 + 5) % 31 + 1).to(torch.float32) / 32
+    offset_a = (((expert * 29 + 7) % 37) - 18).to(torch.float32) / 64
+    offset_b = (((expert * 43 + 11) % 41) - 20).to(torch.float32) / 128
+    scale_sum = (gate * scale).sum(dim=1, keepdim=True)
+    offset_a_sum = (gate * offset_a).sum(dim=1, keepdim=True)
+    offset_b_sum = (gate * offset_b).sum(dim=1, keepdim=True)
+    columns = torch.arange(payload.shape[1], device=payload.device, dtype=torch.int64)
+    pattern = (((columns * 13) % 17) - 8).to(torch.float32) / 8
+    transformed = (
+        payload.float() * scale_sum + offset_a_sum + offset_b_sum * pattern.unsqueeze(0)
+    )
+    return transformed.to(payload.dtype)
+
+
+def _expert_transform_expanded(torch, payload, expert_ids):
+    """Apply the oracle transform to one row per token/expert assignment."""
+    expert = expert_ids.to(torch.int64)
+    scale = (((expert * 17 + 5) % 31 + 1).to(torch.float32) / 32).unsqueeze(1)
+    offset_a = ((((expert * 29 + 7) % 37) - 18).to(torch.float32) / 64).unsqueeze(1)
+    offset_b = ((((expert * 43 + 11) % 41) - 20).to(torch.float32) / 128).unsqueeze(1)
+    columns = torch.arange(payload.shape[1], device=payload.device, dtype=torch.int64)
+    pattern = (((columns * 13) % 17) - 8).to(torch.float32) / 8
+    transformed = payload.float() * scale + offset_a + offset_b * pattern.unsqueeze(0)
+    return transformed.to(payload.dtype)
+
+
+def _expected_transformed_combine(torch, problem):
+    """Independently derive sum_i gate_i * expert_i(x) for each source token."""
+    semantic_x = getattr(problem, "oracle_x", problem.x)
+    expected = torch.zeros_like(semantic_x, dtype=torch.float32)
+    expert_ids = problem.topk_idx.to(torch.int64)
+    weights = problem.topk_weights.to(torch.float32)
+    columns = torch.arange(semantic_x.shape[1], device=semantic_x.device, dtype=torch.int64)
+    pattern = (((columns * 13) % 17) - 8).to(torch.float32) / 8
+    for slot in range(expert_ids.shape[1]):
+        expert = expert_ids[:, slot]
+        gate = weights[:, slot].unsqueeze(1)
+        scale = (((expert * 17 + 5) % 31 + 1).to(torch.float32) / 32).unsqueeze(1)
+        offset_a = ((((expert * 29 + 7) % 37) - 18).to(torch.float32) / 64).unsqueeze(1)
+        offset_b = ((((expert * 43 + 11) % 41) - 20).to(torch.float32) / 128).unsqueeze(1)
+        expert_output = semantic_x.float() * scale + offset_a + offset_b * pattern.unsqueeze(0)
+        expected.add_(gate * expert_output)
+    return expected
+
+
+def _baseline_precision_axis() -> dict:
+    return {
+        "encoded_payload_valid": True,
+        "scales_finite": None,
+        "scales_positive": None,
+        "dequantized_semantics": True,
+        "saturation_count": 0,
+        "saturation_rate": 0.0,
+        "max_abs_error": 0.0,
+        "max_rel_error": 0.0,
+        "passed": True,
+    }
+
+
+def _precision_evidence(backend, problem, view, combined, expected_combined) -> dict:
+    method = getattr(backend, "precision_evidence", None)
+    if method is not None:
+        evidence = method(problem, view)
+        combine_axis = backend.communication_precision["combine"]
+        if combine_axis["communication_format"] != "bf16":
+            if combined.shape == expected_combined.shape and combined.numel():
+                absolute = (combined.float() - expected_combined.float()).abs()
+                max_abs_error = float(absolute.max().item())
+                max_rel_error = max_abs_error / (
+                    float(expected_combined.float().abs().max().item()) + 1e-6
+                )
+                tolerance = ORACLE_ATOL + float(
+                    getattr(backend, "tolerance", ORACLE_RTOL)
+                ) * expected_combined.float().abs()
+                semantics = bool((absolute <= tolerance).all().item())
+            elif combined.shape == expected_combined.shape:
+                max_abs_error = max_rel_error = 0.0
+                semantics = True
+            else:
+                max_abs_error = max_rel_error = 1e30
+                semantics = False
+            direction = evidence["combine"]
+            direction.update({
+                "dequantized_semantics": semantics,
+                "max_abs_error": max_abs_error,
+                "max_rel_error": max_rel_error,
+            })
+            scale_ok = (
+                direction["scales_finite"] is not False
+                and direction["scales_positive"] is not False
+            )
+            direction["passed"] = bool(
+                direction["encoded_payload_valid"] and semantics and scale_ok
+            )
+            evidence["passed"] = bool(
+                evidence["dispatch"]["passed"] and direction["passed"]
+            )
+        return evidence
+    profile_id = getattr(backend, "precision_profile_id", None)
+    if profile_id != identity.V1_CONTROL_PRECISION_PROFILE:
+        failed = _baseline_precision_axis()
+        failed.update({"encoded_payload_valid": False, "dequantized_semantics": False,
+                       "passed": False})
+        return {"profile_id": profile_id, "dispatch": failed, "combine": dict(failed),
+                "passed": False}
+    return {
+        "profile_id": profile_id,
+        "dispatch": _baseline_precision_axis(),
+        "combine": _baseline_precision_axis(),
+        "passed": True,
+    }
+
+
+def _failed_precision_evidence(backend) -> dict:
+    failed = _baseline_precision_axis()
+    failed.update({"encoded_payload_valid": False, "dequantized_semantics": False,
+                   "passed": False})
+    return {
+        "profile_id": getattr(backend, "precision_profile_id", None),
+        "dispatch": failed,
+        "combine": dict(failed),
+        "passed": False,
+    }
+
+
+def aggregate_precision_evidence(evidence_by_rank: list[dict]) -> dict:
+    """Collapse pre/post rank evidence without hiding any direction's worst observation."""
+    records = [record[phase] for record in evidence_by_rank for phase in ("pre", "post")]
+    profile_ids = {record["profile_id"] for record in records}
+    if len(profile_ids) != 1:
+        raise ValueError("precision evidence profiles differ across ranks or oracle passes")
+    result = {"profile_id": profile_ids.pop()}
+    for direction in ("dispatch", "combine"):
+        axes = [record[direction] for record in records]
+        rank_counts = []
+        for rank_index in range(len(evidence_by_rank)):
+            rank_counts.append(max(
+                evidence_by_rank[rank_index][phase][direction]["saturation_count"]
+                for phase in ("pre", "post")
+            ))
+        scale_finite = [axis["scales_finite"] for axis in axes]
+        scale_positive = [axis["scales_positive"] for axis in axes]
+        result[direction] = {
+            "encoded_payload_valid": all(axis["encoded_payload_valid"] for axis in axes),
+            "scales_finite": (
+                None if all(value is None for value in scale_finite)
+                else all(value is True for value in scale_finite)
+            ),
+            "scales_positive": (
+                None if all(value is None for value in scale_positive)
+                else all(value is True for value in scale_positive)
+            ),
+            "dequantized_semantics": all(axis["dequantized_semantics"] for axis in axes),
+            "saturation_count": sum(rank_counts),
+            "saturation_rate": max(axis["saturation_rate"] for axis in axes),
+            "max_abs_error": max(axis["max_abs_error"] for axis in axes),
+            "max_rel_error": max(axis["max_rel_error"] for axis in axes),
+            "passed": all(axis["passed"] for axis in axes),
+        }
+    result["passed"] = all(result[direction]["passed"] for direction in ("dispatch", "combine"))
+    return result
+
+
+def _run_expert_packed_oracle(
+    torch,
+    routing,
+    backend,
+    problem,
+    global_idx,
+    global_weights,
+    rank: int,
+    experts_per_rank: int,
+    seed: int,
+):
+    """Verify an expert-packed dispatch and native gate-weighted combine."""
+    contract = LOW_LATENCY_ORACLE_CONTRACT
+    handle = backend.dispatch(problem)
+    torch.cuda.synchronize()
+    try:
+        packed = backend.inspect_expert_dispatch(problem, handle)
+        view = expert_packed_dispatch_view(
+            torch,
+            packed.payload,
+            packed.local_expert_counts,
+            packed.source_info,
+            packed.layout_range,
+            rank=rank,
+            tokens_per_rank=problem.T,
+            experts_per_rank=experts_per_rank,
+            world_size=backend.world_size,
+        )
+        decoded_source_ids = routing.decode_source_ids(view.payload, seed)
+    except Exception as inspection_error:
+        try:
+            problem.recv_tokens = backend.recv_tokens(handle)
+            backend.stage(problem, handle)
+            backend.combine(problem, handle)
+            torch.cuda.synchronize()
+        except Exception as cleanup_error:
+            raise inspection_error from cleanup_error
+        return {
+            "_precision": _failed_precision_evidence(backend),
+            "contract": contract,
+            "passed": False,
+            "ordering_contract": "adapter-inspection-failed",
+            "order_sha256": None,
+            "dispatch_sha256": None,
+            "combine_weight_semantics": getattr(
+                backend, "combine_weight_semantics", "undeclared"
+            ),
+            "receive_count": 0,
+            "atol": ORACLE_ATOL,
+            "max_absolute_error": None,
+            "max_elementwise_relative_error": None,
+            "max_relative_error": None,
+            "max_weight_error": None,
+            "rtol": ORACLE_RTOL,
+            "checks": {
+                "combine_values": False,
+                "counts": False,
+                "metadata": False,
+                "multiplicity": False,
+                "payload": False,
+                "source_set": False,
+                "weights": False,
+            },
+        }
+
+    device = problem.x.device
+    world_size = backend.world_size
+    total_experts = experts_per_rank * world_size
+    global_idx_device = global_idx.to(device=device, dtype=torch.int64)
+    global_weights_device = global_weights.to(device=device, dtype=torch.float32)
+    source_grid = torch.arange(
+        global_idx_device.shape[0], device=device, dtype=torch.int64
+    ).unsqueeze(1).expand_as(global_idx_device)
+    local_mask = (global_idx_device // experts_per_rank) == rank
+    expected_sources = source_grid[local_mask]
+    expected_experts = global_idx_device[local_mask]
+    expected_pair_weights = global_weights_device[local_mask]
+
+    receive_count = int(view.payload.shape[0])
+    shape_ok = (
+        view.payload.ndim == 2
+        and view.source_ids.shape == (receive_count,)
+        and view.expert_ids.shape == (receive_count,)
+        and view.local_expert_counts.shape == (experts_per_rank,)
+    )
+    source_range = bool(
+        receive_count == 0
+        or (
+            (view.source_ids >= 0)
+            & (view.source_ids < global_idx_device.shape[0])
+        ).all().item()
+    )
+    expected_payload = (
+        routing.activations_for_source_ids(
+            view.source_ids, problem.x.shape[1], seed, problem.x.dtype
+        )
+        if source_range
+        else torch.empty_like(view.payload)
+    )
+    normalize_payload = getattr(backend, "oracle_dispatch_payload", None)
+    if source_range and normalize_payload is not None:
+        expected_payload = normalize_payload(expected_payload)
+    payload_ok = bool(
+        source_range
+        and torch.equal(decoded_source_ids.to(torch.int64), view.source_ids)
+        and torch.equal(view.payload, expected_payload)
+    )
+
+    actual_keys = view.source_ids * total_experts + view.expert_ids
+    expected_keys = expected_sources * total_experts + expected_experts
+    actual_order = torch.argsort(actual_keys, stable=True)
+    expected_order = torch.argsort(expected_keys, stable=True)
+    canonical_sources = view.source_ids.index_select(0, actual_order)
+    canonical_experts = view.expert_ids.index_select(0, actual_order)
+    canonical_expected_weights = expected_pair_weights.index_select(0, expected_order)
+    expected_local_idx = global_idx_device[
+        rank * problem.T:(rank + 1) * problem.T
+    ]
+    metadata_ok = bool(
+        shape_ok
+        and torch.equal(problem.topk_idx.to(torch.int64), expected_local_idx)
+        and torch.equal(
+            actual_keys.index_select(0, actual_order),
+            expected_keys.index_select(0, expected_order),
+        )
+    )
+    expected_counts = torch.bincount(
+        expected_experts - rank * experts_per_rank, minlength=experts_per_rank
+    )
+    counts_ok = torch.equal(
+        view.local_expert_counts.to(torch.int64), expected_counts.to(torch.int64)
+    )
+    actual_multiplicity = torch.bincount(
+        view.source_ids, minlength=global_idx_device.shape[0]
+    )
+    expected_multiplicity = torch.bincount(
+        expected_sources, minlength=global_idx_device.shape[0]
+    )
+    multiplicity_ok = torch.equal(actual_multiplicity, expected_multiplicity)
+    source_set_ok = torch.equal(
+        torch.sort(torch.unique(view.source_ids)).values,
+        torch.sort(torch.unique(expected_sources)).values,
+    )
+
+    expected_local_weights = global_weights_device[
+        rank * problem.T:(rank + 1) * problem.T
+    ]
+    if problem.topk_weights.shape == expected_local_weights.shape:
+        max_weight_error = (
+            float((problem.topk_weights.float() - expected_local_weights).abs().max().item())
+            if expected_local_weights.numel()
+            else 0.0
+        )
+    else:
+        max_weight_error = None
+    weights_ok = max_weight_error == 0.0
+    ordering_contract = f"canonical-source-expert-v1/{view.ordering_contract}"
+    order_sha256 = _tensor_sha256(canonical_sources, canonical_experts)
+    dispatch_sha256 = _tensor_sha256(
+        canonical_sources, canonical_experts, canonical_expected_weights
+    )
+
+    handle.oracle_local_expert_slots = view.local_expert_slots
+    handle.oracle_packed_positions = view.packed_positions
+    problem.recv_tokens = receive_count
+    transformed = _expert_transform_expanded(torch, view.payload, view.expert_ids)
+    combined = backend.combine_transformed(problem, handle, transformed)
+    torch.cuda.synchronize()
+    expected_combined = _expected_transformed_combine(torch, problem)
+    if combined.shape == expected_combined.shape and combined.numel():
+        absolute_error = (combined.float() - expected_combined).abs()
+        max_absolute_error = float(absolute_error.max().item())
+        max_relative_error = max_absolute_error / (
+            float(expected_combined.abs().max().item()) + 1e-6
+        )
+        max_elementwise_relative_error = float(
+            (absolute_error / expected_combined.abs().clamp_min(ORACLE_ATOL)).max().item()
+        )
+        combine_values_ok = bool(torch.allclose(
+            combined.float(), expected_combined, rtol=ORACLE_RTOL, atol=ORACLE_ATOL
+        ))
+    elif combined.shape == expected_combined.shape:
+        max_absolute_error = 0.0
+        max_elementwise_relative_error = 0.0
+        max_relative_error = 0.0
+        combine_values_ok = True
+    else:
+        max_absolute_error = None
+        max_elementwise_relative_error = None
+        max_relative_error = None
+        combine_values_ok = False
+    tolerance = float(getattr(backend, "tolerance", ORACLE_RTOL))
+    precision = _precision_evidence(backend, problem, view, combined, expected_combined)
+    checks = {
+        "combine_values": combine_values_ok,
+        "counts": counts_ok,
+        "metadata": metadata_ok,
+        "multiplicity": multiplicity_ok,
+        "payload": payload_ok,
+        "source_set": source_set_ok,
+        "weights": weights_ok,
+    }
+    return {
+        "_precision": precision,
+        "contract": contract,
+        "passed": bool(
+            all(checks.values())
+            and precision["passed"]
+            and ordering_contract
+            and max_relative_error is not None
+            and max_relative_error < tolerance
+        ),
+        "atol": ORACLE_ATOL,
+        "combine_weight_semantics": backend.combine_weight_semantics,
+        "ordering_contract": ordering_contract,
+        "order_sha256": order_sha256,
+        "dispatch_sha256": dispatch_sha256,
+        "receive_count": receive_count,
+        "max_absolute_error": max_absolute_error,
+        "max_elementwise_relative_error": max_elementwise_relative_error,
+        "max_relative_error": max_relative_error,
+        "max_weight_error": max_weight_error,
+        "rtol": ORACLE_RTOL,
+        "checks": checks,
+    }
+
+
+def _run_expert_oracle(
+    torch,
+    routing,
+    backend,
+    problem,
+    global_idx,
+    global_weights,
+    rank: int,
+    experts_per_rank: int,
+    seed: int,
+):
+    """Verify one real dispatch/transform/combine without entering a timed region."""
+    if getattr(backend, "oracle_layout", "token-rank") == "expert-packed":
+        return _run_expert_packed_oracle(
+            torch,
+            routing,
+            backend,
+            problem,
+            global_idx,
+            global_weights,
+            rank,
+            experts_per_rank,
+            seed,
+        )
+    handle = backend.dispatch(problem)
+    torch.cuda.synchronize()
+    try:
+        view = backend.inspect_dispatch(problem, handle)
+        source_ids = routing.decode_source_ids(view.payload, seed)
+    except Exception as inspection_error:
+        try:
+            problem.recv_tokens = backend.recv_tokens(handle)
+            backend.stage(problem, handle)
+            backend.combine(problem, handle)
+            torch.cuda.synchronize()
+        except Exception as cleanup_error:
+            raise inspection_error from cleanup_error
+        return {
+            "_precision": _failed_precision_evidence(backend),
+            "contract": ORACLE_CONTRACT,
+            "passed": False,
+            "ordering_contract": "adapter-inspection-failed",
+            "order_sha256": None,
+            "dispatch_sha256": None,
+            "combine_weight_semantics": getattr(
+                backend, "combine_weight_semantics", "undeclared"
+            ),
+            "receive_count": 0,
+            "atol": ORACLE_ATOL,
+            "max_absolute_error": None,
+            "max_elementwise_relative_error": None,
+            "max_relative_error": None,
+            "max_weight_error": None,
+            "rtol": ORACLE_RTOL,
+            "checks": {
+                "combine_values": False,
+                "counts": False,
+                "metadata": False,
+                "multiplicity": False,
+                "payload": False,
+                "source_set": False,
+                "weights": False,
+            },
+        }
+
+    receive_count = int(view.payload.shape[0])
+    shape_ok = (
+        view.payload.ndim == 2
+        and view.expert_ids.shape == (receive_count, problem.topk_idx.shape[1])
+        and view.weights.shape == view.expert_ids.shape
+    )
+    source_range = bool(
+        receive_count == 0
+        or ((source_ids >= 0) & (source_ids < global_idx.shape[0])).all().item()
+    )
+    if source_range:
+        expected_idx = global_idx.to(problem.x.device).index_select(0, source_ids)
+        expected_weights = global_weights.to(problem.x.device).index_select(0, source_ids)
+        local = (expected_idx // experts_per_rank) == rank
+        expected_ids = torch.where(local, expected_idx, torch.full_like(expected_idx, -1))
+        expected_weights = expected_weights.masked_fill(~local, 0)
+        expected_payload = routing.activations_for_source_ids(
+            source_ids, problem.x.shape[1], seed, problem.x.dtype
+        )
+        normalize_payload = getattr(backend, "oracle_dispatch_payload", None)
+        if normalize_payload is not None:
+            expected_payload = normalize_payload(expected_payload)
+    else:
+        expected_ids = torch.full_like(view.expert_ids, -1)
+        expected_weights = torch.zeros_like(view.weights)
+        expected_payload = torch.empty_like(view.payload)
+    actual_ids, actual_weights = _normalized_expert_metadata(
+        torch, view.expert_ids, view.weights
+    )
+    expected_ids, expected_weights = _normalized_expert_metadata(
+        torch, expected_ids, expected_weights
+    )
+    expected_sources = (
+        ((global_idx // experts_per_rank) == rank).any(dim=1).nonzero(as_tuple=True)[0]
+    ).to(problem.x.device)
+    source_set_ok = (
+        source_range
+        and source_ids.numel() == torch.unique(source_ids).numel()
+        and torch.equal(torch.sort(source_ids).values, expected_sources)
+    )
+    payload_ok = source_range and torch.equal(view.payload, expected_payload)
+    metadata_ok = shape_ok and torch.equal(actual_ids, expected_ids)
+    max_weight_error = (
+        float((actual_weights - expected_weights).abs().max().item())
+        if actual_weights.numel()
+        else 0.0
+    )
+    weights_ok = max_weight_error == 0.0
+    valid_expected = expected_ids >= 0
+    expected_local = expected_ids[valid_expected] - rank * experts_per_rank
+    expected_counts = torch.bincount(expected_local, minlength=experts_per_rank)
+    counts_ok = torch.equal(
+        view.local_expert_counts.to(torch.int64), expected_counts.to(torch.int64)
+    )
+    multiplicity_ok = torch.equal(
+        (actual_ids >= 0).sum(dim=1), (expected_ids >= 0).sum(dim=1)
+    )
+    # Receive-slot assignment may use atomics and is not a semantic EP guarantee. Compare
+    # pre/post dispatch evidence in canonical source-token order without changing the native path.
+    canonical_order = torch.argsort(source_ids.to(torch.int64), stable=True)
+    canonical_sources = source_ids.to(torch.int64).index_select(0, canonical_order)
+    canonical_ids = actual_ids.to(torch.int64).index_select(0, canonical_order)
+    canonical_weights = actual_weights.index_select(0, canonical_order)
+    ordering_contract = f"canonical-source-id-v1/{view.ordering_contract}"
+    order_sha256 = _tensor_sha256(canonical_sources)
+    dispatch_sha256 = _tensor_sha256(
+        canonical_sources, canonical_ids, canonical_weights
+    )
+
+    problem.recv_tokens = receive_count
+    combine_weight_semantics = backend.combine_weight_semantics
+    transformed = _expert_transform(
+        torch, view.payload, actual_ids, actual_weights, combine_weight_semantics
+    )
+    combined = backend.combine_transformed(problem, handle, transformed)
+    torch.cuda.synchronize()
+    expected_combined = _expected_transformed_combine(torch, problem)
+    if combined.shape == expected_combined.shape and combined.numel():
+        absolute_error = (combined.float() - expected_combined).abs()
+        max_absolute_error = float(absolute_error.max().item())
+        max_relative_error = max_absolute_error / (
+            float(expected_combined.abs().max().item()) + 1e-6
+        )
+        max_elementwise_relative_error = float(
+            (absolute_error / expected_combined.abs().clamp_min(ORACLE_ATOL)).max().item()
+        )
+        combine_values_ok = bool(torch.allclose(
+            combined.float(), expected_combined, rtol=ORACLE_RTOL, atol=ORACLE_ATOL
+        ))
+    elif combined.shape == expected_combined.shape:
+        max_absolute_error = 0.0
+        max_elementwise_relative_error = 0.0
+        max_relative_error = 0.0
+        combine_values_ok = True
+    else:
+        max_absolute_error = None
+        max_elementwise_relative_error = None
+        max_relative_error = None
+        combine_values_ok = False
+    tolerance = float(getattr(backend, "tolerance", 5e-2))
+    precision = _precision_evidence(backend, problem, view, combined, expected_combined)
+    checks = {
+        "combine_values": combine_values_ok,
+        "counts": counts_ok,
+        "metadata": metadata_ok,
+        "multiplicity": multiplicity_ok,
+        "payload": payload_ok,
+        "source_set": source_set_ok,
+        "weights": weights_ok,
+    }
+    return {
+        "_precision": precision,
+        "contract": ORACLE_CONTRACT,
+        "passed": bool(
+            all(checks.values())
+            and precision["passed"]
+            and ordering_contract
+            and max_relative_error is not None
+            and max_relative_error < tolerance
+        ),
+        "atol": ORACLE_ATOL,
+        "combine_weight_semantics": combine_weight_semantics,
+        "ordering_contract": ordering_contract,
+        "order_sha256": order_sha256,
+        "dispatch_sha256": dispatch_sha256,
+        "receive_count": receive_count,
+        "max_absolute_error": max_absolute_error,
+        "max_elementwise_relative_error": max_elementwise_relative_error,
+        "max_relative_error": max_relative_error,
+        "max_weight_error": max_weight_error,
+        "rtol": ORACLE_RTOL,
+        "checks": checks,
+    }
+
+
+def _histogram(xs: list[float], nbins: int = 40) -> dict:
+    """Compact equal-width summary of the exact private cross-rank-max samples."""
+    if not xs:
+        return {"n": 0}
+    lo, hi = min(xs), max(xs)
+    if hi <= lo:
+        return {"n": len(xs), "min": lo, "max": hi, "bins": nbins, "counts": [len(xs)]}
+    counts = [0] * nbins
+    span = hi - lo
+    for x in xs:
+        b = min(nbins - 1, int((x - lo) / span * nbins))
+        counts[b] += 1
+    return {"n": len(xs), "min": round(lo, 3), "max": round(hi, 3), "bins": nbins, "counts": counts}
+
+
+def _derive_publication_status(v: dict) -> str:
+    """Classify raw attempts; only the isolated coverage publisher may promote evidence."""
+    if v["execution_status"] != "complete":
+        return "failed"
+    if v["semantic_correctness"] != "pass" or v["measurement_conformance"] != "conformant" \
+       or v["workload_identity"] == "inconsistent":
+        return "invalid"
+    # Per-case producers cannot prove exact matrix coverage, repeat stability, or controlled
+    # cohorts. Keep even sound attempts diagnostic until the isolated publisher validates them.
+    return "diagnostic"
+
+
+def run_sweep(args, backend, torch, dist, device, rank: int, world_size: int) -> int:
+    """Drive the source-tokens-per-rank sweep for one fully-specified line."""
+    mode = getattr(args, "mode", "normal")
+    requested_precision = getattr(args, "precision_profile", "") or None
+    resolved_precision_id = requested_precision or identity.V1_CONTROL_PRECISION_PROFILE
+    try:
+        profile_case = {"mode": mode}
+        if requested_precision is not None:
+            profile_case["precision_profile"] = requested_precision
+        case_profile = identity.profile_for_case(profile_case)
+        communication_precision = identity.precision_profile(resolved_precision_id)
+    except identity.IdentityError as exc:
+        if rank == 0:
+            print(f"ERROR: {exc}")
+        return 2
+    sampling_error = sampling_contract_error(args.iters, args.trials, args.warmup)
+    if sampling_error:
+        if rank == 0:
+            print(f"ERROR: {sampling_error}")
+        return 2
+    import routing  # torch-based; imported lazily so the module byte-compiles without torch
+    import eplb     # stdlib planner + torch remap (the EPLB transform)
+
+    ep_size = world_size
+    # EPLB (if on): run_ep.py already bumped args.experts to the PHYSICAL count and stashed the
+    # logical count, so experts_per_rank below is physical. The trace is built over LOGICAL
+    # experts then remapped to physical (build_trace), so the whole sweep runs over the
+    # balanced physical placement with no adapter change.
+    eplb_on = getattr(args, "eplb", False)
+    num_logical = getattr(args, "num_logical_experts", args.experts)
+    if args.experts % ep_size != 0:
+        if rank == 0:
+            print(f"ERROR: experts ({args.experts}) must divide ep_size ({ep_size})")
+        return 2
+    experts_per_rank = args.experts // ep_size
+    if getattr(backend, "mode", None) != mode:
+        if rank == 0:
+            print(f"ERROR: backend mode {getattr(backend, 'mode', None)!r} != {mode!r}")
+        return 2
+    if (
+        getattr(backend, "precision_profile_id", None) != resolved_precision_id
+        or getattr(backend, "communication_precision", None) != communication_precision
+    ):
+        if rank == 0:
+            print("ERROR: backend did not realize the requested communication precision")
+        return 2
+    expected_weight_semantics = (
+        "gate-weighted-sum"
+        if case_profile["combine_semantics"] == "gate-weighted"
+        else "unweighted-rank-sum"
+    )
+    if getattr(backend, "combine_weight_semantics", None) != expected_weight_semantics:
+        if rank == 0:
+            print(
+                f"ERROR: {mode} requires combine semantics {expected_weight_semantics}"
+            )
+        return 2
+    if mode == LOW_LATENCY_MODE and (
+        args.phase != "decode"
+        or getattr(backend, "oracle_layout", None) != "expert-packed"
+        or getattr(backend, "payload_unit", None) != "token-expert"
+    ):
+        if rank == 0:
+            print("ERROR: low-latency requires decode expert-packed token-expert execution")
+        return 2
+
+    cap = backend.buffer_cap(args)
+    conditioning_ladder = CONDITIONING_LADDERS[args.phase]
+    if cap is not None and cap < conditioning_ladder[-1]:
+        if rank == 0:
+            print(f"ERROR: {backend.name} buffer cap {cap} cannot run the v1 conditioning ladder")
+        return 2
+    ladder, dropped = token_ladder(args.tokens_ladder, args.phase, cap)
+    if rank == 0 and dropped:
+        print(f"NOTE: dropped tokens/rank {dropped} — exceed {backend.name} buffer cap {cap} "
+              f"(hidden={args.hidden}); not silently truncated.")
+    if not ladder:
+        if rank == 0:
+            print(f"ERROR: empty token ladder (phase={args.phase}, cap={cap})")
+        return 2
+    MAX, MIN, SUM = dist.ReduceOp.MAX, dist.ReduceOp.MIN, dist.ReduceOp.SUM
+
+    # EPLB plan (once): estimate logical load from the global logical trace at the largest
+    # ladder T (most samples), then replicate+place. Held fixed across all T (as real EPLB
+    # plans from an observed load estimate). build_trace builds the LOGICAL trace and remaps
+    # to physical when the plan is present; otherwise it's the identity (logical == physical).
+    eplb_plan = None
+    eplb_calibration = None
+    if eplb_on:
+        calibration_id, calibration_checksums, calibration_rows, _ = (
+            workload_contract.canonical_eplb_calibration_member(
+                args.routing,
+                args.hidden,
+                args.topk,
+                num_logical,
+                ep_size,
+                EPLB_REFERENCE_TOKENS_PER_RANK,
+                args.seed,
+            )
+        )
+        ref_idx = torch.tensor(
+            calibration_rows,
+            dtype=torch.int64,
+        )
+        eplb_calibration = {
+            "token_offset": workload_contract.EPLB_CALIBRATION_TOKEN_OFFSET,
+            "trace_sha256": calibration_checksums["trace"],
+            "window": workload_contract.EPLB_CALIBRATION_WINDOW,
+            "workload_id": calibration_id,
+        }
+        if ref_idx.shape != (
+            EPLB_REFERENCE_TOKENS_PER_RANK * ep_size,
+            args.topk,
+        ):
+            raise RuntimeError("EPLB calibration trace dimensions differ from the contract")
+        load = torch.bincount(ref_idx.reshape(-1), minlength=num_logical).float().tolist()
+        eplb_plan = eplb.build_plan(load, args.experts, ep_size)
+        if rank == 0:
+            print(f"NOTE: EPLB {num_logical}->{args.experts} experts ({ep_size}x{experts_per_rank}); "
+                  f"per-rank load imbalance {eplb_plan['imbalance_before']:.2f}x -> "
+                  f"{eplb_plan['imbalance_after']:.2f}x; {eplb_plan['replicated_experts']} experts "
+                  f"replicated (hottest {eplb_plan['max_replicas']}x)")
+
+    canonical = bool(getattr(args, "workload_dir", ""))
+    loaded_workload_ids, loaded_checksums = [], {}
+    if canonical:
+        import workload as _wl
+
+    def build_trace(gt):
+        # canonical: load pre-serialized trace bytes (verified by checksum) so this run is
+        # provably the SAME workload as any other consuming the same files. else: seeded gen.
+        if canonical:
+            wid = _wl.compute_workload_id(
+                args.routing, args.hidden, args.topk, num_logical, ep_size, gt, args.seed
+            )
+            idx_np, w_np, man = _wl.load_workload(os.path.join(args.workload_dir, f"{wid}.npz"), verify=True)
+            idx_l = torch.from_numpy(idx_np).to(torch.int64)
+            w = torch.from_numpy(w_np).to(torch.float32)
+            if wid not in loaded_workload_ids:
+                loaded_workload_ids.append(wid)
+                loaded_checksums[wid] = man.get("checksums")
+        else:
+            idx_l, w = routing.build_global_routing(
+                gt, num_logical, args.topk, args.routing, args.seed
+            )
+        return (eplb.remap_idx(idx_l, eplb_plan) if eplb_plan is not None else idx_l), w
+
+    # Fabric/clock warm-up BEFORE any timed point (review: H200 had an anomalous cold
+    # first point and a 40% decode-vs-prefill mismatch at the shared T=128). Gradually
+    # ramp through the small ladder shapes untimed — warms clocks/fabric for everyone
+    # and is also cold-jump-safe for MoRI.
+    def warm_roundtrips(problem, count):
+        for _ in range(count):
+            handle = backend.dispatch(problem)
+            if not hasattr(problem, "recv_tokens"):
+                # Dynamic receive cardinality is stable for this fixed routing trace. Cache it
+                # during untimed conditioning so adapters never read a device scalar in timing.
+                problem.recv_tokens = backend.recv_tokens(handle)
+            backend.stage(problem, handle)
+            backend.combine(problem, handle)
+            torch.cuda.synchronize()
+
+    for wt in conditioning_ladder:
+        # Warm-only shapes need not have canonical manifests: they are never measured or emitted.
+        wi, ww = routing.build_global_routing(
+            wt * ep_size, num_logical, args.topk, args.routing, args.seed,
+        )
+        if eplb_plan is not None:
+            wi = eplb.remap_idx(wi, eplb_plan)
+        wsi, wsw = routing.rank_slice(wi, ww, rank, wt)
+        wx = routing.rank_activations(wt, args.hidden, args.seed, rank, device, torch.bfloat16)
+        wp = backend.make_problem(wt, wsi.to(device), wsw.to(device), wx)
+        warm_roundtrips(wp, CONDITIONING_ROUNDS_PER_SHAPE)
+    torch.cuda.synchronize()
+    dist.barrier()
+    # Setup may materialize deferred provenance such as DeepEP V2 JIT CUBINs.
+    # Resolve it after conditioning but before correctness or timed measurements.
+    capture_deferred_provenance = getattr(backend, "capture_deferred_provenance", None)
+    if capture_deferred_provenance is not None:
+        capture_deferred_provenance()
+    provenance_issues = contracts.backend_provenance_issues(
+        backend.name, backend.backend_provenance
+    )
+    if provenance_issues:
+        if rank == 0:
+            print(
+                f"ERROR: unpinned provenance {provenance_issues} "
+                f"in {backend.backend_provenance}"
+            )
+        return 4
+    # ---- Pass 1: build each deterministic problem and run the expert oracle. ----
+    problems, gate, gts, global_traces, input_snapshots = {}, {}, {}, {}, {}
+    routing_hashes = set()
+    for T in ladder:
+        counts = [T] * ep_size
+        gt = T * ep_size
+        gts[T] = gt
+        idx_g, w_g = build_trace(gt)
+        rstats = routing.routing_stats(idx_g, args.experts, experts_per_rank, weights=w_g)
+        gpn = args.gpus_per_node or ep_size
+        rstats["locality"] = routing.routing_locality(idx_g, experts_per_rank, ep_size, max(1, T),
+                                                      gpn, args.scale_up_domain or None)
+        rstats["source_token_stats"] = _stats_vec(counts)
+        routing_hashes.add(rstats["routing_hash"])
+        my_off, my_cnt = rank * T, T
+        idx_s = idx_g[my_off:my_off + my_cnt].contiguous()
+        w_s = w_g[my_off:my_off + my_cnt].contiguous()
+        x = routing.rank_activations(my_cnt, args.hidden, args.seed, rank, device, torch.bfloat16)
+        problem = backend.make_problem(my_cnt, idx_s.to(device), w_s.to(device), x)
+        input_snapshots[T] = (
+            problem.x.clone(), problem.topk_idx.clone(), problem.topk_weights.clone()
+        )
+        oracle = _run_expert_oracle(
+            torch, routing, backend, problem, idx_g, w_g, rank, experts_per_rank,
+            args.seed,
+        )
+        precision_pre = oracle.pop("_precision")
+        before_x, before_idx, before_weights = input_snapshots[T]
+        pre_input_unchanged = (
+            torch.equal(problem.x, before_x)
+            and torch.equal(problem.topk_idx, before_idx)
+            and torch.equal(problem.topk_weights, before_weights)
+        )
+        problems[T] = problem
+        global_traces[T] = (idx_g, w_g)
+        gate[T] = {
+            "rstats": rstats,
+            "recv_local": oracle["receive_count"],
+            "max_rel": oracle["max_relative_error"] or 0.0,
+            "local_ok": int(oracle["passed"]),
+            "oracle_pre": oracle,
+            "precision_pre": precision_pre,
+            "pre_input_unchanged": pre_input_unchanged,
+        }
+
+    # ---- Pass 2: every backend uses the same ascending point order and conditioning ramp.
+    # Per-iteration cross-rank MAX samples are pooled across trials. ----
+    disp_pool = {T: [] for T in ladder}     # pooled per-iteration cross-rank MAX (dispatch)
+    stage_pool = {T: [] for T in ladder}    # measured only when stage launches device work
+    comb_pool = {T: [] for T in ladder}     # ... combine
+    rt_pool = {T: [] for T in ladder}       # independently measured round trip
+    disp_trials = {T: [] for T in ladder}
+    stage_trials = {T: [] for T in ladder}
+    comb_trials = {T: [] for T in ladder}
+    rt_trials = {T: [] for T in ladder}
+    stage_device_work = getattr(backend, "stage_device_work", False)
+    if type(stage_device_work) is not bool:
+        raise ValueError("backend.stage_device_work must be a boolean")
+    order_identity = args.case_id or _sha256_json({
+        "backend": backend.name,
+        "ep_size": ep_size,
+        "mode": mode,
+        "phase": args.phase,
+        "precision_profile": resolved_precision_id,
+        "runner": args.runner,
+        "suite": args.suite,
+    })
+    observed_component_orders = []
+    for trial_index in range(args.trials):
+        order = qualification_order(
+            list(ladder), args.qualification_index, trial_index,
+            identity_key=f"{order_identity}:tokens",
+        )
+        for T in order:
+            problem = problems[T]
+            # Stateful paired APIs may expose only a measured round trip.
+            # Do not synthesize component latency from that measurement.
+            roundtrip_only = getattr(backend, "roundtrip_only", False)
+
+            def rt_once(p=problem):
+                hh = backend.dispatch(p)
+                backend.stage(p, hh)
+                return backend.combine(p, hh)
+
+            available_components = ["roundtrip"]
+            if not roundtrip_only:
+                available_components.extend(["dispatch", "combine"])
+                if stage_device_work:
+                    available_components.append("stage")
+            component_order = qualification_order(
+                available_components,
+                args.qualification_index,
+                trial_index,
+                identity_key=f"{order_identity}:components:{T}",
+            )
+            observed_component_orders.append({
+                "components": component_order,
+                "tokens_per_rank": T,
+                "trial_index": trial_index,
+            })
+            measured = {name: [] for name in ("dispatch", "stage", "combine", "roundtrip")}
+
+            def prep_stage(p=problem):
+                return backend.dispatch(p)
+
+            def prep_combine(p=problem):
+                hh = backend.dispatch(p)
+                backend.stage(p, hh)
+                return hh
+
+            for component_name in component_order:
+                # Every measured component receives the same 32 synchronized full-roundtrip
+                # warmups immediately before its timed trial.
+                warm_roundtrips(problem, args.warmup)
+                if component_name == "roundtrip":
+                    measured[component_name] = time_us(
+                        torch, lambda p=problem: rt_once(p), 0, args.iters
+                    )
+                elif component_name == "dispatch":
+                    measured[component_name] = time_us(
+                        torch, lambda p=problem: backend.dispatch(p), 0, args.iters
+                    )
+                elif component_name == "stage":
+                    measured[component_name] = time_us(
+                        torch,
+                        lambda hh, p=problem: backend.stage(p, hh),
+                        0,
+                        args.iters,
+                        pre=prep_stage,
+                    )
+                elif component_name == "combine":
+                    if backend.combine_needs_redispatch:
+                        measured[component_name] = time_us(
+                            torch,
+                            lambda hh, p=problem: backend.combine(p, hh),
+                            0,
+                            args.iters,
+                            pre=prep_combine,
+                        )
+                    else:
+                        hh = prep_combine()
+                        torch.cuda.synchronize()
+                        measured[component_name] = time_us(
+                            torch,
+                            lambda p=problem, hx=hh: backend.combine(p, hx),
+                            0,
+                            args.iters,
+                        )
+                else:  # pragma: no cover - generated from the fixed list above
+                    raise RuntimeError(f"unknown timed component {component_name!r}")
+            disp_iters = measured["dispatch"]
+            stage_iters = measured["stage"]
+            comb_iters = measured["combine"]
+            rt_iters = measured["roundtrip"]
+            # per-iteration cross-rank MAX (the distributed-op latency per iter), pooled.
+            if disp_iters:
+                reduced_dispatch = _reduce_vec(torch, dist, device, disp_iters, MAX)
+                reduced_combine = _reduce_vec(torch, dist, device, comb_iters, MAX)
+                disp_trials[T].append(reduced_dispatch)
+                comb_trials[T].append(reduced_combine)
+                disp_pool[T] += reduced_dispatch
+                comb_pool[T] += reduced_combine
+            if stage_iters:
+                reduced_stage = _reduce_vec(torch, dist, device, stage_iters, MAX)
+                stage_trials[T].append(reduced_stage)
+                stage_pool[T] += reduced_stage
+            reduced_roundtrip = _reduce_vec(torch, dist, device, rt_iters, MAX)
+            rt_trials[T].append(reduced_roundtrip)
+            rt_pool[T] += reduced_roundtrip
+
+    # ---- Pass 3: prove timed inputs were immutable and repeat the full oracle. ----
+    for T in ladder:
+        problem = problems[T]
+        before_x, before_idx, before_weights = input_snapshots[T]
+        input_unchanged = gate[T]["pre_input_unchanged"] and (
+            torch.equal(problem.x, before_x)
+            and torch.equal(problem.topk_idx, before_idx)
+            and torch.equal(problem.topk_weights, before_weights)
+        )
+        idx_g, w_g = global_traces[T]
+        post = _run_expert_oracle(
+            torch, routing, backend, problem, idx_g, w_g, rank, experts_per_rank,
+            args.seed,
+        )
+        precision_post = post.pop("_precision")
+        pre = gate[T]["oracle_pre"]
+        order_stable = (
+            pre["ordering_contract"] == post["ordering_contract"]
+            and pre["order_sha256"] == post["order_sha256"]
+            and pre["dispatch_sha256"] == post["dispatch_sha256"]
+        )
+        gate[T].update({
+            "input_unchanged": input_unchanged,
+            "local_ok": int(pre["passed"] and post["passed"] and input_unchanged and order_stable),
+            "max_rel": max(pre["max_relative_error"] or 0.0, post["max_relative_error"] or 0.0),
+            "oracle_post": post,
+            "precision_post": precision_post,
+            "order_stable": order_stable,
+        })
+
+    # ---- Pass 4: percentiles (p50/p90/p95/p99, nearest-rank) from pooled samples + bytes + row ----
+    def pcts(xs):
+        return ({"p50": percentile(xs, 50), "p90": percentile(xs, 90),
+                 "p95": percentile(xs, 95), "p99": percentile(xs, 99)} if xs else None)
+
+    def component(percentiles, count, *, derived=False):
+        if percentiles is None:
+            return {"availability": "unavailable", "origin": None,
+                    "percentiles_us": None, "sample_count": 0}
+        return {
+            "availability": "derived" if derived else "measured",
+            "origin": "derived-percentile-sum" if derived else "measured",
+            "percentiles_us": percentiles,
+            "sample_count": 0 if derived else count,
+        }
+    rows = []
+    all_anomalies = []
+    thr_rt = 3.0
+    for T in ladder:
+        gt = gts[T]
+        g = gate[T]
+        rstats = g["rstats"]
+        d, s, c, rt = disp_pool[T], stage_pool[T], comb_pool[T], rt_pool[T]
+        dp, sp, cp, rtp = pcts(d), pcts(s), pcts(c), pcts(rt)
+        # isolated_sum = SUM of the isolated dispatch+stage+combine percentiles. Stage contributes
+        # zero when it is explicitly not applicable. This is NOT a measured chained operation
+        # (can't reveal shared sync / launch amortization / overlap) — do NOT use for throughput
+        # or SLO capacity. The MEASURED round trip (rtp) is the real chained latency.
+        isum = (
+            {key: dp[key] + (sp[key] if sp is not None else 0.0) + cp[key] for key in dp}
+            if dp and cp else None
+        )
+        recv_total = _reduce_int(torch, dist, device, g["recv_local"], SUM)
+        recv_max = _reduce_int(torch, dist, device, g["recv_local"], MAX)
+        recv_min = _reduce_int(torch, dist, device, g["recv_local"], MIN)
+        global_ok = _reduce_int(torch, dist, device, g["local_ok"], MIN)
+        max_rel = _reduce_vec(torch, dist, device, [g["max_rel"]], MAX)[0]
+        point_ok = bool(global_ok) and recv_total > 0
+        rank_evidence = [None] * world_size
+        dist.all_gather_object(
+            rank_evidence,
+            {
+                "input_unchanged": g["input_unchanged"],
+                "order_stable": g["order_stable"],
+                "post_timing": g["oracle_post"],
+                "pre_timing": g["oracle_pre"],
+                "rank": rank,
+            },
+        )
+        precision_rank_evidence = [None] * world_size
+        dist.all_gather_object(
+            precision_rank_evidence,
+            {"pre": g["precision_pre"], "post": g["precision_post"]},
+        )
+        precision_evidence = aggregate_precision_evidence(precision_rank_evidence)
+        # Canonical LOGICAL payload byte contracts (from the routing trace, NOT backend recv
+        # tensors): token-rank = one copy per unique (token,dest-rank); token-expert = one copy
+        # per routed (token,expert). routed_copies = token-rank copies; gt*topk = token-expert.
+        token_rank_copies = rstats["routed_copies"]
+        logical_copies = (
+            sum(rstats["expert_assignments_per_rank"])
+            if case_profile["payload_unit"] == "token-expert"
+            else token_rank_copies
+        )
+        H = args.hidden
+        throughput = {
+            percentile_name: gt / (latency_us * 1e-6)
+            for percentile_name, latency_us in rtp.items()
+        }
+        dispatch_bytes = precision_byte_provenance(
+            communication_precision["dispatch"], logical_copies, H
+        )
+        combine_bytes = precision_byte_provenance(
+            communication_precision["combine"], logical_copies, H
+        )
+        stage_bytes = {
+            "accounting_contract": "activation-data-plus-scales-v1",
+            "activation_data_bytes": 0,
+            "scale_bytes": 0,
+            "total_logical_bytes": 0,
+        }
+        roundtrip_bytes = {
+            "accounting_contract": "activation-data-plus-scales-v1",
+            **{
+                field: dispatch_bytes[field] + combine_bytes[field]
+                for field in (
+                    "activation_data_bytes", "scale_bytes", "total_logical_bytes"
+                )
+            },
+        }
+        # Contract-level anomalies are attached to the row and rolled into validity.
+        #   roundtrip_gt_isolated_sum: measured RT p99 >> Σ(isolated dispatch+combine) p99.
+        #   roundtrip_lt_component_floor: measured RT p50 < max(dispatch,combine) p50 — a chained
+        #     op can't finish faster than its slowest required component (sync semantics violated).
+        row_anoms = []
+        if isum and isum["p99"] > 0 and rtp["p99"] > thr_rt * isum["p99"]:
+            row_anoms.append({"type": "roundtrip_gt_isolated_sum", "T": T,
+                              "roundtrip_p99": round(rtp["p99"], 2), "isolated_sum_p99": round(isum["p99"], 2),
+                              "ratio": round(rtp["p99"] / isum["p99"], 2), "threshold": thr_rt})
+        floor = (
+            max(dp["p50"], cp["p50"], sp["p50"] if sp is not None else 0.0)
+            if dp and cp else None
+        )
+        if floor and rtp["p50"] > 0 and rtp["p50"] < 0.95 * floor:
+            row_anoms.append({"type": "roundtrip_lt_component_floor", "T": T,
+                              "roundtrip_p50": round(rtp["p50"], 2), "component_floor_p50": round(floor, 2)})
+        all_anomalies.extend(row_anoms)
+        rows.append({
+            "anomalies": row_anoms,
+            "components": {
+                "combine": component(cp, len(c)),
+                "dispatch": component(dp, len(d)),
+                "isolated_sum": component(isum, 0, derived=True),
+                "roundtrip": component(rtp, len(rt)),
+                "stage": component(sp, len(s)),
+            },
+            "correctness": {
+                "contract": case_profile["oracle_contract"],
+                "max_relative_error": max_rel,
+                "passed": point_ok,
+                "precision": precision_evidence,
+                "rank_evidence": rank_evidence,
+                "scope": case_profile["correctness_scope"],
+            },
+            "global_tokens": gt,
+            "byte_provenance": {
+                "combine": combine_bytes,
+                "dispatch": dispatch_bytes,
+                "roundtrip": roundtrip_bytes,
+                "stage": stage_bytes,
+            },
+            "receive": {
+                "max": recv_max,
+                "mean": recv_total / world_size,
+                "min": recv_min,
+                "total": recv_total,
+            },
+            "routing": {
+                "empty_expert_count": rstats["empty_expert_count"],
+                "empty_rank_count": rstats["empty_rank_count"],
+                "expert_assignment_rank_cv": rstats["expert_assignment_rank_cv"],
+                "expert_assignments_per_rank": rstats["expert_assignments_per_rank"],
+                "expert_load_cv": rstats["expert_load_cv"],
+                "expert_load_max": rstats["expert_load_max"],
+                "expert_load_mean": rstats["expert_load_mean"],
+                "expert_load_min": rstats["expert_load_min"],
+                "fanout_histogram": rstats["fanout_hist"],
+                "fanout_max": rstats["fanout_max"],
+                "fanout_mean": rstats["fanout_mean"],
+                "fanout_min": rstats["fanout_min"],
+                "hash": rstats["routing_hash"],
+                "hotspot_ratio": rstats["hotspot_ratio"],
+                "locality": rstats.get("locality"),
+                "payload_copies_per_rank": rstats["payload_copies_per_rank"],
+                "payload_rank_cv": rstats["payload_rank_cv"],
+                "routed_copies": rstats["routed_copies"],
+                "source_token_stats": rstats.get("source_token_stats"),
+            },
+            "sample_histograms": {
+                "dispatch": _histogram(d) if d else None,
+                "stage": _histogram(s) if s else None,
+                "combine": _histogram(c) if c else None,
+                "roundtrip": _histogram(rt),
+            },
+            "token_rate_at_latency_percentile": throughput,
+            "tokens_per_rank": T,
+        })
+        if rank == 0:
+            component_log = (f"disp p50/p99={dp['p50']:7.1f}/{dp['p99']:7.1f} "
+                             f"comb {cp['p50']:6.1f}/{cp['p99']:6.1f} " if dp and cp
+                             else "components=unavailable ")
+            print(f"  T={T:<5} {component_log}"
+                  f"RT p50/p99={rtp['p50']:7.1f}/{rtp['p99']:7.1f}us n={len(rt)} fanout={rstats['fanout_mean']:.2f} "
+                  f"recv[min/mean/max]={recv_min}/{recv_total // world_size}/{recv_max} "
+                  f"correct={point_ok}")
+
+    # Cross-rank workload-identity proof: every rank must have built the SAME global routing
+    # (one hash per T here); confirm all ranks agree by hashing the per-T hash set and
+    # MIN/MAX-reducing it — a mismatch means NVIDIA and AMD did NOT run identical routing.
+    trace_sig = hashlib.sha256("|".join(sorted(routing_hashes)).encode()).hexdigest()
+    routing_consistent = _same_hash_across_ranks(torch, dist, device, trace_sig)
+
+    # Capture again after correctness and timing so no lazily generated kernel can escape
+    # the implementation identity recorded in the artifact.
+    if capture_deferred_provenance is not None:
+        capture_deferred_provenance()
+
+    if rank != 0:
+        return 0
+
+    # status=valid requires correctness AND a proven-identical routing trace across ranks.
+    all_ok = bool(rows) and all(r["correctness"]["passed"] for r in rows) and routing_consistent
+
+    # Adapters never self-label official; status is derived from these gates.
+    prov = backend.backend_provenance
+    allocation_stratum_sha256 = getattr(args, "allocation_stratum_sha256", None)
+    provenance_complete = contracts.provenance_complete(
+        prov,
+        backend.name,
+        getattr(args, "git_run", None),
+        allocation_stratum_sha256=allocation_stratum_sha256,
+        image_digest=getattr(args, "image_digest", None),
+        image_verified=getattr(args, "image_digest_verified", False),
+        squash_sha256=getattr(args, "squash_sha256", None),
+    )
+    resource_profile = contracts.project_resource_profile(prov)
+    resource_conformance = resource_profile["conformance_class"]
+    # record the canonical workload identity consumed (one trace per T -> set of ids/checksums).
+    if canonical and loaded_workload_ids:
+        args.workload_id = identity.workload_id(
+            {
+                "members": [
+                    {"checksums": loaded_checksums[member], "workload_id": member}
+                    for member in sorted(loaded_workload_ids)
+                ]
+            }
+        )
+        args.workload_members = sorted(loaded_workload_ids)
+        args.workload_checksums = loaded_checksums
+    canonical_workload = bool(getattr(args, "workload_id", None))
+    activation_identity = workload_contract.compute_activation_identity(args.seed, args.hidden)
+    # EPLB identity covers replica placement, not only counts.
+    eplb_mapping_hash = None
+    if eplb_plan is not None:
+        eplb_mapping_hash = eplb.mapping_hash(eplb_plan)
+    anomaly_free = len(all_anomalies) == 0
+    validity = {
+        "execution_status": "complete" if rows else "failed",
+        "semantic_correctness": (
+            "pass" if rows and all(r["correctness"]["passed"] for r in rows) else "fail"
+        ),
+        "workload_identity": "consistent-across-ranks" if routing_consistent else "inconsistent",
+        "workload_source": "canonical-serialized" if canonical_workload else "seeded-runtime",
+        "measurement_conformance": "conformant",   # run_ep gate rejects nonconformant pre-run
+        "sampling_conformance": "conformant",      # fixed-512-v1 gate rejects any other profile
+        "resource_conformance": resource_conformance,
+        "provenance_complete": provenance_complete,
+        # anomaly-free unless a contract-level timing anomaly fired (then diagnostic, see above).
+        "anomaly_free": anomaly_free,
+    }
+    publication_status = _derive_publication_status(validity)
+
+    shape = {  # FIXED line identity (no T, no per-backend resource knobs)
+        "hidden": args.hidden, "topk": args.topk, "experts": args.experts,
+        "experts_per_rank": experts_per_rank,
+        "precision_profile": resolved_precision_id,
+        "dispatch_precision": communication_precision["dispatch"],
+        "combine_precision": communication_precision["combine"],
+        "routing": args.routing, "eplb": bool(eplb_plan), "num_logical_experts": num_logical,
+        # V2 is reserved for the PR #605 ElasticBuffer adapter; package versions never imply it.
+        "kernel_gen": kernel_generation(backend),
+        "activation_profile": ACTIVATION_PROFILE,
+    }
+    generated_at = args.timestamp or _dt.datetime.now().astimezone().isoformat()
+    realized_placement = getattr(args, "realized_placement", None)
+    nodes = (
+        realized_placement["nodes"]
+        if realized_placement is not None
+        else int(os.environ.get("SLURM_NNODES", "1"))
+    )
+    scheduled_case = {
+            "backend": backend.name,
+            "canonical": canonical,
+            "eplb": bool(eplb_plan),
+            "ep": ep_size,
+            "experts": num_logical,
+            "gpus_per_node": args.gpus_per_node or ep_size,
+            "hidden": args.hidden,
+            "ladder": " ".join(map(str, ladder)),
+            "mode": mode,
+            "nodes": nodes,
+            "phase": args.phase,
+            "required_publication": args.required_publication or "diagnostic",
+            "routing": args.routing,
+            "samples_per_point": TIMED_SAMPLES_PER_POINT,
+            "scale_up_domain": args.scale_up_domain or (args.gpus_per_node or ep_size),
+            "scale_up_transport": args.scale_up_transport,
+            "scale_out_transport": args.scale_out_transport or None,
+            "scope": args.scope,
+            "suite": args.suite or "manual",
+            "timing": f"{args.iters}:{args.trials}:{args.warmup}",
+            "topk": args.topk,
+            "topology_class": args.topology_class,
+            "transport": args.transport,
+            "warmup_semantics": WARMUP_SEMANTICS,
+            "workload": args.workload_name or "manual",
+    }
+    if requested_precision is not None:
+        scheduled_case["precision_profile"] = requested_precision
+    case_factors = {
+        "case": scheduled_case,
+        "profile": case_profile,
+        "sku": args.runner,
+    }
+    computed_case_id = identity.digest("case", case_factors)
+    if args.case_id and args.case_id != computed_case_id:
+        raise ValueError(
+            f"scheduled case ID does not match realized factors: {args.case_id} != {computed_case_id}"
+        )
+    case_identifier = args.case_id or computed_case_id
+    git_run = getattr(args, "git_run", None) or {}
+    allocation_factors = {
+        "artifact": git_run.get("artifact"),
+        "execution_id": getattr(args, "allocation_execution_id", None),
+        "job": git_run.get("job"),
+        "qualification_index": args.qualification_index,
+        "repo": git_run.get("repo"),
+        "run_attempt": git_run.get("run_attempt"),
+        "run_id": git_run.get("run_id"),
+        "runner": args.runner,
+        "source_sha": git_run.get("source_sha"),
+    }
+    allocation_identifier = identity.allocation_id(allocation_factors)
+    try:
+        attempt_ordinal = int(os.environ.get("CX_ATTEMPT_ID", "1"))
+    except ValueError:
+        attempt_ordinal = 0
+    if attempt_ordinal <= 0:
+        raise ValueError("CX_ATTEMPT_ID must be a positive integer")
+    attempt_identifier = identity.attempt_id(
+        allocation=allocation_identifier, case=case_identifier, ordinal=attempt_ordinal
+    )
+    runtime_fingerprint = getattr(args, "runtime_fingerprint", None) or {}
+    implementation_contract = {
+        "kernel_generation": kernel_generation(backend),
+        "name": backend.name,
+        "provenance": _series_provenance(backend.backend_provenance),
+        "resource_profile": resource_profile,
+    }
+    public_config = contracts.public_series_config(
+        kernel_generation=implementation_contract["kernel_generation"],
+        provenance=backend.backend_provenance,
+        resource_profile=resource_profile,
+        resource_mode=args.resource_mode,
+        device_product=getattr(args, "runtime_device_product", None),
+    )
+    series_factors = {
+        "backend": backend.name,
+        "implementation_contract_sha256": _sha256_json(implementation_contract),
+        "public_config_sha256": contracts.public_series_config_sha256(public_config),
+        "routing_control_sha256": contracts.routing_implementation_control_sha256(
+            implementation_contract
+        ),
+        "case_id": case_identifier,
+        "image_digest": getattr(args, "image_digest", None),
+        "runtime_fingerprint_sha256": _sha256_json(runtime_fingerprint),
+        "source_sha": git_run.get("source_sha"),
+        "squash_sha256": getattr(args, "squash_sha256", None),
+        "workload_id": getattr(args, "workload_id", None) or trace_sig,
+    }
+    series_identifier = identity.series_id(series_factors)
+
+    sample_points = []
+    for row in rows:
+        token_count = row["tokens_per_rank"]
+
+        sample_point = {
+            "components": {
+                "combine": sampled_component_evidence(comb_trials[token_count]),
+                "dispatch": sampled_component_evidence(disp_trials[token_count]),
+                "roundtrip": sampled_component_evidence(rt_trials[token_count]),
+                "stage": sampled_component_evidence(stage_trials[token_count]),
+            },
+            "tokens_per_rank": token_count,
+        }
+        sample_sha256 = _sha256_json(sample_point)
+        point_identifier = identity.point_id(
+            series=series_identifier, tokens_per_rank=token_count
+        )
+        evidence_identifier = identity.evidence_id(
+            point=point_identifier,
+            allocation=allocation_identifier,
+            attempt=attempt_identifier,
+            sample_sha256=sample_sha256,
+        )
+        sample_point.update(
+            {
+                "evidence_id": evidence_identifier,
+                "point_id": point_identifier,
+                "sample_sha256": sample_sha256,
+            }
+        )
+        sample_points.append(sample_point)
+        row.update({
+            "evidence_id": evidence_identifier,
+            "point_id": point_identifier,
+            "sample_sha256": sample_sha256,
+        })
+
+    samples_path = args.out[:-5] + ".samples.json" if args.out.endswith(".json") else args.out + ".samples.json"
+    samples_document = {
+        "allocation_id": allocation_identifier,
+        "attempt_id": attempt_identifier,
+        "case_id": case_identifier,
+        "format": "collectivex.samples.v1",
+        "points": sample_points,
+        "qualification_index": args.qualification_index,
+        "sampling": {
+            "iterations_per_trial": args.iters,
+            "reduction": case_profile["rank_reduction"],
+            "trials": args.trials,
+        },
+        "schema_version": 1,
+        "series_id": series_identifier,
+    }
+    samples_payload = contracts.canonical_json_bytes(samples_document)
+    samples_sha256 = hashlib.sha256(samples_payload).hexdigest()
+    samples_bytes = len(samples_payload)
+    sample_artifact = {
+        "bytes": samples_bytes,
+        "format": "collectivex.samples.v1",
+        "path": os.path.basename(samples_path),
+        "sha256": samples_sha256,
+    }
+    headline = next((r for r in rows if r["tokens_per_rank"] == 64), rows[len(rows) // 2])
+    eplb_record = (
+        {
+            "calibration_token_offset": eplb_calibration["token_offset"],
+            "calibration_trace_sha256": eplb_calibration["trace_sha256"],
+            "calibration_window": eplb_calibration["window"],
+            "calibration_workload_id": eplb_calibration["workload_id"],
+            "enabled": True,
+            "imbalance_after": eplb_plan["imbalance_after"],
+            "imbalance_before": eplb_plan["imbalance_before"],
+            "mapping_hash": eplb_mapping_hash,
+            "max_replicas": eplb_plan["max_replicas"],
+            "num_logical_experts": num_logical,
+            "num_physical_experts": args.experts,
+            "num_redundant": args.experts - num_logical,
+            "planner": EPLB_PLANNER,
+            "reference_tokens_per_rank": EPLB_REFERENCE_TOKENS_PER_RANK,
+            "replicated_experts": eplb_plan["replicated_experts"],
+        }
+        if eplb_plan
+        else {
+            "calibration_token_offset": None,
+            "calibration_trace_sha256": None,
+            "calibration_window": None,
+            "calibration_workload_id": None,
+            "enabled": False,
+            "imbalance_after": None,
+            "imbalance_before": None,
+            "mapping_hash": None,
+            "max_replicas": None,
+            "num_logical_experts": num_logical,
+            "num_physical_experts": args.experts,
+            "num_redundant": 0,
+            "planner": None,
+            "reference_tokens_per_rank": None,
+            "replicated_experts": 0,
+        }
+    )
+    doc = {
+        "format": "collectivex.ep.v1",
+        "schema_version": SCHEMA_VERSION,
+        "record_type": "case-attempt",
+        "generated_at": generated_at,
+        "identity": {
+            "allocation_factors": allocation_factors,
+            "allocation_id": allocation_identifier,
+            "attempt_id": attempt_identifier,
+            "attempt_ordinal": attempt_ordinal,
+            "case_factors": case_factors,
+            "case_id": case_identifier,
+            "series_factors": series_factors,
+            "series_id": series_identifier,
+        },
+        "case": {
+            "attempt_ordinal": attempt_ordinal,
+            "backend": backend.name,
+            "eplb": eplb_record,
+            "ep_size": ep_size,
+            "mode": mode,
+            "phase": args.phase,
+            "required_publication": args.required_publication or "diagnostic",
+            "resource_mode": "fixed-profile",
+            "runner": args.runner,
+            "shape": shape,
+            "suite": args.suite or "manual",
+            "workload_name": args.workload_name or "manual",
+        },
+        "workload": {
+            "activation_generator": ACTIVATION_GENERATOR,
+            "activation_identity": activation_identity,
+            "activation_profile": ACTIVATION_PROFILE,
+            "cross_rank_consistent": routing_consistent,
+            "manifest_checksums": getattr(args, "workload_checksums", None),
+            "members": getattr(args, "workload_members", None),
+            "routing_generator": ROUTING_GENERATOR,
+            "source": validity["workload_source"],
+            "trace_hashes": sorted(routing_hashes),
+            "trace_signature": trace_sig,
+            "workload_id": getattr(args, "workload_id", None),
+        },
+        "measurement": {
+            "component_order_contract": case_profile["component_order_contract"],
+            "conditioning": {
+                "contract": CONDITIONING_CONTRACT,
+                "ladder": conditioning_ladder,
+                "roundtrips_per_shape": CONDITIONING_ROUNDS_PER_SHAPE,
+            },
+            "contract": case_profile["contract"],
+            "execution_order_sha256": _sha256_json(observed_component_orders),
+            "qualification_index": args.qualification_index,
+            "rows": rows,
+            "sampling": {
+                "contract": SAMPLING_CONTRACT,
+                "iterations_per_trial": args.iters,
+                "percentile_method": case_profile["percentile_method"],
+                "reduction": case_profile["rank_reduction"],
+                "samples_per_component": TIMED_SAMPLES_PER_POINT,
+                "trials": args.trials,
+                "warmup_iterations": args.warmup,
+                "warmup_semantics": WARMUP_SEMANTICS,
+            },
+            "source_allocation": "even",
+        },
+        "implementation": {
+            "kernel_generation": kernel_generation(backend),
+            "name": backend.name,
+            "provenance": backend.backend_provenance,
+            "resource_profile": resource_profile,
+        },
+        "topology": {
+            "device_count": getattr(args, "runtime_device_count", None),
+            "device_product": getattr(args, "runtime_device_product", None),
+            "gpus_per_node": args.gpus_per_node or ep_size,
+            "nodes": nodes,
+            "placement": "packed",
+            "realized_placement": realized_placement,
+            "scale_up_domain": args.scale_up_domain or (args.gpus_per_node or ep_size),
+            "scale_up_transport": args.scale_up_transport,
+            "scale_out_transport": args.scale_out_transport or None,
+            "scope": args.scope,
+            "topology_class": args.topology_class,
+            "transport": args.transport,
+            "world_size": world_size,
+        },
+        "runtime_fingerprint": runtime_fingerprint,
+        "provenance": {
+            "allocation_stratum_sha256": allocation_stratum_sha256,
+            "command": getattr(args, "reproduction_command", ""),
+            "distributed_launcher": getattr(args, "distributed_launcher", None),
+            "git_run": getattr(args, "git_run", None),
+            "image": {
+                "arch": getattr(args, "image_arch", None),
+                "digest": getattr(args, "image_digest", "") or None,
+                "digest_verified": getattr(args, "image_digest_verified", False),
+                "reference": getattr(args, "image", "") or None,
+                "squash_sha256": getattr(args, "squash_sha256", None),
+            },
+            "redaction": "sanitized-v1",
+        },
+        "sample_artifact": sample_artifact,
+        "outcome": {
+            "publication_status": publication_status,
+            "reasons": [] if all_ok else ["semantic correctness or routing identity failed"],
+            "status": "success" if all_ok else "invalid",
+            "validity": validity,
+        },
+    }
+    contracts.validate_raw_document(doc, samples_document)
+    _write_bytes_atomic(samples_path, samples_payload)
+    _write_json_atomic(args.out, doc)
+    dispatch_percentiles = headline["components"]["dispatch"]["percentiles_us"]
+    dispatch_p99 = dispatch_percentiles["p99"] if dispatch_percentiles else None
+    component_summary = (f"disp_p99={dispatch_p99:.1f}us "
+                         if dispatch_p99 is not None
+                         else "components=unavailable ")
+    print(f"{backend.name} ep-dispatch-combine [{args.phase}/{mode}/{case_profile['contract']}]: "
+          f"status={doc['outcome']['status']} {len(rows)} pts, routing_consistent={routing_consistent}, "
+          f"headline T={headline['tokens_per_rank']} {component_summary}"
+          f"-> {args.out}")
+    # A complete invalid document is still a successfully captured terminal outcome. Launchers
+    # inspect its status to fail the case without conflating it with an execution failure.
+    return 0
diff --git a/experimental/CollectiveX/tests/ep_mori.py b/experimental/CollectiveX/tests/ep_mori.py
new file mode 100644
index 000000000..832a69ea2
--- /dev/null
+++ b/experimental/CollectiveX/tests/ep_mori.py
@@ -0,0 +1,524 @@
+#!/usr/bin/env python3
+"""CollectiveX MoRI adapter for native V1 dispatch/combine precision profiles."""
+from __future__ import annotations
+
+import os
+from pathlib import Path
+import re
+import sys
+import types
+
+# MoRI registers the whole symmetric heap at import time. The pinned upstream
+# inter-node benchmark uses 6 GiB for its InterNodeV1 staging and signal buffers.
+os.environ["MORI_SHMEM_HEAP_SIZE"] = "6G"
+
+import torch
+import torch.distributed as dist
+import ep_precision
+
+try:
+    import mori  # type: ignore
+except Exception as exc:  # pragma: no cover - requires the benchmark image
+    print(f"ERROR: mori import failed: {exc!r}", file=sys.stderr)
+    raise
+
+
+def _project_local_metadata(torch_module, raw_expert_ids, raw_weights, rank, experts_per_rank):
+    local_start = rank * experts_per_rank
+    local = (raw_expert_ids >= local_start) & (
+        raw_expert_ids < local_start + experts_per_rank
+    )
+    expert_ids = torch_module.where(
+        local, raw_expert_ids, torch_module.full_like(raw_expert_ids, -1)
+    )
+    weights = torch_module.where(local, raw_weights, torch_module.zeros_like(raw_weights))
+    return expert_ids, weights, raw_expert_ids[local] - local_start
+
+
+def _mori_source_commit() -> str:
+    module_path = Path(mori.__file__).resolve()
+    for root in module_path.parents:
+        head = root / ".git" / "HEAD"
+        if not head.is_symlink() and head.is_file() and head.stat().st_size <= 128:
+            value = head.read_text(encoding="ascii").strip()
+            if re.fullmatch(r"[0-9a-f]{40}", value):
+                return value
+            raise RuntimeError("MoRI image source is not pinned to a detached commit")
+    raise RuntimeError("MoRI image source revision is unavailable")
+
+
+class MoRIBackend:
+    name = "mori"
+    stage_device_work = False
+    combine_needs_redispatch = True
+    combine_weight_semantics = "unweighted-rank-sum"
+
+    def __init__(self, args, rank, world_size, local_rank, device):
+        self.args = args
+        self.rank = rank
+        self.world_size = world_size
+        self.device = device
+        self.mode = "normal"
+        runner = str(getattr(args, "runner", ""))
+        if runner.startswith("mi355x"):
+            fp8_format = "fp8-e4m3fn"
+            supported_profiles = {
+                "d-bf16.c-bf16",
+                "d-fp8-e4m3fn-b128-f32-prequantized.c-bf16",
+            }
+            if world_size == 8:
+                supported_profiles.update({
+                    "d-bf16.c-fp8-e4m3fn-direct-cast-noscale",
+                    "d-fp8-e4m3fn-b128-f32-prequantized.c-fp8-e4m3fn-direct-cast-noscale",
+                })
+        elif runner.startswith("mi325x"):
+            fp8_format = "fp8-e4m3fnuz"
+            supported_profiles = {
+                "d-bf16.c-bf16",
+                "d-fp8-e4m3fnuz-b128-f32-prequantized.c-bf16",
+            }
+            if world_size == 8:
+                supported_profiles.update({
+                    "d-bf16.c-fp8-e4m3fnuz-direct-cast-noscale",
+                    "d-fp8-e4m3fnuz-b128-f32-prequantized.c-fp8-e4m3fnuz-direct-cast-noscale",
+                })
+        else:
+            raise ep_precision.PrecisionError(
+                f"MoRI precision contract has no pinned FP8 format for runner {runner!r}"
+            )
+        self.precision_profile_id, self.communication_precision = (
+            ep_precision.resolve_precision(
+                args,
+                backend=self.name,
+                mode=self.mode,
+                supported_profiles=supported_profiles,
+            )
+        )
+        self._fp8_dispatch = ep_precision.is_low_precision_dispatch(
+            self.communication_precision
+        )
+        self._direct_cast_combine = ep_precision.uses_direct_cast_combine(
+            self.communication_precision
+        )
+        if self._fp8_dispatch and ep_precision.communication_format(
+            self.communication_precision, "dispatch"
+        ) != fp8_format:
+            raise ep_precision.PrecisionError(
+                "MoRI dispatch FP8 format differs from the pinned GPU architecture"
+            )
+        if self._direct_cast_combine:
+            quant_enum = getattr(mori.ops, "EpDispatchCombineQuantType", None)
+            if quant_enum is None or not hasattr(quant_enum, "Fp8DirectCast"):
+                raise ep_precision.PrecisionError(
+                    "pinned MoRI API omits EpDispatchCombineQuantType.Fp8DirectCast"
+                )
+
+        self.ep_size = world_size
+        self.experts_per_rank = args.experts // self.ep_size
+        device_properties = torch.cuda.get_device_properties(device)
+        device_cus = device_properties.multi_processor_count
+        realized_arch = str(getattr(device_properties, "gcnArchName", "")).split(":", 1)[0]
+        expected_arch = "gfx950" if runner.startswith("mi355x") else "gfx942"
+        if realized_arch != expected_arch:
+            raise ep_precision.PrecisionError(
+                f"MoRI runner {runner!r} realized architecture {realized_arch!r}, "
+                f"expected {expected_arch!r}"
+            )
+        gpus_per_node = int(args.gpus_per_node or world_size)
+        scale_up_domain = int(args.scale_up_domain or gpus_per_node)
+        scale_out = world_size > scale_up_domain
+        if (
+            gpus_per_node <= 0
+            or scale_up_domain <= 0
+            or world_size % gpus_per_node
+            or world_size % scale_up_domain
+        ):
+            raise RuntimeError("MoRI placement is not divisible into complete domains")
+        if scale_out != (args.scope == "scale-out"):
+            raise RuntimeError("MoRI requested scope differs from the EP topology")
+        if not scale_out and (
+            world_size != 8
+            or gpus_per_node != 8
+            or scale_up_domain != 8
+            or args.scale_up_transport != "xgmi"
+            or args.scale_out_transport
+            or args.transport != "xgmi"
+        ):
+            raise RuntimeError("MoRI scale-up is pinned to EP8 over one XGMI domain")
+        if scale_out and (
+            world_size != 16
+            or gpus_per_node != 8
+            or scale_up_domain != 8
+            or args.scale_up_transport != "xgmi"
+            or args.scale_out_transport != "rdma"
+            or args.transport != "xgmi-rdma"
+        ):
+            raise RuntimeError(
+                "MoRI InterNodeV1 is pinned to EP16 over two 8-GPU XGMI/RDMA nodes"
+            )
+        self.block_num = self._block_target = 80
+        self.rdma_block_num = 0
+        self.num_qps = 1
+        self._block_floored = False
+        self._tuned_source = "default-80"
+        self.dispatch_warps = 16
+        self.combine_warps = 8
+
+        # MI355X uses the direct intranode kernel. MI325X uses MoRI's split
+        # AsyncLL send/receive kernel as its normal-mode XGMI transport.
+        kernel_request = os.environ.get("CX_MORI_KERNEL_TYPE", "intranode").strip().lower()
+        self._kernel_type = None
+        self._kernel_type_label = "IntraNode"
+        self._async_ll = False
+        self._inter_node = False
+        if kernel_request in ("asyncll", "async_ll", "async-ll"):
+            if scale_out:
+                raise RuntimeError("MoRI EP16 must use InterNodeV1, not AsyncLL")
+            kernel_enum = getattr(mori.ops, "EpDispatchCombineKernelType", None)
+            if kernel_enum is None or not hasattr(kernel_enum, "AsyncLL"):
+                raise RuntimeError(
+                    "CX_MORI_KERNEL_TYPE=asyncll requires "
+                    "EpDispatchCombineKernelType.AsyncLL"
+                )
+            self._kernel_type = kernel_enum.AsyncLL
+            self._kernel_type_label = "AsyncLL"
+            self._async_ll = True
+            self.block_num = self._block_target = 64
+            self.dispatch_warps = self.combine_warps = 8
+            self._tuned_source = "upstream-asyncll-64x8-external-input"
+        elif kernel_request in ("internode-v1", "internode_v1", "internodev1"):
+            if not scale_out:
+                raise RuntimeError("MoRI InterNodeV1 is valid only for scale-out EP16")
+            kernel_enum = getattr(mori.ops, "EpDispatchCombineKernelType", None)
+            if kernel_enum is None or not hasattr(kernel_enum, "InterNodeV1"):
+                raise RuntimeError(
+                    "CX_MORI_KERNEL_TYPE=internode-v1 requires "
+                    "EpDispatchCombineKernelType.InterNodeV1"
+                )
+            self._kernel_type = kernel_enum.InterNodeV1
+            self._kernel_type_label = "InterNodeV1"
+            self._inter_node = True
+            self.block_num = self._block_target = 96
+            self.rdma_block_num = 64
+            self.dispatch_warps = self.combine_warps = 8
+            self._tuned_source = "upstream-internode-v1-96-64x8-qps1"
+        elif kernel_request not in ("intranode", "intra_node", "intra-node", ""):
+            raise RuntimeError(
+                f"unknown CX_MORI_KERNEL_TYPE={kernel_request!r} "
+                "(expected intranode|asyncll|internode-v1)"
+            )
+        elif scale_out:
+            raise RuntimeError("MoRI scale-out EP16 requires CX_MORI_KERNEL_TYPE=internode-v1")
+        self.kernel_generation = (
+            "inter-node-v1" if self._inter_node
+            else "async-ll" if self._async_ll
+            else "intranode"
+        )
+        self._external_input = (
+            self._async_ll or self._inter_node or self._direct_cast_combine
+        )
+        # Registered-input MoRI copies expert output into a device-side symmetric buffer. External
+        # input kernels consume the dispatch output directly, so their stage is not applicable.
+        self.stage_device_work = self._fp8_dispatch or not self._external_input
+
+        world_group = torch.distributed.group.WORLD
+        torch._C._distributed_c10d._register_process_group("default", world_group)
+        mori.shmem.shmem_torch_process_group_init("default")
+        realized_qps = int(mori.shmem.shmem_num_qp_per_pe())
+        if realized_qps < self.num_qps:
+            raise RuntimeError(
+                f"MoRI realized {realized_qps} QPs per PE; {self.num_qps} required"
+            )
+
+        self._cap = self.buffer_cap(args)
+        dispatch_dtype = (
+            getattr(
+                torch,
+                "float8_e4m3fn"
+                if fp8_format == "fp8-e4m3fn"
+                else "float8_e4m3fnuz",
+                None,
+            )
+            if self._fp8_dispatch
+            else torch.bfloat16
+        )
+        if dispatch_dtype is None:
+            raise ep_precision.PrecisionError(
+                f"active torch build does not expose {fp8_format}"
+            )
+        scale_dim = args.hidden // 128 if self._fp8_dispatch else 0
+        if self._fp8_dispatch and args.hidden % 128:
+            raise ep_precision.PrecisionError(
+                "MoRI native FP8 dispatch requires hidden divisible by 128"
+            )
+        config_kwargs = {
+            "data_type": dispatch_dtype,
+            "rank": rank,
+            "world_size": world_size,
+            "hidden_dim": args.hidden,
+            "scale_dim": scale_dim,
+            "scale_type_size": 4 if self._fp8_dispatch else 1,
+            "max_token_type_size": (
+                torch.tensor([], dtype=torch.bfloat16).element_size()
+                if self._inter_node
+                else torch.tensor([], dtype=torch.float32).element_size()
+            ),
+            "max_num_inp_token_per_rank": max(512, self._cap),
+            "num_experts_per_rank": self.experts_per_rank,
+            "num_experts_per_token": args.topk,
+            "use_external_inp_buf": self._external_input,
+            "quant_type": (
+                "fp8_direct_cast" if self._direct_cast_combine else "none"
+            ),
+        }
+        if self._kernel_type is not None:
+            config_kwargs["kernel_type"] = self._kernel_type
+        if self._async_ll:
+            config_kwargs["max_total_recv_tokens"] = 0
+        if self._async_ll or self._inter_node:
+            config_kwargs["block_num"] = self.block_num
+            config_kwargs["warp_num_per_block"] = self.dispatch_warps
+        if self._inter_node:
+            config_kwargs.update({
+                "gpu_per_node": gpus_per_node,
+                "rdma_block_num": self.rdma_block_num,
+                "num_qp_per_pe": self.num_qps,
+            })
+        self.config = mori.ops.EpDispatchCombineConfig(**config_kwargs)
+        expected_config = {
+            "data_type": dispatch_dtype,
+            "scale_dim": scale_dim,
+            "scale_type_size": 4 if self._fp8_dispatch else 1,
+            "use_external_inp_buf": self._external_input,
+            "quant_type": config_kwargs["quant_type"],
+        }
+        if self._async_ll or self._inter_node:
+            expected_config.update({
+                "block_num": self.block_num,
+                "warp_num_per_block": self.dispatch_warps,
+            })
+        if self._inter_node:
+            expected_config.update({
+                "gpu_per_node": 8,
+                "rdma_block_num": 64,
+                "num_qp_per_pe": 1,
+            })
+        if any(getattr(self.config, key, None) != value for key, value in expected_config.items()):
+            raise RuntimeError("MoRI requested launch/topology configuration was not realized")
+        # The newer pinned MoRI revision can otherwise replace explicit values
+        # with token-dependent tuning rules from the image.
+        os.environ["MORI_EP_LAUNCH_CONFIG_MODE"] = "MANUAL"
+        self.op = mori.ops.EpDispatchCombineOp(self.config)
+        if getattr(self.op, "launch_config_mode", None) != "MANUAL":
+            raise RuntimeError("MoRI explicit launch configuration was not applied")
+
+        expected_mori_commit = os.environ.get("MORI_COMMIT")
+        mori_commit = _mori_source_commit()
+        if expected_mori_commit and mori_commit != expected_mori_commit:
+            raise RuntimeError("MoRI image source revision differs from canonical provenance")
+        self.backend_provenance = {
+            "mori_commit": mori_commit,
+            "api": (
+                "mori.ops.EpDispatchCombineOp/external-input"
+                if self._external_input
+                else "mori.ops.EpDispatchCombineOp/registered-input"
+            ),
+            "mode": "normal",
+            "dispatch_dtype": ep_precision.communication_format(
+                self.communication_precision, "dispatch"
+            ),
+            "combine_dtype": ep_precision.communication_format(
+                self.communication_precision, "combine"
+            ),
+            "kernel_type": self._kernel_type_label,
+            "enable_sdma": os.environ.get("MORI_ENABLE_SDMA"),
+            "heap_size": os.environ.get("MORI_SHMEM_HEAP_SIZE"),
+            "max_num_inp_token_per_rank": max(512, self._cap),
+            "max_total_recv_tokens": config_kwargs.get("max_total_recv_tokens"),
+            "gpus_per_node": gpus_per_node,
+            "rdma_block_num": self.rdma_block_num,
+            "use_external_inp_buf": self._external_input,
+            "num_qps": self.num_qps,
+            "resource_mode": "fixed-profile",
+            "block_num": self.block_num,
+            "block_num_target": self._block_target,
+            "block_num_floored": self._block_floored,
+            "dispatch_warps": self.dispatch_warps,
+            "combine_warps": self.combine_warps,
+            "device_cus": device_cus,
+            "sm_fraction": None if self._async_ll else self.block_num / device_cus,
+            "tuned_source": self._tuned_source,
+        }
+
+    def buffer_cap(self, args):
+        return 512
+
+    def make_problem(self, T, idx, weights, x):
+        encoding = ep_precision.encode_dispatch(
+            torch, x, self.communication_precision
+        )
+        indices = idx.to(torch.int32)
+        gate_weights = weights.to(torch.float32)
+        return types.SimpleNamespace(
+            T=T,
+            x=x,
+            dispatch_x=encoding.native_input[0] if self._fp8_dispatch else x,
+            oracle_x=encoding.semantic,
+            dispatch_precision_evidence=encoding.evidence,
+            topk_idx=indices,
+            topk_weights=gate_weights,
+            indices=indices,
+            weights=gate_weights,
+            scales=(
+                encoding.scales
+                if encoding.scales is not None
+                else torch.empty((T, 0), dtype=torch.uint8, device=self.device)
+            ),
+        )
+
+    def dispatch(self, p):
+        dispatch_output, dispatch_weights, _scales, dispatch_indices, recv_num = (
+            self.op.dispatch(
+                p.dispatch_x,
+                p.weights,
+                p.scales,
+                p.indices,
+                block_num=self.block_num,
+                rdma_block_num=self.rdma_block_num,
+                warp_per_block=self.dispatch_warps,
+            )
+        )
+        if self._async_ll:
+            self.op.dispatch_recv(warp_per_block=self.dispatch_warps)
+        return types.SimpleNamespace(
+            dispatch_output=dispatch_output,
+            dispatch_weights=dispatch_weights,
+            dispatch_scales=_scales,
+            dispatch_indices=dispatch_indices,
+            recv_num=recv_num[0],
+            combine_input=None,
+        )
+
+    def stage(self, p, h):
+        rows = getattr(p, "recv_tokens", None)
+        if not isinstance(rows, int) or rows < 0 or rows > h.dispatch_output.size(0):
+            raise RuntimeError("MoRI receive count was not validated before staging")
+        h.combine_input = self._semantic_recv(h, rows)
+        if self._external_input:
+            return None
+        buffer = self.op.get_registered_combine_input_buffer(
+            torch.bfloat16, hidden_dim=h.combine_input.size(1)
+        )
+        buffer[:rows, :].copy_(h.combine_input[:rows, :])
+        h.combine_input = buffer
+
+    def combine(self, p, h):
+        combine_indices = p.indices if self._async_ll else h.dispatch_indices
+        combined, _weights = self.op.combine(
+            h.combine_input,
+            None,
+            combine_indices,
+            block_num=self.block_num,
+            rdma_block_num=self.rdma_block_num,
+            warp_per_block=self.combine_warps,
+        )
+        if self._async_ll:
+            self.op.combine_recv(warp_per_block=self.combine_warps)
+        return combined[:p.T]
+
+    def inspect_dispatch(self, p, h):
+        count = self.recv_tokens(h)
+        if h.dispatch_weights is None:
+            raise RuntimeError("MoRI dispatch did not expose gate weights")
+        if count < 0 or any(
+            tensor.ndim == 0 or count > tensor.size(0)
+            for tensor in (h.dispatch_output, h.dispatch_indices, h.dispatch_weights)
+        ):
+            raise RuntimeError("MoRI receive count exceeds dispatch metadata")
+        raw_expert_ids = h.dispatch_indices[:count].to(torch.int64)
+        expert_ids, weights, local_expert_ids = _project_local_metadata(
+            torch,
+            raw_expert_ids,
+            h.dispatch_weights[:count].to(torch.float32),
+            self.rank,
+            self.experts_per_rank,
+        )
+        return types.SimpleNamespace(
+            payload=self._semantic_recv(h, count)[:count],
+            encoded_payload=h.dispatch_output[:count],
+            scales=(
+                h.dispatch_scales[:count]
+                if h.dispatch_scales is not None
+                else None
+            ),
+            expert_ids=expert_ids,
+            weights=weights,
+            local_expert_counts=torch.bincount(
+                local_expert_ids, minlength=self.experts_per_rank
+            ),
+            ordering_contract="mori-global-topk-masked-v1",
+        )
+
+    def combine_transformed(self, p, h, transformed):
+        h.combine_input = transformed.to(torch.bfloat16)
+        rows = getattr(p, "recv_tokens", None)
+        if not isinstance(rows, int) or rows < 0 or rows > h.combine_input.size(0):
+            raise RuntimeError("MoRI receive count was not validated before transformed combine")
+        if not self._external_input:
+            buffer = self.op.get_registered_combine_input_buffer(
+                torch.bfloat16, hidden_dim=h.combine_input.size(1)
+            )
+            buffer[:rows, :].copy_(h.combine_input[:rows, :])
+            h.combine_input = buffer
+        return self.combine(p, h)
+
+    def recv_tokens(self, h):
+        return int(h.recv_num.item())
+
+    def _semantic_recv(self, h, rows):
+        if not self._fp8_dispatch:
+            return h.dispatch_output
+        if not hasattr(h, "recv_semantic"):
+            if h.dispatch_scales is None:
+                raise ep_precision.PrecisionError(
+                    "MoRI FP8 dispatch did not return scaling factors"
+                )
+            semantic = torch.empty(
+                h.dispatch_output.shape,
+                dtype=torch.bfloat16,
+                device=h.dispatch_output.device,
+            )
+            semantic[:rows].copy_(ep_precision.dequantize_dispatch(
+                torch,
+                h.dispatch_output[:rows],
+                h.dispatch_scales[:rows],
+                self.communication_precision["dispatch"],
+            ))
+            h.recv_semantic = semantic
+            h.recv_semantic_rows = rows
+        elif h.recv_semantic_rows != rows:
+            raise RuntimeError("MoRI receive count changed for one dispatch handle")
+        return h.recv_semantic
+
+    def oracle_dispatch_payload(self, payload):
+        return ep_precision.encode_dispatch(
+            torch, payload, self.communication_precision
+        ).semantic
+
+    def precision_evidence(self, problem, view=None):
+        return ep_precision.precision_evidence(
+            torch,
+            profile_id=self.precision_profile_id,
+            profile=self.communication_precision,
+            problem=problem,
+            view=view,
+        )
+
+    def finalize(self, rc):
+        try:
+            dist.barrier()
+        except Exception:
+            pass
+        sys.stdout.flush()
+        sys.stderr.flush()
+        os._exit(rc if 0 <= rc <= 255 else 1)
diff --git a/experimental/CollectiveX/tests/ep_nccl.py b/experimental/CollectiveX/tests/ep_nccl.py
new file mode 100644
index 000000000..d440f8ecf
--- /dev/null
+++ b/experimental/CollectiveX/tests/ep_nccl.py
@@ -0,0 +1,223 @@
+"""CollectiveX NCCL all-to-all expert-parallel reference backend.
+
+The canonical "token-shuffle" EP built on torch.distributed's NCCL ``all_to_all_single``. Like the
+DeepEP-family APIs, dispatch sends one hidden-state copy to each distinct destination rank, even when
+multiple selected experts live on that rank. Combine reverses the shuffle and sums those rank copies.
+
+Why this exists alongside DeepEP/UCCL/MoRI: it is the portable collective reference baseline for the
+same rank-deduplicated payload and routing metadata. It keeps the library comparison anchored to the
+platform collective stack without claiming the custom fused kernels use the same transport algorithm.
+
+Scope: BF16, normal mode, layout-and-dispatch-v1. The timed dispatch includes layout, count exchange,
+payload, rank-masked expert indices, gate weights, and source-token metadata; combine returns only
+the activation payload. RCCL exposes the same API. The v1 AMD matrix uses this backend at EP8 and EP16.
+"""
+
+import os
+import re
+import types
+
+import torch
+import torch.distributed as dist
+import contracts
+import ep_precision
+
+
+def _runtime_collective(args, torch_module) -> tuple[str, str]:
+    expected = "rccl" if torch_module.version.hip else "nccl"
+    fingerprint = getattr(args, "runtime_fingerprint", None)
+    collective = fingerprint.get("collective_library") if isinstance(fingerprint, dict) else None
+    if (
+        not isinstance(collective, dict)
+        or collective.get("kind") != expected
+        or not isinstance(collective.get("version"), str)
+        or not re.fullmatch(r"[0-9]+\.[0-9]+\.[0-9]+", collective["version"])
+    ):
+        raise RuntimeError("loaded collective runtime identity is unavailable")
+    return expected, collective["version"]
+
+
+class NCCLBackend:
+    name = "nccl-ep"
+    stage_device_work = False
+    combine_needs_redispatch = False  # dispatch saves the permutation + splits
+    combine_weight_semantics = "unweighted-rank-sum"
+
+    def __init__(self, args, rank, world_size, local_rank, device):
+        self.args = args
+        self.rank = rank
+        self.world_size = world_size
+        self.device = device
+        self.experts = args.experts
+        self.mode = getattr(args, "mode", "normal")
+        if self.mode != "normal":
+            raise ep_precision.PrecisionError("NCCL/RCCL EP supports normal mode only")
+        self.precision_profile_id, self.communication_precision = (
+            ep_precision.resolve_precision(
+                args,
+                backend=self.name,
+                mode=self.mode,
+                supported_profiles={"d-bf16.c-bf16"},
+            )
+        )
+        if args.experts % world_size:
+            raise ValueError(f"experts({args.experts}) must divide world_size({world_size})")
+        self.experts_per_rank = args.experts // world_size
+        self.tolerance = 5e-2  # bf16 round-trip
+        _library, _version = _runtime_collective(args, torch)
+        if args.scale_out_transport:
+            hcas = os.environ.get("NCCL_IB_HCA", "")
+            if os.environ.get("NCCL_NET") != "IB" or not re.fullmatch(
+                r"=[A-Za-z][A-Za-z0-9_.-]{0,31}(?::[1-9][0-9]*)?"
+                r"(?:,[A-Za-z][A-Za-z0-9_.-]{0,31}(?::[1-9][0-9]*)?)*",
+                hcas,
+            ):
+                raise RuntimeError("scale-out collective transport is not pinned to RDMA")
+        self.kernel_generation = contracts.collective_kernel_generation(_library)
+        self.backend_provenance = {
+            "backend": f"{_library}-all2all",
+            "backend_lineage": _library,
+            "collective_library": _library,
+            "nccl_version": _version,
+            "transport": f"{_library}-all_to_all_single",
+            "resource_mode": "fixed-profile",
+            "num_sms": None,
+            "device_sms": torch.cuda.get_device_properties(device).multi_processor_count,
+            "tuned_source": "nccl-collective",
+            "reference_semantics": "rank-deduplicated-payload-plus-routing-metadata-v2",
+            "routing_metadata": "expert-index-gate-weight-source-token",
+            "dispatch_dtype": "bf16",
+            "combine_dtype": "bf16",
+        }
+
+    def buffer_cap(self, args):
+        return None  # no fixed pre-allocated buffer; all-to-all sizes itself per step
+
+    def make_problem(self, T, idx, weights, x):
+        encoding = ep_precision.encode_dispatch(
+            torch, x, self.communication_precision
+        )
+        # idx[T,topk] int64, weights[T,topk] f32, x[T,hidden] bf16 — the shared routing-trace slice.
+        return types.SimpleNamespace(
+            T=T,
+            x=x,
+            oracle_x=encoding.semantic,
+            dispatch_precision_evidence=encoding.evidence,
+            topk_idx=idx.to(torch.int64),
+            topk_weights=weights.to(torch.float32),
+            layout=None,
+        )
+
+    def dispatch(self, p):
+        ws = self.world_size
+        x = p.x  # [T, H] bf16
+        idx = p.topk_idx  # [T, topk]
+        T, H = int(x.shape[0]), int(x.shape[1])
+        dev = x.device
+        # DeepEP dispatches one token per destination rank, not one copy per expert. Build the same
+        # rank-deduplicated routing map so NCCL traffic and combine semantics are comparable.
+        destinations = (idx // self.experts_per_rank).clamp_(0, ws - 1)
+        present = torch.zeros((T, ws), dtype=torch.bool, device=dev)
+        present.scatter_(1, destinations, True)
+        flat_token, flat_dest = present.nonzero(as_tuple=True)
+        # Group rank copies by destination (stable -> deterministic, invertible permutation).
+        order = torch.argsort(flat_dest, stable=True)
+        ordered_token = flat_token.index_select(0, order)
+        ordered_dest = flat_dest.index_select(0, order)
+        send_counts = torch.bincount(flat_dest, minlength=ws)  # [ws]
+        send_x = x.index_select(0, ordered_token).contiguous()
+        send_topk_idx = idx.index_select(0, ordered_token).contiguous()
+        expert_start = ordered_dest.unsqueeze(1) * self.experts_per_rank
+        local_mask = ((send_topk_idx >= expert_start)
+                      & (send_topk_idx < expert_start + self.experts_per_rank))
+        send_topk_idx = torch.where(
+            local_mask, send_topk_idx - expert_start, torch.full_like(send_topk_idx, -1)
+        )
+        send_topk_weights = p.topk_weights.index_select(0, ordered_token).contiguous()
+        send_topk_weights.masked_fill_(~local_mask, 0)
+        send_src_metadata = (ordered_token.to(torch.int64) | (self.rank << 32)).contiguous()
+        # Exchange per-rank counts so every rank can size its receive buffer.
+        recv_counts = torch.empty_like(send_counts)
+        dist.all_to_all_single(recv_counts, send_counts)
+        sc = send_counts.tolist()
+        rc = recv_counts.tolist()
+        total_recv = int(sum(rc))
+        recv_x = torch.empty((total_recv, H), dtype=x.dtype, device=dev)
+        recv_topk_idx = torch.empty((total_recv, int(idx.shape[1])), dtype=idx.dtype, device=dev)
+        recv_topk_weights = torch.empty((total_recv, int(idx.shape[1])),
+                                        dtype=p.topk_weights.dtype, device=dev)
+        recv_src_metadata = torch.empty((total_recv,), dtype=torch.int64, device=dev)
+        # Dispatch the uneven per-rank splits over the configured collective transport.
+        dist.all_to_all_single(recv_x, send_x, rc, sc)
+        dist.all_to_all_single(recv_topk_idx, send_topk_idx, rc, sc)
+        dist.all_to_all_single(recv_topk_weights, send_topk_weights, rc, sc)
+        dist.all_to_all_single(recv_src_metadata, send_src_metadata, rc, sc)
+        return types.SimpleNamespace(
+            recv_x=recv_x, combine_input=None, order=order, flat_token=flat_token,
+            recv_topk_idx=recv_topk_idx,
+            recv_topk_weights=recv_topk_weights, recv_src_rank=recv_src_metadata >> 32,
+            recv_src_token=recv_src_metadata & ((1 << 32) - 1), send_counts=sc, recv_counts=rc,
+            T=T, H=H, topk=int(idx.shape[1]), total_recv=total_recv)
+
+    def stage(self, p, h):
+        # No expert compute: the expert "output" is the received tokens as-is (the round-trip identity).
+        h.combine_input = h.recv_x
+        return None
+
+    def combine(self, p, h):
+        # Reverse all-to-all: ship expert outputs back to their origin ranks (swap the split lists).
+        send_back = torch.empty((int(h.order.shape[0]), h.H), dtype=h.combine_input.dtype,
+                                device=h.combine_input.device)
+        dist.all_to_all_single(send_back, h.combine_input.contiguous(),
+                               h.send_counts, h.recv_counts)
+        # send_back is in send (sorted) order; invert the argsort to token-copy order.
+        copies = torch.empty_like(send_back)
+        copies[h.order] = send_back
+        # Sum one copy per destination rank under this reference's explicit unweighted contract.
+        out = torch.zeros((h.T, h.H), dtype=torch.float32, device=send_back.device)
+        out.index_add_(0, h.flat_token, copies.float())
+        return out.to(p.x.dtype)
+
+    def inspect_dispatch(self, p, h):
+        valid = h.recv_topk_idx >= 0
+        expert_ids = torch.where(
+            valid,
+            h.recv_topk_idx + self.rank * self.experts_per_rank,
+            h.recv_topk_idx,
+        )
+        return types.SimpleNamespace(
+            payload=h.recv_x,
+            expert_ids=expert_ids,
+            weights=h.recv_topk_weights.masked_fill(~valid, 0),
+            local_expert_counts=torch.bincount(
+                h.recv_topk_idx[valid], minlength=self.experts_per_rank
+            ),
+            ordering_contract="source-rank-major-stable-v1",
+        )
+
+    def combine_transformed(self, p, h, transformed):
+        h.combine_input = transformed.to(h.recv_x.dtype)
+        return self.combine(p, h)
+
+    def recv_tokens(self, h):
+        return int(h.total_recv)
+
+    def oracle_dispatch_payload(self, payload):
+        return payload
+
+    def precision_evidence(self, problem, view=None):
+        return ep_precision.precision_evidence(
+            torch,
+            profile_id=self.precision_profile_id,
+            profile=self.communication_precision,
+            problem=problem,
+            view=view,
+        )
+
+    def finalize(self, rc):
+        try:
+            dist.barrier()
+            dist.destroy_process_group()
+        except Exception:
+            pass
+        return rc
diff --git a/experimental/CollectiveX/tests/ep_precision.py b/experimental/CollectiveX/tests/ep_precision.py
new file mode 100644
index 000000000..c4f19f54f
--- /dev/null
+++ b/experimental/CollectiveX/tests/ep_precision.py
@@ -0,0 +1,371 @@
+#!/usr/bin/env python3
+"""Native communication-precision helpers for CollectiveX EP adapters."""
+from __future__ import annotations
+
+import inspect
+from copy import deepcopy
+from dataclasses import dataclass
+from typing import Any, Iterable
+
+import identity
+
+
+class PrecisionError(RuntimeError):
+    """A requested precision profile cannot be realized by the pinned API."""
+
+
+@dataclass(frozen=True)
+class DispatchEncoding:
+    """One dispatch input plus its post-codec semantic representation."""
+
+    native_input: Any
+    encoded_payload: Any | None
+    scales: Any | None
+    semantic: Any
+    evidence: dict[str, Any]
+
+
+def resolve_precision(
+    args,
+    *,
+    backend: str,
+    mode: str,
+    supported_profiles: Iterable[str],
+) -> tuple[str, dict[str, Any]]:
+    """Resolve and validate the exact profile requested for one adapter."""
+    profile_id = (
+        getattr(args, "precision_profile", "")
+        or identity.V1_CONTROL_PRECISION_PROFILE
+    )
+    try:
+        profile = identity.precision_profile(profile_id)
+    except identity.IdentityError as exc:
+        raise PrecisionError(str(exc)) from exc
+    if mode not in profile["modes"]:
+        raise PrecisionError(
+            f"precision profile {profile_id!r} is not valid in mode {mode!r}"
+        )
+    supported = frozenset(supported_profiles)
+    if profile_id not in supported:
+        raise PrecisionError(
+            f"{backend} does not realize precision profile {profile_id!r} in mode {mode!r}"
+        )
+    return profile_id, profile
+
+
+def require_keyword(callable_object, keyword: str, *, api: str) -> None:
+    """Fail closed when a pinned Python API does not expose a required control."""
+    try:
+        parameters = inspect.signature(callable_object).parameters
+    except (TypeError, ValueError) as exc:
+        raise PrecisionError(f"cannot inspect required precision API {api}") from exc
+    if keyword not in parameters:
+        raise PrecisionError(f"required precision API {api} omits {keyword!r}")
+
+
+def communication_format(profile: dict[str, Any], component: str) -> str:
+    """Return the exact wire format for dispatch or combine."""
+    return str(profile[component]["communication_format"])
+
+
+def is_low_precision_dispatch(profile: dict[str, Any]) -> bool:
+    return communication_format(profile, "dispatch").startswith("fp8-")
+
+
+def is_caller_prequantized(profile: dict[str, Any]) -> bool:
+    return profile["dispatch"]["quantization_origin"] == "caller-prequantized"
+
+
+def uses_logfmt_combine(profile: dict[str, Any]) -> bool:
+    return communication_format(profile, "combine") == "logfmt10"
+
+
+def uses_direct_cast_combine(profile: dict[str, Any]) -> bool:
+    return profile["combine"]["quantization_origin"] == "backend-internal-direct-cast"
+
+
+def _fp8_dtype(torch_module, axis: dict[str, Any]):
+    fmt = axis["communication_format"]
+    attribute = {
+        "fp8-e4m3fn": "float8_e4m3fn",
+        "fp8-e4m3fnuz": "float8_e4m3fnuz",
+    }.get(fmt)
+    if attribute is None:
+        raise PrecisionError(f"unsupported FP8 communication format {fmt!r}")
+    dtype = getattr(torch_module, attribute, None)
+    if dtype is None:
+        raise PrecisionError(f"active torch build does not expose torch.{attribute}")
+    return dtype
+
+
+def _axis_evidence(
+    *,
+    dequantized_semantics: bool,
+    encoded_payload_valid: bool,
+    max_abs_error: float,
+    max_rel_error: float,
+    saturation_count: int,
+    saturation_rate: float,
+    scales_finite: bool | None,
+    scales_positive: bool | None,
+    passed: bool,
+) -> dict[str, Any]:
+    return {
+        "encoded_payload_valid": bool(encoded_payload_valid),
+        "scales_finite": scales_finite,
+        "scales_positive": scales_positive,
+        "dequantized_semantics": bool(dequantized_semantics),
+        "saturation_count": int(saturation_count),
+        "saturation_rate": float(saturation_rate),
+        "max_abs_error": float(max_abs_error),
+        "max_rel_error": float(max_rel_error),
+        "passed": bool(passed),
+    }
+
+
+def exact_axis_evidence() -> dict[str, Any]:
+    """Evidence for an unquantized BF16 communication axis."""
+    return _axis_evidence(
+        encoded_payload_valid=True,
+        scales_finite=None,
+        scales_positive=None,
+        dequantized_semantics=True,
+        saturation_count=0,
+        saturation_rate=0.0,
+        max_abs_error=0.0,
+        max_rel_error=0.0,
+        passed=True,
+    )
+
+
+def _quantize_fp8(torch_module, x, axis: dict[str, Any]) -> DispatchEncoding:
+    group_size = axis["scale_group_size"]
+    if group_size != 128 or axis["scale_dtype"] != "f32":
+        raise PrecisionError("v1 FP8 dispatch requires block-128 FP32 scales")
+    if x.ndim != 2 or x.shape[1] % group_size:
+        raise PrecisionError(
+            "v1 native FP8 dispatch requires a 2D hidden dimension divisible by 128"
+        )
+    dtype = _fp8_dtype(torch_module, axis)
+    fp8_max = float(torch_module.finfo(dtype).max)
+    blocks = x.float().reshape(x.shape[0], x.shape[1] // group_size, group_size)
+    amax = blocks.abs().amax(dim=-1)
+    # Match the pinned DeepEP/HybridEP block codec, including its nonzero scale floor.
+    scales = (amax.clamp_min(1e-4) / fp8_max).to(torch_module.float32)
+    normalized = blocks / scales.unsqueeze(-1)
+    saturation_mask = normalized.abs() > fp8_max
+    encoded = normalized.clamp(min=-fp8_max, max=fp8_max).to(dtype).reshape_as(x).contiguous()
+    semantic = dequantize_dispatch(
+        torch_module, encoded, scales, axis
+    ).to(x.dtype).contiguous()
+    absolute = (semantic.float() - x.float()).abs()
+    max_abs = float(absolute.max().item()) if absolute.numel() else 0.0
+    reference_max = float(x.float().abs().max().item()) if x.numel() else 0.0
+    max_rel = max_abs / (reference_max + 1e-6)
+    saturation_count = int(saturation_mask.sum().item())
+    saturation_rate = saturation_count / max(1, int(x.numel()))
+    finite = bool(torch_module.isfinite(scales).all().item())
+    positive = bool((scales > 0).all().item())
+    semantic_ok = bool(
+        torch_module.isfinite(semantic.float()).all().item()
+        and torch_module.allclose(
+            semantic.float(), x.float(), rtol=0.05, atol=0.02
+        )
+    )
+    valid = encoded.dtype == dtype and encoded.shape == x.shape
+    evidence = _axis_evidence(
+        encoded_payload_valid=valid,
+        scales_finite=finite,
+        scales_positive=positive,
+        dequantized_semantics=semantic_ok,
+        saturation_count=saturation_count,
+        saturation_rate=saturation_rate,
+        max_abs_error=max_abs,
+        max_rel_error=max_rel,
+        passed=valid and finite and positive and semantic_ok,
+    )
+    return DispatchEncoding(
+        native_input=(encoded, scales),
+        encoded_payload=encoded,
+        scales=scales,
+        semantic=semantic,
+        evidence=evidence,
+    )
+
+
+def encode_dispatch(torch_module, x, profile: dict[str, Any]) -> DispatchEncoding:
+    """Build caller-prequantized input or a fused-codec oracle outside timing."""
+    axis = profile["dispatch"]
+    origin = axis["quantization_origin"]
+    if origin == "none":
+        return DispatchEncoding(
+            native_input=x,
+            encoded_payload=None,
+            scales=None,
+            semantic=x,
+            evidence=exact_axis_evidence(),
+        )
+    if origin not in {"caller-prequantized", "backend-fused"}:
+        raise PrecisionError(f"unsupported dispatch quantization origin {origin!r}")
+    encoded = _quantize_fp8(torch_module, x, axis)
+    if origin == "backend-fused":
+        return DispatchEncoding(
+            native_input=x,
+            encoded_payload=encoded.encoded_payload,
+            scales=encoded.scales,
+            semantic=encoded.semantic,
+            evidence=encoded.evidence,
+        )
+    return encoded
+
+
+def dequantize_dispatch(
+    torch_module,
+    encoded_payload,
+    scales,
+    axis: dict[str, Any],
+    *,
+    uint8_storage: bool = False,
+):
+    """Decode one native block-scaled FP8 payload to BF16 semantics."""
+    group_size = axis["scale_group_size"]
+    if group_size != 128 or scales is None:
+        raise PrecisionError("FP8 dispatch payload is missing block-128 scales")
+    dtype = _fp8_dtype(torch_module, axis)
+    payload = encoded_payload.view(dtype) if uint8_storage else encoded_payload
+    if payload.dtype != dtype or payload.ndim < 2 or payload.shape[-1] % group_size:
+        raise PrecisionError("native FP8 dispatch payload has an invalid dtype or shape")
+    expected_scale_shape = (*payload.shape[:-1], payload.shape[-1] // group_size)
+    if tuple(scales.shape) != expected_scale_shape or scales.dtype != torch_module.float32:
+        raise PrecisionError("native FP8 dispatch scales have an invalid dtype or shape")
+    values = payload.float().reshape(
+        *payload.shape[:-1], payload.shape[-1] // group_size, group_size
+    )
+    return (values * scales.float().reshape(*expected_scale_shape, 1)).reshape(
+        payload.shape
+    ).to(torch_module.bfloat16).contiguous()
+
+
+def validate_received_encoding(
+    torch_module,
+    *,
+    encoded_payload,
+    scales,
+    semantic,
+    axis: dict[str, Any],
+    uint8_storage: bool = False,
+) -> bool:
+    """Validate that received bytes/scales exactly decode to the semantic view."""
+    try:
+        decoded = dequantize_dispatch(
+            torch_module,
+            encoded_payload,
+            scales,
+            axis,
+            uint8_storage=uint8_storage,
+        )
+    except PrecisionError:
+        return False
+    return bool(torch_module.equal(decoded, semantic))
+
+
+def dequantize_expert_prefixes(
+    torch_module,
+    encoded_payload,
+    scales,
+    axis: dict[str, Any],
+    counts: tuple[int, ...],
+    output,
+):
+    """Decode only valid expert-packed prefixes into a reusable BF16 workspace."""
+    if encoded_payload.ndim != 3 or len(counts) != encoded_payload.shape[0]:
+        raise PrecisionError("expert-packed FP8 receive counts have an invalid shape")
+    if output.shape != encoded_payload.shape or output.dtype != torch_module.bfloat16:
+        raise PrecisionError("expert-packed BF16 stage workspace has an invalid shape")
+    capacity = encoded_payload.shape[1]
+    for expert, count in enumerate(counts):
+        if count < 0 or count > capacity:
+            raise PrecisionError("expert-packed FP8 receive count exceeds capacity")
+        if count:
+            output[expert, :count].copy_(dequantize_dispatch(
+                torch_module,
+                encoded_payload[expert, :count],
+                scales[expert, :count],
+                axis,
+            ))
+    return output
+
+
+def _direct_cast_saturation(torch_module, profile: dict[str, Any], view) -> tuple[int, float]:
+    """Count values clipped by MoRI's unscaled BF16-to-FP8 combine cast."""
+    if not all(hasattr(view, field) for field in ("payload", "expert_ids", "weights")):
+        return 0, 0.0
+    expert_ids = view.expert_ids
+    weights = view.weights
+    if expert_ids.ndim != 2 or weights.shape != expert_ids.shape:
+        return 0, 0.0
+    valid = expert_ids >= 0
+    expert = expert_ids.clamp(min=0).to(torch_module.int64)
+    gate = weights.to(torch_module.float32).masked_fill(~valid, 0)
+    scale = ((expert * 17 + 5) % 31 + 1).to(torch_module.float32) / 32
+    offset_a = (((expert * 29 + 7) % 37) - 18).to(torch_module.float32) / 64
+    offset_b = (((expert * 43 + 11) % 41) - 20).to(torch_module.float32) / 128
+    columns = torch_module.arange(
+        view.payload.shape[1], device=view.payload.device, dtype=torch_module.int64
+    )
+    pattern = (((columns * 13) % 17) - 8).to(torch_module.float32) / 8
+    transformed = (
+        view.payload.float() * (gate * scale).sum(dim=1, keepdim=True)
+        + (gate * offset_a).sum(dim=1, keepdim=True)
+        + (gate * offset_b).sum(dim=1, keepdim=True) * pattern.unsqueeze(0)
+    )
+    dtype = _fp8_dtype(torch_module, profile["combine"])
+    fp8_max = float(torch_module.finfo(dtype).max)
+    count = int((transformed.abs() > fp8_max).sum().item())
+    return count, count / max(1, int(transformed.numel()))
+
+
+def precision_evidence(
+    torch_module,
+    *,
+    profile_id: str,
+    profile: dict[str, Any],
+    problem,
+    view=None,
+    uint8_storage: bool = False,
+) -> dict[str, Any]:
+    """Return schema-shaped codec evidence; the harness adds combine-oracle errors."""
+    dispatch = deepcopy(problem.dispatch_precision_evidence)
+    if (
+        is_low_precision_dispatch(profile)
+        and view is not None
+        and all(hasattr(view, field) for field in ("encoded_payload", "scales", "payload"))
+    ):
+        valid = validate_received_encoding(
+            torch_module,
+            encoded_payload=view.encoded_payload,
+            scales=view.scales,
+            semantic=view.payload,
+            axis=profile["dispatch"],
+            uint8_storage=uint8_storage,
+        )
+        dispatch["encoded_payload_valid"] = (
+            dispatch["encoded_payload_valid"] and valid
+        )
+        dispatch["passed"] = dispatch["passed"] and valid
+    combine = exact_axis_evidence()
+    # Internal quantizers are validated by native configuration here. The harness
+    # replaces the error fields and pass bit with the transformed-combine oracle.
+    if communication_format(profile, "combine") != "bf16":
+        combine["scales_finite"] = None
+        combine["scales_positive"] = None
+    if uses_direct_cast_combine(profile) and view is not None:
+        count, rate = _direct_cast_saturation(torch_module, profile, view)
+        combine["saturation_count"] = count
+        combine["saturation_rate"] = rate
+    return {
+        "profile_id": profile_id,
+        "dispatch": dispatch,
+        "combine": combine,
+        "passed": bool(dispatch["passed"] and combine["passed"]),
+    }
diff --git a/experimental/CollectiveX/tests/ep_uccl.py b/experimental/CollectiveX/tests/ep_uccl.py
new file mode 100644
index 000000000..8eb514741
--- /dev/null
+++ b/experimental/CollectiveX/tests/ep_uccl.py
@@ -0,0 +1,535 @@
+#!/usr/bin/env python3
+"""CollectiveX UCCL adapter for native V1 dispatch/combine precision profiles."""
+from __future__ import annotations
+
+import importlib.metadata as metadata
+import json
+import os
+from pathlib import Path
+from pathlib import PurePosixPath
+import sys
+import types
+
+import torch
+import torch.distributed as dist
+import contracts
+import ep_precision
+
+try:
+    import uccl
+    import uccl_deepep
+    from uccl_deepep import Buffer  # type: ignore
+except Exception as exc:  # pragma: no cover - requires the benchmark image
+    print(f"ERROR: uccl.ep import failed: {exc!r}", file=sys.stderr)
+    raise
+
+
+def _uccl_version() -> str:
+    try:
+        return metadata.version("uccl")
+    except Exception:
+        return getattr(uccl, "__version__", "unknown")
+
+
+def _uccl_dependency_versions() -> dict[str, str]:
+    versions = {
+        package: metadata.version(package)
+        for package in contracts.UCCL_DEPENDENCY_VERSIONS
+    }
+    if versions != contracts.UCCL_DEPENDENCY_VERSIONS:
+        raise RuntimeError(
+            "UCCL runtime dependency versions differ from the v1 contract"
+        )
+    return versions
+
+
+def _is_uccl_runtime_payload(name: str) -> bool:
+    path = PurePosixPath(name)
+    return (
+        bool(path.parts)
+        and path.parts[0] in {"uccl", "uccl.libs"}
+        and "__pycache__" not in path.parts
+        and path.suffix != ".pyc"
+    )
+
+
+def _python_dependency_evidence(package: str, version: str) -> dict[str, str]:
+    distribution = metadata.distribution(package)
+    runtime_files = []
+    for entry in distribution.files or ():
+        logical = PurePosixPath(entry.as_posix())
+        path = Path(distribution.locate_file(entry))
+        if (
+            logical.parts
+            and logical.parts[0] == package
+            and "__pycache__" not in logical.parts
+            and logical.suffix != ".pyc"
+            and path.is_file()
+        ):
+            runtime_files.append((entry.as_posix(), path))
+    return contracts.content_manifest_evidence(
+        role=f"{package}-distribution",
+        name=f"{package}-{version}",
+        files=runtime_files,
+    )
+
+
+def _loaded_libcudart_evidence(
+    version: str, maps_path: Path = Path("/proc/self/maps")
+) -> dict[str, str]:
+    distribution = metadata.distribution("nvidia-cuda-runtime-cu12")
+    candidates = {
+        Path(distribution.locate_file(entry)).resolve()
+        for entry in distribution.files or ()
+        if PurePosixPath(entry.as_posix()).name.startswith("libcudart.so")
+        and Path(distribution.locate_file(entry)).is_file()
+    }
+    candidate_names = {path.name for path in candidates}
+    if not candidates or not candidate_names:
+        raise RuntimeError("pinned CUDA runtime distribution has no libcudart payload")
+
+    loaded: set[Path] = set()
+    try:
+        mappings = maps_path.read_text().splitlines()
+    except OSError as exc:
+        raise RuntimeError("cannot inspect mapped UCCL runtime libraries") from exc
+    for mapping in mappings:
+        columns = mapping.split(maxsplit=5)
+        if len(columns) != 6:
+            continue
+        raw_path = columns[5]
+        deleted = raw_path.endswith(" (deleted)")
+        if deleted:
+            raw_path = raw_path.removesuffix(" (deleted)")
+        mapped = Path(raw_path)
+        if mapped.name not in candidate_names:
+            continue
+        if deleted or not mapped.is_file():
+            raise RuntimeError(
+                "mapped libcudart is unavailable for content verification"
+            )
+        resolved = mapped.resolve()
+        if resolved not in candidates:
+            raise RuntimeError(
+                "mapped libcudart is not owned by the pinned CUDA runtime package"
+            )
+        loaded.add(resolved)
+    if len(loaded) != 1:
+        raise RuntimeError(
+            "expected exactly one mapped libcudart from the pinned CUDA runtime"
+        )
+    return contracts.content_manifest_evidence(
+        role="cuda-runtime",
+        name=f"nvidia-cuda-runtime-cu12-{version}",
+        files=[("libcudart.so", loaded.pop())],
+    )
+
+
+def _uccl_build_evidence(
+    version: str, dependency_versions: dict[str, str]
+) -> list[dict[str, str]]:
+    distribution = metadata.distribution("uccl")
+    distribution_files = [
+        (entry.as_posix(), distribution.locate_file(entry))
+        for entry in distribution.files or ()
+        if _is_uccl_runtime_payload(entry.as_posix())
+        and Path(distribution.locate_file(entry)).is_file()
+    ]
+    wrapper_root = Path(uccl_deepep.__file__).resolve().parent
+    wrapper_files = [
+        (path.relative_to(wrapper_root).as_posix(), path)
+        for path in wrapper_root.rglob("*.py")
+        if path.is_file()
+    ]
+    return [
+        contracts.content_manifest_evidence(
+            role="uccl-distribution",
+            name=f"uccl-{version}",
+            files=distribution_files,
+        ),
+        contracts.content_manifest_evidence(
+            role="uccl-wrapper",
+            name="uccl-deepep-wrapper",
+            files=wrapper_files,
+        ),
+        _python_dependency_evidence("intervaltree", dependency_versions["intervaltree"]),
+        _python_dependency_evidence(
+            "sortedcontainers", dependency_versions["sortedcontainers"]
+        ),
+        _loaded_libcudart_evidence(dependency_versions["nvidia-cuda-runtime-cu12"]),
+    ]
+
+
+def _require_cross_rank_equal(value, label: str) -> None:
+    gathered = [None] * dist.get_world_size()
+    dist.all_gather_object(gathered, value)
+    canonical = {json.dumps(item, sort_keys=True, separators=(",", ":")) for item in gathered}
+    if len(canonical) != 1:
+        raise RuntimeError(f"UCCL {label} differs across ranks")
+
+
+def _normal_buffer_sizes(hidden: int, world_size: int) -> tuple[int, int]:
+    """Apply the wrapped DeepEP dispatch/combine sizing contract for this EP world."""
+    hidden_bytes = hidden * torch.tensor([], dtype=torch.bfloat16).element_size()
+    configs = (Buffer.get_dispatch_config(world_size), Buffer.get_combine_config(world_size))
+    num_nvl_bytes = max(
+        int(config.get_nvl_buffer_size_hint(hidden_bytes, world_size)) for config in configs
+    )
+    num_rdma_bytes = max(
+        int(config.get_rdma_buffer_size_hint(hidden_bytes, world_size)) for config in configs
+    )
+    if num_nvl_bytes <= 0 or num_rdma_bytes < 0:
+        raise RuntimeError("UCCL returned invalid normal-mode buffer size hints")
+    return num_nvl_bytes, num_rdma_bytes
+
+
+class UCCLBackend:
+    name = "uccl"
+    stage_device_work = False
+    combine_needs_redispatch = False
+    combine_weight_semantics = "unweighted-rank-sum"
+    oracle_layout = "token-rank"
+    payload_unit = "token-rank"
+
+    def __init__(self, args, rank, world_size, local_rank, device):
+        self.args = args
+        self.rank = rank
+        self.world_size = world_size
+        self.device = device
+        self.mode = getattr(args, "mode", "normal")
+        if self.mode not in {"normal", "low-latency"}:
+            raise ValueError(f"unsupported UCCL mode {self.mode!r}")
+        supported_profiles = {
+            "normal": {
+                "d-bf16.c-bf16",
+                "d-fp8-e4m3fn-b128-f32-prequantized.c-bf16",
+            },
+            "low-latency": {
+                "d-bf16.c-bf16",
+                "d-fp8-e4m3fn-b128-f32-fused.c-bf16",
+                "d-bf16.c-logfmt10-dynamic64",
+                "d-fp8-e4m3fn-b128-f32-fused.c-logfmt10-dynamic64",
+            },
+        }
+        self.precision_profile_id, self.communication_precision = (
+            ep_precision.resolve_precision(
+                args,
+                backend=self.name,
+                mode=self.mode,
+                supported_profiles=supported_profiles[self.mode],
+            )
+        )
+        self._fp8_dispatch = ep_precision.is_low_precision_dispatch(
+            self.communication_precision
+        )
+        self._use_logfmt = ep_precision.uses_logfmt_combine(
+            self.communication_precision
+        )
+        self.stage_device_work = self._fp8_dispatch
+
+        self.group = dist.group.WORLD
+        device_sms = torch.cuda.get_device_properties(device).multi_processor_count
+        if self.mode == "low-latency":
+            ep_precision.require_keyword(
+                Buffer.low_latency_dispatch,
+                "use_fp8",
+                api="uccl_deepep.Buffer.low_latency_dispatch",
+            )
+            ep_precision.require_keyword(
+                Buffer.low_latency_combine,
+                "use_logfmt",
+                api="uccl_deepep.Buffer.low_latency_combine",
+            )
+            if args.phase != "decode":
+                raise ValueError("UCCL low-latency mode only supports the decode ladder")
+            if args.experts % world_size:
+                raise ValueError("UCCL low-latency experts must divide the EP group")
+            self.combine_needs_redispatch = True
+            self.combine_weight_semantics = "gate-weighted-sum"
+            self.oracle_layout = "expert-packed"
+            self.payload_unit = "token-expert"
+            self.max_tokens_per_rank = 128
+            num_qps_per_rank = args.experts // world_size
+            num_rdma_bytes = Buffer.get_low_latency_rdma_size_hint(
+                self.max_tokens_per_rank, args.hidden, world_size, args.experts
+            )
+            self.buffer = Buffer(
+                self.group,
+                num_nvl_bytes=0,
+                num_rdma_bytes=num_rdma_bytes,
+                low_latency_mode=True,
+                num_qps_per_rank=num_qps_per_rank,
+                allow_nvlink_for_low_latency_mode=True,
+            )
+            self.buffer.clean_low_latency_buffer(
+                self.max_tokens_per_rank, args.hidden, args.experts
+            )
+            resource_provenance = {
+                "requested_num_sms": None,
+                "num_sms": None,
+                "sm_fraction": None,
+                "tuned_source": "uccl-low-latency-fixed-kernel",
+                "num_max_tokens_per_rank": self.max_tokens_per_rank,
+                "num_nvl_bytes": 0,
+                "num_rdma_bytes": num_rdma_bytes,
+            }
+        else:
+            ep_precision.require_keyword(
+                Buffer.dispatch,
+                "async_finish",
+                api="uccl_deepep.Buffer.dispatch",
+            )
+            ep_precision.require_keyword(
+                Buffer.combine,
+                "async_finish",
+                api="uccl_deepep.Buffer.combine",
+            )
+            num_nvl_bytes, num_rdma_bytes = _normal_buffer_sizes(args.hidden, world_size)
+            if world_size > args.scale_up_domain and num_rdma_bytes == 0:
+                raise RuntimeError("UCCL scale-out configuration returned no RDMA buffer")
+            self.buffer = Buffer(self.group, num_nvl_bytes, num_rdma_bytes)
+            num_sms = int(getattr(Buffer, "num_sms", args.num_sms))
+            try:
+                Buffer.set_num_sms(num_sms)
+            except Exception as exc:  # pragma: no cover - version dependent
+                raise RuntimeError(
+                    f"UCCL did not apply requested num_sms={num_sms}: {exc!r}"
+                ) from exc
+            applied_num_sms = int(getattr(Buffer, "num_sms", num_sms))
+            if applied_num_sms != num_sms:
+                raise RuntimeError(
+                    f"UCCL num_sms mismatch: requested={num_sms} applied={applied_num_sms}"
+                )
+            resource_provenance = {
+                "requested_num_sms": num_sms,
+                "num_sms": applied_num_sms,
+                "sm_fraction": applied_num_sms / device_sms,
+                "tuned_source": "uccl-default-num_sms",
+                "num_nvl_bytes": num_nvl_bytes,
+                "num_rdma_bytes": num_rdma_bytes,
+            }
+        version = _uccl_version()
+        dependency_versions = _uccl_dependency_versions()
+        loaded_libraries = _uccl_build_evidence(version, dependency_versions)
+        _require_cross_rank_equal(loaded_libraries, "installed content identities")
+        self.backend_provenance = {
+            "uccl_version": version,
+            "uccl_commit": os.environ.get("UCCL_COMMIT") or f"pkg-{version}",
+            "uccl_wrapper_commit": os.environ.get("UCCL_WRAPPER_COMMIT"),
+            "backend_lineage": "uccl",
+            "uccl_dependency_versions": dependency_versions,
+            "loaded_libraries": loaded_libraries,
+            "mode": self.mode,
+            "dispatch_dtype": ep_precision.communication_format(
+                self.communication_precision, "dispatch"
+            ),
+            "combine_dtype": ep_precision.communication_format(
+                self.communication_precision, "combine"
+            ),
+            "resource_mode": "fixed-profile",
+            "device_sms": device_sms,
+            **resource_provenance,
+        }
+
+    def buffer_cap(self, args):
+        return self.max_tokens_per_rank if self.mode == "low-latency" else None
+
+    def make_problem(self, T, idx, weights, x):
+        encoding = ep_precision.encode_dispatch(
+            torch, x, self.communication_precision
+        )
+        return types.SimpleNamespace(
+            T=T,
+            x=x,
+            dispatch_x=encoding.native_input,
+            oracle_x=encoding.semantic,
+            dispatch_precision_evidence=encoding.evidence,
+            topk_idx=idx.to(torch.int64),
+            topk_weights=weights.to(torch.float32),
+        )
+
+    def dispatch(self, p):
+        if self.mode == "low-latency":
+            recv_x, recv_counts, handle, _, _ = self.buffer.low_latency_dispatch(
+                p.x,
+                p.topk_idx,
+                self.max_tokens_per_rank,
+                self.args.experts,
+                use_fp8=self._fp8_dispatch,  # BF16 control realizes use_fp8=False.
+                async_finish=False,
+                return_recv_hook=False,
+            )
+            return types.SimpleNamespace(
+                recv_x=recv_x,
+                recv_counts=recv_counts,
+                handle=handle,
+            )
+        (
+            num_tokens_per_rank,
+            num_tokens_per_rdma_rank,
+            num_tokens_per_expert,
+            is_token_in_rank,
+            _,
+        ) = self.buffer.get_dispatch_layout(p.topk_idx, self.args.experts)
+        recv_x, recv_topk_idx, recv_topk_weights, recv_counts, handle, _ = self.buffer.dispatch(
+            p.dispatch_x,
+            topk_idx=p.topk_idx,
+            topk_weights=p.topk_weights,
+            num_tokens_per_rank=num_tokens_per_rank,
+            num_tokens_per_rdma_rank=num_tokens_per_rdma_rank,
+            is_token_in_rank=is_token_in_rank,
+            num_tokens_per_expert=num_tokens_per_expert,
+            async_finish=False,
+        )
+        return types.SimpleNamespace(
+            recv_x=recv_x,
+            recv_topk_idx=recv_topk_idx,
+            recv_topk_weights=recv_topk_weights,
+            recv_counts=recv_counts,
+            handle=handle,
+        )
+
+    def stage(self, p, h):
+        h.combine_input = self._semantic_recv(h, p)
+
+    def combine(self, p, h):
+        if self.mode == "low-latency":
+            combined_x, _, _ = self.buffer.low_latency_combine(
+                h.combine_input,
+                p.topk_idx,
+                p.topk_weights,
+                h.handle,
+                use_logfmt=self._use_logfmt,
+                async_finish=False,
+                return_recv_hook=False,
+            )
+            return combined_x
+        combined_x, _, _ = self.buffer.combine(
+            h.combine_input, h.handle, async_finish=False
+        )
+        return combined_x
+
+    def inspect_dispatch(self, p, h):
+        valid = h.recv_topk_idx >= 0
+        expert_ids = torch.where(
+            valid,
+            h.recv_topk_idx + self.rank * (self.args.experts // self.world_size),
+            h.recv_topk_idx,
+        )
+        return types.SimpleNamespace(
+            payload=self._semantic_recv(h, p),
+            encoded_payload=self._encoded_recv(h),
+            scales=self._recv_scales(h),
+            expert_ids=expert_ids,
+            weights=h.recv_topk_weights.masked_fill(~valid, 0),
+            local_expert_counts=torch.tensor(h.recv_counts, device=self.device, dtype=torch.int64),
+            ordering_contract="source-rank-major-stable-v1",
+        )
+
+    def inspect_expert_dispatch(self, p, h):
+        if self.mode != "low-latency":
+            raise RuntimeError("expert-packed inspection requires low-latency mode")
+        p.recv_counts = tuple(int(value) for value in h.recv_counts.tolist())
+        return types.SimpleNamespace(
+            payload=self._semantic_recv(h, p),
+            encoded_payload=self._encoded_recv(h),
+            scales=self._recv_scales(h),
+            local_expert_counts=h.recv_counts,
+            source_info=h.handle[0],
+            layout_range=h.handle[1],
+        )
+
+    def combine_transformed(self, p, h, transformed):
+        if self.mode == "low-latency":
+            packed = torch.zeros(
+                self._encoded_recv(h).shape,
+                dtype=torch.bfloat16,
+                device=self._encoded_recv(h).device,
+            )
+            packed[h.oracle_local_expert_slots, h.oracle_packed_positions] = transformed.to(
+                packed.dtype
+            )
+            combined, _, _ = self.buffer.low_latency_combine(
+                packed,
+                p.topk_idx,
+                p.topk_weights,
+                h.handle,
+                use_logfmt=self._use_logfmt,
+                async_finish=False,
+                return_recv_hook=False,
+            )
+            return combined
+        semantic = self._semantic_recv(h, p)
+        combined, _, _ = self.buffer.combine(
+            transformed.to(semantic.dtype), h.handle, async_finish=False
+        )
+        return combined
+
+    def recv_tokens(self, h):
+        if self.mode == "low-latency":
+            return int(h.recv_counts.to(torch.int64).sum().item())
+        return int(self._encoded_recv(h).shape[0])
+
+    def _encoded_recv(self, h):
+        return h.recv_x[0] if isinstance(h.recv_x, tuple) else h.recv_x
+
+    def _recv_scales(self, h):
+        return h.recv_x[1] if isinstance(h.recv_x, tuple) else None
+
+    def _semantic_recv(self, h, problem=None):
+        if not self._fp8_dispatch:
+            return h.recv_x
+        if not hasattr(h, "recv_semantic"):
+            if self.mode == "low-latency":
+                counts = getattr(problem, "recv_counts", None)
+                if counts is None:
+                    counts = tuple(int(value) for value in h.recv_counts.tolist())
+                    if problem is not None:
+                        problem.recv_counts = counts
+                workspace = getattr(self, "_ll_semantic_workspace", None)
+                if workspace is None:
+                    encoded = self._encoded_recv(h)
+                    workspace = torch.empty(
+                        encoded.shape, dtype=torch.bfloat16, device=encoded.device
+                    )
+                    self._ll_semantic_workspace = workspace
+                h.recv_semantic = ep_precision.dequantize_expert_prefixes(
+                    torch,
+                    self._encoded_recv(h),
+                    self._recv_scales(h),
+                    self.communication_precision["dispatch"],
+                    counts,
+                    workspace,
+                )
+            else:
+                h.recv_semantic = ep_precision.dequantize_dispatch(
+                    torch,
+                    self._encoded_recv(h),
+                    self._recv_scales(h),
+                    self.communication_precision["dispatch"],
+                )
+        return h.recv_semantic
+
+    def oracle_dispatch_payload(self, payload):
+        return ep_precision.encode_dispatch(
+            torch, payload, self.communication_precision
+        ).semantic
+
+    def precision_evidence(self, problem, view=None):
+        return ep_precision.precision_evidence(
+            torch,
+            profile_id=self.precision_profile_id,
+            profile=self.communication_precision,
+            problem=problem,
+            view=view,
+        )
+
+    def finalize(self, rc):
+        # UCCL's proxy teardown can crash after results are written; preserve the real rc.
+        try:
+            dist.barrier()
+        except Exception:
+            pass
+        sys.stdout.flush()
+        sys.stderr.flush()
+        os._exit(rc if 0 <= rc <= 255 else 1)
diff --git a/experimental/CollectiveX/tests/eplb.py b/experimental/CollectiveX/tests/eplb.py
new file mode 100644
index 000000000..b1479da9f
--- /dev/null
+++ b/experimental/CollectiveX/tests/eplb.py
@@ -0,0 +1,199 @@
+#!/usr/bin/env python3
+"""CollectiveX — EPLB (Expert-Parallel Load Balancer), the DeepSeek-style remedy for
+skewed (zipf) expert load.
+
+Under skewed routing, the ranks hosting hot logical experts receive far more token-copies
+than the rest; dispatch/combine latency is gated by that busiest rank (the cross-rank MAX
+the harness measures), so the whole collective stalls on it. EPLB REPLICATES hot experts
+onto extra physical slots and PLACES the slots so every rank carries ~equal load.
+
+This module is backend-agnostic: it is purely a transform of the deterministic routing
+trace. The trick that keeps every adapter unchanged — DeepEP/MoRI both route expert i to
+rank `i // experts_per_rank` (contiguous block placement) — is to number the physical slots
+RANK-MAJOR (rank r owns physical ids [r*spp, (r+1)*spp)), so the standard contiguous mapping
+reproduces EPLB's balanced placement. The harness then runs with `experts = num_physical`
+and the remapped (physical) trace; nothing else changes.
+
+  num_physical = num_logical + redundant   (redundant rounded up to a multiple of ep_size)
+  build_plan(): greedy replicate-by-load + equal-cardinality balanced packing onto ep_size ranks
+  remap_idx():  each token's logical targets -> physical replicas, spread by global token id
+
+Pure-Python planner (no torch) so it unit-tests on a login node; remap_idx needs torch.
+"""
+from __future__ import annotations
+
+import hashlib
+import json
+
+
+def physical_count(num_logical: int, num_redundant: int, ep_size: int) -> int:
+    """num_logical + redundant, with redundant rounded UP to a multiple of ep_size so the
+    physical experts divide evenly across ranks (symmetric dispatch)."""
+    r = ((max(0, num_redundant) + ep_size - 1) // ep_size) * ep_size
+    return num_logical + r
+
+
+def _contiguous_rank_load(logical_load, ep_size):
+    """Per-rank received load WITHOUT EPLB: logical experts placed contiguously
+    (experts_per_rank = num_logical/ep_size), so rank r carries its block's total."""
+    n = len(logical_load)
+    per = n // ep_size
+    return [sum(logical_load[r * per:(r + 1) * per]) for r in range(ep_size)]
+
+
+def build_plan(logical_load, num_physical: int, ep_size: int) -> dict:
+    """logical_load: list[float] length num_logical (token-copies per logical expert).
+    Returns the replication+placement plan (all pure-Python lists) + before/after balance."""
+    num_logical = len(logical_load)
+    assert num_physical >= num_logical, "num_physical must be >= num_logical"
+    assert num_physical % ep_size == 0, "num_physical must divide ep_size"
+    assert num_logical % ep_size == 0, "num_logical must divide ep_size"
+    spp = num_physical // ep_size                      # physical slots per rank (fixed)
+
+    # 1) Replica allocation — start one slot per logical expert, then hand each redundant
+    #    slot to the expert with the highest CURRENT per-replica load (greedy min-max).
+    replicas = [1] * num_logical
+    for _ in range(num_physical - num_logical):
+        best, best_lps = 0, -1.0
+        for e in range(num_logical):
+            lps = logical_load[e] / replicas[e]
+            if lps > best_lps:
+                best, best_lps = e, lps
+        replicas[best] += 1
+
+    # 2) Slots = (per-replica load, logical expert), one per replica.
+    slots = []
+    for e in range(num_logical):
+        lps = logical_load[e] / replicas[e]
+        slots.extend((lps, e) for _ in range(replicas[e]))
+
+    # 3) Balanced packing into ep_size bins of EQUAL cardinality (spp each), minimizing the
+    #    max per-rank load: heaviest slot first -> least-loaded rank that still has capacity.
+    slots.sort(reverse=True)
+    rank_slots = [[] for _ in range(ep_size)]
+    rank_load = [0.0] * ep_size
+    for lps, e in slots:
+        r = min((r for r in range(ep_size) if len(rank_slots[r]) < spp),
+                key=lambda r: rank_load[r])
+        rank_slots[r].append(e)
+        rank_load[r] += lps
+
+    # 4) Rank-major physical numbering -> contiguous placement == this balanced placement.
+    phys2log, rank_of_phys = [], []
+    for r in range(ep_size):
+        for e in rank_slots[r]:
+            phys2log.append(e)
+            rank_of_phys.append(r)
+    log2phys = [[] for _ in range(num_logical)]
+    for pid, e in enumerate(phys2log):
+        log2phys[e].append(pid)
+
+    before = _contiguous_rank_load(logical_load, ep_size)
+    total = sum(logical_load) or 1.0
+    mean = total / ep_size
+    return {
+        "num_logical": num_logical, "num_physical": num_physical, "ep_size": ep_size,
+        "slots_per_rank": spp, "replicas": replicas, "max_replicas": max(replicas),
+        "phys2log": phys2log, "rank_of_phys": rank_of_phys, "log2phys": log2phys,
+        "rank_load_after": rank_load, "rank_load_before": before,
+        # imbalance = busiest rank / mean (1.0 = perfect). This is the number EPLB cuts.
+        "imbalance_before": max(before) / mean, "imbalance_after": max(rank_load) / mean,
+        "replicated_experts": sum(1 for r in replicas if r > 1),
+    }
+
+
+def mapping_hash(plan: dict) -> str:
+    """Hash the placement fields that fully determine the logical-to-physical remap."""
+    payload = {
+        "phys2log": plan["phys2log"],
+        "rank_of_phys": plan["rank_of_phys"],
+        "replicas": plan["replicas"],
+    }
+    return hashlib.sha256(json.dumps(payload, sort_keys=True).encode()).hexdigest()
+
+
+def remap_rows(indices: list[list[int]], plan: dict) -> list[list[int]]:
+    """Pure-Python equivalent of remap_idx for contract verification."""
+    replicas = plan["log2phys"]
+    return [
+        [replicas[expert][token % len(replicas[expert])] for expert in row]
+        for token, row in enumerate(indices)
+    ]
+
+
+def remap_idx(idx_logical, plan):
+    """idx_logical: torch [gt, topk] int64 logical-expert ids (global trace).
+    Returns idx_physical [gt, topk]: each token's logical target -> one of that expert's
+    physical replicas, SPREAD by global token id (row) so a hot expert's tokens fan out
+    across its replicas (= across ranks). Replicas of distinct logical experts are disjoint,
+    so a token's top-k physical ids stay distinct (dispatch invariant preserved)."""
+    import torch
+    replicas = plan["replicas"]
+    num_logical = len(replicas)
+    max_rc = plan["max_replicas"]
+    rc = torch.tensor(replicas, dtype=torch.int64)
+    # padded [num_logical, max_rc] table of physical ids (pad with replica 0; never indexed
+    # past rc[e] because the replica index is taken mod rc[e]).
+    padded = torch.zeros(num_logical, max_rc, dtype=torch.int64)
+    for e, phys in enumerate(plan["log2phys"]):
+        for k in range(max_rc):
+            padded[e, k] = phys[k] if k < len(phys) else phys[0]
+    gt = idx_logical.shape[0]
+    rows = torch.arange(gt, dtype=torch.int64).unsqueeze(1)     # [gt,1] global token id
+    e = idx_logical.to(torch.int64)                             # [gt,topk]
+    ridx = rows % rc[e]                                         # [gt,topk] replica index
+    return padded[e, ridx]                                      # [gt,topk] physical ids
+
+
+# --------------------------------------------------------------------------- self-test
+if __name__ == "__main__":
+    # Synthetic zipf load (popularity ∝ 1/(e+1)) — the case EPLB targets. No torch needed.
+    import sys
+    NUM_LOGICAL, EP, REDUNDANT = 256, 8, 32
+    load = [1.0 / (e + 1) for e in range(NUM_LOGICAL)]
+    nphys = physical_count(NUM_LOGICAL, REDUNDANT, EP)
+    plan = build_plan(load, nphys, EP)
+    print(f"num_logical={NUM_LOGICAL} ep={EP} num_physical={nphys} slots/rank={plan['slots_per_rank']}")
+    print(f"replicated experts={plan['replicated_experts']} max_replicas={plan['max_replicas']} "
+          f"(hottest expert 0 replicas={plan['replicas'][0]})")
+    print(f"per-rank load BEFORE (contiguous): {[round(x,3) for x in plan['rank_load_before']]}")
+    print(f"per-rank load AFTER  (EPLB):       {[round(x,3) for x in plan['rank_load_after']]}")
+    print(f"imbalance (max/mean)  BEFORE={plan['imbalance_before']:.2f}x  AFTER={plan['imbalance_after']:.2f}x")
+    # Gates: equal slot cardinality, every logical expert placed, big imbalance cut.
+    assert all(plan["replicas"][e] >= 1 for e in range(NUM_LOGICAL))
+    assert sum(plan["replicas"]) == nphys
+    assert len(plan["phys2log"]) == nphys
+    assert all(len(plan["log2phys"][e]) == plan["replicas"][e] for e in range(NUM_LOGICAL))
+    # rank-major numbering => contiguous block per rank => rank_of_phys is non-decreasing
+    assert plan["rank_of_phys"] == sorted(plan["rank_of_phys"])
+    assert plan["imbalance_after"] < plan["imbalance_before"], "EPLB must reduce imbalance"
+    assert plan["imbalance_after"] < 1.30, f"EPLB should get within ~30% of perfect, got {plan['imbalance_after']:.2f}"
+    # remap (if torch present): distinctness + balanced receive on a sampled zipf trace.
+    try:
+        import torch
+        g = torch.Generator().manual_seed(0)
+        p = torch.tensor(load)
+        p = (p / p.sum()).expand(4096, NUM_LOGICAL)
+        idx_l = torch.multinomial(p, 8, replacement=False, generator=g).to(torch.int64)
+        idx_p = remap_idx(idx_l, plan)
+        assert idx_p.shape == idx_l.shape
+        # top-k physical ids distinct per token
+        assert all(len(set(row.tolist())) == 8 for row in idx_p), "physical top-k must stay distinct"
+        spp = plan["slots_per_rank"]
+        recv_before = [0] * EP
+        recv_after = [0] * EP
+        per_log = NUM_LOGICAL // EP
+        for row_l, row_p in zip(idx_l.tolist(), idx_p.tolist()):
+            for e in row_l:
+                recv_before[e // per_log] += 1
+            for pid in row_p:
+                recv_after[pid // spp] += 1
+        ib = max(recv_before) / (sum(recv_before) / EP)
+        ia = max(recv_after) / (sum(recv_after) / EP)
+        print(f"sampled-trace receive imbalance BEFORE={ib:.2f}x  AFTER={ia:.2f}x")
+        assert ia < ib and ia < 1.35, "remap must balance per-rank receive load"
+        print("remap self-test: OK")
+    except ImportError:
+        print("(torch absent — skipped remap self-test; planner gates passed)")
+    print("EPLB self-test: PASS")
+    sys.exit(0)
diff --git a/experimental/CollectiveX/tests/make_workloads.py b/experimental/CollectiveX/tests/make_workloads.py
new file mode 100644
index 000000000..862c3d037
--- /dev/null
+++ b/experimental/CollectiveX/tests/make_workloads.py
@@ -0,0 +1,119 @@
+#!/usr/bin/env python3
+"""Generate canonical serialized workloads. Runs the stdlib counter generator for
+each (routing, global_tokens) in a ladder and writes <workload_id>.npz + .manifest.json into a
+dir that runs then consume via `run_ep.py --workload-dir`. One trace is emitted per global-token
+count because global token count is part of workload identity.
+
+  python3 tests/make_workloads.py --out-dir /path/to/cx_workloads \\
+      --routing uniform --ep 8 --hidden 7168 --topk 8 --experts 256 --seed 67 \\
+      --tokens-ladder "1 2 4 8 16 32 64 128 256 512"
+
+Or by the named v1 workload in configs/workloads.yaml. Explicit dimension flags still override it:
+
+  python3 tests/make_workloads.py --out-dir /path/to/cx_workloads --workload deepseek-v3-v1 --routing uniform --ep 8
+
+--id-only prints the content-bound workload_id per ladder point without torch/numpy:
+
+  python3 tests/make_workloads.py --workload deepseek-v3-v1 --ep 8 --id-only
+
+Generate every routing the suites need by running once per --routing. Idempotent (same id => same
+file). The dir is the cross-hardware artifact: copy it to each cluster so all consume identical bytes.
+"""
+from __future__ import annotations
+
+import argparse
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+import workload as wl   # noqa: E402
+
+# Repo root holds configs/ (this file is in tests/). Used only for --workload name resolution.
+_REPO = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+
+
+def resolve_manifest(name):
+    """Look a workload name up in configs/workloads.yaml and return (hidden, topk, experts).
+    Searches synthetic + model_derived; expert count = `experts` or (for model-derived) `routed_experts`.
+    Raises SystemExit with the known names if the manifest is absent. Pure PyYAML + stdlib."""
+    import yaml
+    path = os.path.join(_REPO, "configs", "workloads.yaml")
+    with open(path) as handle:
+        cfg = yaml.safe_load(handle)
+    known = []
+    for section in ("synthetic", "model_derived"):
+        sec = cfg.get(section) or {}
+        known += list(sec)
+        m = sec.get(name)
+        if m is None:
+            continue
+        experts = m.get("experts", m.get("routed_experts"))
+        if m.get("hidden") is None or m.get("topk") is None or experts is None:
+            raise SystemExit(f"workload '{name}' is missing hidden/topk/experts in {path}")
+        return int(m["hidden"]), int(m["topk"]), int(experts)
+    raise SystemExit(f"unknown --workload '{name}'; known: {sorted(known)}")
+
+
+def main() -> int:
+    ap = argparse.ArgumentParser(description="Generate canonical CollectiveX workloads")
+    ap.add_argument("--out-dir", help="required unless --id-only")
+    ap.add_argument("--workload", help="named manifest in configs/workloads.yaml (sets hidden/topk/experts)")
+    ap.add_argument("--routing", default="uniform", choices=["uniform", "zipf"])
+    ap.add_argument("--ep", type=int, required=True, help="ep_size (global_tokens = T * ep)")
+    ap.add_argument("--hidden", type=int, help="override (default 7168, or the --workload's hidden)")
+    ap.add_argument("--topk", type=int, help="override (default 8, or the --workload's topk)")
+    ap.add_argument("--experts", type=int, help="override (default 256, or the --workload's experts)")
+    ap.add_argument("--seed", type=int, default=67)
+    ap.add_argument("--tokens-ladder", default="1 2 4 8 16 32 64 128 256 512")
+    ap.add_argument("--id-only", action="store_true",
+                    help="print content-bound workload_id per point without torch/numpy")
+    a = ap.parse_args()
+
+    # Resolve dims: a named --workload supplies defaults; explicit --hidden/--topk/--experts override
+    # per field. With neither, fall back to the v1 DeepSeek dimensions (7168/8/256).
+    base_h, base_t, base_e = (7168, 8, 256)
+    if a.workload:
+        base_h, base_t, base_e = resolve_manifest(a.workload)
+    hidden = a.hidden if a.hidden is not None else base_h
+    topk = a.topk if a.topk is not None else base_t
+    experts = a.experts if a.experts is not None else base_e
+
+    if not a.id_only and not a.out_dir:
+        ap.error("--out-dir is required unless --id-only")
+
+    raw_ladder = [int(token) for token in a.tokens_ladder.replace(",", " ").split()]
+    if (a.ep <= 0 or min(hidden, topk, experts) <= 0 or topk > experts or experts % a.ep
+            or not raw_ladder or any(token <= 0 for token in raw_ladder)
+            or len(raw_ladder) != len(set(raw_ladder))):
+        ap.error("shape, EP, and token ladder must be positive, divisible, and unique")
+    ladder = sorted(raw_ladder)
+    epr = experts // a.ep
+    label = f"workload={a.workload} " if a.workload else ""
+
+    if a.id_only:
+        # The stdlib counter generator derives the same content-bound ID on every runtime.
+        made = []
+        for T in ladder:
+            gt = T * a.ep
+            wid = wl.compute_workload_id(a.routing, hidden, topk, experts, a.ep, gt, a.seed)
+            made.append((T, gt, wid))
+            print(f"  T={T:<5} gt={gt:<6} routing={a.routing} -> {wid}")
+        print(f"{label}id-only: {len(made)} workload_id(s) "
+              f"(hidden={hidden} topk={topk} experts={experts} ep={a.ep} routing={a.routing} seed={a.seed})")
+        return 0
+
+    os.makedirs(a.out_dir, exist_ok=True)
+    made = []
+    for T in ladder:
+        gt = T * a.ep
+        idx, w, man = wl.build_workload(hidden, topk, experts, a.routing, gt, a.seed, epr)
+        wid = wl.save_workload(a.out_dir, idx, w, man)
+        made.append((T, gt, wid))
+        print(f"  T={T:<5} gt={gt:<6} routing={a.routing} -> {wid}  "
+              f"(trace sha {man['checksums']['trace'][:12]})")
+    print(f"{label}wrote {len(made)} canonical workloads to {a.out_dir} (routing={a.routing}, ep={a.ep})")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/experimental/CollectiveX/tests/probe_precision.py b/experimental/CollectiveX/tests/probe_precision.py
new file mode 100644
index 000000000..2a92e7079
--- /dev/null
+++ b/experimental/CollectiveX/tests/probe_precision.py
@@ -0,0 +1,1162 @@
+#!/usr/bin/env python3
+"""Bounded real-hardware capability probe for provisional CollectiveX precision cells."""
+from __future__ import annotations
+
+import argparse
+import datetime as dt
+import hashlib
+import inspect
+import json
+import os
+import platform
+import re
+import socket
+import sys
+import tempfile
+from pathlib import Path
+from types import SimpleNamespace
+from typing import Any
+
+
+HERE = Path(__file__).resolve().parent
+ROOT = HERE.parent
+sys.path[:0] = [str(HERE), str(ROOT)]
+
+import artifact_safety  # noqa: E402
+import capability  # noqa: E402
+import ep_harness  # noqa: E402
+
+
+FORMAT = "collectivex.precision-probe.v1"
+PLAN_FORMAT = "collectivex.precision-probe-plan.v1"
+CONTROL_FORMAT = "collectivex.precision-probe-control.v1"
+RECORD_TYPE = "precision-capability-probe"
+PROBE_CONTRACT = "bounded-native-cell-v1"
+FENCE_CONTRACT = "caller-event-cross-stream-v1"
+SUPPORTED_REASON = "native-probe-passed"
+UNSUPPORTED_REASONS = frozenset({
+    "backend-construction-failed",
+    "completion-fence-failed",
+    "cross-rank-evidence-mismatch",
+    "native-operation-failed",
+    "precision-contract-mismatch",
+    "runtime-identity-mismatch",
+    "target-not-provisional",
+    "transport-fallback-detected",
+    "unsupported-native-api",
+    "unverified-execution-identity",
+})
+BACKENDS = frozenset({
+    "deepep", "deepep-v2", "deepep-hybrid", "mori", "uccl",
+})
+SHA40 = re.compile(r"[0-9a-f]{40}")
+SHA256 = re.compile(r"[0-9a-f]{64}")
+IMAGE_DIGEST = re.compile(r"sha256:[0-9a-f]{64}")
+
+
+class ProbeError(RuntimeError):
+    """A provisional cell did not produce complete native runtime evidence."""
+
+    def __init__(self, reason: str):
+        if reason not in UNSUPPORTED_REASONS:
+            raise ValueError(f"unknown precision probe reason {reason!r}")
+        super().__init__(reason)
+        self.reason = reason
+
+
+def _canonical(value: Any) -> bytes:
+    return json.dumps(
+        value, allow_nan=False, ensure_ascii=True, sort_keys=True, separators=(",", ":")
+    ).encode("ascii")
+
+
+def _sha(value: Any) -> str:
+    return hashlib.sha256(_canonical(value)).hexdigest()
+
+
+def _exact_keys(value: Any, expected: set[str], path: str) -> dict[str, Any]:
+    if not isinstance(value, dict) or set(value) != expected:
+        raise ValueError(f"{path} fields differ from {FORMAT}")
+    return value
+
+
+def _text(value: Any, path: str, *, nullable: bool = False) -> str | None:
+    if nullable and value is None:
+        return None
+    if (
+        not isinstance(value, str)
+        or not value
+        or len(value) > 4096
+        or any(ord(character) < 0x20 or ord(character) > 0x7E for character in value)
+    ):
+        raise ValueError(f"{path} is not bounded printable ASCII")
+    return value
+
+
+def _boolean(value: Any, path: str) -> bool:
+    if type(value) is not bool:
+        raise ValueError(f"{path} is not boolean")
+    return value
+
+
+def _integer(value: Any, path: str, minimum: int = 0) -> int:
+    if type(value) is not int or value < minimum:
+        raise ValueError(f"{path} is not an integer >= {minimum}")
+    return value
+
+
+def validate_manifest(document: Any) -> dict[str, Any]:
+    """Validate the closed probe format without extending publication schemas."""
+    doc = _exact_keys(document, {
+        "evidence", "format", "generated_at", "privacy", "probe_contract",
+        "record_type", "result", "schema_version", "target", "topology",
+    }, "probe")
+    if (
+        doc["format"] != FORMAT
+        or doc["record_type"] != RECORD_TYPE
+        or doc["schema_version"] != 1
+        or doc["probe_contract"] != PROBE_CONTRACT
+    ):
+        raise ValueError("probe format, record type, schema, or contract differs")
+    _text(doc["generated_at"], "probe.generated_at")
+    target = _exact_keys(doc["target"], {
+        "backend", "basis", "ep", "mode", "precision_profile", "registry_disposition",
+        "sku",
+    }, "probe.target")
+    if (
+        target["backend"] not in BACKENDS
+        or target["registry_disposition"] != "provisional"
+        or target["mode"] not in {"normal", "low-latency"}
+    ):
+        raise ValueError("probe target is not a provisional native adapter cell")
+    for field in ("basis", "precision_profile", "sku"):
+        _text(target[field], f"probe.target.{field}")
+    _integer(target["ep"], "probe.target.ep", 1)
+    topology = _exact_keys(doc["topology"], {
+        "gpus_per_node", "nodes", "placement_valid", "scale_up_domain",
+        "scale_up_transport", "scale_out_transport", "scope", "topology_class",
+        "transport", "world_size",
+    }, "probe.topology")
+    for field in ("gpus_per_node", "nodes", "scale_up_domain", "world_size"):
+        _integer(topology[field], f"probe.topology.{field}", 1)
+    for field in ("scale_up_transport", "scope", "topology_class", "transport"):
+        _text(topology[field], f"probe.topology.{field}")
+    _text(topology["scale_out_transport"], "probe.topology.scale_out_transport", nullable=True)
+    _boolean(topology["placement_valid"], "probe.topology.placement_valid")
+    result = _exact_keys(doc["result"], {
+        "disposition", "reason", "registry_mutation", "runtime_executed",
+        "static_inspection_sufficient",
+    }, "probe.result")
+    if result["disposition"] not in {"supported", "unsupported"}:
+        raise ValueError("probe result disposition is invalid")
+    expected_reason = (
+        SUPPORTED_REASON if result["disposition"] == "supported" else result["reason"]
+    )
+    if result["reason"] != expected_reason or (
+        result["disposition"] == "unsupported"
+        and result["reason"] not in UNSUPPORTED_REASONS
+    ):
+        raise ValueError("probe result reason is invalid")
+    if result["registry_mutation"] is not False or result["static_inspection_sufficient"] is not False:
+        raise ValueError("probe must never mutate or statically promote the registry")
+    _boolean(result["runtime_executed"], "probe.result.runtime_executed")
+    privacy = _exact_keys(doc["privacy"], {"contract", "sanitized"}, "probe.privacy")
+    if privacy != {"contract": "artifact-safety-v1", "sanitized": True}:
+        raise ValueError("probe privacy contract differs")
+    if result["disposition"] == "supported":
+        _validate_evidence(doc["evidence"])
+    elif doc["evidence"] is not None:
+        _validate_evidence(doc["evidence"])
+    artifact_safety.assert_publication_safe([doc])
+    return doc
+
+
+def _validate_evidence(value: Any) -> None:
+    evidence = _exact_keys(value, {
+        "api", "completion", "identity", "precision", "transport",
+    }, "probe.evidence")
+    api = _exact_keys(evidence["api"], {"calls", "signature_sha256"}, "probe.evidence.api")
+    if not isinstance(api["calls"], list) or not api["calls"]:
+        raise ValueError("probe API calls are empty")
+    for index, call in enumerate(api["calls"]):
+        item = _exact_keys(call, {"name", "signature"}, f"probe.evidence.api.calls[{index}]")
+        _text(item["name"], "probe API name")
+        _text(item["signature"], "probe API signature")
+    if not isinstance(api["signature_sha256"], str) or not SHA256.fullmatch(api["signature_sha256"]):
+        raise ValueError("probe API signature digest is invalid")
+    completion = _exact_keys(evidence["completion"], {
+        "caller_event_complete", "contract", "mode", "output_finite",
+        "verifier_stream_complete",
+    }, "probe.evidence.completion")
+    if completion["contract"] != FENCE_CONTRACT:
+        raise ValueError("probe completion contract differs")
+    _text(completion["mode"], "probe completion mode")
+    if not all(
+        _boolean(completion[field], f"probe completion {field}")
+        for field in ("caller_event_complete", "output_finite", "verifier_stream_complete")
+    ):
+        raise ValueError("probe completion evidence did not pass")
+    identity_record = _exact_keys(evidence["identity"], {
+        "backend_components", "backend_provenance_sha256", "image_digest",
+        "image_digest_verified", "image_reference", "source_sha",
+    }, "probe.evidence.identity")
+    if not SHA40.fullmatch(str(identity_record["source_sha"])):
+        raise ValueError("probe source SHA is invalid")
+    if not IMAGE_DIGEST.fullmatch(str(identity_record["image_digest"])):
+        raise ValueError("probe image digest is invalid")
+    _text(identity_record["image_reference"], "probe image reference")
+    if identity_record["image_digest_verified"] is not True:
+        raise ValueError("probe image digest is unverified")
+    if not SHA256.fullmatch(str(identity_record["backend_provenance_sha256"])):
+        raise ValueError("probe backend provenance digest is invalid")
+    components = identity_record["backend_components"]
+    if not isinstance(components, list) or not components:
+        raise ValueError("probe backend component identity is empty")
+    for component in components:
+        item = _exact_keys(component, {"revision", "role", "version"}, "probe backend component")
+        _text(item["role"], "probe backend component role")
+        _text(item["revision"], "probe backend component revision", nullable=True)
+        _text(item["version"], "probe backend component version", nullable=True)
+        if item["revision"] is None and item["version"] is None:
+            raise ValueError("probe backend component has no identity")
+    precision = _exact_keys(evidence["precision"], {
+        "combine", "correctness", "dispatch", "profile_id",
+    }, "probe.evidence.precision")
+    _text(precision["profile_id"], "probe precision profile")
+    for direction in ("dispatch", "combine"):
+        axis = _exact_keys(precision[direction], {
+            "accumulator_dtype", "accumulator_evidence", "api_input_dtype",
+            "api_output_dtype", "communication_format", "runtime_input",
+            "runtime_output", "scale_contract", "semantic_output",
+        }, f"probe.evidence.precision.{direction}")
+        for field in (
+            "accumulator_dtype", "accumulator_evidence", "api_input_dtype",
+            "api_output_dtype", "communication_format",
+        ):
+            _text(axis[field], f"probe precision {direction} {field}")
+        _validate_tensor_summary(axis["runtime_input"], f"probe precision {direction} input")
+        _validate_tensor_summary(axis["runtime_output"], f"probe precision {direction} output")
+        _validate_tensor_summary(axis["semantic_output"], f"probe precision {direction} semantic")
+        scale = _exact_keys(axis["scale_contract"], {
+            "alignment", "dtype", "finite", "group_size", "layout", "padding",
+            "positive", "runtime_shapes", "runtime_storage_dtype",
+        }, f"probe precision {direction} scales")
+        for field in ("alignment", "layout", "padding"):
+            _text(scale[field], f"probe precision {direction} scale {field}")
+        _text(scale["dtype"], "probe scale dtype", nullable=True)
+        _text(scale["runtime_storage_dtype"], "probe scale storage", nullable=True)
+        if scale["group_size"] is not None:
+            _integer(scale["group_size"], "probe scale group", 1)
+        for field in ("finite", "positive"):
+            if scale[field] is not None:
+                _boolean(scale[field], f"probe scale {field}")
+        if not isinstance(scale["runtime_shapes"], list):
+            raise ValueError("probe scale shapes are invalid")
+    correctness = precision["correctness"]
+    if not isinstance(correctness, dict) or correctness.get("passed") is not True:
+        raise ValueError("probe precision correctness did not pass")
+    transport = _exact_keys(evidence["transport"], {
+        "evidence", "fallback_used", "native_backend", "requested", "runtime_route",
+    }, "probe.evidence.transport")
+    for field in ("native_backend", "requested", "runtime_route"):
+        _text(transport[field], f"probe transport {field}")
+    if transport["fallback_used"] is not False:
+        raise ValueError("probe transport fallback is present")
+    if not isinstance(transport["evidence"], list) or not transport["evidence"]:
+        raise ValueError("probe transport evidence is empty")
+    for item in transport["evidence"]:
+        _text(item, "probe transport evidence item")
+
+
+def _validate_tensor_summary(value: Any, path: str) -> None:
+    summary = _exact_keys(value, {"finite", "rank", "shapes", "storage_dtype"}, path)
+    _text(summary["storage_dtype"], f"{path}.storage_dtype")
+    _integer(summary["rank"], f"{path}.rank", 0)
+    if summary["finite"] is not True:
+        raise ValueError(f"{path} is not finite")
+    if not isinstance(summary["shapes"], list) or not summary["shapes"]:
+        raise ValueError(f"{path} shapes are empty")
+    for shape in summary["shapes"]:
+        if not isinstance(shape, list) or any(type(item) is not int or item < 0 for item in shape):
+            raise ValueError(f"{path} shape is invalid")
+
+
+def provisional_targets() -> list[dict[str, Any]]:
+    """Return deterministic probe cells without changing their dispositions."""
+    return sorted(
+        capability.provisional_precision_targets(),
+        key=lambda item: (
+            item["sku"], item["backend"], item["ep"], item["mode"],
+            item["precision_profile"],
+        ),
+    )
+
+
+def _probe_id(target: dict[str, Any]) -> str:
+    return f"probe-{_sha({key: target[key] for key in ('backend', 'sku', 'ep', 'mode', 'precision_profile')})[:20]}"
+
+
+def _workflow_row(target: dict[str, Any]) -> dict[str, Any]:
+    topology = capability.topology_for(target["sku"], target["ep"])
+    if topology is None:
+        raise ValueError("precision probe target has no registered topology")
+    return {
+        "backend": target["backend"],
+        "basis": target["basis"],
+        "disposition": target["disposition"],
+        "ep": target["ep"],
+        "execution_weight": target["ep"],
+        "gpus_per_node": topology["gpus_per_node"],
+        "id": _probe_id(target),
+        "launcher": capability.PLATFORMS[target["sku"]]["launcher"],
+        "mode": target["mode"],
+        "n": 1,
+        "nodes": topology["nodes"],
+        "precision_profile": target["precision_profile"],
+        "scale_up_domain": topology["scale_up_domain"],
+        "sku": target["sku"],
+    }
+
+
+def workflow_plan(*, backend: str = "all", only_sku: str = "") -> dict[str, Any]:
+    targets = [
+        target for target in provisional_targets()
+        if (backend == "all" or target["backend"] == backend)
+        and (not only_sku or target["sku"] == only_sku)
+    ]
+    if backend != "all" and backend not in BACKENDS:
+        raise ValueError("precision probe backend is not registered")
+    if only_sku and only_sku not in capability.PLATFORMS:
+        raise ValueError("precision probe SKU is not registered")
+    if not targets:
+        raise ValueError("precision probe filters select no provisional cells")
+    return {
+        "format": PLAN_FORMAT,
+        "include": [_workflow_row(target) for target in targets],
+        "schema_version": 1,
+    }
+
+
+def validate_workflow_plan(document: Any) -> dict[str, Any]:
+    plan = _exact_keys(document, {"format", "include", "schema_version"}, "probe plan")
+    if plan["format"] != PLAN_FORMAT or plan["schema_version"] != 1:
+        raise ValueError("precision probe plan format differs")
+    if not isinstance(plan["include"], list):
+        raise ValueError("precision probe plan include is not a list")
+    expected = {_probe_id(target): _workflow_row(target) for target in provisional_targets()}
+    seen: set[str] = set()
+    for row in plan["include"]:
+        if not isinstance(row, dict) or row.get("id") not in expected or row != expected[row["id"]]:
+            raise ValueError("precision probe plan row differs from the capability registry")
+        if row["id"] in seen:
+            raise ValueError("precision probe plan contains a duplicate row")
+        seen.add(row["id"])
+    return plan
+
+
+def extract_control(
+    plan: Any, *, probe_id: str, sku: str, backend: str, nodes: int,
+) -> dict[str, Any]:
+    rows = [row for row in validate_workflow_plan(plan)["include"] if row["id"] == probe_id]
+    if len(rows) != 1:
+        raise ValueError("precision probe ID is not unique in the plan")
+    row = rows[0]
+    if (row["sku"], row["backend"], row["nodes"]) != (sku, backend, nodes):
+        raise ValueError("precision probe control differs from the workflow matrix")
+    target = select_target(
+        backend=row["backend"], sku=row["sku"], ep=row["ep"], mode=row["mode"],
+        precision_profile=row["precision_profile"],
+    )
+    topology = capability.topology_for(row["sku"], row["ep"])
+    if topology is None:
+        raise ValueError("precision probe control has no topology")
+    return {
+        "format": CONTROL_FORMAT,
+        "id": row["id"],
+        "launcher": row["launcher"],
+        "schema_version": 1,
+        "target": target,
+        "topology": topology,
+    }
+
+
+def validate_control(
+    document: Any, *, sku: str, backend: str, nodes: int,
+) -> dict[str, Any]:
+    control = _exact_keys(
+        document, {"format", "id", "launcher", "schema_version", "target", "topology"},
+        "probe control",
+    )
+    if control["format"] != CONTROL_FORMAT or control["schema_version"] != 1:
+        raise ValueError("precision probe control format differs")
+    expected = extract_control(
+        {"format": PLAN_FORMAT, "include": [_workflow_row(control["target"])], "schema_version": 1},
+        probe_id=control["id"], sku=sku, backend=backend, nodes=nodes,
+    )
+    if control != expected:
+        raise ValueError("precision probe control differs from the capability registry")
+    return control
+
+
+def validate_bundle(plan: Any, manifests: list[Any]) -> None:
+    rows = validate_workflow_plan(plan)["include"]
+    expected = {
+        (row["backend"], row["sku"], row["ep"], row["mode"], row["precision_profile"])
+        for row in rows
+    }
+    observed = []
+    for manifest in manifests:
+        target = validate_manifest(manifest)["target"]
+        observed.append((
+            target["backend"], target["sku"], target["ep"], target["mode"],
+            target["precision_profile"],
+        ))
+    if len(observed) != len(set(observed)) or set(observed) != expected:
+        raise ValueError("precision probe bundle does not cover the exact workflow plan")
+
+
+def select_target(
+    *, backend: str, sku: str, ep: int, mode: str, precision_profile: str
+) -> dict[str, Any]:
+    matches = [
+        item for item in provisional_targets()
+        if item["backend"] == backend and item["sku"] == sku and item["ep"] == ep
+        and item["mode"] == mode and item["precision_profile"] == precision_profile
+    ]
+    if len(matches) != 1:
+        raise ProbeError("target-not-provisional")
+    return matches[0]
+
+
+def _target_record(target: dict[str, Any]) -> dict[str, Any]:
+    return {
+        "backend": target["backend"],
+        "basis": target["basis"],
+        "ep": target["ep"],
+        "mode": target["mode"],
+        "precision_profile": target["precision_profile"],
+        "registry_disposition": "provisional",
+        "sku": target["sku"],
+    }
+
+
+def build_manifest(
+    *, target: dict[str, Any], topology: dict[str, Any], disposition: str,
+    reason: str, runtime_executed: bool, evidence: dict[str, Any] | None,
+) -> dict[str, Any]:
+    document = {
+        "evidence": evidence,
+        "format": FORMAT,
+        "generated_at": dt.datetime.now(dt.timezone.utc).isoformat(),
+        "privacy": {"contract": "artifact-safety-v1", "sanitized": True},
+        "probe_contract": PROBE_CONTRACT,
+        "record_type": RECORD_TYPE,
+        "result": {
+            "disposition": disposition,
+            "reason": reason,
+            "registry_mutation": False,
+            "runtime_executed": runtime_executed,
+            "static_inspection_sufficient": False,
+        },
+        "schema_version": 1,
+        "target": _target_record(target),
+        "topology": topology,
+    }
+    return validate_manifest(document)
+
+
+def _write_atomic(path: Path, document: dict[str, Any]) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    descriptor, temporary = tempfile.mkstemp(prefix=f".{path.name}.", dir=path.parent)
+    try:
+        os.fchmod(descriptor, 0o600)
+        with os.fdopen(descriptor, "wb") as handle:
+            handle.write(_canonical(document) + b"\n")
+            handle.flush()
+            os.fsync(handle.fileno())
+        os.replace(temporary, path)
+    finally:
+        if os.path.exists(temporary):
+            os.unlink(temporary)
+
+
+def _dtype_name(dtype: Any) -> str:
+    return str(dtype).removeprefix("torch.")
+
+
+def _local_tensor_summary(torch_module, tensor) -> dict[str, Any]:
+    return {
+        "finite": bool(torch_module.isfinite(tensor.float()).all().item()),
+        "rank": int(tensor.ndim),
+        "shape": [int(item) for item in tensor.shape],
+        "storage_dtype": _dtype_name(tensor.dtype),
+    }
+
+
+def _aggregate_tensor_summaries(records: list[dict[str, Any]]) -> dict[str, Any]:
+    dtypes = {record["storage_dtype"] for record in records}
+    ranks = {record["rank"] for record in records}
+    if len(dtypes) != 1 or len(ranks) != 1:
+        raise ProbeError("cross-rank-evidence-mismatch")
+    return {
+        "finite": all(record["finite"] for record in records),
+        "rank": ranks.pop(),
+        "shapes": sorted({tuple(record["shape"]) for record in records}),
+        "storage_dtype": dtypes.pop(),
+    }
+
+
+def _scale_contract(torch_module, axis: dict[str, Any], scales) -> dict[str, Any]:
+    return {
+        "alignment": axis["alignment_contract"],
+        "dtype": axis["scale_dtype"],
+        "finite": (
+            bool(torch_module.isfinite(scales.float()).all().item())
+            if scales is not None else None
+        ),
+        "group_size": axis["scale_group_size"],
+        "layout": axis["scale_layout"],
+        "padding": axis["padding_contract"],
+        "positive": bool((scales > 0).all().item()) if scales is not None else None,
+        "runtime_shape": [int(item) for item in scales.shape] if scales is not None else None,
+        "runtime_storage_dtype": _dtype_name(scales.dtype) if scales is not None else None,
+    }
+
+
+def _aggregate_scale_contracts(records: list[dict[str, Any]]) -> dict[str, Any]:
+    fixed_fields = (
+        "alignment", "dtype", "group_size", "layout", "padding", "runtime_storage_dtype",
+    )
+    result: dict[str, Any] = {}
+    for field in fixed_fields:
+        values = {_canonical(record[field]) for record in records}
+        if len(values) != 1:
+            raise ProbeError("cross-rank-evidence-mismatch")
+        result[field] = records[0][field]
+    for field in ("finite", "positive"):
+        values = [record[field] for record in records]
+        result[field] = None if all(value is None for value in values) else all(value is True for value in values)
+    result["runtime_shapes"] = sorted({
+        tuple(record["runtime_shape"] or ()) for record in records
+    }) if any(record["runtime_shape"] is not None for record in records) else []
+    return result
+
+
+def _signature(callable_object: Any, name: str) -> dict[str, str]:
+    try:
+        signature = str(inspect.signature(callable_object))
+    except (TypeError, ValueError) as exc:
+        raise ProbeError("unsupported-native-api") from exc
+    _text(signature, f"native API {name} signature")
+    return {"name": name, "signature": signature}
+
+
+def _api_evidence(backend_name: str, backend) -> dict[str, Any]:
+    if backend_name in {"deepep", "uccl"}:
+        native = type(backend.buffer)
+        dispatch_name = "low_latency_dispatch" if backend.mode == "low-latency" else "dispatch"
+        combine_name = "low_latency_combine" if backend.mode == "low-latency" else "combine"
+        calls = [
+            _signature(native.__init__, f"{native.__name__}.__init__"),
+            _signature(getattr(native, dispatch_name), f"{native.__name__}.{dispatch_name}"),
+            _signature(getattr(native, combine_name), f"{native.__name__}.{combine_name}"),
+        ]
+    elif backend_name in {"deepep-v2", "deepep-hybrid"}:
+        native = type(backend.buffer)
+        calls = [
+            _signature(native.__init__, f"{native.__name__}.__init__"),
+            _signature(native.dispatch, f"{native.__name__}.dispatch"),
+            _signature(native.combine, f"{native.__name__}.combine"),
+        ]
+    elif backend_name == "mori":
+        native = type(backend.op)
+        calls = [
+            _signature(type(backend.config).__init__, "EpDispatchCombineConfig.__init__"),
+            _signature(native.dispatch, f"{native.__name__}.dispatch"),
+            _signature(native.combine, f"{native.__name__}.combine"),
+        ]
+    else:  # pragma: no cover - guarded by target registry
+        raise ProbeError("unsupported-native-api")
+    return {"calls": calls, "signature_sha256": _sha(calls)}
+
+
+def _completion_mode(backend_name: str, mode: str) -> str:
+    if backend_name in {"deepep", "uccl"}:
+        return "async_finish=false;return_recv_hook=false"
+    if backend_name == "deepep-v2":
+        return "async_with_compute_stream=false;do_cpu_sync=true"
+    if backend_name == "deepep-hybrid":
+        return "metadata-nonblocking=false;caller-stream-ordered"
+    if backend_name == "mori":
+        return "current-stream-ordered"
+    raise ProbeError("unsupported-native-api")
+
+
+def _transport_evidence(backend_name: str, backend, args) -> dict[str, Any]:
+    provenance = backend.backend_provenance
+    fallback = False
+    facts: list[str]
+    if backend_name == "deepep":
+        if args.scope == "scale-out" and int(provenance["num_rdma_bytes"]) <= 0:
+            fallback = True
+        if args.scale_up_transport == "mnnvl" and provenance["mnnvl_comm"] != "explicit-allow-mnnvl":
+            fallback = True
+        route = f"deepep-{backend.mode}"
+        facts = [
+            f"mnnvl={provenance['mnnvl_comm']}",
+            f"nvl-buffer={int(provenance['num_nvl_bytes']) > 0}",
+            f"rdma-buffer={int(provenance['num_rdma_bytes']) > 0}",
+        ]
+    elif backend_name == "uccl":
+        scratch_location = str(backend.buffer.scratch.device.type)
+        rdma_active = int(provenance["num_rdma_bytes"]) > 0
+        fallback = rdma_active and scratch_location != "cuda"
+        route = f"uccl-proxy-{backend.mode}"
+        facts = [f"rdma-buffer={rdma_active}", f"rdma-memory={scratch_location}"]
+    elif backend_name == "deepep-v2":
+        expected_gin = args.scope == "scale-out"
+        fallback = bool(provenance["gin_enabled"]) != expected_gin
+        route = str(provenance["communication_backend"])
+        facts = [
+            f"gin-enabled={bool(provenance['gin_enabled'])}",
+            f"nccl-communicator={provenance['nccl_communicator']}",
+        ]
+    elif backend_name == "deepep-hybrid":
+        route = str(provenance["transport"])
+        expected_build = "multinode-doca" if args.scope == "scale-out" else "intradomain"
+        realized_build = os.environ.get("DEEPEP_HYBRID_BUILD_MODE")
+        fallback = realized_build != expected_build
+        facts = [f"build-mode={realized_build or 'missing'}", f"domains={backend.communication_domains}"]
+    elif backend_name == "mori":
+        route = str(backend.kernel_generation)
+        expected_kernel = (
+            "inter-node-v1" if args.scope == "scale-out"
+            else "async-ll" if args.runner == "mi325x" else "intranode"
+        )
+        fallback = route != expected_kernel
+        facts = [
+            f"kernel={route}",
+            f"external-input={bool(provenance['use_external_inp_buf'])}",
+            f"qps={int(provenance['num_qps'])}",
+        ]
+    else:  # pragma: no cover - guarded by registry
+        raise ProbeError("unsupported-native-api")
+    return {
+        "evidence": sorted(facts),
+        "fallback_used": fallback,
+        "native_backend": backend_name,
+        "requested": str(args.transport),
+        "runtime_route": route,
+    }
+
+
+def _component_identities(backend_name: str, provenance: dict[str, Any]) -> list[dict[str, Any]]:
+    if backend_name == "deepep":
+        values = [("deepep", provenance.get("deepep_commit"), provenance.get("deepep_version"))]
+    elif backend_name == "deepep-v2":
+        values = [
+            ("deepep-v2", provenance.get("deepep_commit"), provenance.get("deepep_version")),
+            ("deepep-tree", provenance.get("deepep_tree"), None),
+            ("fmt", provenance.get("fmt_commit"), None),
+        ]
+    elif backend_name == "deepep-hybrid":
+        values = [
+            ("deepep-hybrid", provenance.get("deepep_commit"), None),
+            ("deepep-tree", provenance.get("deepep_tree"), None),
+        ]
+    elif backend_name == "uccl":
+        values = [
+            ("uccl", provenance.get("uccl_commit"), provenance.get("uccl_version")),
+            ("uccl-wrapper", provenance.get("uccl_wrapper_commit"), None),
+        ]
+    elif backend_name == "mori":
+        values = [("mori", provenance.get("mori_commit"), None)]
+    else:  # pragma: no cover
+        raise ProbeError("unsupported-native-api")
+    result = [
+        {"revision": revision, "role": role, "version": version}
+        for role, revision, version in values
+    ]
+    for item in result:
+        if item["revision"] is None and item["version"] is None:
+            raise ProbeError("unverified-execution-identity")
+    return result
+
+
+def _execution_identity(backend_name: str, backend) -> dict[str, Any]:
+    source_sha = os.environ.get("COLLECTIVEX_SOURCE_SHA") or os.environ.get("GITHUB_SHA")
+    image_reference = os.environ.get("COLLECTIVEX_IMAGE")
+    image_digest = os.environ.get("COLLECTIVEX_IMAGE_DIGEST")
+    verified = os.environ.get("COLLECTIVEX_IMAGE_DIGEST_VERIFIED") == "1"
+    if (
+        not isinstance(source_sha, str) or not SHA40.fullmatch(source_sha)
+        or not isinstance(image_reference, str) or not image_reference
+        or not isinstance(image_digest, str) or not IMAGE_DIGEST.fullmatch(image_digest)
+        or not verified
+    ):
+        raise ProbeError("unverified-execution-identity")
+    provenance = backend.backend_provenance
+    return {
+        "backend_components": _component_identities(backend_name, provenance),
+        "backend_provenance_sha256": _sha(provenance),
+        "image_digest": image_digest,
+        "image_digest_verified": True,
+        "image_reference": image_reference,
+        "source_sha": source_sha,
+    }
+
+
+def _correctness_aggregate(records: list[dict[str, Any]]) -> dict[str, Any]:
+    profile_ids = {record["profile_id"] for record in records}
+    if len(profile_ids) != 1:
+        raise ProbeError("cross-rank-evidence-mismatch")
+    result: dict[str, Any] = {"profile_id": profile_ids.pop()}
+    for direction in ("dispatch", "combine"):
+        axes = [record[direction] for record in records]
+        scale_finite = [axis["scales_finite"] for axis in axes]
+        scale_positive = [axis["scales_positive"] for axis in axes]
+        result[direction] = {
+            "dequantized_semantics": all(axis["dequantized_semantics"] for axis in axes),
+            "encoded_payload_valid": all(axis["encoded_payload_valid"] for axis in axes),
+            "max_abs_error": max(float(axis["max_abs_error"]) for axis in axes),
+            "max_rel_error": max(float(axis["max_rel_error"]) for axis in axes),
+            "passed": all(axis["passed"] for axis in axes),
+            "saturation_count": sum(int(axis["saturation_count"]) for axis in axes),
+            "saturation_rate": max(float(axis["saturation_rate"]) for axis in axes),
+            "scales_finite": (
+                None if all(value is None for value in scale_finite)
+                else all(value is True for value in scale_finite)
+            ),
+            "scales_positive": (
+                None if all(value is None for value in scale_positive)
+                else all(value is True for value in scale_positive)
+            ),
+        }
+    result["passed"] = all(record["passed"] for record in records) and all(
+        result[direction]["passed"] for direction in ("dispatch", "combine")
+    )
+    return result
+
+
+def _topology_record(topology: dict[str, Any], placement_valid: bool) -> dict[str, Any]:
+    return {
+        "gpus_per_node": topology["gpus_per_node"],
+        "nodes": topology["nodes"],
+        "placement_valid": placement_valid,
+        "scale_up_domain": topology["scale_up_domain"],
+        "scale_up_transport": topology["scale_up_transport"],
+        "scale_out_transport": topology["scale_out_transport"],
+        "scope": topology["scope"],
+        "topology_class": topology["topology_class"],
+        "transport": topology["transport"],
+        "world_size": topology["nodes"] * topology["gpus_per_node"],
+    }
+
+
+def _backend_class(name: str):
+    if name == "deepep":
+        from ep_deepep import DeepEPBackend
+        return DeepEPBackend
+    if name == "deepep-v2":
+        from ep_deepep_v2 import DeepEPV2Backend
+        return DeepEPV2Backend
+    if name == "deepep-hybrid":
+        from ep_deepep_hybrid import DeepEPHybridBackend
+        return DeepEPHybridBackend
+    if name == "uccl":
+        from ep_uccl import UCCLBackend
+        return UCCLBackend
+    if name == "mori":
+        from ep_mori import MoRIBackend
+        return MoRIBackend
+    raise ProbeError("unsupported-native-api")
+
+
+def _runtime_args(target: dict[str, Any], topology: dict[str, Any], fingerprint: dict[str, Any]):
+    return SimpleNamespace(
+        backend=target["backend"],
+        eplb=False,
+        experts=256,
+        gpus_per_node=topology["gpus_per_node"],
+        hidden=7168,
+        mode=target["mode"],
+        num_logical_experts=256,
+        num_sms=24,
+        phase="decode",
+        precision_profile=target["precision_profile"],
+        runner=target["sku"],
+        runtime_fingerprint=fingerprint,
+        scale_out_transport=topology["scale_out_transport"] or "",
+        scale_up_domain=topology["scale_up_domain"],
+        scale_up_transport=topology["scale_up_transport"],
+        scope=topology["scope"],
+        tokens_ladder="8",
+        topk=8,
+        topology_class=topology["topology_class"],
+        transport=topology["transport"],
+    )
+
+
+def _init_distributed(torch_module, dist, backend_name: str, device, rank: int, world_size: int) -> None:
+    if dist.is_initialized():
+        return
+    if backend_name == "mori":
+        dist.init_process_group(
+            backend="cpu:gloo,cuda:nccl", rank=rank, world_size=world_size, device_id=device
+        )
+    elif backend_name == "deepep-v2":
+        dist.init_process_group("nccl", device_id=device)
+    else:
+        dist.init_process_group("nccl")
+
+
+def _runtime_context(torch_module, dist, target: dict[str, Any], device, local_rank: int):
+    import run_ep
+
+    world_size = dist.get_world_size()
+    topology = capability.topology_for(target["sku"], target["ep"])
+    if topology is None or world_size != target["ep"]:
+        raise ProbeError("runtime-identity-mismatch")
+    machine = {"x86_64": "amd64", "aarch64": "arm64"}.get(
+        platform.machine(), platform.machine()
+    )
+    properties = torch_module.cuda.get_device_properties(device)
+    if torch_module.version.hip:
+        vendor = "amd"
+        arch = str(getattr(properties, "gcnArchName", "")).split(":", 1)[0]
+    else:
+        vendor = "nvidia"
+        major, minor = torch_module.cuda.get_device_capability(device)
+        arch = f"sm{major}{minor}"
+    fingerprint = run_ep._runtime_fingerprint(
+        torch_module, device, machine=machine, vendor=vendor, arch=arch
+    )
+    issues = capability.runtime_identity_issues(
+        target["sku"], vendor=vendor, arch=arch, machine=machine,
+        device_name=torch_module.cuda.get_device_name(device),
+        device_count=torch_module.cuda.device_count(), world_size=world_size,
+    )
+    records: list[Any] = [None] * world_size
+    dist.all_gather_object(records, (socket.gethostname(), local_rank, fingerprint, issues))
+    if any(record[3] for record in records):
+        raise ProbeError("runtime-identity-mismatch")
+    placement = run_ep._summarize_realized_placement(
+        [(record[0], record[1]) for record in records],
+        expected_nodes=topology["nodes"],
+        expected_gpus_per_node=topology["gpus_per_node"],
+        expected_world_size=world_size,
+    )
+    common_fingerprint = run_ep._common_runtime_fingerprint([record[2] for record in records])
+    return topology, placement, common_fingerprint
+
+
+def _local_probe(torch_module, dist, target: dict[str, Any], backend, args, rank: int):
+    import routing
+
+    tokens = 8
+    global_idx, global_weights = routing.build_global_routing(
+        tokens * target["ep"], args.experts, args.topk, "uniform", ep_harness.ROUTING_SEED
+    )
+    local_idx, local_weights = routing.rank_slice(
+        global_idx, global_weights, rank, tokens
+    )
+    x = routing.rank_activations(
+        tokens, args.hidden, ep_harness.ROUTING_SEED, rank,
+        torch_module.device(f"cuda:{int(os.environ.get('LOCAL_RANK', '0'))}"),
+        torch_module.bfloat16,
+    )
+    problem = backend.make_problem(
+        tokens, local_idx.to(x.device), local_weights.to(x.device), x
+    )
+    oracle = ep_harness._run_expert_oracle(
+        torch_module, routing, backend, problem, global_idx, global_weights, rank,
+        args.experts // target["ep"], ep_harness.ROUTING_SEED,
+    )
+    if not oracle["passed"] or not oracle["_precision"]["passed"]:
+        raise ProbeError("precision-contract-mismatch")
+
+    caller = torch_module.cuda.Stream(device=x.device)
+    verifier = torch_module.cuda.Stream(device=x.device)
+    completion_event = torch_module.cuda.Event()
+    with torch_module.cuda.stream(caller):
+        handle = backend.dispatch(problem)
+        problem.recv_tokens = backend.recv_tokens(handle)
+        view = (
+            backend.inspect_expert_dispatch(problem, handle)
+            if target["mode"] == "low-latency"
+            else backend.inspect_dispatch(problem, handle)
+        )
+        backend.stage(problem, handle)
+        combined = backend.combine(problem, handle)
+        completion_event.record(caller)
+    with torch_module.cuda.stream(verifier):
+        verifier.wait_event(completion_event)
+        verifier_sentinel = combined.float().abs().sum()
+    verifier.synchronize()
+    completion = {
+        "caller_event_complete": bool(completion_event.query()),
+        "contract": FENCE_CONTRACT,
+        "mode": _completion_mode(target["backend"], target["mode"]),
+        "output_finite": bool(torch_module.isfinite(combined.float()).all().item()),
+        "verifier_stream_complete": bool(torch_module.isfinite(verifier_sentinel).item()),
+    }
+    if not all(
+        completion[field]
+        for field in ("caller_event_complete", "output_finite", "verifier_stream_complete")
+    ):
+        raise ProbeError("completion-fence-failed")
+
+    deferred = getattr(backend, "capture_deferred_provenance", None)
+    if deferred is not None:
+        deferred()
+    dispatch_input = problem.dispatch_x[0] if isinstance(problem.dispatch_x, tuple) else problem.dispatch_x
+    dispatch_input_scales = (
+        problem.dispatch_x[1] if isinstance(problem.dispatch_x, tuple)
+        else getattr(problem, "dispatch_scales", None)
+        or getattr(problem, "scales", None)
+    )
+    dispatch_axis = backend.communication_precision["dispatch"]
+    combine_axis = backend.communication_precision["combine"]
+    local = {
+        "api": _api_evidence(target["backend"], backend),
+        "completion": completion,
+        "identity": _execution_identity(target["backend"], backend),
+        "precision": {
+            "profile_id": backend.precision_profile_id,
+            "correctness": oracle["_precision"],
+            "dispatch": {
+                "accumulator_dtype": "not-applicable",
+                "accumulator_evidence": "not-applicable",
+                "api_input_dtype": dispatch_axis["api_input_dtype"],
+                "api_output_dtype": dispatch_axis["api_output_dtype"],
+                "communication_format": dispatch_axis["communication_format"],
+                "runtime_input": _local_tensor_summary(torch_module, dispatch_input),
+                "runtime_output": _local_tensor_summary(torch_module, view.encoded_payload),
+                "scale_contract": _scale_contract(
+                    torch_module, dispatch_axis,
+                    view.scales if view.scales is not None else dispatch_input_scales,
+                ),
+                "semantic_output": _local_tensor_summary(torch_module, view.payload),
+            },
+            "combine": {
+                "accumulator_dtype": "fp32",
+                "accumulator_evidence": "pinned-source-image-plus-runtime-oracle",
+                "api_input_dtype": combine_axis["api_input_dtype"],
+                "api_output_dtype": combine_axis["api_output_dtype"],
+                "communication_format": combine_axis["communication_format"],
+                "runtime_input": _local_tensor_summary(torch_module, handle.combine_input),
+                "runtime_output": _local_tensor_summary(torch_module, combined),
+                "scale_contract": _scale_contract(torch_module, combine_axis, None),
+                "semantic_output": _local_tensor_summary(torch_module, combined),
+            },
+        },
+        "transport": _transport_evidence(target["backend"], backend, args),
+    }
+    if local["transport"]["fallback_used"]:
+        raise ProbeError("transport-fallback-detected")
+    return local
+
+
+def _aggregate_local(records: list[dict[str, Any]]) -> dict[str, Any]:
+    for field in ("api", "completion", "identity", "transport"):
+        values = {_canonical(record[field]) for record in records}
+        if len(values) != 1:
+            raise ProbeError("cross-rank-evidence-mismatch")
+    profile_ids = {record["precision"]["profile_id"] for record in records}
+    if len(profile_ids) != 1:
+        raise ProbeError("cross-rank-evidence-mismatch")
+    precision: dict[str, Any] = {
+        "profile_id": profile_ids.pop(),
+        "correctness": _correctness_aggregate([
+            record["precision"]["correctness"] for record in records
+        ]),
+    }
+    for direction in ("dispatch", "combine"):
+        axes = [record["precision"][direction] for record in records]
+        fixed = (
+            "accumulator_dtype", "accumulator_evidence", "api_input_dtype",
+            "api_output_dtype", "communication_format",
+        )
+        result: dict[str, Any] = {}
+        for field in fixed:
+            if len({axis[field] for axis in axes}) != 1:
+                raise ProbeError("cross-rank-evidence-mismatch")
+            result[field] = axes[0][field]
+        for field in ("runtime_input", "runtime_output", "semantic_output"):
+            result[field] = _aggregate_tensor_summaries([axis[field] for axis in axes])
+        result["scale_contract"] = _aggregate_scale_contracts([
+            axis["scale_contract"] for axis in axes
+        ])
+        precision[direction] = result
+    return {
+        "api": records[0]["api"],
+        "completion": records[0]["completion"],
+        "identity": records[0]["identity"],
+        "precision": precision,
+        "transport": records[0]["transport"],
+    }
+
+
+def _finalize(backend, dist) -> None:
+    if backend is not None:
+        backend.finalize(0)
+        return
+    if dist.is_initialized():
+        dist.destroy_process_group()
+
+
+def run_target(target: dict[str, Any], output: Path) -> int:
+    try:
+        import torch
+        import torch.distributed as dist
+    except Exception as exc:  # pragma: no cover - diagnostic runtime requirement
+        raise ProbeError("runtime-identity-mismatch") from exc
+    rank = int(os.environ.get("RANK", "0"))
+    world_size = int(os.environ.get("WORLD_SIZE", "1"))
+    local_rank = int(os.environ.get("LOCAL_RANK", "0"))
+    os.environ.setdefault("MASTER_ADDR", "localhost")
+    os.environ.setdefault("MASTER_PORT", "12355")
+    torch.cuda.set_device(local_rank)
+    device = torch.device(f"cuda:{local_rank}")
+    _init_distributed(torch, dist, target["backend"], device, rank, world_size)
+    backend = None
+    topology = capability.topology_for(target["sku"], target["ep"])
+    topology_record = _topology_record(topology, False) if topology is not None else {
+        "gpus_per_node": 1, "nodes": target["ep"], "placement_valid": False,
+        "scale_up_domain": 1, "scale_up_transport": "unknown",
+        "scale_out_transport": None, "scope": "scale-out",
+        "topology_class": "unknown", "transport": "unknown", "world_size": target["ep"],
+    }
+    try:
+        topology, placement, fingerprint = _runtime_context(
+            torch, dist, target, device, local_rank
+        )
+        topology_record = _topology_record(topology, bool(placement["valid"]))
+        args = _runtime_args(target, topology, fingerprint)
+        try:
+            backend = _backend_class(target["backend"])(
+                args, rank, world_size, local_rank, device
+            )
+            construction = {"ok": True}
+        except Exception:
+            construction = {"ok": False, "reason": "backend-construction-failed"}
+        gathered: list[Any] = [None] * world_size
+        dist.all_gather_object(gathered, construction)
+        if not all(record.get("ok") is True for record in gathered):
+            manifest = build_manifest(
+                target=target, topology=topology_record, disposition="unsupported",
+                reason="backend-construction-failed", runtime_executed=True, evidence=None,
+            )
+        else:
+            try:
+                local = {"ok": True, "evidence": _local_probe(
+                    torch, dist, target, backend, args, rank
+                )}
+            except ProbeError as exc:
+                local = {"ok": False, "reason": exc.reason}
+            except Exception:
+                local = {"ok": False, "reason": "native-operation-failed"}
+            gathered = [None] * world_size
+            dist.all_gather_object(gathered, local)
+            if not all(record.get("ok") is True for record in gathered):
+                reasons = {record.get("reason") for record in gathered}
+                reason = reasons.pop() if len(reasons) == 1 else "cross-rank-evidence-mismatch"
+                manifest = build_manifest(
+                    target=target, topology=topology_record, disposition="unsupported",
+                    reason=reason, runtime_executed=True, evidence=None,
+                )
+            else:
+                evidence = _aggregate_local([record["evidence"] for record in gathered])
+                manifest = build_manifest(
+                    target=target, topology=topology_record, disposition="supported",
+                    reason=SUPPORTED_REASON, runtime_executed=True, evidence=evidence,
+                )
+    except ProbeError as exc:
+        manifest = build_manifest(
+            target=target, topology=topology_record, disposition="unsupported",
+            reason=exc.reason, runtime_executed=False, evidence=None,
+        )
+    if rank == 0:
+        _write_atomic(output, manifest)
+        print(json.dumps(manifest, allow_nan=False, sort_keys=True, separators=(",", ":")))
+    dist.barrier()
+    _finalize(backend, dist)
+    return 0
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("--list-targets", action="store_true")
+    parser.add_argument("--workflow-plan", action="store_true")
+    parser.add_argument("--extract-from", type=Path)
+    parser.add_argument("--probe-id")
+    parser.add_argument("--validate-control", type=Path)
+    parser.add_argument("--validate-manifest", type=Path, nargs="+")
+    parser.add_argument("--validate-bundle", type=Path)
+    parser.add_argument("--backend", choices=sorted(BACKENDS | {"all"}))
+    parser.add_argument("--sku")
+    parser.add_argument("--only-sku", default="")
+    parser.add_argument("--expect-sku")
+    parser.add_argument("--expect-backend")
+    parser.add_argument("--expect-nodes", type=int)
+    parser.add_argument("--ep", type=int)
+    parser.add_argument("--mode", choices=("normal", "low-latency"))
+    parser.add_argument("--precision-profile")
+    parser.add_argument("--out", type=Path)
+    args = parser.parse_args()
+    if args.list_targets:
+        print(json.dumps(provisional_targets(), allow_nan=False, sort_keys=True, separators=(",", ":")))
+        return 0
+    if args.workflow_plan:
+        plan = workflow_plan(backend=args.backend or "all", only_sku=args.only_sku)
+        if args.out is None:
+            print(json.dumps(plan, allow_nan=False, sort_keys=True, separators=(",", ":")))
+        else:
+            _write_atomic(args.out, plan)
+        return 0
+    if args.extract_from is not None:
+        if None in (args.probe_id, args.expect_sku, args.expect_backend, args.expect_nodes, args.out):
+            parser.error("probe extraction requires ID, expected placement, and --out")
+        control = extract_control(
+            json.loads(args.extract_from.read_text()), probe_id=args.probe_id,
+            sku=args.expect_sku, backend=args.expect_backend, nodes=args.expect_nodes,
+        )
+        _write_atomic(args.out, control)
+        return 0
+    if args.validate_control is not None:
+        if None in (args.expect_sku, args.expect_backend, args.expect_nodes):
+            parser.error("control validation requires expected placement")
+        validate_control(
+            json.loads(args.validate_control.read_text()), sku=args.expect_sku,
+            backend=args.expect_backend, nodes=args.expect_nodes,
+        )
+        return 0
+    if args.validate_bundle is not None:
+        if not args.validate_manifest:
+            parser.error("bundle validation requires manifest paths")
+        validate_bundle(
+            json.loads(args.validate_bundle.read_text()),
+            [json.loads(path.read_text()) for path in args.validate_manifest],
+        )
+        return 0
+    if args.validate_manifest is not None:
+        for path in args.validate_manifest:
+            validate_manifest(json.loads(path.read_text()))
+        return 0
+    if any(
+        value is None
+        for value in (args.backend, args.sku, args.ep, args.mode, args.precision_profile, args.out)
+    ):
+        parser.error("one exact --backend/--sku/--ep/--mode/--precision-profile/--out cell is required")
+    try:
+        target = select_target(
+            backend=args.backend, sku=args.sku, ep=args.ep, mode=args.mode,
+            precision_profile=args.precision_profile,
+        )
+        return run_target(target, args.out)
+    except ProbeError as exc:
+        parser.error(exc.reason)
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/experimental/CollectiveX/tests/routing.py b/experimental/CollectiveX/tests/routing.py
new file mode 100644
index 000000000..ee4eb12a8
--- /dev/null
+++ b/experimental/CollectiveX/tests/routing.py
@@ -0,0 +1,220 @@
+#!/usr/bin/env python3
+"""CollectiveX — deterministic, platform-independent MoE routing trace.
+
+Fair-comparison fix #1: routing (per-token expert IDs + gate weights) is generated
+ONCE from a fixed seed over the *global* token batch, indexed by global token id, and
+is identical on every SKU for the same (seed, routing, global_tokens, experts, top-k).
+Each rank materializes its slice `[rank*T,(rank+1)*T)`. Activations
+are per-rank (same rank ⇒ same x on any platform), so a given global token id has
+identical activation everywhere without materializing a global activation tensor.
+
+The v1 suite keeps two routing distributions:
+
+  * uniform   — top-k distinct experts drawn uniformly per token. The DEFAULT.
+                Expected fan-out for top-k=8, 256 experts, EP8 (32 experts/rank) ≈
+                8·(1 − C(224,8)/C(256,8)) ≈ 5.3 ranks/token. Load ~ Poisson.
+  * zipf      — expert popularity proportional to 1/rank, producing expert/rank load skew.
+
+Always publish the realized fan-out so the workload is never misread again
+(`routing_stats`).
+"""
+from __future__ import annotations
+
+import hashlib
+
+import torch
+
+ACTIVATION_GENERATOR = "collectivex-activation-counter-v4"
+SOURCE_ID_BITS = 32
+SOURCE_CHECKSUM_BITS = 16
+SOURCE_ID_COLUMNS = SOURCE_ID_BITS + SOURCE_CHECKSUM_BITS
+SOURCE_ID_CONTRACT = "bounded-sign-bit-source-v1"
+
+
+def build_global_routing(
+    global_tokens: int,
+    experts: int,
+    topk: int,
+    routing: str,
+    seed: int,
+    *,
+    token_offset: int = 0,
+):
+    """Return one byte-stable counter-generated routing window on CPU."""
+    import workload
+
+    indices, weights = workload.canonical_routing_rows(
+        int(global_tokens),
+        int(experts),
+        int(topk),
+        routing,
+        int(seed),
+        token_offset=token_offset,
+    )
+    return (
+        torch.tensor(indices, dtype=torch.int64),
+        torch.tensor(weights, dtype=torch.float32),
+    )
+
+
+def rank_slice(idx, weights, rank: int, tokens_per_rank: int):
+    lo = rank * tokens_per_rank
+    return idx[lo:lo + tokens_per_rank].contiguous(), weights[lo:lo + tokens_per_rank].contiguous()
+
+
+def rank_activations(tokens: int, hidden: int, seed: int, rank: int, device,
+                     dtype=torch.bfloat16):
+    """Exact counter-derived inputs with a quantization-safe source-token prefix."""
+    source = torch.arange(tokens, device=device, dtype=torch.int64) + rank * tokens
+    return activations_for_source_ids(source, hidden, seed, dtype)
+
+
+def activations_for_source_ids(source, hidden: int, seed: int, dtype=torch.bfloat16):
+    """Materialize canonical activations for arbitrary global source-token IDs."""
+    if hidden < SOURCE_ID_COLUMNS:
+        raise ValueError(f"hidden must be at least {SOURCE_ID_COLUMNS}")
+    source = source.to(torch.int64)
+    column = torch.arange(hidden, device=source.device, dtype=torch.int64)
+    values = (source[:, None] * 131 + column[None, :] * 17 + int(seed) * 19) % 257 - 128
+    output = values.to(dtype).mul_(1 / 64)
+    if bool((source < 0).any().item()) or bool((source >= (1 << SOURCE_ID_BITS)).any().item()):
+        raise ValueError("source token ID is outside the bounded identity contract")
+    source_columns = torch.arange(SOURCE_ID_BITS, device=source.device, dtype=torch.int64)
+    source_bits = ((source[:, None] >> source_columns[None, :]) & 1) * 2 - 1
+    checksum = (source * 0x9E37 + int(seed) * 0xA24B) & ((1 << SOURCE_CHECKSUM_BITS) - 1)
+    checksum_columns = torch.arange(
+        SOURCE_CHECKSUM_BITS, device=source.device, dtype=torch.int64
+    )
+    checksum_bits = ((checksum[:, None] >> checksum_columns[None, :]) & 1) * 2 - 1
+    # Magnitude one sits inside the ordinary [-2, 2] activation range, so the identity cannot set
+    # an FP8 block scale. Decode depends only on sign and remains stable after dequantization.
+    output[:, :SOURCE_ID_BITS] = source_bits.to(dtype)
+    output[:, SOURCE_ID_BITS:SOURCE_ID_COLUMNS] = checksum_bits.to(dtype)
+    return output
+
+
+def decode_source_ids(payload, seed: int):
+    """Decode and validate source IDs carried by rank_activations."""
+    if payload.ndim != 2 or payload.shape[1] < SOURCE_ID_COLUMNS:
+        raise ValueError("received payload cannot carry the source-token prefix")
+    prefix = payload[:, :SOURCE_ID_COLUMNS].float()
+    if not bool(torch.isfinite(prefix).all().item()) or bool((prefix.abs() < 0.25).any().item()):
+        raise ValueError("received source-token prefix is not quantization-stable")
+    bits = prefix >= 0
+    powers = 1 << torch.arange(SOURCE_ID_BITS, device=payload.device, dtype=torch.int64)
+    source = (bits[:, :SOURCE_ID_BITS].to(torch.int64) * powers).sum(dim=1)
+    checksum_powers = 1 << torch.arange(
+        SOURCE_CHECKSUM_BITS, device=payload.device, dtype=torch.int64
+    )
+    observed_checksum = (
+        bits[:, SOURCE_ID_BITS:SOURCE_ID_COLUMNS].to(torch.int64) * checksum_powers
+    ).sum(dim=1)
+    checksum = (source * 0x9E37 + int(seed) * 0xA24B) & (
+        (1 << SOURCE_CHECKSUM_BITS) - 1
+    )
+    if not torch.equal(checksum, observed_checksum):
+        raise ValueError("received source-token checksum differs")
+    return source
+
+
+def routing_locality(idx, experts_per_rank: int, ep_size: int, tokens_per_rank: int,
+                     gpus_per_node: int, scale_up_domain: int = None) -> dict:
+    """Locality of rank-deduplicated payload copies under packed placement."""
+    import torch as _t
+    gt = idx.shape[0]
+    assignments = (idx // experts_per_rank).clamp(max=ep_size - 1)
+    destinations = _t.zeros((gt, ep_size), dtype=_t.bool)
+    destinations.scatter_(1, assignments, True)
+    token, dest = destinations.nonzero(as_tuple=True)
+    src = (token // max(1, tokens_per_rank)).clamp(max=ep_size - 1)
+    sud = scale_up_domain or (gpus_per_node * ep_size)                  # default: all one domain
+    phys = _t.arange(ep_size, dtype=_t.int64)
+    pd, ps = phys[dest], phys[src]
+    local = (dest == src)
+    same_node = (pd // gpus_per_node) == (ps // gpus_per_node)
+    same_dom = (pd // sud) == (ps // sud)
+    n = dest.numel()
+    return {
+        "placement": "packed",
+        "local_rank_fraction": float(local.float().mean()),
+        "same_node_fraction": float(same_node.float().mean()),
+        "same_scaleup_domain_fraction": float(same_dom.float().mean()),
+        "cross_node_fraction": float((~same_node).float().mean()),
+        "cross_domain_fraction": float((~same_dom).float().mean()),
+        "gpus_per_node": gpus_per_node, "scale_up_domain": sud, "copies": int(n),
+    }
+
+
+def routing_stats(idx, experts: int, experts_per_rank: int, weights=None) -> dict:
+    """Realized routing properties for the GLOBAL trace — published per point so the
+    fan-out / load can never be silently misread. idx is the global [gt, topk] tensor;
+    weights the matching [gt, topk] gate weights (hashed too for workload identity).
+    """
+    ep = max(1, experts // max(1, experts_per_rank))
+    ranks = (idx // experts_per_rank)                       # [gt, topk] destination rank per assignment
+    # unique destination ranks per token (fan-out)
+    onehot = torch.zeros(idx.shape[0], ep, dtype=torch.bool)
+    onehot.scatter_(1, ranks.clamp(max=ep - 1), True)
+    fanout = onehot.sum(dim=1)                              # [gt]
+    hist = torch.bincount(fanout, minlength=ep + 1)[1:ep + 1].tolist()  # counts for fan-out 1..ep
+    load = torch.bincount(idx.reshape(-1), minlength=experts).float()
+    # Keep expert assignments (compute load) separate from rank-deduplicated payload copies
+    # (network load). Conflating them overstates traffic when two experts share a rank.
+    assignment_load = torch.bincount(
+        ranks.reshape(-1).clamp(max=ep - 1), minlength=ep
+    ).float()
+    payload_load = onehot.sum(dim=0).float()
+    # One-number imbalance summaries so a row is self-describing for the distribution-sensitivity
+    # suite (no need to read the full histograms): CV = std/mean of the load; hotspot_ratio =
+    # worst expert load over the mean. Zipf should be more concentrated than uniform.
+    def _cv(t):
+        m = float(t.mean())
+        return float(t.std(unbiased=False) / m) if m > 0 else 0.0
+    expert_load_cv = _cv(load)
+    assignment_rank_cv = _cv(assignment_load)
+    payload_rank_cv = _cv(payload_load)
+    hotspot_ratio = float(load.max() / load.mean()) if float(load.mean()) > 0 else 0.0
+    # Empty experts capture compute skew; empty destination ranks capture network skew.
+    empty_expert_count = int((load == 0).sum())
+    empty_rank_count = int((payload_load == 0).sum())
+    # SHA-256 workload identity over both topk_idx and gate weights: a chart
+    # point's routing is provably identical across SKUs only if both hashes match.
+    idx_bytes = idx.to(torch.int32).cpu().numpy().tobytes()
+    idx_hash = hashlib.sha256(idx_bytes).hexdigest()
+    if weights is not None:
+        w_bytes = weights.to(torch.float32).cpu().numpy().tobytes()
+        w_hash = hashlib.sha256(w_bytes).hexdigest()
+        routing_hash = hashlib.sha256(idx_bytes + w_bytes).hexdigest()
+    else:
+        w_hash, routing_hash = None, idx_hash
+    return {
+        "fanout_mean": float(fanout.float().mean()),
+        "fanout_min": int(fanout.min()), "fanout_max": int(fanout.max()),
+        "fanout_hist": hist,                               # index k-1 = #tokens with fan-out k
+        "expert_assignments_per_rank": [int(x) for x in assignment_load.tolist()],
+        "payload_copies_per_rank": [int(x) for x in payload_load.tolist()],
+        "routed_copies": int(fanout.sum()),                # total (token, dest-rank) pairs
+        "expert_load_min": int(load.min()), "expert_load_max": int(load.max()),
+        "expert_load_mean": float(load.mean()), "expert_load_cv": expert_load_cv,
+        "expert_assignment_rank_cv": assignment_rank_cv,
+        "payload_rank_cv": payload_rank_cv, "hotspot_ratio": hotspot_ratio,
+        "empty_expert_count": empty_expert_count, "empty_rank_count": empty_rank_count,
+        "routing_hash": routing_hash, "idx_hash": idx_hash, "weights_hash": w_hash,
+    }
+
+
+# --------------------------------------------------------------------------- self-test
+if __name__ == "__main__":
+    import sys
+    E, TOPK, EPR, GT = 256, 8, 32, 4096
+    ui, _ = build_global_routing(GT, E, TOPK, "uniform", 67)
+    zi, _ = build_global_routing(GT, E, TOPK, "zipf", 67)
+    assert all(len(set(row.tolist())) == TOPK for row in ui[:16])
+    uniform, zipf = routing_stats(ui, E, EPR), routing_stats(zi, E, EPR)
+    assert uniform["hotspot_ratio"] < zipf["hotspot_ratio"]
+    dev = torch.device("cpu")
+    first = rank_activations(8, 256, 67, 0, dev, dtype=torch.float32)
+    second = rank_activations(8, 256, 67, 0, dev, dtype=torch.float32)
+    assert torch.equal(first, second) and torch.isfinite(first).all()
+    print("routing self-test: PASS")
+    sys.exit(0)
diff --git a/experimental/CollectiveX/tests/run_ep.py b/experimental/CollectiveX/tests/run_ep.py
new file mode 100644
index 000000000..7f3ca79d0
--- /dev/null
+++ b/experimental/CollectiveX/tests/run_ep.py
@@ -0,0 +1,529 @@
+#!/usr/bin/env python3
+"""CollectiveX v1 EP benchmark entrypoint for torchrun or rank environments."""
+
+from __future__ import annotations
+
+import argparse
+import ctypes
+import hashlib
+import hmac
+import json
+import os
+import platform
+import re
+import shlex
+import socket
+import subprocess
+import sys
+
+# Make the sibling tests/ modules importable when run as `tests/run_ep.py` under
+# torchrun (it executes the file as __main__, not as a package).
+HERE = os.path.dirname(os.path.abspath(__file__))
+sys.path[:0] = [HERE, os.path.dirname(HERE)]
+
+import ep_harness  # noqa: E402  (stdlib-only; safe before torch)
+import identity  # noqa: E402
+
+
+ALLOCATION_STRATUM_CONTRACT = "collectivex-allocation-stratum-v1"
+PRIVATE_FABRIC_ENV = {
+    "ib_gid_index": "CX_IB_GID_INDEX",
+    "rdma_devices": "CX_RDMA_DEVICES",
+    "rdma_service_level": "CX_RDMA_SERVICE_LEVEL",
+    "socket_ifname": "CX_SOCKET_IFNAME",
+}
+
+
+def _numeric_version(command: list[str]) -> str | None:
+    try:
+        result = subprocess.run(
+            command, capture_output=True, check=False, text=True, timeout=10
+        )
+    except (OSError, subprocess.TimeoutExpired):
+        return None
+    if result.returncode != 0:
+        return None
+    match = re.search(r"\b[0-9]+(?:\.[0-9]+){1,3}\b", result.stdout)
+    return match.group(0) if match else None
+
+
+def _loaded_collective_version() -> str | None:
+    try:
+        with open("/proc/self/maps", encoding="utf-8") as handle:
+            paths = {
+                os.path.realpath(line.rstrip().split()[-1])
+                for line in handle
+                if any(name in line for name in ("libnccl.so", "librccl.so"))
+                and os.path.isfile(line.rstrip().split()[-1])
+            }
+        if len(paths) != 1:
+            return None
+        version = ctypes.c_int()
+        library = ctypes.CDLL(paths.pop())
+        if library.ncclGetVersion(ctypes.byref(version)) != 0:
+            return None
+        return ep_harness.format_collective_version(version.value)
+    except (AttributeError, OSError):
+        return None
+
+
+def _runtime_fingerprint(
+    torch, device, *, machine: str, vendor: str, arch: str
+) -> dict:
+    """Return strict runtime facts without hosts, addresses, UUIDs, or paths."""
+    properties = torch.cuda.get_device_properties(device)
+    if vendor == "nvidia":
+        driver = _numeric_version(
+            ["nvidia-smi", "--query-gpu=driver_version", "--format=csv,noheader"]
+        )
+        runtime_kind, runtime_version, collective_kind = (
+            "cuda",
+            torch.version.cuda,
+            "nccl",
+        )
+    else:
+        driver = _numeric_version(["rocm-smi", "--showdriverversion"])
+        runtime_kind, runtime_version, collective_kind = (
+            "hip",
+            torch.version.hip,
+            "rccl",
+        )
+    return {
+        "accelerator_runtime": {"kind": runtime_kind, "version": runtime_version},
+        "collective_library": {
+            "kind": collective_kind,
+            "version": _loaded_collective_version(),
+        },
+        "device": {
+            "arch": arch,
+            "compute_units": int(properties.multi_processor_count),
+            "memory_bytes": int(properties.total_memory),
+            "product": torch.cuda.get_device_name(device),
+            "warp_size": int(properties.warp_size),
+        },
+        "driver_version": driver,
+        "framework": {"kind": "torch", "version": str(torch.__version__)},
+        "machine": machine,
+        "python_version": platform.python_version(),
+        "vendor": vendor,
+    }
+
+
+def _summarize_realized_placement(
+    records: list[tuple[str, int]],
+    *,
+    expected_nodes: int,
+    expected_gpus_per_node: int,
+    expected_world_size: int,
+) -> dict:
+    """Validate private host/rank records and return only publication-safe aggregates."""
+    if expected_nodes < 1 or expected_gpus_per_node < 1:
+        raise ValueError("requested placement dimensions must be positive")
+    if expected_nodes * expected_gpus_per_node != expected_world_size:
+        raise ValueError("requested nodes x GPUs per node differs from world size")
+    if len(records) != expected_world_size:
+        raise ValueError("realized rank count differs from world size")
+
+    by_host: dict[str, list[int]] = {}
+    for host, local_rank in records:
+        if not isinstance(host, str) or not host or type(local_rank) is not int:
+            raise ValueError("realized placement record has invalid types")
+        by_host.setdefault(host, []).append(local_rank)
+
+    counts = sorted(len(local_ranks) for local_ranks in by_host.values())
+    complete_local_ranks = all(
+        sorted(local_ranks) == list(range(expected_gpus_per_node))
+        for local_ranks in by_host.values()
+    )
+    unique_pairs = len(set(records)) == len(records)
+    if len(by_host) != expected_nodes:
+        raise ValueError(
+            f"realized node count {len(by_host)} differs from requested {expected_nodes}"
+        )
+    if counts != [expected_gpus_per_node] * expected_nodes:
+        raise ValueError("realized ranks per node differ from requested GPUs per node")
+    if not complete_local_ranks or not unique_pairs:
+        raise ValueError("realized local ranks are incomplete or duplicated")
+    return {
+        "gpus_per_node": expected_gpus_per_node,
+        "nodes": expected_nodes,
+        "ranks_per_node": expected_gpus_per_node,
+        "unique_local_ranks": True,
+        "valid": True,
+    }
+
+
+def _common_runtime_fingerprint(records: list[dict]) -> dict:
+    """Return the shared sanitized fingerprint, rejecting heterogeneous ranks."""
+    if not records:
+        raise ValueError("runtime fingerprint evidence is empty")
+    canonical = {
+        json.dumps(record, allow_nan=False, sort_keys=True, separators=(",", ":"))
+        for record in records
+    }
+    if len(canonical) != 1:
+        raise ValueError("runtime fingerprint differs across distributed ranks")
+    return records[0]
+
+
+def _allocation_stratum_sha256(
+    physical_hosts: list[str],
+    *,
+    audit_salt: str | None,
+    fabric_selectors: dict[str, str | None],
+    required: bool,
+) -> str | None:
+    """Commit private allocation/fabric identity without exposing its inputs."""
+    if audit_salt in (None, ""):
+        if required:
+            raise ValueError("canonical execution requires a private allocation audit salt")
+        return None
+    if not isinstance(audit_salt, str) or not re.fullmatch(r"[0-9a-f]{64}", audit_salt):
+        raise ValueError("allocation audit salt is invalid")
+    if set(fabric_selectors) != set(PRIVATE_FABRIC_ENV):
+        raise ValueError("private fabric selector set differs from the stratum contract")
+    for value in fabric_selectors.values():
+        if value is not None and (
+            not isinstance(value, str)
+            or not value
+            or len(value) > 512
+            or any(ord(char) < 32 or ord(char) == 127 for char in value)
+        ):
+            raise ValueError("private fabric selector is invalid")
+    if not physical_hosts or any(
+        not isinstance(host, str)
+        or not host
+        or len(host) > 255
+        or any(ord(char) < 32 or ord(char) == 127 for char in host)
+        for host in physical_hosts
+    ):
+        raise ValueError("physical allocation host evidence is invalid")
+    payload = json.dumps(
+        {
+            "contract": ALLOCATION_STRATUM_CONTRACT,
+            "fabric_selectors": fabric_selectors,
+            "physical_hosts": sorted(set(physical_hosts)),
+        },
+        allow_nan=False,
+        ensure_ascii=False,
+        separators=(",", ":"),
+        sort_keys=True,
+    ).encode("utf-8")
+    return hmac.new(bytes.fromhex(audit_salt), payload, hashlib.sha256).hexdigest()
+
+
+def _common_allocation_stratum(
+    records: list[str | None], *, required: bool
+) -> str | None:
+    """Require every distributed rank to derive the same private stratum."""
+    if not records or any(
+        value is not None
+        and (not isinstance(value, str) or not re.fullmatch(r"[0-9a-f]{64}", value))
+        for value in records
+    ):
+        raise ValueError("allocation stratum evidence is invalid")
+    distinct = set(records)
+    if len(distinct) != 1:
+        raise ValueError("allocation stratum differs across distributed ranks")
+    value = records[0]
+    if required and value is None:
+        raise ValueError("canonical execution requires an allocation stratum")
+    return value
+
+
+def main() -> int:
+    ap = argparse.ArgumentParser(description="CollectiveX EP dispatch/combine sweep")
+    ap.add_argument(
+        "--backend",
+        required=True,
+        choices=[
+            "deepep",
+            "deepep-v2",
+            "deepep-hybrid",
+            "mori",
+            "uccl",
+            "nccl-ep",
+        ],
+    )
+    ep_harness.add_common_args(ap)
+    args = ap.parse_args()
+
+    if args.mode == ep_harness.LOW_LATENCY_MODE:
+        if args.backend not in {"deepep", "uccl"}:
+            print(
+                "ERROR: low-latency mode is supported only by deepep and uccl",
+                file=sys.stderr,
+            )
+            return 2
+        if args.phase != "decode":
+            print("ERROR: low-latency mode requires --phase decode", file=sys.stderr)
+            return 2
+    if args.case_id and not identity.is_typed_id(args.case_id, "case"):
+        print(f"ERROR: invalid native case ID {args.case_id!r}", file=sys.stderr)
+        return 2
+    if args.case_id and args.seed != ep_harness.ROUTING_SEED:
+        print(
+            f"ERROR: scheduled v1 cases require seed={ep_harness.ROUTING_SEED}; got {args.seed}",
+            file=sys.stderr,
+        )
+        return 2
+    if args.qualification_index not in range(1, ep_harness.QUALIFICATION_RUNS + 1):
+        print(
+            f"ERROR: qualification index must be in 1..{ep_harness.QUALIFICATION_RUNS}",
+            file=sys.stderr,
+        )
+        return 2
+
+    sampling_error = ep_harness.sampling_contract_error(
+        args.iters, args.trials, args.warmup
+    )
+    if sampling_error:
+        print(f"ERROR: {sampling_error}", file=sys.stderr)
+        return 2
+
+    try:
+        import torch
+        import torch.distributed as dist
+    except Exception as exc:  # pragma: no cover
+        print(f"ERROR: torch unavailable: {exc!r}", file=sys.stderr)
+        return 3
+
+    rank = int(os.environ.get("RANK", "0"))
+    world_size = int(os.environ.get("WORLD_SIZE", "1"))
+    local_rank = int(os.environ.get("LOCAL_RANK", "0"))
+    torch.cuda.set_device(local_rank)
+    device = torch.device(f"cuda:{local_rank}")
+    os.environ.setdefault("MASTER_ADDR", "localhost")
+    os.environ.setdefault("MASTER_PORT", "12355")
+
+    import capability
+
+    sku = capability.PLATFORMS.get(args.runner)
+    if sku is None:
+        print(f"ERROR: unknown runner identity {args.runner!r}", file=sys.stderr)
+        return 5
+    machine = {"x86_64": "amd64", "aarch64": "arm64"}.get(
+        platform.machine(), platform.machine()
+    )
+    props = torch.cuda.get_device_properties(device)
+    if torch.version.hip:
+        vendor = "amd"
+        accelerator = str(getattr(props, "gcnArchName", "")).split(":", 1)[0]
+    else:
+        vendor = "nvidia"
+        major, minor = torch.cuda.get_device_capability(device)
+        accelerator = f"sm{major}{minor}"
+    device_name = torch.cuda.get_device_name(device)
+    device_count = torch.cuda.device_count()
+    identity_issues = capability.runtime_identity_issues(
+        args.runner,
+        vendor=vendor,
+        arch=accelerator,
+        machine=machine,
+        device_name=device_name,
+        device_count=device_count,
+        world_size=world_size,
+    )
+    if identity_issues:
+        print(
+            f"ERROR: runtime identity does not match {args.runner}: "
+            + "; ".join(identity_issues),
+            file=sys.stderr,
+        )
+        return 5
+    observed_gpus_per_node = args.gpus_per_node or device_count
+    if observed_gpus_per_node != sku["gpus_per_node"]:
+        print(
+            f"ERROR: {args.runner} requires {sku['gpus_per_node']} GPUs per node",
+            file=sys.stderr,
+        )
+        return 5
+    if world_size % observed_gpus_per_node:
+        print("ERROR: distributed world is not divisible by GPUs per node", file=sys.stderr)
+        return 5
+    observed_nodes = world_size // observed_gpus_per_node
+    topology = capability.topology_for(args.runner, world_size)
+    observed_topology = {
+        "nodes": observed_nodes,
+        "gpus_per_node": observed_gpus_per_node,
+        "scale_up_domain": args.scale_up_domain or observed_gpus_per_node,
+        "scope": args.scope,
+        "scale_up_transport": args.scale_up_transport,
+        "scale_out_transport": args.scale_out_transport or None,
+        "transport": args.transport,
+        "topology_class": args.topology_class,
+    }
+    if topology is None or any(
+        observed_topology[field] != topology[field] for field in observed_topology
+    ):
+        print(
+            f"ERROR: runtime topology does not match {args.runner} EP{world_size}",
+            file=sys.stderr,
+        )
+        return 5
+    schedulable, reason = capability.resolve(
+        args.runner,
+        args.backend,
+        ep=world_size,
+        nodes=observed_nodes,
+        routing=args.routing,
+        eplb=args.eplb,
+        mode=args.mode,
+    )
+    if not schedulable:
+        print(f"ERROR: scheduled case is unsupported: {reason}", file=sys.stderr)
+        return 5
+    args.runtime_device_product = device_name
+    args.runtime_device_count = device_count
+    args.allocation_execution_id = os.environ.get("COLLECTIVEX_EXECUTION_ID")
+
+    # EPLB bumps the expert count to PHYSICAL (logical + redundant) BEFORE backend construction
+    # so the backend sizes its buffers for the replicated set; ep_harness builds the LOGICAL
+    # routing trace and remaps it to the balanced physical placement (a pure routing transform,
+    # tests/eplb.py — no adapter change). Deterministic, so every rank agrees on the count.
+    if getattr(args, "eplb", False):
+        import eplb
+
+        args.num_logical_experts = args.experts
+        args.experts = eplb.physical_count(
+            args.experts, ep_harness.EPLB_REDUNDANT_EXPERTS, world_size
+        )
+
+    # Reproduction provenance (recorded in the artifact). Rack launchers provide ranks directly
+    # through srun, while single-node launchers use torchrun; do not claim torchrun for both.
+    if os.environ.get("TORCHELASTIC_RUN_ID"):
+        args.distributed_launcher = "torchrun"
+        prefix = f"torchrun --nproc_per_node={world_size}"
+    else:
+        args.distributed_launcher = "rank-environment"
+        prefix = f"RANK={rank} WORLD_SIZE={world_size} LOCAL_RANK={local_rank} python3"
+    args.reproduction_command = f"{prefix} tests/run_ep.py {shlex.join(sys.argv[1:])}"
+    args.image = os.environ.get("COLLECTIVEX_IMAGE", "")
+    args.image_digest = os.environ.get("COLLECTIVEX_IMAGE_DIGEST", "")
+    args.image_digest_verified = (
+        os.environ.get("COLLECTIVEX_IMAGE_DIGEST_VERIFIED") == "1"
+    )
+    # Container architecture and local squash hash for Enroot/Pyxis.
+    args.image_arch = machine
+    args.squash_sha256 = os.environ.get("COLLECTIVEX_SQUASH_SHA256")
+    # GitHub provenance: repo, run ID, attempt, ref, source SHA, job,
+    # artifact. A result is only publication-'official' when these are present (validity gate).
+    _run = {
+        "run_id": os.environ.get("GITHUB_RUN_ID"),
+        "run_attempt": os.environ.get("GITHUB_RUN_ATTEMPT"),
+        "ref": os.environ.get("GITHUB_REF_NAME") or os.environ.get("GITHUB_REF"),
+        "source_sha": os.environ.get("COLLECTIVEX_SOURCE_SHA")
+        or os.environ.get("GITHUB_SHA"),
+        "repo": os.environ.get("GITHUB_REPOSITORY"),
+        "job": os.environ.get("GITHUB_JOB"),
+        "artifact": os.environ.get("COLLECTIVEX_ARTIFACT_NAME"),
+    }
+    if any(_run.values()):
+        _run["qualification_index"] = args.qualification_index
+        args.git_run = _run
+    else:
+        args.git_run = None
+
+    # Import the backend class only after torch initializes. The selected mode is an
+    # explicit case dimension; adapters do not infer it from the token ladder.
+    if args.backend == "mori":
+        from ep_mori import MoRIBackend as Backend
+    elif args.backend == "nccl-ep":
+        from ep_nccl import NCCLBackend as Backend
+    elif args.backend == "uccl":
+        from ep_uccl import UCCLBackend as Backend
+    elif args.backend == "deepep-hybrid":
+        from ep_deepep_hybrid import DeepEPHybridBackend as Backend
+    elif args.backend == "deepep-v2":
+        from ep_deepep_v2 import DeepEPV2Backend as Backend
+    else:
+        from ep_deepep import DeepEPBackend as Backend
+
+    # MoRI uses the gloo+NCCL group shape from its reference; other adapters use NCCL/RCCL.
+    if not dist.is_initialized():
+        if args.backend == "mori":
+            dist.init_process_group(
+                backend="cpu:gloo,cuda:nccl",
+                rank=rank,
+                world_size=world_size,
+                device_id=device,
+            )
+        elif args.backend == "deepep-v2":
+            # PR #605 reuses PyTorch's NCCL communicator through ``_comm_ptr``. Supplying
+            # device_id eagerly forms it before ElasticBuffer construction.
+            dist.init_process_group("nccl", device_id=device)
+        else:
+            dist.init_process_group("nccl")
+
+    args.runtime_fingerprint = _runtime_fingerprint(
+        torch, device, machine=machine, vendor=vendor, arch=accelerator
+    )
+
+    gpus_per_node = args.gpus_per_node or sku["gpus_per_node"]
+    try:
+        expected_nodes = int(
+            os.environ.get("SLURM_NNODES", str(world_size // gpus_per_node))
+        )
+    except ValueError as exc:
+        raise ValueError("SLURM_NNODES must be a positive integer") from exc
+    realized_records: list[tuple[str, int, dict] | None] = [None] * world_size
+    dist.all_gather_object(
+        realized_records,
+        (socket.gethostname(), local_rank, args.runtime_fingerprint),
+    )
+    complete_records = [record for record in realized_records if record is not None]
+    args.realized_placement = _summarize_realized_placement(
+        [(record[0], record[1]) for record in complete_records],
+        expected_nodes=expected_nodes,
+        expected_gpus_per_node=gpus_per_node,
+        expected_world_size=world_size,
+    )
+    args.runtime_fingerprint = _common_runtime_fingerprint(
+        [record[2] for record in complete_records]
+    )
+    canonical = bool(args.workload_dir)
+    local_stratum = _allocation_stratum_sha256(
+        [record[0] for record in complete_records],
+        audit_salt=os.environ.get("CX_AUDIT_SALT"),
+        fabric_selectors={
+            field: os.environ.get(environment) or None
+            for field, environment in PRIVATE_FABRIC_ENV.items()
+        },
+        required=canonical,
+    )
+    stratum_records: list[str | None] = [None] * world_size
+    dist.all_gather_object(stratum_records, local_stratum)
+    args.allocation_stratum_sha256 = _common_allocation_stratum(
+        stratum_records, required=canonical
+    )
+
+    # Construct + run inside a try so a backend exception (esp. a new adapter on GPU) prints its
+    # FULL traceback to STDOUT — torchrun captures per-rank stdout but only summarizes stderr, so an
+    # uncaught exception is otherwise invisible in CI. Print on every rank (prefixed) then re-raise.
+    try:
+        backend = Backend(args, rank, world_size, local_rank, device)
+        if rank == 0:
+            print(
+                f"[run_ep] backend={args.backend} phase={args.phase} mode={args.mode} "
+                f"world={world_size} ep_size={world_size} hidden={args.hidden} "
+                f"topk={args.topk} experts={args.experts} dtype=bf16 "
+                f"routing={args.routing} seed={args.seed} "
+                f"qualification_index={args.qualification_index}"
+            )
+        rc = ep_harness.run_sweep(args, backend, torch, dist, device, rank, world_size)
+    except Exception:
+        import traceback
+
+        print(
+            f"[run_ep][rank{rank}] backend={args.backend} FAILED:\n"
+            + traceback.format_exc(),
+            flush=True,
+        )
+        raise
+    # finalize() handles backend-specific teardown: DeepEP returns rc cleanly;
+    # MoRI hard-exits past its post-shmem_finalize teardown assertion.
+    return backend.finalize(rc)
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/experimental/CollectiveX/tests/test_deepep_v2_contract.py b/experimental/CollectiveX/tests/test_deepep_v2_contract.py
new file mode 100644
index 000000000..c9f65c2c3
--- /dev/null
+++ b/experimental/CollectiveX/tests/test_deepep_v2_contract.py
@@ -0,0 +1,2151 @@
+#!/usr/bin/env python3
+"""CPU-only structural and registry tests for the pinned DeepEP V2 path."""
+from __future__ import annotations
+
+import ast
+import argparse
+import copy
+import ctypes
+import hashlib
+import json
+import os
+from pathlib import Path
+from pathlib import PurePosixPath
+import shutil
+import stat
+import subprocess
+import sys
+import tempfile
+import types
+import unittest
+
+HERE = Path(__file__).resolve().parent
+ROOT = HERE.parent
+sys.path.insert(0, str(ROOT))
+
+import capability  # noqa: E402
+import contracts  # noqa: E402
+import ep_harness  # noqa: E402
+import identity  # noqa: E402
+import run_ep  # noqa: E402
+
+
+COMMIT = "fa8a9b16898204afd347c663b89e65ef87dc6ce6"
+TREE = "29809e75c5874e6609dac4804e7b651d5226959f"
+FMT_COMMIT = "a4c7e17133ee9cb6a2f45545f6e974dd3c393efa"
+
+
+def deepep_v2_jit_provenance() -> list[dict[str, str]]:
+    return [
+        {
+            "cache_key": f"kernel.{name}.{index:032x}",
+            "cubin_sha256": f"{index + 1:x}" * 64,
+            "sass_sha256": f"{index + 2:x}" * 64,
+            "source_sha256": f"{index + 3:x}" * 64,
+        }
+        for index, name in enumerate(sorted(contracts.DEEPEP_V2_JIT_KERNELS))
+    ]
+
+
+def hybrid_realized_config() -> dict[str, object]:
+    config = {field: 1 for field in contracts.HYBRID_REALIZED_CONFIG_FIELDS}
+    for field in contracts.HYBRID_REALIZED_BOOL_FIELDS:
+        config[field] = True
+    config["token_data_type"] = "UINT16"
+    return config
+
+
+def hybrid_jit_provenance(ranks: int = 2) -> tuple[list[str], list[dict[str, object]]]:
+    keys = ["combine-key", "dispatch-key", "preprocess-key"]
+    artifacts = [
+        {
+            "kernel_key": key,
+            "rank_artifacts": [
+                {"bytes": 10 + index, "rank": rank, "sha256": f"{index + 1:x}" * 64}
+                for rank in range(ranks)
+            ],
+        }
+        for index, key in enumerate(keys)
+    ]
+    return keys, artifacts
+
+
+def load_uccl_function(name: str, namespace: dict[str, object]):
+    path = HERE / "ep_uccl.py"
+    function = next(
+        node
+        for node in ast.parse(path.read_text()).body
+        if isinstance(node, ast.FunctionDef) and node.name == name
+    )
+    exec(compile(ast.Module(body=[function], type_ignores=[]), str(path), "exec"), namespace)
+    return namespace[name]
+
+
+def operator_config(root: Path) -> dict[str, object]:
+    path = str(root)
+    network = {"socket_ifname": "eth0", "rdma_devices": "mlx5_0:1"}
+    runners = {
+        "h100-dgxc": {
+            "partition": "test", "account": "test", "squash_dir": path,
+            "stage_dir": path, **network,
+        },
+        "h200-dgxc": {
+            "partition": "test", "squash_dir": path, "stage_dir": path, **network,
+        },
+        "b200-dgxc": {
+            "partition": "test", "account": "test", "squash_dir": path,
+            "stage_dir": path, **network,
+        },
+        "b300": {
+            "partition": "test", "account": "test", "squash_dir": path, "stage_dir": path,
+            **network,
+        },
+        "gb200": {"partition": "test", "account": "test", "storage_roots": [path]},
+        "gb300": {
+            "partition": "test", "account": "test", "squash_dir": path,
+            "stage_dir": path, "enroot_cache_path": path,
+        },
+        "mi325x": {
+            "partition": "test", "squash_dir": path, "stage_dir": path, **network,
+        },
+        "mi355x": {
+            "partition": "test", "squash_dir": path, "stage_dir": path, **network,
+        },
+    }
+    return {"schema_version": 1, "audit_salt": "a" * 64, "runners": runners}
+
+
+class DeepEPV2ContractTests(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls) -> None:
+        cls.path = HERE / "ep_deepep_v2.py"
+        cls.tree = ast.parse(cls.path.read_text(), str(cls.path))
+
+    def test_capability_is_explicit_for_every_sku(self) -> None:
+        backend = capability.BACKENDS["deepep-v2"]
+        self.assertEqual(
+            (backend["implementation"], backend["commit"], backend["torch"], backend["nccl"]),
+            ("deep_ep.ElasticBuffer", COMMIT, "2.10.0+cu130", "2.30.4"),
+        )
+        self.assertEqual(backend["source"], "deepseek-ai/DeepEP#605+#630")
+        self.assertEqual(backend["communication_backend"], "nccl-device-lsa")
+        self.assertEqual(set(backend["sku_capabilities"]), set(capability.PLATFORMS))
+        for sku, platform in capability.PLATFORMS.items():
+            ok, _ = capability.resolve(sku, "deepep-v2")
+            self.assertEqual(ok, platform["vendor"] == "nvidia" and sku != "h100-dgxc")
+            self.assertEqual(
+                set(backend["sku_capabilities"][sku]), {"basis", "schedulable"}
+            )
+        self.assertEqual(
+            backend["sku_capabilities"]["h100-dgxc"],
+            {
+                "schedulable": False,
+                "basis": "current-runner-nccl-device-api-symmetric-memory-unavailable",
+            },
+        )
+
+    def test_adapter_ast_pins_elastic_api_and_weight_semantics(self) -> None:
+        imports = {
+            alias.name
+            for node in ast.walk(self.tree)
+            if isinstance(node, ast.ImportFrom) and node.module == "deep_ep"
+            for alias in node.names
+        }
+        self.assertEqual(imports, {"ElasticBuffer"})
+        constants = {
+            node.targets[0].id: ast.literal_eval(node.value)
+            for node in self.tree.body
+            if isinstance(node, ast.Assign)
+            and len(node.targets) == 1
+            and isinstance(node.targets[0], ast.Name)
+            and isinstance(node.value, ast.Constant)
+        }
+        self.assertEqual(constants["DEEPEP_V2_COMMIT"], COMMIT)
+        self.assertEqual(constants["DEEPEP_V2_TREE"], TREE)
+        self.assertEqual(constants["DEEPEP_V2_FMT_COMMIT"], FMT_COMMIT)
+        self.assertEqual(constants["DEEPEP_V2_PR"], 605)
+        self.assertEqual(constants["DEEPEP_V2_FIX_PR"], 630)
+        self.assertEqual(
+            constants["DEEPEP_V2_JIT_RANDOM_SEED"],
+            "collectivex-deepep-v2-fa8a9b1",
+        )
+        self.assertEqual(constants["NCCL_VERSION"], "2.30.4")
+        self.assertEqual(constants["NVSHMEM_VERSION"], "3.3.9")
+        backend = next(
+            node for node in self.tree.body
+            if isinstance(node, ast.ClassDef) and node.name == "DeepEPV2Backend"
+        )
+        assignments = {
+            node.targets[0].id: ast.literal_eval(node.value)
+            for node in backend.body
+            if isinstance(node, ast.Assign)
+            and isinstance(node.targets[0], ast.Name)
+            and isinstance(node.value, ast.Constant)
+        }
+        self.assertEqual(assignments["combine_weight_semantics"], "unweighted-rank-sum")
+        methods = {node.name for node in backend.body if isinstance(node, ast.FunctionDef)}
+        self.assertTrue({
+            "dispatch", "inspect_dispatch", "combine_transformed", "capture_deferred_provenance",
+            "finalize",
+        } <= methods)
+        self.assertNotIn("expected", methods)
+        constructor = next(
+            node for node in ast.walk(backend)
+            if isinstance(node, ast.Call)
+            and isinstance(node.func, ast.Name)
+            and node.func.id == "ElasticBuffer"
+        )
+        deterministic = next(
+            keyword for keyword in constructor.keywords if keyword.arg == "deterministic"
+        )
+        self.assertIs(ast.literal_eval(deterministic.value), False)
+        self.assertIn("deterministic", contracts.REQUIRED_BACKEND_PROVENANCE["deepep-v2"])
+        self.assertIn("num_experts", contracts.REQUIRED_BACKEND_PROVENANCE["deepep-v2"])
+        self.assertIn("tuning_num_experts", contracts.REQUIRED_BACKEND_PROVENANCE["deepep-v2"])
+        self.assertIn("jit_random_seed", contracts.REQUIRED_BACKEND_PROVENANCE["deepep-v2"])
+        self.assertIn("gin_enabled", contracts.REQUIRED_BACKEND_PROVENANCE["deepep-v2"])
+        self.assertIn("communication_backend", contracts.REQUIRED_BACKEND_PROVENANCE["deepep-v2"])
+        self.assertIn("deepep_pr", contracts.REQUIRED_BACKEND_PROVENANCE["deepep-v2"])
+        self.assertIn("deepep_fix_pr", contracts.REQUIRED_BACKEND_PROVENANCE["deepep-v2"])
+        source = self.path.read_text()
+        self.assertIn('getattr(args, "num_logical_experts", args.experts)', source)
+        self.assertIn('"use_expanded_layout": False', source)
+        self.assertIn("allow_hybrid_mode = _configure_gin_mode(args, world_size)", source)
+        self.assertIn("get_theoretical_num_sms(tuning_num_experts, args.topk)", source)
+
+        jit_function = next(
+            node for node in self.tree.body
+            if isinstance(node, ast.FunctionDef) and node.name == "_jit_cache_key"
+        )
+        namespace = {"hashlib": __import__("hashlib"), "json": json}
+        exec(compile(ast.Module(body=[jit_function], type_ignores=[]), str(self.path), "exec"), namespace)
+        key = namespace["_jit_cache_key"]
+        baseline = types.SimpleNamespace(
+            runner="h100-dgxc", hidden=7168, topk=8, experts=256,
+            routing="uniform", eplb=False, case_id="uniform",
+        )
+        zipf = types.SimpleNamespace(**{**vars(baseline), "routing": "zipf", "case_id": "zipf"})
+        eplb = types.SimpleNamespace(
+            **{**vars(zipf), "experts": 288, "num_logical_experts": 256, "eplb": True}
+        )
+        realized = {
+            "num_sms": 24,
+            "num_qps": 9,
+            "allocated_qps": 17,
+            "logical_scaleout_ranks": 1,
+            "logical_scaleup_ranks": 8,
+            "physical_rdma_ranks": 2,
+            "physical_nvlink_ranks": 4,
+            "is_scaleup_nvlink": False,
+            "device_arch_major": 9,
+            "device_arch_minor": 0,
+            "device_sms": 132,
+            "device_smem_bytes": 232448,
+            "gpu_timeout_cycles": 198000000000,
+        }
+        direct = key(baseline, 8, 128, False, realized)
+        self.assertTrue(direct.startswith("jitcfg-v3-"))
+        self.assertEqual(direct, key(zipf, 8, 128, False, realized))
+        self.assertNotEqual(direct, key(zipf, 8, 128, True, realized))
+        self.assertNotEqual(direct, key(eplb, 8, 128, False, realized))
+        for field, value in realized.items():
+            changed = not value if type(value) is bool else value + 1
+            self.assertNotEqual(
+                direct,
+                key(baseline, 8, 128, False, {**realized, field: changed}),
+                field,
+            )
+        init = next(
+            node for node in backend.body
+            if isinstance(node, ast.FunctionDef) and node.name == "__init__"
+        )
+        buffer_call = next(
+            node for node in ast.walk(init)
+            if isinstance(node, ast.Call)
+            and isinstance(node.func, ast.Name)
+            and node.func.id == "ElasticBuffer"
+        )
+        jit_config_check = next(
+            node for node in ast.walk(init)
+            if isinstance(node, ast.Call)
+            and isinstance(node.func, ast.Name)
+            and node.func.id == "_require_cross_rank_equal"
+            and ast.literal_eval(node.args[1]) == "JIT configuration"
+        )
+        cache_assignment = next(
+            node for node in ast.walk(init)
+            if isinstance(node, ast.Assign)
+            and isinstance(node.targets[0], ast.Subscript)
+            and ast.unparse(node.targets[0].value) == "os.environ"
+            and ast.literal_eval(node.targets[0].slice) == "EP_JIT_CACHE_DIR"
+        )
+        self.assertLess(buffer_call.lineno, jit_config_check.lineno)
+        self.assertLess(jit_config_check.lineno, cache_assignment.lineno)
+        capture = next(
+            node for node in backend.body
+            if isinstance(node, ast.FunctionDef)
+            and node.name == "capture_deferred_provenance"
+        )
+        calls = [node for node in ast.walk(capture) if isinstance(node, ast.Call)]
+        barrier = next(
+            node for node in calls
+            if isinstance(node.func, ast.Attribute) and node.func.attr == "barrier"
+        )
+        self.assertEqual(
+            {keyword.arg: ast.literal_eval(keyword.value) for keyword in barrier.keywords},
+            {"use_comm_stream": True, "with_cpu_sync": True},
+        )
+        scan = next(
+            node for node in calls
+            if isinstance(node.func, ast.Name) and node.func.id == "_jit_artifact_evidence"
+        )
+        self.assertLess(barrier.lineno, scan.lineno)
+        realized_check = next(
+            node for node in ast.walk(backend)
+            if isinstance(node, ast.Call)
+            and isinstance(node.func, ast.Name)
+            and node.func.id == "_require_cross_rank_equal"
+            and len(node.args) > 1
+            and isinstance(node.args[1], ast.Constant)
+            and node.args[1].value == "realized tuning/topology"
+        )
+        self.assertIsInstance(realized_check, ast.Call)
+        self.assertEqual(
+            (ROOT / "tests" / "ep_harness.py").read_text().count(
+                "capture_deferred_provenance()"
+            ),
+            2,
+        )
+        schema = json.loads((ROOT / "schemas" / "raw-case-v1.schema.json").read_text())
+        provenance = schema["properties"]["implementation"]["properties"]["provenance"]
+        self.assertEqual(provenance["properties"]["deterministic"], {"type": "boolean"})
+        self.assertEqual(
+            provenance["properties"]["num_experts"],
+            {"minimum": 1, "type": "integer"},
+        )
+        self.assertEqual(
+            provenance["properties"]["tuning_num_experts"],
+            {"minimum": 1, "type": "integer"},
+        )
+        self.assertEqual(
+            provenance["properties"]["jit_cubins"]["items"],
+            {"$ref": "#/$defs/deepep_v2_jit_cubin"},
+        )
+        self.assertEqual(
+            (
+                provenance["properties"]["jit_cubins"]["minItems"],
+                provenance["properties"]["jit_cubins"]["maxItems"],
+            ),
+            (5, 5),
+        )
+        self.assertEqual(
+            provenance["properties"]["jit_random_seed"],
+            {"const": "collectivex-deepep-v2-fa8a9b1"},
+        )
+        self.assertEqual(provenance["properties"]["allow_hybrid_mode"], {"type": "boolean"})
+        self.assertEqual(provenance["properties"]["gin_enabled"], {"type": "boolean"})
+        self.assertEqual(provenance["properties"]["deepep_pr"], {"const": 605})
+        self.assertEqual(provenance["properties"]["deepep_fix_pr"], {"const": 630})
+        self.assertEqual(
+            provenance["properties"]["communication_backend"],
+            {"enum": ["nccl-device-lsa", "nccl-gin"]},
+        )
+        self.assertEqual(
+            provenance["properties"]["num_rdma_bytes"],
+            {"minimum": 0, "type": "integer"},
+        )
+        self.assertEqual(
+            provenance["properties"]["num_qps_per_rank"],
+            {"minimum": 1, "type": "integer"},
+        )
+        for field, value in (
+            ("num_experts", "288"),
+            ("tuning_num_experts", "not-an-integer"),
+            ("tuning_num_experts", 0),
+        ):
+            with self.subTest(provenance_field=field, value=value):
+                self.assertIn(
+                    field,
+                    contracts.backend_provenance_issues(
+                        "deepep-v2", {field: value}
+                    ),
+                )
+
+    def test_v2_gin_mode_uses_the_scale_up_domain_and_safe_fallbacks(self) -> None:
+        functions = {
+            node.name: node for node in self.tree.body if isinstance(node, ast.FunctionDef)
+        }
+        namespace = {"os": os}
+        exec(
+            compile(
+                ast.Module(
+                    body=[
+                        functions["_configure_gin_mode"],
+                        functions["_lsa_topology_is_valid"],
+                    ],
+                    type_ignores=[],
+                ),
+                str(self.path),
+                "exec",
+            ),
+            namespace,
+        )
+        configure = namespace["_configure_gin_mode"]
+        topology_is_valid = namespace["_lsa_topology_is_valid"]
+        original = os.environ.get("EP_DISABLE_GIN")
+        try:
+            args = types.SimpleNamespace(scale_up_domain=72, gpus_per_node=4)
+            self.assertFalse(configure(args, 8))
+            self.assertEqual(os.environ.get("EP_DISABLE_GIN"), "1")
+
+            os.environ["EP_DISABLE_GIN"] = "stale"
+            args = types.SimpleNamespace(scale_up_domain=8, gpus_per_node=4)
+            self.assertTrue(configure(args, 16))
+            self.assertNotIn("EP_DISABLE_GIN", os.environ)
+
+            args = types.SimpleNamespace(gpus_per_node=4)
+            self.assertTrue(configure(args, 8))
+            self.assertNotIn("EP_DISABLE_GIN", os.environ)
+
+            self.assertFalse(configure(types.SimpleNamespace(), 8))
+            self.assertEqual(os.environ.get("EP_DISABLE_GIN"), "1")
+
+            topology = {
+                "physical_rdma_ranks": 1,
+                "physical_nvlink_ranks": 8,
+                "logical_scaleout_ranks": 1,
+                "logical_scaleup_ranks": 8,
+                "is_scaleup_nvlink": True,
+            }
+            self.assertTrue(topology_is_valid(False, 8, 8, topology))
+            topology["physical_rdma_ranks"] = 2
+            topology["logical_scaleout_ranks"] = 2
+            self.assertTrue(topology_is_valid(True, 16, 8, topology))
+            topology["physical_nvlink_ranks"] = 4
+            self.assertFalse(topology_is_valid(False, 8, 8, topology))
+        finally:
+            if original is None:
+                os.environ.pop("EP_DISABLE_GIN", None)
+            else:
+                os.environ["EP_DISABLE_GIN"] = original
+
+    def test_ep_adapters_declare_unweighted_rank_sum(self) -> None:
+        adapters = {
+            "ep_deepep.py": "DeepEPBackend",
+            "ep_deepep_v2.py": "DeepEPV2Backend",
+            "ep_deepep_hybrid.py": "DeepEPHybridBackend",
+            "ep_mori.py": "MoRIBackend",
+            "ep_nccl.py": "NCCLBackend",
+            "ep_uccl.py": "UCCLBackend",
+        }
+        for filename, class_name in adapters.items():
+            with self.subTest(adapter=filename):
+                tree = ast.parse((HERE / filename).read_text())
+                backend = next(
+                    node for node in tree.body
+                    if isinstance(node, ast.ClassDef) and node.name == class_name
+                )
+                assignment = next(
+                    node for node in backend.body
+                    if isinstance(node, ast.Assign)
+                    and isinstance(node.targets[0], ast.Name)
+                    and node.targets[0].id == "combine_weight_semantics"
+                )
+                self.assertEqual(ast.literal_eval(assignment.value), "unweighted-rank-sum")
+                combine_methods = [
+                    item for item in backend.body
+                    if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef))
+                    and item.name in {"combine", "combine_transformed"}
+                ]
+                self.assertEqual(len(combine_methods), 2)
+                for method in combine_methods:
+                    source = ast.unparse(method)
+                    if filename in {"ep_deepep.py", "ep_uccl.py"}:
+                        self.assertIn("self.mode == 'low-latency'", source)
+                    else:
+                        self.assertNotIn("topk_weights", source)
+                        self.assertNotIn("combine_topk_weights", source)
+
+    def test_low_latency_mode_parser_and_profile_are_explicit(self) -> None:
+        parser = argparse.ArgumentParser()
+        ep_harness.add_common_args(parser)
+        required = [
+            "--runner", "test", "--topology-class", "test",
+            "--scope", "scale-up", "--scale-up-transport", "nvlink",
+            "--out", "test.json",
+        ]
+        self.assertEqual(parser.parse_args(required).mode, "normal")
+        self.assertEqual(
+            parser.parse_args([*required, "--mode", "low-latency"]).mode,
+            "low-latency",
+        )
+        profile = identity.case_profile("low-latency")
+        self.assertEqual(profile["contract"], "expert-packed-weighted-combine-v1")
+        self.assertEqual(
+            profile["component_order_contract"],
+            "qualification-hash-rotated-components-v1",
+        )
+        self.assertEqual(
+            profile["correctness_scope"],
+            "expert-assignment-and-weighted-combine",
+        )
+        self.assertEqual(profile["payload_unit"], "token-expert")
+
+    def test_expert_packed_slot_map_reconstructs_exact_sources(self) -> None:
+        pack = lambda begin, count: (begin << 32) | count
+        slots = ep_harness.expert_packed_slot_map(
+            [2, 1],
+            [[1, 0, 0, 0], [1, 0, 0, 0]],
+            [[pack(0, 1), pack(1, 1)], [pack(0, 0), pack(0, 1)]],
+            tokens_per_rank=2,
+            experts_per_rank=2,
+            world_size=2,
+        )
+        self.assertEqual(slots, [(0, 0, 1), (0, 1, 2), (1, 0, 3)])
+
+        invalid = (
+            ([1], [[0]], [[pack(1, 1), pack(0, 0)]]),
+            ([1], [[2]], [[pack(0, 1), pack(1, 0)]]),
+            ([2], [[1, 1]], [[pack(0, 2), pack(2, 0)]]),
+        )
+        for counts, source, layout in invalid:
+            with self.subTest(counts=counts, source=source, layout=layout):
+                with self.assertRaises(ValueError):
+                    ep_harness.expert_packed_slot_map(
+                        counts,
+                        source,
+                        layout,
+                        tokens_per_rank=2,
+                        experts_per_rank=1,
+                        world_size=2,
+                    )
+
+    def test_deepep_and_uccl_expose_genuine_low_latency_calls(self) -> None:
+        required_fragments = (
+            "Buffer.get_low_latency_rdma_size_hint(",
+            "low_latency_mode=True",
+            "num_qps_per_rank=num_qps_per_rank",
+            "self.buffer.clean_low_latency_buffer(",
+            "self.buffer.low_latency_dispatch(",
+            "use_fp8=False",
+            "self.buffer.low_latency_combine(",
+            "p.topk_weights",
+            'self.combine_weight_semantics = "gate-weighted-sum"',
+            "self.combine_needs_redispatch = True",
+            "def inspect_expert_dispatch(",
+        )
+        for filename in ("ep_deepep.py", "ep_uccl.py"):
+            source = (HERE / filename).read_text()
+            with self.subTest(adapter=filename):
+                for fragment in required_fragments:
+                    self.assertIn(fragment, source)
+                self.assertIn("self.max_tokens_per_rank = 128", source)
+                self.assertIn("async_finish=False", source)
+                self.assertIn("return_recv_hook=False", source)
+
+        run_ep_source = (HERE / "run_ep.py").read_text()
+        self.assertIn('args.backend not in {"deepep", "uccl"}', run_ep_source)
+        self.assertIn('args.phase != "decode"', run_ep_source)
+
+    def test_deepep_v2_jit_evidence_is_strict_and_stable(self) -> None:
+        valid = deepep_v2_jit_provenance()
+        self.assertTrue(contracts._deepep_v2_jit_cubins_are_valid(valid))
+        for invalid in (
+            [],
+            [{**valid[0], "path": "/private/kernel.cubin"}],
+            [{**item, "cache_key": "dispatch"} for item in valid],
+            [{**item, "cubin_sha256": "invalid"} for item in valid],
+            valid[:-1],
+            [*valid, valid[0]],
+            [
+                *valid,
+                {
+                    **valid[0],
+                    "cache_key": valid[0]["cache_key"][:-32] + "f" * 32,
+                },
+            ],
+        ):
+            with self.subTest(invalid=invalid):
+                self.assertFalse(contracts._deepep_v2_jit_cubins_are_valid(invalid))
+
+        backend = next(
+            node for node in self.tree.body
+            if isinstance(node, ast.ClassDef) and node.name == "DeepEPV2Backend"
+        )
+        capture = next(
+            node for node in backend.body
+            if isinstance(node, ast.FunctionDef)
+            and node.name == "capture_deferred_provenance"
+        )
+        artifacts = copy.deepcopy(valid)
+
+        class FakeBuffer:
+            @staticmethod
+            def barrier(*, use_comm_stream: bool, with_cpu_sync: bool) -> None:
+                self.assertTrue(use_comm_stream)
+                self.assertTrue(with_cpu_sync)
+
+        namespace = {
+            "torch": types.SimpleNamespace(
+                cuda=types.SimpleNamespace(synchronize=lambda: None)
+            ),
+            "_jit_artifact_evidence": lambda: copy.deepcopy(artifacts),
+            "_require_cross_rank_equal": lambda _value, _label: None,
+        }
+        exec(
+            compile(ast.Module(body=[capture], type_ignores=[]), str(self.path), "exec"),
+            namespace,
+        )
+        state = types.SimpleNamespace(
+            buffer=FakeBuffer(),
+            _deferred_jit_snapshot=None,
+            backend_provenance={"jit_cubins": []},
+        )
+        namespace["capture_deferred_provenance"](state)
+        namespace["capture_deferred_provenance"](state)
+        artifacts[0]["cubin_sha256"] = "f" * 64
+        with self.assertRaisesRegex(RuntimeError, "changed after measurement"):
+            namespace["capture_deferred_provenance"](state)
+
+    def test_deepep_v2_jit_files_are_complete_regular_and_content_bound(self) -> None:
+        functions = [
+            node for node in self.tree.body
+            if isinstance(node, ast.FunctionDef)
+            and node.name in {"_sha256", "_jit_artifact_evidence"}
+        ]
+        namespace = {
+            "hashlib": hashlib,
+            "os": os,
+            "Path": Path,
+            "re": __import__("re"),
+            "DEEPEP_V2_JIT_KERNELS": contracts.DEEPEP_V2_JIT_KERNELS,
+        }
+        exec(compile(ast.Module(body=functions, type_ignores=[]), str(self.path), "exec"), namespace)
+        with tempfile.TemporaryDirectory() as temporary:
+            cache = Path(temporary) / "cache"
+            cache.mkdir()
+            for index, name in enumerate(sorted(contracts.DEEPEP_V2_JIT_KERNELS)):
+                kernel = cache / f"kernel.{name}.{index:032x}"
+                kernel.mkdir()
+                for suffix in ("cu", "cubin", "sass"):
+                    (kernel / f"kernel.{suffix}").write_bytes(f"{name}-{suffix}".encode())
+            old_cache = os.environ.get("EP_JIT_CACHE_DIR")
+            os.environ["EP_JIT_CACHE_DIR"] = temporary
+            try:
+                evidence = namespace["_jit_artifact_evidence"]()
+                self.assertEqual(len(evidence), len(contracts.DEEPEP_V2_JIT_KERNELS))
+                self.assertEqual(
+                    set(evidence[0]),
+                    {"cache_key", "cubin_sha256", "sass_sha256", "source_sha256"},
+                )
+                first = cache / evidence[0]["cache_key"]
+                duplicate = cache / (evidence[0]["cache_key"][:-32] + "f" * 32)
+                duplicate.mkdir()
+                for suffix in ("cu", "cubin", "sass"):
+                    (duplicate / f"kernel.{suffix}").write_bytes(b"duplicate")
+                with self.assertRaisesRegex(RuntimeError, "kernel set"):
+                    namespace["_jit_artifact_evidence"]()
+                shutil.rmtree(duplicate)
+                (first / "kernel.sass").unlink()
+                with self.assertRaisesRegex(RuntimeError, "incomplete"):
+                    namespace["_jit_artifact_evidence"]()
+                (first / "kernel.sass").symlink_to(first / "kernel.cubin")
+                with self.assertRaisesRegex(RuntimeError, "regular file"):
+                    namespace["_jit_artifact_evidence"]()
+            finally:
+                if old_cache is None:
+                    os.environ.pop("EP_JIT_CACHE_DIR", None)
+                else:
+                    os.environ["EP_JIT_CACHE_DIR"] = old_cache
+
+    def test_runtime_and_shared_version_formatter_are_valid(self) -> None:
+        subprocess.run(
+            ["bash", "-n", str(ROOT / "runtime" / "run_in_container.sh")],
+            check=True,
+        )
+        self.assertEqual(ep_harness.format_collective_version(23004), "2.30.4")
+        self.assertEqual(ep_harness.format_collective_version((2, 30, 4)), "2.30.4")
+        source = self.path.read_text()
+        version_function = next(
+            node for node in self.tree.body
+            if isinstance(node, ast.FunctionDef) and node.name == "_loaded_nccl_version"
+        )
+
+        class FakeNccl:
+            @staticmethod
+            def ncclGetVersion(pointer) -> int:
+                pointer._obj.value = 23004
+                return 0
+
+        namespace = {
+            "ctypes": types.SimpleNamespace(
+                CDLL=lambda _path: FakeNccl(), byref=ctypes.byref, c_int=ctypes.c_int,
+            ),
+            "ep_harness": ep_harness,
+            "os": os,
+            "_loaded_library_paths": lambda: {"/safe/libnccl.so.2"},
+        }
+        exec(
+            compile(ast.Module(body=[version_function], type_ignores=[]), str(self.path), "exec"),
+            namespace,
+        )
+        self.assertEqual(namespace["_loaded_nccl_version"](), "2.30.4")
+        for paths in (set(), {"/safe/libnccl.so.2", "/other/libnccl.so.2"}):
+            namespace["_loaded_library_paths"] = lambda paths=paths: paths
+            with self.assertRaisesRegex(RuntimeError, "exactly one"):
+                namespace["_loaded_nccl_version"]()
+        evidence_function = next(
+            node for node in self.tree.body
+            if isinstance(node, ast.FunctionDef) and node.name == "_loaded_library_evidence"
+        )
+        paths = {
+            "/safe/_C.cpython-310-x86_64-linux-gnu.so",
+            "/safe/libnccl.so.2",
+            "/safe/libnvshmem_host.so.3",
+        }
+        namespace.update(
+            _loaded_library_paths=lambda: paths,
+            _sha256=lambda _path: "a" * 64,
+        )
+        exec(
+            compile(ast.Module(body=[evidence_function], type_ignores=[]), str(self.path), "exec"),
+            namespace,
+        )
+        evidence = namespace["_loaded_library_evidence"]()
+        self.assertIn(
+            {"name": "deep_ep._C", "role": "deepep-extension", "sha256": "a" * 64},
+            evidence,
+        )
+        self.assertTrue(
+            contracts._content_evidence_is_valid(
+                evidence, {"deepep-extension", "nccl", "nvshmem"}
+            )
+        )
+        self.assertNotIn("torch.cuda.nccl.version()", source)
+        fingerprint = {"runtime": "cuda", "version": "13.0"}
+        self.assertIs(
+            run_ep._common_runtime_fingerprint([fingerprint, dict(fingerprint)]),
+            fingerprint,
+        )
+        with self.assertRaises(ValueError):
+            run_ep._common_runtime_fingerprint([fingerprint, {"runtime": "cuda", "version": "12.8"}])
+
+    def test_conditioning_contract_is_exact_for_each_phase(self) -> None:
+        expected = {
+            "decode": [1, 2, 4, 8, 16, 32, 64, 128],
+            "prefill": [1, 2, 4, 8, 16, 32, 64, 128, 256, 512],
+        }
+        for phase, ladder in expected.items():
+            valid = {
+                "contract": "fixed-phase-ramp-8-roundtrips-v1",
+                "ladder": ladder,
+                "roundtrips_per_shape": 8,
+            }
+            self.assertIs(contracts.validate_conditioning_contract(valid, phase), valid)
+            for mutate in (
+                lambda item: item["ladder"].reverse(),
+                lambda item: item["ladder"].pop(),
+                lambda item: item.update(ladder=[1.0, *item["ladder"][1:]]),
+                lambda item: item.update(roundtrips_per_shape=7),
+                lambda item: item.update(roundtrips_per_shape=8.0),
+            ):
+                changed = copy.deepcopy(valid)
+                mutate(changed)
+                with self.assertRaises(contracts.ContractError):
+                    contracts.validate_conditioning_contract(changed, phase)
+            other = "prefill" if phase == "decode" else "decode"
+            with self.assertRaises(contracts.ContractError):
+                contracts.validate_conditioning_contract(valid, other)
+
+    def test_content_manifest_evidence_is_stable_and_content_sensitive(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary)
+            first, second = root / "first", root / "second"
+            first.write_bytes(b"first")
+            second.write_bytes(b"second")
+            files = [("pkg/first", first), ("pkg/second", second)]
+            evidence = contracts.content_manifest_evidence(
+                role="test-content", name="test-build", files=files,
+            )
+            self.assertNotIn(temporary, json.dumps(evidence))
+            self.assertEqual(
+                evidence,
+                contracts.content_manifest_evidence(
+                    role="test-content", name="test-build", files=reversed(files),
+                ),
+            )
+            self.assertRegex(evidence["sha256"], r"^[0-9a-f]{64}$")
+            second.write_bytes(b"changed")
+            self.assertNotEqual(
+                evidence,
+                contracts.content_manifest_evidence(
+                    role="test-content", name="test-build", files=files,
+                ),
+            )
+            for invalid in (
+                [("../first", first)],
+                [("same", first), ("same", second)],
+                [("missing", root / "missing")],
+            ):
+                with self.assertRaises(contracts.ContractError):
+                    contracts.content_manifest_evidence(
+                        role="test-content", name="test-build", files=invalid,
+                    )
+
+    def test_hybrid_realized_config_and_jit_evidence_are_path_free(self) -> None:
+        path = HERE / "ep_deepep_hybrid.py"
+        tree = ast.parse(path.read_text(), str(path))
+        selected = [
+            node for node in tree.body
+            if (
+                isinstance(node, ast.Assign)
+                and any(
+                    isinstance(target, ast.Name) and target.id == "HYBRID_CONFIG_FIELDS"
+                    for target in node.targets
+                )
+            )
+            or isinstance(node, ast.FunctionDef)
+            and node.name in {
+                "_hybrid_realized_config", "_sha256_with_size", "_hybrid_jit_evidence",
+            }
+        ]
+        namespace = {"Path": Path, "hashlib": hashlib, "re": __import__("re")}
+        exec(compile(ast.Module(body=selected, type_ignores=[]), str(path), "exec"), namespace)
+        fields = namespace["HYBRID_CONFIG_FIELDS"]
+        self.assertEqual(set(fields), contracts.HYBRID_REALIZED_CONFIG_FIELDS)
+
+        class TokenType:
+            def __init__(self, label: str, name: str | None = None) -> None:
+                self.label = label
+                if name is not None:
+                    self.name = name
+
+            def __str__(self) -> str:
+                return self.label
+
+        values = {field: 1 for field in fields}
+        values.update({field: True for field in contracts.HYBRID_REALIZED_BOOL_FIELDS})
+        for raw, expected in (("uint16_t", "UINT16"), ("uint8_t", "UINT8")):
+            values["token_data_type"] = TokenType(raw)
+            config = types.SimpleNamespace(**values)
+            realized = namespace["_hybrid_realized_config"](config)
+            self.assertEqual(realized["token_data_type"], expected)
+            self.assertEqual(set(realized), contracts.HYBRID_REALIZED_CONFIG_FIELDS)
+        values["token_data_type"] = TokenType("opaque-enum", "UINT16")
+        self.assertEqual(
+            namespace["_hybrid_realized_config"](types.SimpleNamespace(**values))[
+                "token_data_type"
+            ],
+            "UINT16",
+        )
+        values["token_data_type"] = TokenType("UINT16")
+        with self.assertRaisesRegex(RuntimeError, "token_data_type is invalid"):
+            namespace["_hybrid_realized_config"](types.SimpleNamespace(**values))
+        values["token_data_type"] = TokenType("uint16_t")
+        config = types.SimpleNamespace(**values)
+        delattr(config, "hidden_dim")
+        with self.assertRaisesRegex(RuntimeError, "omits hidden_dim"):
+            namespace["_hybrid_realized_config"](config)
+
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary)
+            for key, payload in (
+                ("preprocess-key", b"pre"),
+                ("combine-key", b"combine"),
+                ("dispatch-key", b"dispatch"),
+            ):
+                (root / f"{key}.so").write_bytes(payload)
+            evidence = namespace["_hybrid_jit_evidence"](root)
+            self.assertEqual(
+                [item["kernel_key"] for item in evidence],
+                ["combine-key", "dispatch-key", "preprocess-key"],
+            )
+            self.assertNotIn(temporary, json.dumps(evidence))
+            (root / "dispatch-key.so").write_bytes(b"changed")
+            self.assertNotEqual(evidence, namespace["_hybrid_jit_evidence"](root))
+            (root / "extra-key.so").write_bytes(b"extra")
+            with self.assertRaisesRegex(RuntimeError, "expected 3"):
+                namespace["_hybrid_jit_evidence"](root)
+            (root / "extra-key.so").unlink()
+            (root / "bad key.so").write_bytes(b"bad")
+            with self.assertRaisesRegex(RuntimeError, "kernel key"):
+                namespace["_hybrid_jit_evidence"](root)
+            (root / "bad key.so").unlink()
+            (root / "combine-key.so").unlink()
+            (root / "combine-key.so").symlink_to(root / "dispatch-key.so")
+            with self.assertRaisesRegex(RuntimeError, "regular file"):
+                namespace["_hybrid_jit_evidence"](root)
+            empty = root / "empty"
+            empty.mkdir()
+            with self.assertRaisesRegex(RuntimeError, "expected 3"):
+                namespace["_hybrid_jit_evidence"](empty)
+
+    def test_hybrid_uses_communication_domains_not_physical_hosts(self) -> None:
+        path = HERE / "ep_deepep_hybrid.py"
+        function = next(
+            node for node in ast.parse(path.read_text(), str(path)).body
+            if isinstance(node, ast.FunctionDef) and node.name == "_hybrid_topology"
+        )
+        namespace: dict[str, object] = {}
+        exec(compile(ast.Module(body=[function], type_ignores=[]), str(path), "exec"), namespace)
+        resolve = namespace["_hybrid_topology"]
+        cases = (
+            (8, 8, 8, "scale-up", "nvlink", "", 8, 1, 1),
+            (16, 8, 8, "scale-out", "nvlink", "rdma", 8, 2, 2),
+            (8, 4, 72, "scale-up", "mnnvl", "", 8, 1, 2),
+            (16, 4, 72, "scale-up", "mnnvl", "", 16, 1, 4),
+        )
+        for world, gpn, domain, scope, up, out, ranks, domains, hosts in cases:
+            with self.subTest(world=world, gpus_per_node=gpn, transport=up):
+                topology = resolve(types.SimpleNamespace(
+                    gpus_per_node=gpn,
+                    scale_up_domain=domain,
+                    scope=scope,
+                    scale_up_transport=up,
+                    scale_out_transport=out,
+                    transport=up if not out else f"{up}-{out}",
+                ), world)
+                self.assertEqual(
+                    (topology["domain_ranks"], topology["communication_domains"],
+                     topology["physical_nodes"]),
+                    (ranks, domains, hosts),
+                )
+        with self.assertRaisesRegex(RuntimeError, "outside the fixed v1 matrix"):
+            resolve(types.SimpleNamespace(
+                gpus_per_node=8, scale_up_domain=8, scope="scale-up",
+                scale_up_transport="nvlink", scale_out_transport="", transport="nvlink",
+            ), 16)
+
+    def test_mori_ep16_pins_upstream_internode_v1_resources(self) -> None:
+        source = (HERE / "ep_mori.py").read_text()
+        for fragment in (
+            'kernel_enum.InterNodeV1',
+            'self.block_num = self._block_target = 96',
+            'self.rdma_block_num = 64',
+            'self.dispatch_warps = self.combine_warps = 8',
+            'self.num_qps = 1',
+            '"gpu_per_node": gpus_per_node',
+            '"rdma_block_num": self.rdma_block_num',
+            '"num_qp_per_pe": self.num_qps',
+            '"use_external_inp_buf": self._external_input',
+            'os.environ["MORI_EP_LAUNCH_CONFIG_MODE"] = "MANUAL"',
+            'rdma_block_num=self.rdma_block_num',
+        ):
+            self.assertIn(fragment, source)
+        self.assertGreaterEqual(source.count("rdma_block_num=self.rdma_block_num"), 2)
+
+    def test_hybrid_deferred_provenance_wraps_before_conditioning_and_recaptures(self) -> None:
+        path = HERE / "ep_deepep_hybrid.py"
+        source = path.read_text()
+        tree = ast.parse(source, str(path))
+        backend = next(
+            node for node in tree.body
+            if isinstance(node, ast.ClassDef) and node.name == "DeepEPHybridBackend"
+        )
+        methods = {node.name for node in backend.body if isinstance(node, ast.FunctionDef)}
+        self.assertIn("capture_deferred_provenance", methods)
+        constructor = next(node for node in backend.body if isinstance(node, ast.FunctionDef) and node.name == "__init__")
+        buffer_call = next(
+            node for node in ast.walk(constructor)
+            if isinstance(node, ast.Call) and isinstance(node.func, ast.Name)
+            and node.func.id == "HybridEPBuffer"
+        )
+        wrapper_install = next(
+            node for node in ast.walk(constructor)
+            if isinstance(node, ast.Assign)
+            and any(
+                isinstance(target, ast.Attribute)
+                and target.attr == "update_template_config"
+                for target in node.targets
+            )
+        )
+        cache_line = source[:source.index('os.environ["HYBRID_EP_CACHE_DIR"]')].count("\n") + 1
+        self.assertLess(cache_line, buffer_call.lineno)
+        self.assertLess(buffer_call.lineno, wrapper_install.lineno)
+
+        capture = next(
+            node for node in backend.body
+            if isinstance(node, ast.FunctionDef) and node.name == "capture_deferred_provenance"
+        )
+        called = {
+            node.func.id if isinstance(node.func, ast.Name) else node.func.attr
+            for node in ast.walk(capture) if isinstance(node, ast.Call)
+            and isinstance(node.func, (ast.Name, ast.Attribute))
+        }
+        self.assertTrue({"_hybrid_jit_evidence", "_require_cross_rank_equal", "all_gather_object"} <= called)
+        self.assertIn("changed after measurement", ast.get_source_segment(source, capture))
+
+        artifacts = [[
+            {"bytes": 1, "kernel_key": key, "sha256": digit * 64}
+            for key, digit in (("a", "1"), ("b", "2"), ("c", "3"))
+        ]]
+
+        class FakeCuda:
+            @staticmethod
+            def synchronize() -> None:
+                return None
+
+        class FakeDist:
+            @staticmethod
+            def barrier() -> None:
+                return None
+
+            @staticmethod
+            def get_world_size() -> int:
+                return 2
+
+            @staticmethod
+            def all_gather_object(output, value) -> None:
+                output[:] = [copy.deepcopy(value), copy.deepcopy(value)]
+
+        namespace = {
+            "torch": types.SimpleNamespace(cuda=FakeCuda),
+            "dist": FakeDist,
+            "_hybrid_jit_evidence": lambda _root: copy.deepcopy(artifacts[0]),
+            "_require_cross_rank_equal": lambda _value, _label: None,
+        }
+        exec(compile(ast.Module(body=[capture], type_ignores=[]), str(path), "exec"), namespace)
+        state = types.SimpleNamespace(
+            _deferred_jit_diagnostics=None,
+            _deferred_semantic_snapshot=None,
+            _jit_root=Path("private-cache"),
+            _realized_config=hybrid_realized_config(),
+            backend_provenance={},
+        )
+        namespace["capture_deferred_provenance"](state)
+        artifacts[0][0]["kernel_key"] = "changed"
+        with self.assertRaisesRegex(RuntimeError, "kernel set changed"):
+            namespace["capture_deferred_provenance"](state)
+        artifacts[0][0]["kernel_key"] = "a"
+        artifacts[0][0]["sha256"] = "f" * 64
+        with self.assertRaisesRegex(RuntimeError, "artifacts changed"):
+            namespace["capture_deferred_provenance"](state)
+
+        harness = (HERE / "ep_harness.py").read_text()
+        captures = [
+            index for index in range(len(harness))
+            if harness.startswith("capture_deferred_provenance()", index)
+        ]
+        self.assertEqual(len(captures), 2)
+        self.assertLess(harness.index("for wt in conditioning_ladder:"), captures[0])
+        self.assertLess(captures[0], harness.index("oracle = _run_expert_oracle("))
+        self.assertLess(harness.index("trace_sig = hashlib.sha256"), captures[1])
+
+    def test_hybrid_diagnostic_hashes_do_not_split_series_identity(self) -> None:
+        keys, artifacts = hybrid_jit_provenance()
+        provenance = {
+            "deepep_tree": "b" * 40,
+            "jit_kernel_keys": keys,
+            "jit_shared_objects": artifacts,
+            "loaded_libraries": [{
+                "name": "hybrid_ep_cpp", "role": "deepep-hybrid-extension",
+                "sha256": "a" * 64,
+            }],
+            "realized_config": hybrid_realized_config(),
+        }
+        baseline = ep_harness._series_provenance(provenance)
+        changed = copy.deepcopy(provenance)
+        changed["jit_shared_objects"][0]["rank_artifacts"][0]["sha256"] = "f" * 64
+        self.assertEqual(ep_harness._series_provenance(changed), baseline)
+        changed = copy.deepcopy(provenance)
+        changed["loaded_libraries"][0]["sha256"] = "f" * 64
+        self.assertEqual(ep_harness._series_provenance(changed), baseline)
+        changed = copy.deepcopy(provenance)
+        changed["jit_kernel_keys"][0] = "changed-key"
+        self.assertNotEqual(ep_harness._series_provenance(changed), baseline)
+        changed = copy.deepcopy(provenance)
+        changed["realized_config"]["num_of_blocks_dispatch_api"] += 1
+        self.assertNotEqual(ep_harness._series_provenance(changed), baseline)
+        changed = copy.deepcopy(provenance)
+        changed["deepep_tree"] = "c" * 40
+        self.assertNotEqual(ep_harness._series_provenance(changed), baseline)
+
+    def test_v2_series_identity_uses_source_and_sass_not_container_metadata(self) -> None:
+        provenance = {
+            "deepep_tree": "a" * 40,
+            "loaded_libraries": [
+                {"name": "deep_ep._C.so", "role": "deepep-extension", "sha256": "1" * 64},
+                {"name": "libnccl.so.2", "role": "nccl", "sha256": "2" * 64},
+            ],
+            "jit_cubins": deepep_v2_jit_provenance(),
+            "jit_random_seed": "collectivex-deepep-v2-fa8a9b1",
+        }
+        baseline = contracts.series_provenance(provenance)
+        changed = copy.deepcopy(provenance)
+        changed["loaded_libraries"][0]["sha256"] = "f" * 64
+        changed["jit_cubins"][0]["cubin_sha256"] = "e" * 64
+        self.assertEqual(contracts.series_provenance(changed), baseline)
+        for mutate in (
+            lambda item: item["loaded_libraries"][1].update(sha256="f" * 64),
+            lambda item: item["jit_cubins"][0].update(source_sha256="f" * 64),
+            lambda item: item["jit_cubins"][0].update(sass_sha256="f" * 64),
+            lambda item: item.update(deepep_tree="f" * 40),
+        ):
+            changed = copy.deepcopy(provenance)
+            mutate(changed)
+            self.assertNotEqual(contracts.series_provenance(changed), baseline)
+
+    def test_mnnvl_resolution_has_no_ambiguous_signature_fallback(self) -> None:
+        self.assertEqual(
+            contracts.resolve_deepep_mnnvl(
+                requested=False, signature_parameters=(), deepep_commit=None,
+            ),
+            ({}, "not-requested"),
+        )
+        self.assertEqual(
+            contracts.resolve_deepep_mnnvl(
+                requested=True, signature_parameters=("allow_mnnvl",),
+                deepep_commit="a" * 40,
+            ),
+            ({"allow_mnnvl": True}, "explicit-allow-mnnvl"),
+        )
+        with self.assertRaises(contracts.ContractError):
+            contracts.resolve_deepep_mnnvl(
+                requested=True, signature_parameters=(),
+                deepep_commit="814e508537c6ffc775d59f6f1b9ba43f3a65968c",
+            )
+
+    def test_backend_provenance_requires_lineage_and_content_hashes(self) -> None:
+        def record(role: str, name: str, digit: str) -> dict[str, str]:
+            return {"role": role, "name": name, "sha256": digit * 64}
+
+        hybrid_keys, hybrid_artifacts = hybrid_jit_provenance()
+        v2 = {
+            **contracts.DEEPEP_V2_V1_PROVENANCE,
+            "api_signature_sha256": "c" * 64,
+            "loaded_libraries": [
+                record("deepep-extension", "deep_ep._C", "1"),
+                record("nccl", "libnccl.so.2", "2"),
+                record("nvshmem", "libnvshmem_host.so.3", "3"),
+            ],
+            "jit_cubins": deepep_v2_jit_provenance(),
+            "jit_random_seed": "collectivex-deepep-v2-fa8a9b1",
+            "deterministic": False,
+            "num_experts": 256,
+            "tuning_num_experts": 256,
+            "allow_hybrid_mode": False,
+            "gin_enabled": False,
+            "communication_backend": "nccl-device-lsa",
+        }
+        deepep = {
+            "deepep_version": "1.1.0", "deepep_commit": "a" * 40,
+            "backend_lineage": "deepep-v1", "allow_mnnvl": False,
+            "mnnvl_comm": "not-requested", "mode": "normal",
+            "num_nvl_bytes": 1024, "num_rdma_bytes": 0,
+        }
+        hybrid = {
+            "deepep_commit": "a" * 40, "deepep_tree": "b" * 40,
+            "branch": "hybrid-ep", "backend_lineage": "deepep-hybrid",
+            "loaded_libraries": [
+                record("deepep-extension", "deep_ep_cpp", "1"),
+                record("deepep-hybrid-extension", "hybrid_ep_cpp", "2"),
+            ],
+            "jit_kernel_keys": hybrid_keys,
+            "jit_shared_objects": hybrid_artifacts,
+            "realized_config": hybrid_realized_config(),
+        }
+        uccl = {
+            "uccl_version": "0.1.1", "uccl_commit": "pkg-0.1.1",
+            "uccl_wrapper_commit": "c" * 40, "backend_lineage": "uccl",
+            "uccl_dependency_versions": dict(contracts.UCCL_DEPENDENCY_VERSIONS),
+            "loaded_libraries": [
+                record("uccl-distribution", "uccl-0.1.1", "3"),
+                record("uccl-wrapper", "uccl-deepep-wrapper", "4"),
+                record("intervaltree-distribution", "intervaltree-3.1.0", "5"),
+                record("sortedcontainers-distribution", "sortedcontainers-2.4.0", "6"),
+                record("cuda-runtime", "nvidia-cuda-runtime-cu12-12.9.79", "7"),
+            ],
+            "mode": "normal", "num_nvl_bytes": 1024, "num_rdma_bytes": 0,
+        }
+        reference = {
+            "nccl_version": "2.30.4", "collective_library": "nccl",
+            "backend_lineage": "nccl",
+        }
+        for backend, provenance in (
+            ("deepep", deepep), ("deepep-v2", v2), ("deepep-hybrid", hybrid),
+            ("uccl", uccl), ("nccl-ep", reference),
+        ):
+            self.assertEqual(contracts.backend_provenance_issues(backend, provenance), [])
+            changed = copy.deepcopy(provenance)
+            if "loaded_libraries" in changed:
+                changed["loaded_libraries"][0]["sha256"] = "invalid"
+                expected = "loaded_libraries"
+            else:
+                changed["backend_lineage"] = "wrong"
+                expected = "backend_lineage"
+            self.assertIn(expected, contracts.backend_provenance_issues(backend, changed))
+
+        changed = copy.deepcopy(uccl)
+        changed["uccl_dependency_versions"]["intervaltree"] = "3.2.0"
+        self.assertIn(
+            "uccl_dependency_versions",
+            contracts.backend_provenance_issues("uccl", changed),
+        )
+        changed = copy.deepcopy(uccl)
+        changed["loaded_libraries"] = [
+            item
+            for item in changed["loaded_libraries"]
+            if item["role"] != "sortedcontainers-distribution"
+        ]
+        self.assertIn(
+            "loaded_libraries", contracts.backend_provenance_issues("uccl", changed)
+        )
+
+        for field, mutate in (
+            ("realized_config", lambda item: item["realized_config"].pop("hidden_dim")),
+            ("jit_kernel_keys", lambda item: item["jit_kernel_keys"].reverse()),
+            (
+                "jit_shared_objects",
+                lambda item: item["jit_shared_objects"][0]["rank_artifacts"][0].update(
+                    sha256="invalid"
+                ),
+            ),
+        ):
+            with self.subTest(hybrid_field=field):
+                changed = copy.deepcopy(hybrid)
+                mutate(changed)
+                self.assertIn(
+                    field,
+                    contracts.backend_provenance_issues("deepep-hybrid", changed),
+                )
+
+        for field, value in (
+            ("jit_cubins", [{"cache_key": "invalid", "cubin_sha256": "4" * 64}]),
+            ("jit_random_seed", "different-seed"),
+        ):
+            with self.subTest(v2_field=field):
+                changed = copy.deepcopy(v2)
+                changed[field] = value
+                self.assertIn(
+                    field,
+                    contracts.backend_provenance_issues("deepep-v2", changed),
+                )
+
+        changed = copy.deepcopy(v2)
+        changed["gin_enabled"] = True
+        self.assertIn("gin_enabled", contracts.backend_provenance_issues("deepep-v2", changed))
+        changed = copy.deepcopy(v2)
+        changed["communication_backend"] = "nccl-gin"
+        self.assertIn(
+            "communication_backend", contracts.backend_provenance_issues("deepep-v2", changed)
+        )
+        changed = copy.deepcopy(v2)
+        changed.update(
+            allow_hybrid_mode=True,
+            gin_enabled=True,
+            communication_backend="nccl-gin",
+        )
+        self.assertEqual(
+            contracts.backend_provenance_issues("deepep-v2", changed),
+            [],
+        )
+        changed["allow_hybrid_mode"] = False
+        self.assertEqual(
+            contracts.backend_provenance_issues("deepep-v2", changed),
+            ["allow_hybrid_mode", "communication_backend", "gin_enabled"],
+        )
+        for field, expected in contracts.DEEPEP_V2_V1_PROVENANCE.items():
+            with self.subTest(v2_pin_field=field):
+                changed = copy.deepcopy(v2)
+                changed[field] = not expected if type(expected) is bool else "wrong"
+                self.assertIn(
+                    field,
+                    contracts.backend_provenance_issues("deepep-v2", changed),
+                )
+
+        schema = json.loads((ROOT / "schemas" / "raw-case-v1.schema.json").read_text())
+        provenance_schema = schema["properties"]["implementation"]["properties"]["provenance"]
+        self.assertEqual(
+            provenance_schema["properties"]["realized_config"],
+            {"$ref": "#/$defs/hybrid_realized_config"},
+        )
+        self.assertFalse(schema["$defs"]["hybrid_realized_config"]["additionalProperties"])
+        self.assertEqual(provenance_schema["properties"]["jit_kernel_keys"]["minItems"], 3)
+        self.assertEqual(provenance_schema["properties"]["jit_shared_objects"]["minItems"], 3)
+
+        self.assertEqual(contracts.collective_kernel_generation("nccl"), "nccl")
+        self.assertEqual(contracts.collective_kernel_generation("rccl"), "rccl")
+        with self.assertRaises(contracts.ContractError):
+            contracts.collective_kernel_generation("unknown")
+
+    def test_transport_resource_provenance_is_exact(self) -> None:
+        self.assertEqual(contracts.hybrid_communication_domains(8, 8), (8, 1))
+        self.assertEqual(contracts.hybrid_communication_domains(16, 8), (8, 2))
+        self.assertEqual(contracts.hybrid_communication_domains(8, 72), (8, 1))
+        self.assertEqual(contracts.hybrid_communication_domains(16, 72), (16, 1))
+
+        profile = contracts.project_resource_profile({
+            "num_nvl_bytes": 1024, "num_rdma_bytes": 2048,
+            "num_qps_per_rank": 32, "heap_size": "6G",
+        })
+        self.assertEqual(profile["persistent_bytes"], 3072)
+        self.assertEqual(profile["qps_per_rank"], 32)
+        self.assertEqual(
+            contracts.project_resource_profile({
+                "num_nvl_bytes": 0, "num_rdma_bytes": 0, "heap_size": "6G",
+            })["persistent_bytes"],
+            0,
+        )
+        self.assertEqual(
+            contracts.project_resource_profile({"heap_size": "6G"})[
+                "persistent_bytes"
+            ],
+            "6G",
+        )
+
+        mori = {
+            "mori_commit": "a" * 40, "kernel_type": "InterNodeV1",
+            "block_num": 96, "rdma_block_num": 64,
+            "dispatch_warps": 8, "combine_warps": 8, "num_qps": 1,
+            "use_external_inp_buf": True, "gpus_per_node": 8,
+        }
+        self.assertEqual(contracts.backend_provenance_issues("mori", mori), [])
+        for field in (
+            "block_num", "rdma_block_num", "dispatch_warps", "combine_warps",
+            "num_qps", "use_external_inp_buf", "gpus_per_node",
+        ):
+            changed = copy.deepcopy(mori)
+            changed[field] = False if field == "use_external_inp_buf" else 0
+            with self.subTest(mori_field=field):
+                self.assertIn(
+                    field, contracts.backend_provenance_issues("mori", changed)
+                )
+
+    def test_routing_control_binds_binary_but_allows_treatment_configuration(self) -> None:
+        hybrid_keys, hybrid_artifacts = hybrid_jit_provenance()
+        implementation = {
+            "kernel_generation": "hybrid",
+            "name": "deepep-hybrid",
+            "provenance": {
+                "deepep_tree": "a" * 40,
+                "loaded_libraries": [{
+                    "role": "deepep-extension", "name": "deep_ep_cpp", "sha256": "1" * 64,
+                }],
+                "local_experts": 32,
+                "num_experts": 256,
+                "num_sms": 24,
+                "jit_cache_key": "case-one",
+                "jit_cubins": [{"cache_key": "one", "cubin_sha256": "2" * 64}],
+                "jit_kernel_keys": hybrid_keys,
+                "jit_shared_objects": hybrid_artifacts,
+                "realized_config": hybrid_realized_config(),
+            },
+            "resource_profile": {"configured_units": 24},
+        }
+        baseline = contracts.routing_implementation_control_sha256(implementation)
+        treatment = copy.deepcopy(implementation)
+        treatment["provenance"].update({
+            "local_experts": 36,
+            "num_experts": 288,
+            "jit_cache_key": "case-two",
+            "jit_cubins": [{"cache_key": "two", "cubin_sha256": "3" * 64}],
+            "jit_kernel_keys": ["changed-a", "changed-b", "changed-c"],
+            "jit_shared_objects": hybrid_jit_provenance(3)[1],
+            "realized_config": {
+                **hybrid_realized_config(),
+                "num_of_experts_per_rank": 36,
+            },
+        })
+        self.assertEqual(
+            contracts.routing_implementation_control_sha256(treatment), baseline
+        )
+        changed = copy.deepcopy(implementation)
+        changed["provenance"]["loaded_libraries"][0]["sha256"] = "4" * 64
+        self.assertEqual(
+            contracts.routing_implementation_control_sha256(changed), baseline
+        )
+        changed = copy.deepcopy(implementation)
+        changed["provenance"]["deepep_tree"] = "b" * 40
+        self.assertNotEqual(
+            contracts.routing_implementation_control_sha256(changed), baseline
+        )
+        changed = copy.deepcopy(implementation)
+        changed["provenance"]["num_sms"] = 20
+        self.assertNotEqual(
+            contracts.routing_implementation_control_sha256(changed), baseline
+        )
+
+    def test_runtime_pins_uccl_wheel_and_hybrid_source_tree(self) -> None:
+        runtime = (ROOT / "runtime" / "run_in_container.sh").read_text()
+        common = (ROOT / "runtime" / "common.sh").read_text()
+        self.assertIn("cd /ix/experimental/CollectiveX", runtime)
+        for launcher_name in ("launch_single-slurm.sh", "launch_gb-nv.sh"):
+            launcher = (ROOT / "launchers" / launcher_name).read_text()
+            self.assertIn("$MOUNT_SRC:/ix", launcher)
+            self.assertIn("cx_prepare_backend_cache", launcher)
+            self.assertNotIn('$(cx_prepare_backend_cache', launcher)
+            self.assertIn("$CX_PREPARED_BACKEND_CACHE:/cx-cache", launcher)
+            self.assertIn("CX_BACKEND_CACHE_ROOT=/cx-cache", launcher)
+            self.assertIn("CX_BACKEND_SOURCE_ROOT=/ix/experimental/CollectiveX/.cx_sources", launcher)
+            self.assertIn('|| [ "$CX_BENCH" = deepep-hybrid ]', launcher)
+            self.assertIn("cx_prepare_backend_source", launcher)
+            cache_block = launcher[launcher.index('if [ "$CX_BENCH" = deepep-v2 ]'):]
+            self.assertLess(
+                cache_block.index("cx_set_failure_stage backend-setup"),
+                cache_block.index("cx_prepare_backend_cache"),
+            )
+            self.assertLess(
+                cache_block.index("cx_prepare_backend_source"),
+                cache_block.index("cx_set_failure_stage scheduler-allocation"),
+            )
+        self.assertIn("--frandom-seed=$seed", runtime)
+        self.assertIn("DEEPEP_V2_JIT_RANDOM_SEED", runtime)
+        persisted = runtime[runtime.index("cx_persist_backend_env()") :]
+        self.assertIn("CUDA_HOME CPATH NVCC_PREPEND_FLAGS", persisted)
+        self.assertIn(
+            "390c1320918972206546e44d79b132988f2818ec07e23afcd0595f7183916cec",
+            runtime,
+        )
+        self.assertIn("--require-hashes", runtime)
+        self.assertIn("d77aeab7f1bb52b615666fe178d26ced41fae08e", common)
+        self.assertIn("HEAD^{tree}", runtime)
+        self.assertIn("$PWD/.cx_backend/deepep-hybrid-", runtime)
+        self.assertIn("cx_materialize_backend_source deepep-hybrid", runtime)
+        self.assertIn("cx_materialize_backend_source deepep-v2", runtime)
+        self.assertIn("cx_deepep_hybrid_marker_content_sha256", runtime)
+        self.assertIn("cx_deepep_hybrid_cache_is_valid", runtime)
+        self.assertIn("cx_extension_pair_sha256", runtime)
+        self.assertIn(".collectivex-complete.tmp.", runtime)
+        self.assertNotIn("cx_fetch_revision", runtime)
+        self.assertIn("cx_fetch_revision", common)
+        self.assertIn("third-party/fmt", common)
+        hybrid = runtime[
+            runtime.index("cx_build_deepep_hybrid()"):
+            runtime.index("# UCCL EP")
+        ]
+        configure = runtime[
+            runtime.index("cx_configure_deepep_hybrid_build()"):
+            runtime.index("cx_deepep_hybrid_marker_content_sha256()")
+        ]
+        self.assertIn("cx_prepare_cuda_cccl", hybrid)
+        self.assertIn("unset NVSHMEM_DIR", hybrid)
+        self.assertIn(
+            "unset HYBRID_EP_MULTINODE USE_NIXL RDMA_CORE_HOME", configure
+        )
+        self.assertIn("cx_configure_deepep_hybrid_build || return 1", hybrid)
+        self.assertIn('[ "$(uname -m)" = x86_64 ]', configure)
+        self.assertIn('[ -n "${GLOO_SOCKET_IFNAME:-}" ]', configure)
+        self.assertIn('[ -d "/sys/class/infiniband/$rdma_name" ]', configure)
+        self.assertIn("command -v make", configure)
+        self.assertIn("/usr/include/infiniband/verbs.h", configure)
+        self.assertIn("export HYBRID_EP_MULTINODE=1 USE_NIXL=0", configure)
+        self.assertNotIn("cx_prepare_deepep_toolchain", hybrid)
+        toolchain = runtime[
+            runtime.index("cx_prepare_deepep_toolchain()"):
+            runtime.index("cx_probe_deepep()")
+        ]
+        self.assertIn('overlay="$root/nvshmem-overlay"', toolchain)
+        self.assertIn("flock 8 || exit 1", toolchain)
+        self.assertIn('mv "$temporary" "$overlay" || exit 1', toolchain)
+        self.assertNotIn("/tmp/collectivex-nvshmem", toolchain)
+        jit = runtime[
+            runtime.index("cx_enable_deepep_v2_jit_reproducibility()"):
+            runtime.index("cx_probe_deepep_v2()")
+        ]
+        self.assertIn('cccl="${CX_CUDA_CCCL:-}"', jit)
+        self.assertNotIn("/usr/local/cuda*", jit)
+        self.assertIn("deepep-v2-cache-v2|$cpu|sm${arch/./}", runtime)
+        self.assertNotIn("deepep-v2-cache-v1|", runtime)
+        self.assertIn('base="${CX_BACKEND_CACHE_ROOT:-}"', runtime)
+        self.assertNotIn("${CX_BACKEND_CACHE_ROOT:-$PWD/.cx_backend}", runtime)
+        self.assertIn(
+            "recipe=aot-persistent-nvshmem-active-cuda-maxjobs16-v2", runtime
+        )
+        self.assertNotIn("recipe=aot-source-date-epoch-arch-maxjobs16-v1", runtime)
+        self.assertNotIn("recipe=$source_sha", runtime)
+        self.assertIn("pip=26.1.2|setuptools=82.0.1|wheel=0.47.0|ninja=1.13.0", runtime)
+        self.assertIn("manual-unverified", runtime)
+        self.assertIn("cx_deepep_v2_content_sha256", runtime)
+        self.assertIn("DeepEP V2 cache validation failed", runtime)
+        probe = runtime[
+            runtime.index("cx_probe_deepep_v2()"):
+            runtime.index("cx_deepep_v2_content_sha256()")
+        ]
+        self.assertNotIn("torch.cuda.nccl.version", probe)
+        self.assertIn("ncclGetVersion", probe)
+        self.assertIn("runtime_version.value == 23004", probe)
+        self.assertIn("cx_nvidia_package_root nvidia-nccl-cu13 nccl", runtime)
+        self.assertIn("cx_nvidia_package_root nvidia-nvshmem-cu12 nvshmem", runtime)
+        self.assertNotIn("import os,nvidia.nccl", runtime)
+        self.assertNotIn("import os,nvidia.nvshmem", runtime)
+        self.assertIn(
+            'export EP_JIT_CACHE_DIR="$stage_root/.cx_backend/deepep-v2-jit"', runtime
+        )
+        self.assertIn('stage_root="${CX_BACKEND_SOURCE_ROOT%/.cx_sources}"', runtime)
+        self.assertNotIn('export EP_JIT_CACHE_DIR="$root/jit"', runtime)
+        self.assertIn('EP_NVSHMEM_ROOT_DIR="$NVSHMEM_DIR"', runtime)
+        reference = (HERE / "ep_nccl.py").read_text()
+        self.assertIn("self.kernel_generation = contracts.collective_kernel_generation", reference)
+
+    def test_deepep_v2_cache_recovers_from_an_unpublished_partial_build(self) -> None:
+        runtime = ROOT / "runtime" / "run_in_container.sh"
+        with tempfile.TemporaryDirectory() as temporary:
+            cache_key = "a" * 64
+            content_hash = "b" * 64
+            root = Path(temporary) / f"deepep-v2-{cache_key}"
+            root.mkdir(mode=0o700)
+            marker = root / ".collectivex-complete"
+            stale = root / "stale-partial-build"
+            stale.write_text("partial\n")
+            command = r'''
+              set -euo pipefail
+              eval "$(sed -n '/^cx_build_deepep_v2()/,/^}/p' "$1")"
+              cache_root="$2"; expected_revision="$3"; expected_tree="$4"; expected_fmt="$5"
+              expected_content="$6"
+              cx_log() { :; }
+              cx_verify_backend_cache_mount() { return 0; }
+              cx_cuda_arch() { printf '9.0'; }
+              cx_deepep_v2_root() { printf '%s' "$cache_root"; }
+              cx_activate_deepep_v2() { export DEEPEP_V2_COMMIT="$expected_revision"; }
+              cx_prepare_deepep_toolchain() { export NVSHMEM_DIR=/tmp/cx-test-nvshmem; }
+              cx_probe_deepep_v2() { return 0; }
+              cx_deepep_v2_content_sha256() { printf '%s' "$expected_content"; }
+              cx_deepep_v2_cache_is_valid() {
+                test -f "$2" && test "$(wc -l < "$2" | tr -d ' ')" = 5
+              }
+              cx_enable_deepep_v2_jit_reproducibility() { return 0; }
+              cx_materialize_backend_source() { mkdir -p "$2/third-party/fmt"; }
+              flock() { return 0; }
+              python3() {
+                if [ "${1:-}" = -m ] && [ "${2:-}" = venv ]; then
+                  mkdir -p "$3/bin"
+                  printf '#!/bin/sh\nexit 0\n' > "$3/bin/python"
+                  chmod 700 "$3/bin/python"
+                fi
+                return 0
+              }
+              git() {
+                case " $* " in
+                  *' third-party/fmt rev-parse HEAD '*) printf '%s\n' "$expected_fmt" ;;
+                  *' rev-parse HEAD^{tree} '*) printf '%s\n' "$expected_tree" ;;
+                  *' show -s --format=%ct HEAD '*) printf '1\n' ;;
+                  *) return 0 ;;
+                esac
+              }
+              cx_git_in_tree() { shift; git "$@"; }
+              cx_build_deepep_v2
+            '''
+            subprocess.run(
+                [
+                    "bash", "-c", command, "_", str(runtime), str(root),
+                    COMMIT, TREE, FMT_COMMIT, content_hash,
+                ],
+                check=True,
+            )
+            self.assertFalse(stale.exists())
+            self.assertEqual(
+                marker.read_text(),
+                f"{COMMIT}\n{TREE}\n{FMT_COMMIT}\n{cache_key}\n{content_hash}\n",
+            )
+            self.assertEqual(list(root.glob(".collectivex-complete.tmp.*")), [])
+
+    def test_deepep_v2_published_cache_is_never_deleted_after_probe_failure(self) -> None:
+        runtime = ROOT / "runtime" / "run_in_container.sh"
+        with tempfile.TemporaryDirectory() as temporary:
+            cache_key = "a" * 64
+            root = Path(temporary) / f"deepep-v2-{cache_key}"
+            root.mkdir(mode=0o700)
+            marker = root / ".collectivex-complete"
+            marker.write_text("published\n")
+            sentinel = root / "active-reader"
+            sentinel.write_text("active\n")
+            command = r'''
+              set -euo pipefail
+              eval "$(sed -n '/^cx_build_deepep_v2()/,/^}/p' "$1")"
+              cache_root="$2"
+              cx_log() { :; }
+              cx_verify_backend_cache_mount() { return 0; }
+              cx_cuda_arch() { printf '9.0'; }
+              cx_deepep_v2_root() { printf '%s' "$cache_root"; }
+              cx_deepep_v2_cache_is_valid() { return 0; }
+              cx_activate_deepep_v2() { return 0; }
+              cx_prepare_deepep_toolchain() { return 0; }
+              cx_enable_deepep_v2_jit_reproducibility() { return 0; }
+              cx_probe_deepep_v2() { return 1; }
+              ! cx_build_deepep_v2
+            '''
+            subprocess.run(
+                ["bash", "-c", command, "_", str(runtime), str(root)],
+                check=True,
+            )
+            self.assertEqual(sentinel.read_text(), "active\n")
+            self.assertEqual(marker.read_text(), "published\n")
+
+    def test_deepep_v2_corrupt_published_cache_fails_without_reset(self) -> None:
+        runtime = ROOT / "runtime" / "run_in_container.sh"
+        with tempfile.TemporaryDirectory() as temporary:
+            cache_key = "a" * 64
+            root = Path(temporary) / f"deepep-v2-{cache_key}"
+            root.mkdir(mode=0o700)
+            marker = root / ".collectivex-complete"
+            marker.write_text("corrupt\n")
+            sentinel = root / "active-reader"
+            sentinel.write_text("active\n")
+            command = r'''
+              set -euo pipefail
+              eval "$(sed -n '/^cx_build_deepep_v2()/,/^}/p' "$1")"
+              cache_root="$2"
+              cx_log() { :; }
+              cx_verify_backend_cache_mount() { return 0; }
+              cx_cuda_arch() { printf '9.0'; }
+              cx_deepep_v2_root() { printf '%s' "$cache_root"; }
+              cx_deepep_v2_cache_is_valid() { return 1; }
+              flock() { return 0; }
+              ! cx_build_deepep_v2
+            '''
+            subprocess.run(
+                ["bash", "-c", command, "_", str(runtime), str(root)],
+                check=True,
+            )
+            self.assertEqual(sentinel.read_text(), "active\n")
+            self.assertEqual(marker.read_text(), "corrupt\n")
+
+    def test_deepep_v2_marker_requires_private_owned_cache_objects(self) -> None:
+        runtime = ROOT / "runtime" / "run_in_container.sh"
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary) / "cache"
+            root.mkdir(mode=0o700)
+            (root / "source").mkdir(mode=0o700)
+            (root / "venv").mkdir(mode=0o700)
+            marker = root / ".collectivex-complete"
+            cache_key = "a" * 64
+            content_hash = "b" * 64
+            marker.write_text(
+                f"{COMMIT}\n{TREE}\n{FMT_COMMIT}\n{cache_key}\n{content_hash}\n"
+            )
+            root.chmod(0o2700)
+            marker.chmod(0o600)
+            command = r'''
+              set -euo pipefail
+              eval "$(sed -n '/^cx_deepep_v2_marker_content_sha256()/,/^}/p' "$1")"
+              cx_deepep_v2_marker_content_sha256 "$2" "$3" "$4" "$5" "$6" "$7"
+            '''
+            args = [
+                "bash", "-c", command, "_", str(runtime), str(root), str(marker),
+                COMMIT, TREE, FMT_COMMIT, cache_key,
+            ]
+            valid = subprocess.run(args, text=True, capture_output=True, check=True)
+            self.assertEqual(valid.stdout, content_hash)
+            marker.chmod(0o644)
+            self.assertNotEqual(subprocess.run(args).returncode, 0)
+
+    def test_deepep_hybrid_marker_requires_a_private_regular_file(self) -> None:
+        runtime = ROOT / "runtime" / "run_in_container.sh"
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary) / "cache"
+            root.mkdir(mode=0o700)
+            marker = root / ".collectivex-complete"
+            content_hash = "b" * 64
+            marker.write_text(f"{COMMIT}\n{TREE}\n{content_hash}\n")
+            root.chmod(0o2700)
+            marker.chmod(0o600)
+            command = r'''
+              set -euo pipefail
+              eval "$(sed -n '/^cx_deepep_hybrid_marker_content_sha256()/,/^}/p' "$1")"
+              cx_deepep_hybrid_marker_content_sha256 "$2" "$3" "$4" "$5"
+            '''
+            args = [
+                "bash", "-c", command, "_", str(runtime), str(root), str(marker),
+                COMMIT, TREE,
+            ]
+            valid = subprocess.run(args, text=True, capture_output=True, check=True)
+            self.assertEqual(valid.stdout, content_hash)
+            marker_contract = runtime.read_text()
+            marker_contract = marker_contract[
+                marker_contract.index("cx_deepep_hybrid_marker_content_sha256()"):
+                marker_contract.index("cx_deepep_hybrid_cache_is_valid()")
+            ]
+            self.assertIn("marker_item.st_uid != root_item.st_uid", marker_contract)
+            self.assertNotIn("st_uid != os.getuid()", marker_contract)
+            marker.chmod(0o644)
+            self.assertNotEqual(subprocess.run(args).returncode, 0)
+
+    def test_deepep_v2_installed_content_digest_binds_every_distribution_file(self) -> None:
+        runtime = ROOT / "runtime" / "run_in_container.sh"
+        with tempfile.TemporaryDirectory() as temporary:
+            site = Path(temporary) / "venv" / "lib" / "python3.11" / "site-packages"
+            package = site / "deep_ep"
+            info = site / "deep_ep-2.0.0.dist-info"
+            package.mkdir(parents=True)
+            info.mkdir()
+            (package / "__init__.py").write_text("__version__ = '2.0.0'\n")
+            extension = package / "_C.so"
+            extension.write_bytes(b"extension-one")
+            (info / "METADATA").write_text(
+                "Metadata-Version: 2.1\nName: deep_ep\nVersion: 2.0.0\n"
+            )
+            (info / "RECORD").write_text(
+                "deep_ep/__init__.py,,\n"
+                "deep_ep/_C.so,,\n"
+                "deep_ep-2.0.0.dist-info/METADATA,,\n"
+                "deep_ep-2.0.0.dist-info/RECORD,,\n"
+            )
+            command = r'''
+              set -euo pipefail
+              eval "$(sed -n '/^cx_deepep_v2_content_sha256()/,/^}/p' "$1")"
+              cx_deepep_v2_content_sha256
+            '''
+            env = {
+                **os.environ,
+                "PYTHONPATH": str(site),
+                "VIRTUAL_ENV": str(Path(temporary) / "venv"),
+            }
+            first = subprocess.run(
+                ["bash", "-c", command, "_", str(runtime)],
+                text=True, capture_output=True, check=True, env=env,
+            ).stdout
+            extension.write_bytes(b"extension-two")
+            second = subprocess.run(
+                ["bash", "-c", command, "_", str(runtime)],
+                text=True, capture_output=True, check=True, env=env,
+            ).stdout
+            self.assertRegex(first, r"^[0-9a-f]{64}$")
+            self.assertRegex(second, r"^[0-9a-f]{64}$")
+            self.assertNotEqual(first, second)
+            extension.unlink()
+            outside = Path(temporary) / "outside.so"
+            outside.write_bytes(b"outside")
+            extension.symlink_to(outside)
+            self.assertNotEqual(
+                subprocess.run(
+                    ["bash", "-c", command, "_", str(runtime)], env=env,
+                ).returncode,
+                0,
+            )
+
+    def test_uccl_content_identity_excludes_install_generated_files(self) -> None:
+        keep = load_uccl_function(
+            "_is_uccl_runtime_payload", {"PurePosixPath": PurePosixPath}
+        )
+        self.assertTrue(keep("uccl/ep.abi3.so"))
+        self.assertTrue(keep("uccl.libs/libnuma.so"))
+        self.assertFalse(keep("uccl/__pycache__/collective.cpython-312.pyc"))
+        self.assertFalse(keep("uccl-0.1.1.dist-info/RECORD"))
+
+    def test_uccl_dependency_versions_are_exact(self) -> None:
+        installed = dict(contracts.UCCL_DEPENDENCY_VERSIONS)
+        dependency_versions = load_uccl_function(
+            "_uccl_dependency_versions",
+            {
+                "contracts": contracts,
+                "metadata": types.SimpleNamespace(
+                    version=lambda package: installed[package]
+                ),
+            },
+        )
+        self.assertEqual(dependency_versions(), contracts.UCCL_DEPENDENCY_VERSIONS)
+        installed["intervaltree"] = "3.2.0"
+        with self.assertRaisesRegex(RuntimeError, "differ from the v1 contract"):
+            dependency_versions()
+
+        schema = json.loads((ROOT / "schemas" / "raw-case-v1.schema.json").read_text())
+        dependency_schema = schema["properties"]["implementation"]["properties"][
+            "provenance"
+        ]["properties"]["uccl_dependency_versions"]
+        self.assertFalse(dependency_schema["additionalProperties"])
+        self.assertEqual(
+            {
+                package: definition["const"]
+                for package, definition in dependency_schema["properties"].items()
+            },
+            contracts.UCCL_DEPENDENCY_VERSIONS,
+        )
+
+    def test_uccl_support_dependency_content_is_path_free(self) -> None:
+        with tempfile.TemporaryDirectory() as directory:
+            root = Path(directory)
+            source_entry = PurePosixPath("intervaltree/__init__.py")
+            cache_entry = PurePosixPath("intervaltree/__pycache__/__init__.pyc")
+            metadata_entry = PurePosixPath("intervaltree-3.1.0.dist-info/RECORD")
+            for entry in (source_entry, cache_entry, metadata_entry):
+                path = root / entry
+                path.parent.mkdir(parents=True, exist_ok=True)
+                path.write_bytes(entry.as_posix().encode())
+            distribution = types.SimpleNamespace(
+                files=[source_entry, cache_entry, metadata_entry],
+                locate_file=lambda item: root / item,
+            )
+            evidence_for = load_uccl_function(
+                "_python_dependency_evidence",
+                {
+                    "Path": Path,
+                    "PurePosixPath": PurePosixPath,
+                    "contracts": contracts,
+                    "metadata": types.SimpleNamespace(
+                        distribution=lambda package: distribution
+                    ),
+                },
+            )
+            evidence = evidence_for("intervaltree", "3.1.0")
+            self.assertEqual(
+                evidence,
+                contracts.content_manifest_evidence(
+                    role="intervaltree-distribution",
+                    name="intervaltree-3.1.0",
+                    files=[(source_entry.as_posix(), root / source_entry)],
+                ),
+            )
+            self.assertNotIn(str(root), json.dumps(evidence))
+
+    def test_uccl_hashes_the_mapped_pinned_libcudart_without_exposing_paths(
+        self,
+    ) -> None:
+        with tempfile.TemporaryDirectory() as directory:
+            root = Path(directory)
+            entry = PurePosixPath("nvidia/cuda_runtime/lib/libcudart.so.12")
+            library = root / entry
+            library.parent.mkdir(parents=True)
+            library.write_bytes(b"pinned CUDA 12 runtime")
+            distribution = types.SimpleNamespace(
+                files=[entry],
+                locate_file=lambda item: root / item,
+            )
+            evidence_for = load_uccl_function(
+                "_loaded_libcudart_evidence",
+                {
+                    "Path": Path,
+                    "PurePosixPath": PurePosixPath,
+                    "contracts": contracts,
+                    "metadata": types.SimpleNamespace(
+                        distribution=lambda package: distribution
+                    ),
+                },
+            )
+            maps = root / "maps"
+            maps.write_text(f"7f00-7f10 r-xp 00000000 00:00 0 {library}\n")
+            evidence = evidence_for("12.9.79", maps)
+            self.assertEqual(
+                evidence,
+                contracts.content_manifest_evidence(
+                    role="cuda-runtime",
+                    name="nvidia-cuda-runtime-cu12-12.9.79",
+                    files=[("libcudart.so", library)],
+                ),
+            )
+            self.assertNotIn(str(root), json.dumps(evidence))
+
+            unowned = root / "unowned" / library.name
+            unowned.parent.mkdir()
+            unowned.write_bytes(library.read_bytes())
+            maps.write_text(f"7f00-7f10 r-xp 00000000 00:00 0 {unowned}\n")
+            with self.assertRaisesRegex(RuntimeError, "not owned") as raised:
+                evidence_for("12.9.79", maps)
+            self.assertNotIn(str(root), str(raised.exception))
+
+    def test_private_runtime_logs_are_not_public_artifacts(self) -> None:
+        path = subprocess.check_output(
+            [
+                "bash", "-c", 'source "$1"; cx_private_log_path test', "_",
+                str(ROOT / "runtime" / "common.sh"),
+            ],
+            text=True,
+            env={**os.environ, "COLLECTIVEX_EXECUTION_ID": "contract-test"},
+        ).strip()
+        try:
+            log = Path(path)
+            self.assertEqual(stat.S_IMODE(log.stat().st_mode), 0o600)
+            self.assertEqual(stat.S_IMODE(log.parent.stat().st_mode), 0o700)
+            self.assertFalse(log.is_relative_to(ROOT))
+        finally:
+            shutil.rmtree(Path(path).parent, ignore_errors=True)
+
+    def test_private_runtime_logs_reject_traversal_and_symlinks(self) -> None:
+        common = str(ROOT / "runtime" / "common.sh")
+        for variable, value in (
+            ("COLLECTIVEX_EXECUTION_ID", ".."),
+            ("CX_TEST_LABEL", ".."),
+        ):
+            environment = {
+                **os.environ,
+                "COLLECTIVEX_EXECUTION_ID": "contract-adversarial",
+                "CX_TEST_LABEL": "test",
+                variable: value,
+            }
+            result = subprocess.run(
+                ["bash", "-c", 'source "$1"; cx_private_log_path "$CX_TEST_LABEL"', "_", common],
+                text=True,
+                capture_output=True,
+                env=environment,
+            )
+            self.assertNotEqual(result.returncode, 0)
+            self.assertNotIn(value, result.stderr)
+
+        private_root = Path(f"/tmp/inferencex-collectivex-{os.getuid()}")
+        private_root.mkdir(mode=0o700, exist_ok=True)
+        self.assertFalse(private_root.is_symlink())
+        os.chmod(private_root, 0o700)
+        with tempfile.TemporaryDirectory() as temporary:
+            target = Path(temporary)
+            tag = f"contract-symlink-{os.getpid()}"
+            link = private_root / tag
+            link.symlink_to(target, target_is_directory=True)
+            try:
+                result = subprocess.run(
+                    ["bash", "-c", 'source "$1"; cx_private_log_path test', "_", common],
+                    text=True,
+                    capture_output=True,
+                    env={**os.environ, "COLLECTIVEX_EXECUTION_ID": tag},
+                )
+                self.assertNotEqual(result.returncode, 0)
+                self.assertEqual(list(target.iterdir()), [])
+            finally:
+                link.unlink(missing_ok=True)
+
+            tag = f"contract-log-symlink-{os.getpid()}"
+            directory = private_root / tag
+            directory.mkdir(mode=0o700)
+            target_file = target / "target"
+            target_file.write_text("unchanged")
+            log_link = directory / "test.log"
+            log_link.symlink_to(target_file)
+            try:
+                result = subprocess.run(
+                    ["bash", "-c", 'source "$1"; cx_private_log_path test', "_", common],
+                    text=True,
+                    capture_output=True,
+                    env={**os.environ, "COLLECTIVEX_EXECUTION_ID": tag},
+                )
+                self.assertNotEqual(result.returncode, 0)
+                self.assertEqual(target_file.read_text(), "unchanged")
+            finally:
+                log_link.unlink(missing_ok=True)
+                directory.rmdir()
+
+    def test_operator_config_failure_is_value_free(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            config = Path(temporary) / "operator.env"
+            config.write_text("printf 'private-config-token\\n' >&2\nfalse\n")
+            config.chmod(0o600)
+            result = subprocess.run(
+                ["bash", "-c",
+                 'export COLLECTIVEX_EXECUTION_ID="operator-failure-$$"; '
+                 "trap 'cx_cleanup_private_logs 0' EXIT; source \"$1\"; "
+                 "cx_load_operator_config", "_",
+                 str(ROOT / "runtime" / "common.sh")],
+                text=True,
+                capture_output=True,
+                env={
+                    **os.environ,
+                    "CX_RUNNER": "h100-dgxc",
+                    "COLLECTIVEX_OPERATOR_CONFIG": str(config),
+                },
+            )
+            self.assertNotEqual(result.returncode, 0)
+            self.assertIn("runner-local configuration failed", result.stderr)
+            self.assertNotIn("private-config-token", result.stderr)
+
+    def test_ephemeral_operator_config_is_removed_after_source(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            config = Path(temporary) / "operator.env"
+            decoy = Path(temporary) / "decoy"
+            decoy.write_text("keep")
+            config.write_text(json.dumps(operator_config(Path(temporary) / "storage")))
+            config.chmod(0o600)
+            result = subprocess.run(
+                [
+                    "bash", "-c",
+                    'export COLLECTIVEX_EXECUTION_ID="operator-ephemeral-$$"; '
+                    "trap 'cx_cleanup_private_logs 0' EXIT; "
+                    'config="$COLLECTIVEX_OPERATOR_CONFIG"; source "$1"; '
+                    'cx_load_operator_config; test ! -e "$config"; '
+                    'test "$CX_PARTITION" = test; '
+                    'test -z "${COLLECTIVEX_OPERATOR_CONFIG+x}"',
+                    "_", str(ROOT / "runtime" / "common.sh"),
+                ],
+                text=True,
+                capture_output=True,
+                env={
+                    **os.environ,
+                    "CX_RUNNER": "h100-dgxc",
+                    "COLLECTIVEX_OPERATOR_CONFIG": str(config),
+                    "COLLECTIVEX_OPERATOR_CONFIG_EPHEMERAL": "1",
+                },
+            )
+            self.assertEqual(result.returncode, 0, result.stderr)
+            self.assertFalse(config.exists())
+            self.assertEqual(decoy.read_text(), "keep")
+
+    def test_operator_config_is_strict_per_runner_json(self) -> None:
+        command = (
+            'source "$1"; export COLLECTIVEX_EXECUTION_ID="operator-config-$$"; '
+            "trap 'cx_cleanup_private_logs 0' EXIT; cx_load_operator_config; "
+            'test "$CX_PARTITION" = test; '
+            'test -z "${COLLECTIVEX_OPERATOR_CONFIG_CONTENT+x}"; '
+            'test -z "${ENROOT_CACHE_PATH+x}"'
+        )
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary)
+            document = operator_config(root / "storage")
+            config = root / "operator.json"
+            config.write_text(json.dumps(document))
+            config.chmod(0o600)
+            for runner in capability.PLATFORMS:
+                with self.subTest(runner=runner):
+                    result = subprocess.run(
+                        ["bash", "-c", command, "_", str(ROOT / "runtime" / "common.sh")],
+                        text=True,
+                        capture_output=True,
+                        env={
+                            **os.environ,
+                            "CX_RUNNER": runner,
+                            "ENROOT_CACHE_PATH": "/private/stale-enroot-cache",
+                            "COLLECTIVEX_OPERATOR_CONFIG": str(config),
+                        },
+                    )
+                    self.assertEqual(result.returncode, 0, result.stderr)
+
+            lock_dir = root / "amd-locks"
+            document["runners"]["mi355x"]["lock_dir"] = str(lock_dir)
+            config.write_text(json.dumps(document))
+            config.chmod(0o600)
+            canonical = subprocess.run(
+                [
+                    "bash",
+                    "-c",
+                    'source "$1"; export COLLECTIVEX_EXECUTION_ID="canonical-lock-$$"; '
+                    "trap 'cx_cleanup_private_logs 0' EXIT; cx_load_operator_config; "
+                    'cx_lock_canonical_gha_env mi355x; test "$CX_LOCK_DIR" = "$2"',
+                    "_",
+                    str(ROOT / "runtime" / "common.sh"),
+                    str(lock_dir),
+                ],
+                text=True,
+                capture_output=True,
+                env={
+                    **os.environ,
+                    "CX_RUNNER": "mi355x",
+                    "CX_SHARD_FILE": ".shards/test.json",
+                    "CX_SHARD_SKU": "mi355x",
+                    "CX_NODES": "1",
+                    "CX_GPUS_PER_NODE": "8",
+                    "COLLECTIVEX_CANONICAL_GHA": "1",
+                    "COLLECTIVEX_OPERATOR_CONFIG": str(config),
+                    "COLLECTIVEX_SOURCE_SHA": "a" * 40,
+                    "GITHUB_ACTIONS": "true",
+                    "GITHUB_RUN_ATTEMPT": "1",
+                    "GITHUB_RUN_ID": "1",
+                    "GITHUB_WORKSPACE": str(root.resolve()),
+                },
+            )
+            self.assertEqual(canonical.returncode, 0, canonical.stderr)
+
+            selected_only = {
+                "schema_version": 1,
+                "runners": {"h100-dgxc": document["runners"]["h100-dgxc"]},
+            }
+            result = subprocess.run(
+                [
+                    "bash", "-c", command + '; test "$CX_STAGE_DIR" = "$2"', "_",
+                    str(ROOT / "runtime" / "common.sh"), str(root / "storage"),
+                ],
+                text=True,
+                capture_output=True,
+                env={
+                    **os.environ,
+                    "CX_RUNNER": "h100-dgxc",
+                    "CX_STAGE_DIR": "/private/stale-stage",
+                    "ENROOT_CACHE_PATH": "/private/stale-enroot-cache",
+                    "COLLECTIVEX_OPERATOR_CONFIG_LOADED": "1",
+                    "COLLECTIVEX_OPERATOR_CONFIG_CONTENT": json.dumps(selected_only),
+                    "COLLECTIVEX_OPERATOR_CONFIG_REQUIRED": "1",
+                },
+            )
+            self.assertEqual(result.returncode, 0, result.stderr)
+
+            rejected = json.loads(json.dumps(document))
+            rejected["runners"]["h100-dgxc"]["shell"] = "private-command"
+            boolean_version = {**document, "schema_version": True}
+            missing_socket = json.loads(json.dumps(document))
+            del missing_socket["runners"]["h100-dgxc"]["socket_ifname"]
+            missing_rdma = json.loads(json.dumps(document))
+            del missing_rdma["runners"]["mi355x"]["rdma_devices"]
+            missing_amd_stage = json.loads(json.dumps(document))
+            del missing_amd_stage["runners"]["mi325x"]["stage_dir"]
+            missing_nvidia_stage = json.loads(json.dumps(document))
+            del missing_nvidia_stage["runners"]["h100-dgxc"]["stage_dir"]
+            for invalid in (rejected, boolean_version, missing_nvidia_stage):
+                config.write_text(json.dumps(invalid))
+                config.chmod(0o600)
+                result = subprocess.run(
+                    ["bash", "-c", command, "_", str(ROOT / "runtime" / "common.sh")],
+                    text=True,
+                    capture_output=True,
+                    env={
+                        **os.environ,
+                        "CX_RUNNER": "h100-dgxc",
+                        "COLLECTIVEX_OPERATOR_CONFIG": str(config),
+                    },
+                )
+                self.assertNotEqual(result.returncode, 0)
+                self.assertNotIn("private-command", result.stderr)
+
+            for valid, runner in (
+                (missing_socket, "h100-dgxc"),
+                (missing_rdma, "h100-dgxc"),
+                (missing_amd_stage, "h100-dgxc"),
+            ):
+                config.write_text(json.dumps(valid))
+                config.chmod(0o600)
+                result = subprocess.run(
+                    [
+                        "bash", "-c", command + "; cx_apply_network_profile 1 nvlink",
+                        "_", str(ROOT / "runtime" / "common.sh"),
+                    ],
+                    text=True,
+                    capture_output=True,
+                    env={
+                        **os.environ,
+                        "CX_RUNNER": runner,
+                        "COLLECTIVEX_OPERATOR_CONFIG": str(config),
+                    },
+                )
+                self.assertEqual(result.returncode, 0, result.stderr)
+
+            config.write_text(json.dumps(missing_socket))
+            config.chmod(0o600)
+            scaleout = subprocess.run(
+                [
+                    "bash", "-c", command + "; cx_apply_network_profile 2 nvlink-rdma",
+                    "_", str(ROOT / "runtime" / "common.sh"),
+                ],
+                text=True,
+                capture_output=True,
+                env={
+                    **os.environ,
+                    "CX_RUNNER": "h100-dgxc",
+                    "COLLECTIVEX_OPERATOR_CONFIG": str(config),
+                },
+            )
+            self.assertNotEqual(scaleout.returncode, 0)
+
+            config.write_text(json.dumps(missing_amd_stage))
+            config.chmod(0o600)
+            selected_missing_stage = subprocess.run(
+                ["bash", "-c", command, "_", str(ROOT / "runtime" / "common.sh")],
+                text=True,
+                capture_output=True,
+                env={
+                    **os.environ,
+                    "CX_RUNNER": "mi325x",
+                    "COLLECTIVEX_OPERATOR_CONFIG": str(config),
+                },
+            )
+            self.assertNotEqual(selected_missing_stage.returncode, 0)
+
+            config.write_text(json.dumps(document))
+            config.chmod(0o644)
+            result = subprocess.run(
+                ["bash", "-c", command, "_", str(ROOT / "runtime" / "common.sh")],
+                text=True,
+                capture_output=True,
+                env={
+                    **os.environ,
+                    "CX_RUNNER": "h100-dgxc",
+                    "COLLECTIVEX_OPERATOR_CONFIG": str(config),
+                },
+            )
+            self.assertNotEqual(result.returncode, 0)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/experimental/CollectiveX/tests/test_ep_precision_adapters.py b/experimental/CollectiveX/tests/test_ep_precision_adapters.py
new file mode 100644
index 000000000..a9655cdcd
--- /dev/null
+++ b/experimental/CollectiveX/tests/test_ep_precision_adapters.py
@@ -0,0 +1,130 @@
+#!/usr/bin/env python3
+"""CPU-only contract tests for native EP precision adapter wiring."""
+from __future__ import annotations
+
+import ast
+import sys
+import unittest
+from pathlib import Path
+from types import SimpleNamespace
+
+
+ROOT = Path(__file__).resolve().parents[1]
+TESTS = ROOT / "tests"
+sys.path.insert(0, str(ROOT))
+sys.path.insert(0, str(TESTS))
+
+import ep_precision  # noqa: E402
+
+
+class PrecisionResolutionTests(unittest.TestCase):
+    def test_blank_profile_resolves_to_bf16_control(self):
+        profile_id, profile = ep_precision.resolve_precision(
+            SimpleNamespace(precision_profile=""),
+            backend="nccl-ep",
+            mode="normal",
+            supported_profiles={"d-bf16.c-bf16"},
+        )
+        self.assertEqual(profile_id, "d-bf16.c-bf16")
+        self.assertEqual(profile["dispatch"]["communication_format"], "bf16")
+        self.assertEqual(profile["combine"]["communication_format"], "bf16")
+
+    def test_adapter_profile_rejection_is_fail_closed(self):
+        with self.assertRaisesRegex(
+            ep_precision.PrecisionError,
+            "nccl-ep does not realize precision profile",
+        ):
+            ep_precision.resolve_precision(
+                SimpleNamespace(
+                    precision_profile=(
+                        "d-fp8-e4m3fn-b128-f32-prequantized.c-bf16"
+                    )
+                ),
+                backend="nccl-ep",
+                mode="normal",
+                supported_profiles={"d-bf16.c-bf16"},
+            )
+
+    def test_profile_mode_mismatch_is_rejected(self):
+        with self.assertRaisesRegex(ep_precision.PrecisionError, "not valid in mode"):
+            ep_precision.resolve_precision(
+                SimpleNamespace(
+                    precision_profile="d-bf16.c-logfmt10-dynamic64"
+                ),
+                backend="deepep",
+                mode="normal",
+                supported_profiles={"d-bf16.c-logfmt10-dynamic64"},
+            )
+
+    def test_required_native_keyword_is_checked(self):
+        def native_api(*, use_fp8=False):
+            return use_fp8
+
+        ep_precision.require_keyword(native_api, "use_fp8", api="native_api")
+        with self.assertRaisesRegex(ep_precision.PrecisionError, "omits 'use_logfmt'"):
+            ep_precision.require_keyword(
+                native_api, "use_logfmt", api="native_api"
+            )
+
+    def test_bf16_evidence_is_exact_and_has_no_scale_checks(self):
+        evidence = ep_precision.exact_axis_evidence()
+        self.assertTrue(evidence["passed"])
+        self.assertEqual(evidence["max_abs_error"], 0.0)
+        self.assertEqual(evidence["max_rel_error"], 0.0)
+        self.assertEqual(evidence["saturation_count"], 0)
+        self.assertEqual(evidence["saturation_rate"], 0.0)
+        self.assertIsNone(evidence["scales_finite"])
+        self.assertIsNone(evidence["scales_positive"])
+
+
+class NativeAdapterWiringTests(unittest.TestCase):
+    @staticmethod
+    def _tree(name: str) -> ast.Module:
+        return ast.parse((TESTS / name).read_text(encoding="utf-8"))
+
+    @staticmethod
+    def _keywords(tree: ast.AST, attribute: str) -> list[set[str]]:
+        result = []
+        for node in ast.walk(tree):
+            if not isinstance(node, ast.Call):
+                continue
+            function = node.func
+            if (
+                isinstance(function, ast.Attribute) and function.attr == attribute
+            ) or (
+                isinstance(function, ast.Name) and function.id == attribute
+            ):
+                result.append({keyword.arg for keyword in node.keywords})
+        return result
+
+    def test_deepep_and_uccl_wire_native_low_latency_controls(self):
+        for filename in ("ep_deepep.py", "ep_uccl.py"):
+            with self.subTest(filename=filename):
+                tree = self._tree(filename)
+                dispatch_calls = self._keywords(tree, "low_latency_dispatch")
+                combine_calls = self._keywords(tree, "low_latency_combine")
+                self.assertTrue(any("use_fp8" in call for call in dispatch_calls))
+                self.assertTrue(any("use_logfmt" in call for call in combine_calls))
+
+    def test_elastic_and_hybrid_constructors_enable_native_fp8(self):
+        v2 = self._tree("ep_deepep_v2.py")
+        hybrid = self._tree("ep_deepep_hybrid.py")
+        self.assertTrue(
+            any("use_fp8_dispatch" in call for call in self._keywords(v2, "ElasticBuffer"))
+        )
+        self.assertTrue(
+            any("use_fp8" in call for call in self._keywords(hybrid, "HybridEPBuffer"))
+        )
+        self.assertTrue(
+            any("scaling_factor" in call for call in self._keywords(hybrid, "dispatch"))
+        )
+
+    def test_mori_wires_dispatch_scales_and_direct_cast_config(self):
+        source = (TESTS / "ep_mori.py").read_text(encoding="utf-8")
+        self.assertIn('"fp8_direct_cast" if self._direct_cast_combine', source)
+        self.assertIn("p.scales,", source)
+        self.assertIn("dispatch_scales=_scales", source)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/experimental/CollectiveX/tests/test_precision_scheduling.py b/experimental/CollectiveX/tests/test_precision_scheduling.py
new file mode 100644
index 000000000..f10385537
--- /dev/null
+++ b/experimental/CollectiveX/tests/test_precision_scheduling.py
@@ -0,0 +1,347 @@
+#!/usr/bin/env python3
+"""CPU-only tests for CollectiveX communication-precision scheduling."""
+from __future__ import annotations
+
+import copy
+from pathlib import Path
+import sys
+import unittest
+from unittest import mock
+
+
+HERE = Path(__file__).resolve().parent
+ROOT = HERE.parent
+sys.path[:0] = [str(ROOT), str(HERE)]
+
+import capability  # noqa: E402
+import identity  # noqa: E402
+import probe_precision  # noqa: E402
+import sweep_matrix  # noqa: E402
+
+
+class PrecisionSchedulingTest(unittest.TestCase):
+    def test_precision_probe_inventory_is_exact_and_non_mutating(self) -> None:
+        before = copy.deepcopy(capability.PRECISION_CAPABILITIES)
+        targets = probe_precision.provisional_targets()
+        key = lambda item: (
+            item["sku"], item["backend"], item["ep"], item["mode"],
+            item["precision_profile"],
+        )
+        self.assertEqual(targets, sorted(capability.provisional_precision_targets(), key=key))
+        self.assertEqual(len(targets), 94)
+        self.assertEqual(capability.PRECISION_CAPABILITIES, before)
+        self.assertEqual(
+            len({
+                (item["backend"], item["sku"], item["ep"], item["mode"],
+                 item["precision_profile"])
+                for item in targets
+            }),
+            len(targets),
+        )
+
+    def test_precision_probe_selects_only_one_exact_provisional_cell(self) -> None:
+        target = probe_precision.provisional_targets()[0]
+        selected = probe_precision.select_target(
+            backend=target["backend"], sku=target["sku"], ep=target["ep"],
+            mode=target["mode"], precision_profile=target["precision_profile"],
+        )
+        self.assertEqual(selected, target)
+        with self.assertRaisesRegex(probe_precision.ProbeError, "target-not-provisional"):
+            probe_precision.select_target(
+                backend=target["backend"], sku=target["sku"], ep=target["ep"],
+                mode=target["mode"], precision_profile="d-bf16.c-bf16",
+            )
+
+    def test_precision_probe_workflow_plan_binds_exact_controls(self) -> None:
+        plan = probe_precision.workflow_plan(backend="deepep", only_sku="b200-dgxc")
+        self.assertTrue(plan["include"])
+        self.assertTrue(all(
+            row["backend"] == "deepep" and row["sku"] == "b200-dgxc"
+            for row in plan["include"]
+        ))
+        row = plan["include"][0]
+        control = probe_precision.extract_control(
+            plan, probe_id=row["id"], sku=row["sku"], backend=row["backend"],
+            nodes=row["nodes"],
+        )
+        self.assertEqual(
+            probe_precision.validate_control(
+                control, sku=row["sku"], backend=row["backend"], nodes=row["nodes"],
+            ),
+            control,
+        )
+        with self.assertRaisesRegex(ValueError, "workflow matrix"):
+            probe_precision.extract_control(
+                plan, probe_id=row["id"], sku=row["sku"], backend=row["backend"],
+                nodes=row["nodes"] + 1,
+            )
+        with self.assertRaisesRegex(ValueError, "select no provisional"):
+            probe_precision.workflow_plan(backend="mori", only_sku="b200-dgxc")
+
+    def test_precision_probe_manifest_is_sanitized_and_runtime_evidence_is_required(self) -> None:
+        target = probe_precision.provisional_targets()[0]
+        topology = capability.topology_for(target["sku"], target["ep"])
+        self.assertIsNotNone(topology)
+        topology_record = probe_precision._topology_record(topology, False)
+        document = probe_precision.build_manifest(
+            target=target, topology=topology_record, disposition="unsupported",
+            reason="unsupported-native-api", runtime_executed=True, evidence=None,
+        )
+        self.assertEqual(document["result"], {
+            "disposition": "unsupported",
+            "reason": "unsupported-native-api",
+            "registry_mutation": False,
+            "runtime_executed": True,
+            "static_inspection_sufficient": False,
+        })
+        with self.assertRaises((TypeError, ValueError)):
+            probe_precision.build_manifest(
+                target=target, topology=topology_record, disposition="supported",
+                reason=probe_precision.SUPPORTED_REASON, runtime_executed=True,
+                evidence=None,
+            )
+
+    def test_precision_profiles_bind_exact_formats_and_timing_boundaries(self) -> None:
+        scheduled = set(identity.V1_NORMAL_PRECISION_PROFILE_IDS) | set(
+            identity.V1_LOW_LATENCY_PRECISION_PROFILE_IDS
+        )
+        self.assertEqual(
+            set(identity.V1_PRECISION_PROFILES),
+            scheduled | {identity.V1_CONTROL_PRECISION_PROFILE},
+        )
+        self.assertNotIn(identity.V1_CONTROL_PRECISION_PROFILE, scheduled)
+        self.assertNotIn("fp4", repr(identity.V1_PRECISION_PROFILES).lower())
+
+        required_axis_fields = {
+            "api_input_dtype",
+            "api_output_dtype",
+            "communication_format",
+            "scale_dtype",
+            "scale_layout",
+            "scale_group_size",
+            "padding_contract",
+            "alignment_contract",
+            "quantization_origin",
+            "conversion_boundary",
+        }
+        for name in identity.V1_PRECISION_PROFILES:
+            with self.subTest(profile=name):
+                profile = identity.precision_profile(name)
+                self.assertEqual(profile["profile_id"], name)
+                self.assertEqual(set(profile["dispatch"]), required_axis_fields)
+                self.assertEqual(set(profile["combine"]), required_axis_fields)
+                self.assertRegex(name, r"^[a-z0-9][a-z0-9.-]*$")
+
+        prequantized = identity.precision_profile(
+            "d-fp8-e4m3fn-b128-f32-prequantized.c-bf16"
+        )["dispatch"]
+        fused = identity.precision_profile(
+            "d-fp8-e4m3fn-b128-f32-fused.c-bf16"
+        )["dispatch"]
+        self.assertEqual(prequantized["conversion_boundary"], "before-dispatch-timing")
+        self.assertEqual(fused["conversion_boundary"], "inside-dispatch-timing")
+        self.assertEqual(prequantized["scale_group_size"], 128)
+
+        mi325 = identity.precision_profile(
+            "d-fp8-e4m3fnuz-b128-f32-prequantized.c-bf16"
+        )["dispatch"]
+        self.assertEqual(mi325["communication_format"], "fp8-e4m3fnuz")
+        logfmt = identity.precision_profile("d-bf16.c-logfmt10-dynamic64")["combine"]
+        self.assertEqual(
+            (logfmt["communication_format"], logfmt["scale_group_size"]),
+            ("logfmt10", 64),
+        )
+
+        base = {"mode": "normal"}
+        precision_case = {
+            **base,
+            "precision_profile": "d-fp8-e4m3fn-b128-f32-prequantized.c-bf16",
+        }
+        self.assertIs(identity.profile_for_case(base), identity.V1_NORMAL_CASE_PROFILE)
+        self.assertIn("communication_precision", identity.profile_for_case(precision_case))
+        self.assertNotEqual(
+            identity.digest("case", identity.profile_for_case(base)),
+            identity.digest("case", identity.profile_for_case(precision_case)),
+        )
+
+    def test_capability_registry_uses_exact_native_targets(self) -> None:
+        targets = capability.precision_targets()
+        self.assertTrue(targets)
+        self.assertTrue(all(item["disposition"] == "provisional" for item in targets))
+        self.assertEqual(targets, capability.provisional_precision_targets())
+        keys = {
+            (
+                item["precision_profile"],
+                item["backend"],
+                item["sku"],
+                item["ep"],
+                item["mode"],
+            )
+            for item in targets
+        }
+        self.assertEqual(len(keys), len(targets))
+
+        normal = "d-fp8-e4m3fn-b128-f32-prequantized.c-bf16"
+        direct = "d-bf16.c-fp8-e4m3fn-direct-cast-noscale"
+        fnuz_direct = "d-bf16.c-fp8-e4m3fnuz-direct-cast-noscale"
+        low_latency = "d-bf16.c-logfmt10-dynamic64"
+        cases = (
+            (("h200-dgxc", "deepep-v2", 8, "normal", normal), "provisional"),
+            (("h100-dgxc", "deepep-v2", 8, "normal", normal), "not-applicable"),
+            (("h200-dgxc", "nccl-ep", 8, "normal", normal), "not-applicable"),
+            (("mi355x", "mori", 8, "normal", direct), "provisional"),
+            (("mi355x", "mori", 16, "normal", direct), "not-applicable"),
+            (("mi325x", "mori", 8, "normal", fnuz_direct), "provisional"),
+            (("h200-dgxc", "deepep", 8, "low-latency", low_latency), "provisional"),
+            (("h200-dgxc", "deepep-hybrid", 8, "low-latency", low_latency),
+             "not-applicable"),
+        )
+        for (sku, backend, ep, mode, profile), expected in cases:
+            with self.subTest(sku=sku, backend=backend, profile=profile):
+                topology = capability.topology_for(sku, ep)
+                self.assertIsNotNone(topology)
+                disposition, _ = capability.resolve_disposition(
+                    sku,
+                    backend,
+                    ep=ep,
+                    nodes=topology["nodes"],  # type: ignore[index]
+                    mode=mode,
+                    precision_profile=profile,
+                )
+                self.assertEqual(disposition, expected)
+
+        control, _ = capability.resolve_disposition(
+            "h200-dgxc",
+            "deepep",
+            ep=8,
+            nodes=1,
+            precision_profile=identity.V1_CONTROL_PRECISION_PROFILE,
+        )
+        self.assertEqual(control, "supported")
+
+    def test_split_suites_are_provisional_and_do_not_duplicate_bf16(self) -> None:
+        suites = sweep_matrix._load("suites.yaml")
+        workloads = sweep_matrix._load("workloads.yaml")
+        sweep_matrix.validate_config_documents(suites, workloads)
+        normal = suites["suites"]["ep-precision-normal-v1"]
+        low_latency = suites["suites"]["ep-precision-low-latency-v1"]
+        self.assertEqual(
+            (
+                normal["mode"],
+                normal["phases"],
+                normal["token_points_decode"],
+                normal["token_points_prefill"],
+            ),
+            ("normal", ["decode", "prefill"], [128], [512]),
+        )
+        self.assertEqual(
+            (
+                low_latency["mode"],
+                low_latency["phases"],
+                low_latency["token_points_decode"],
+            ),
+            ("low-latency", ["decode"], [128]),
+        )
+        self.assertTrue(normal["provisional"])
+        self.assertTrue(low_latency["provisional"])
+        self.assertEqual(
+            normal["required_publication"], "comparable-experimental"
+        )
+        self.assertEqual(
+            low_latency["required_publication"], "comparable-experimental"
+        )
+        listed = normal["precision_profiles"] + low_latency["precision_profiles"]
+        self.assertNotIn(identity.V1_CONTROL_PRECISION_PROFILE, listed)
+        self.assertEqual(len(listed), len(set(listed)))
+
+        matrix = sweep_matrix.validate_matrix_document(
+            sweep_matrix.resolve_matrix(backends="all")
+        )
+        self.assertFalse(any("precision_profile" in item["case"] for item in matrix["requested_cases"]))
+        with self.assertRaisesRegex(SystemExit, "provisional precision suites"):
+            sweep_matrix.resolve_matrix(
+                suites="ep-precision-normal-v1", backends="all"
+            )
+
+        stale = copy.deepcopy(suites)
+        stale["suites"]["ep-precision-normal-v1"]["provisional"] = False
+        with self.assertRaisesRegex(SystemExit, "must track unresolved"):
+            sweep_matrix.validate_config_documents(stale, workloads)
+
+    def test_resolved_profiles_schedule_without_cartesian_fill(self) -> None:
+        suites = sweep_matrix._load("suites.yaml")
+        workloads = sweep_matrix._load("workloads.yaml")
+        promoted = copy.deepcopy(capability.PRECISION_CAPABILITIES)
+        for rules in promoted.values():
+            for rule in rules:
+                rule["disposition"] = "supported"
+        normal_profile = "d-fp8-e4m3fn-b128-f32-prequantized.c-bf16"
+        promoted[normal_profile][0]["disposition"] = "unsupported"
+        resolved_suites = copy.deepcopy(suites)
+        for name in ("ep-precision-normal-v1", "ep-precision-low-latency-v1"):
+            resolved_suites["suites"][name]["provisional"] = False
+
+        def load_config(name: str) -> dict[str, object]:
+            if name == "suites.yaml":
+                return resolved_suites
+            if name == "workloads.yaml":
+                return workloads
+            raise AssertionError(name)
+
+        suite_names = "ep-precision-normal-v1,ep-precision-low-latency-v1"
+        expected_cases = sum(
+            len(resolved_suites["suites"][
+                "ep-precision-normal-v1"
+                if target["mode"] == "normal"
+                else "ep-precision-low-latency-v1"
+            ]["phases"])
+            for target in capability.precision_targets()
+        )
+        unsupported_targets = [
+            target for target in capability.precision_targets([normal_profile])
+            if target["backend"] == "deepep"
+        ]
+        with mock.patch.object(capability, "PRECISION_CAPABILITIES", promoted):
+            with self.assertRaisesRegex(SystemExit, "must track unresolved"):
+                sweep_matrix.validate_config_documents(suites, workloads)
+            with mock.patch.object(sweep_matrix, "_load", side_effect=load_config):
+                matrix = sweep_matrix.validate_matrix_document(
+                    sweep_matrix.resolve_matrix(suites=suite_names, backends="all")
+                )
+
+        unsupported = [
+            item for item in matrix["requested_cases"]
+            if item["disposition"] == "unsupported"
+        ]
+        self.assertEqual(
+            len(unsupported),
+            len(unsupported_targets)
+            * len(resolved_suites["suites"]["ep-precision-normal-v1"]["phases"]),
+        )
+        self.assertTrue(all(
+            item["reason"] == "precision-profile-unsupported" for item in unsupported
+        ))
+        self.assertTrue(any(
+            item["disposition"] == "runnable" for item in matrix["requested_cases"]
+        ))
+
+        self.assertEqual(len(matrix["requested_cases"]), expected_cases)
+        self.assertEqual(
+            {item["case"]["precision_profile"] for item in matrix["requested_cases"]},
+            set(identity.V1_NORMAL_PRECISION_PROFILE_IDS)
+            | set(identity.V1_LOW_LATENCY_PRECISION_PROFILE_IDS),
+        )
+        self.assertFalse(any(
+            item["case"]["precision_profile"] == identity.V1_CONTROL_PRECISION_PROFILE
+            for item in matrix["requested_cases"]
+        ))
+        direct_cases = [
+            item for item in matrix["requested_cases"]
+            if "direct-cast" in item["case"]["precision_profile"]
+        ]
+        self.assertTrue(direct_cases)
+        self.assertEqual({item["case"]["ep"] for item in direct_cases}, {8})
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/experimental/CollectiveX/tests/test_publisher.py b/experimental/CollectiveX/tests/test_publisher.py
new file mode 100644
index 000000000..abc43c6ec
--- /dev/null
+++ b/experimental/CollectiveX/tests/test_publisher.py
@@ -0,0 +1,2986 @@
+#!/usr/bin/env python3
+"""Focused end-to-end tests for the isolated CollectiveX publisher."""
+from __future__ import annotations
+
+import copy
+import hashlib
+import itertools
+import json
+import os
+from pathlib import Path
+import subprocess
+import sys
+import tempfile
+import types
+import unittest
+from unittest import mock
+import zipfile
+
+HERE = Path(__file__).resolve().parent
+ROOT = HERE.parent
+sys.path[:0] = [str(ROOT), str(HERE)]
+
+import contracts  # noqa: E402
+import identity  # noqa: E402
+import publisher  # noqa: E402
+import summarize  # noqa: E402
+import sweep_matrix  # noqa: E402
+
+
+RUN = {
+    "repository": "SemiAnalysisAI/InferenceX",
+    "run_id": "12345",
+    "run_attempt": 1,
+    "qualification_index": 1,
+    "source_sha": "a" * 40,
+}
+
+
+def _unsupported_delivery(
+    root: Path, ordinals: tuple[int, ...] = (1,), run: dict = RUN,
+) -> tuple[Path, Path]:
+    matrix = sweep_matrix.resolve_matrix(backends="all")
+    wrapper = next(item for item in matrix["requested_cases"] if item["disposition"] == "unsupported")
+    matrix = {
+        "format": "collectivex.matrix.v1",
+        "schema_version": 1,
+        "requested_cases": [wrapper],
+        "include": [],
+    }
+    case = {key: value for key, value in wrapper["case"].items() if key != "case_id"}
+    artifact_name = f"cxunsupported-{run['run_id']}-{run['run_attempt']}"
+    git_run = {
+        "artifact": artifact_name,
+        "job": "setup",
+        "ref": "collectivex",
+        "repo": run["repository"],
+        "qualification_index": run["qualification_index"],
+        "run_attempt": str(run["run_attempt"]),
+        "run_id": run["run_id"],
+        "source_sha": run["source_sha"],
+    }
+    allocation = {
+        "artifact": artifact_name,
+        "execution_id": f"{run['run_id']}_{run['run_attempt']}_unsupported",
+        "job": "setup",
+        "qualification_index": run["qualification_index"],
+        "repo": run["repository"],
+        "run_attempt": str(run["run_attempt"]),
+        "run_id": run["run_id"],
+        "runner": "capability-resolver",
+        "source_sha": run["source_sha"],
+    }
+    matrix_path = root / "matrix.json"
+    artifact = root / artifact_name
+    artifact.mkdir()
+    matrix_path.write_text(json.dumps(matrix))
+    control_sha256 = hashlib.sha256(matrix_path.read_bytes()).hexdigest()
+    for ordinal in ordinals:
+        terminal = contracts.make_terminal_document(
+            allocation_factors=allocation, attempt_ordinal=ordinal, case=case,
+            case_factors={"case": case, "profile": identity.V1_CASE_PROFILE,
+                          "sku": wrapper["sku"]},
+            control_sha256=control_sha256, failure_mode="capability",
+            generated_at="2026-07-04T00:00:00Z", git_run=git_run,
+            reason=wrapper["reason"], return_code=5, source="matrix-capability-resolver",
+            status="unsupported", expected_case_id=wrapper["case"]["case_id"],
+        )
+        (artifact / f"unsupported-{ordinal}.json").write_text(json.dumps(terminal))
+    return matrix_path, artifact
+
+
+def _args(
+    store: Path, matrix: Path, artifact: Path, run: dict = RUN
+) -> types.SimpleNamespace:
+    return types.SimpleNamespace(
+        store_root=str(store),
+        matrix=str(matrix),
+        artifact=[str(artifact)],
+        repository=run["repository"],
+        run_id=run["run_id"],
+        run_attempt=run["run_attempt"],
+        qualification_index=run["qualification_index"],
+        source_sha=run["source_sha"],
+    )
+
+
+def _ids(seed: str) -> tuple[str, str, str, str, str, str]:
+    case = identity.digest("case", {"seed": seed})
+    allocation = identity.allocation_id({"seed": seed})
+    attempt = identity.attempt_id(allocation=allocation, case=case, ordinal=1)
+    series = identity.series_id({"seed": seed})
+    point = identity.point_id(series=series, tokens_per_rank=8)
+    evidence = identity.evidence_id(
+        point=point, allocation=allocation, attempt=attempt, sample_sha256="b" * 64
+    )
+    return case, allocation, attempt, series, point, evidence
+
+
+def _component(scale: float = 1.0) -> dict:
+    latency = {"p50": 10.0 * scale, "p90": 12.0 * scale,
+               "p95": 14.0 * scale, "p99": 20.0 * scale}
+    byte_provenance = {
+        "accounting_contract": "activation-data-plus-scales-v1",
+        "activation_data_bytes": 100_000,
+        "scale_bytes": 0,
+        "total_logical_bytes": 100_000,
+    }
+    return {
+        "origin": "measured",
+        "latency_us": latency,
+        "byte_provenance": byte_provenance,
+        "activation_data_rate_gbps_at_latency_percentile": {
+            name: byte_provenance["activation_data_bytes"] / (value * 1000.0)
+            for name, value in latency.items()
+        },
+        "total_logical_data_rate_gbps_at_latency_percentile": {
+            name: byte_provenance["total_logical_bytes"] / (value * 1000.0)
+            for name, value in latency.items()
+        },
+        "sample_count": 512,
+    }
+
+
+def _precision_axis_evidence() -> dict:
+    return {
+        "encoded_payload_valid": True,
+        "scales_finite": None,
+        "scales_positive": None,
+        "dequantized_semantics": True,
+        "saturation_count": 0,
+        "saturation_rate": 0.0,
+        "max_abs_error": 0.0,
+        "max_rel_error": 0.0,
+        "passed": True,
+    }
+
+
+def _precision_evidence(
+    profile_id: str = identity.V1_CONTROL_PRECISION_PROFILE,
+) -> dict:
+    axis = _precision_axis_evidence()
+    return {
+        "profile_id": profile_id,
+        "dispatch": axis,
+        "combine": copy.deepcopy(axis),
+        "passed": True,
+    }
+
+
+def _hybrid_provenance(ep_size: int = 1) -> dict:
+    realized = {field: 1 for field in contracts.HYBRID_REALIZED_CONFIG_FIELDS}
+    for field in contracts.HYBRID_REALIZED_BOOL_FIELDS:
+        realized[field] = True
+    realized.update({
+        "num_of_experts_per_rank": 1,
+        "num_of_nodes": 1,
+        "num_of_ranks_per_node": ep_size,
+        "token_data_type": "UINT16",
+    })
+    kernel_keys = ["combine-key", "dispatch-key", "preprocess-key"]
+    return {
+        "backend_lineage": "deepep-hybrid", "branch": "hybrid-ep",
+        "deepep_commit": "a" * 40, "deepep_tree": "b" * 40,
+        "device_sms": 1,
+        "jit_kernel_keys": kernel_keys,
+        "jit_shared_objects": [
+            {
+                "kernel_key": key,
+                "rank_artifacts": [
+                    {"bytes": 1, "rank": rank, "sha256": f"{index + 1:x}" * 64}
+                    for rank in range(ep_size)
+                ],
+            }
+            for index, key in enumerate(kernel_keys)
+        ],
+        "loaded_libraries": [
+            {"name": "deep_ep_cpp", "role": "deepep-extension", "sha256": "4" * 64},
+            {"name": "hybrid_ep_cpp", "role": "deepep-hybrid-extension", "sha256": "5" * 64},
+        ],
+        "realized_config": realized,
+        "resource_mode": "fixed-profile",
+        "tuned_source": "deepep-hybrid-configurer-autotune-v1",
+    }
+
+
+def _native_fixture(backend: str = "nccl-ep") -> tuple[dict, dict]:
+    def digest(value: object) -> str:
+        return hashlib.sha256(contracts.canonical_json_bytes(value)).hexdigest()
+
+    scheduled = {
+        "backend": backend, "canonical": True, "eplb": False, "ep": 1,
+        "experts": 1, "gpus_per_node": 1, "hidden": 1, "ladder": "1", "nodes": 1,
+        "mode": "normal", "phase": "decode", "required_publication": "official",
+        "routing": "uniform", "samples_per_point": 512,
+        "scale_out_transport": None, "scale_up_domain": 1,
+        "scale_up_transport": "nvlink", "scope": "scale-up", "suite": "ep-core-v1",
+        "timing": "8:64:32", "topk": 1,
+        "topology_class": "fixture", "transport": "nvlink",
+        "warmup_semantics": "full-roundtrip-before-each-component-trial-point-v1",
+        "workload": "deepseek-v3-v1",
+    }
+    communication_precision = identity.precision_profile(
+        identity.V1_CONTROL_PRECISION_PROFILE
+    )
+    case_factors = {
+        "case": scheduled, "profile": identity.V1_NORMAL_CASE_PROFILE, "sku": "fixture"
+    }
+    case_id = identity.digest("case", case_factors)
+    git_run = {
+        "artifact": "cxshard-fixture-999-1", "job": "sweep", "ref": "collectivex",
+        "repo": RUN["repository"], "qualification_index": 1,
+        "run_attempt": "1", "run_id": "999",
+        "source_sha": RUN["source_sha"],
+    }
+    allocation_factors = {
+        "artifact": git_run["artifact"], "execution_id": "999_1_fixture",
+        "job": git_run["job"], "qualification_index": 1,
+        "repo": git_run["repo"], "run_attempt": "1",
+        "run_id": "999", "runner": "fixture", "source_sha": git_run["source_sha"],
+    }
+    allocation_id = identity.allocation_id(allocation_factors)
+    attempt_id = identity.attempt_id(allocation=allocation_id, case=case_id, ordinal=1)
+    member_id, member_checksums, routing_hash, routing_rows, routing_weights = (
+        contracts._expected_canonical_trace(
+        "uniform", hidden=1, topk=1, logical_experts=1, physical_experts=1,
+        ep_size=1, tokens_per_rank=1, seed=67, eplb_enabled=False,
+        reference_tokens_per_rank=2048,
+        )
+    )
+    workload_id = identity.workload_id({
+        "members": [{"checksums": member_checksums, "workload_id": member_id}]
+    })
+    runtime = {
+        "accelerator_runtime": {"kind": "cuda", "version": "13.0"},
+        "collective_library": {"kind": "nccl", "version": "2.30.4"},
+        "device": {
+            "arch": "sm100", "compute_units": 1, "memory_bytes": 1,
+            "product": "Fixture GPU", "warp_size": 32,
+        },
+        "driver_version": "1", "framework": {"kind": "torch", "version": "2.10.0"},
+        "machine": "fixture", "python_version": "3.12", "vendor": "nvidia",
+    }
+    implementation_provenance = (
+        {
+            "backend": "nccl-ep", "backend_lineage": "nccl",
+            "collective_library": "nccl", "nccl_version": "2.30.4",
+            "reference_semantics": "fixture-v1",
+        }
+        if backend == "nccl-ep"
+        else _hybrid_provenance()
+    )
+    kernel_generation = "nccl" if backend == "nccl-ep" else "hybrid"
+    implementation = {
+        "kernel_generation": kernel_generation,
+        "name": backend,
+        "provenance": implementation_provenance,
+        "resource_profile": contracts.project_resource_profile(implementation_provenance),
+    }
+    public_config = contracts.public_series_config(
+        kernel_generation=implementation["kernel_generation"],
+        provenance=implementation_provenance,
+        resource_profile=implementation["resource_profile"],
+        resource_mode="fixed-profile",
+        device_product=runtime["device"]["product"],
+    )
+    series_factors = {
+        "backend": backend, "case_id": case_id,
+        "image_digest": "sha256:" + "d" * 64,
+        "implementation_contract_sha256": digest({
+            **implementation,
+            "provenance": contracts.series_provenance(implementation_provenance),
+        }),
+        "public_config_sha256": contracts.public_series_config_sha256(public_config),
+        "routing_control_sha256": contracts.routing_implementation_control_sha256(
+            implementation
+        ),
+        "runtime_fingerprint_sha256": digest(runtime),
+        "source_sha": RUN["source_sha"], "squash_sha256": "e" * 64,
+        "workload_id": workload_id,
+    }
+    series_id = identity.series_id(series_factors)
+    point_id = identity.point_id(series=series_id, tokens_per_rank=1)
+    sample_components = {
+        name: {
+            "availability": "measured", "sample_count": 512,
+            "trials": [[latency] * 8 for _ in range(64)],
+        }
+        for name, latency in (("combine", 20.0), ("dispatch", 10.0), ("roundtrip", 40.0))
+    }
+    sample_components["stage"] = {
+        "availability": "unavailable", "sample_count": 0, "trials": None,
+    }
+    sample_sha = digest({"components": sample_components, "tokens_per_rank": 1})
+    evidence_id = identity.evidence_id(
+        point=point_id, allocation=allocation_id, attempt=attempt_id,
+        sample_sha256=sample_sha,
+    )
+    samples = {
+        "allocation_id": allocation_id, "attempt_id": attempt_id, "case_id": case_id,
+        "format": contracts.SAMPLES_FORMAT,
+        "points": [{
+            "components": sample_components, "evidence_id": evidence_id,
+            "point_id": point_id, "sample_sha256": sample_sha, "tokens_per_rank": 1,
+        }],
+        "sampling": {
+            "iterations_per_trial": 8, "reduction": "cross-rank-max-per-iteration",
+            "trials": 64,
+        },
+        "qualification_index": 1, "schema_version": 1, "series_id": series_id,
+    }
+    sample_bytes = contracts.canonical_json_bytes(samples)
+    oracle = {
+        "atol": 0.02,
+        "checks": {name: True for name in (
+            "combine_values", "counts", "metadata", "multiplicity", "payload",
+            "source_set", "weights",
+        )},
+        "combine_weight_semantics": "unweighted-rank-sum",
+        "contract": "expert-specific-transform-v1", "dispatch_sha256": "1" * 64,
+        "max_absolute_error": 0.0, "max_elementwise_relative_error": 0.0,
+        "max_relative_error": 0.0, "max_weight_error": 0.0,
+        "order_sha256": "2" * 64, "ordering_contract": "fixture-order-v1",
+        "passed": True, "receive_count": 1, "rtol": 0.05,
+    }
+    def pct(value: float) -> dict[str, float]:
+        return {name: value for name in ("p50", "p90", "p95", "p99")}
+
+    def measured(value: float) -> dict:
+        return {
+            "availability": "measured", "origin": "measured",
+            "percentiles_us": pct(value), "sample_count": 512,
+        }
+    row = {
+        "anomalies": [],
+        "components": {
+            "combine": measured(20.0), "dispatch": measured(10.0),
+            "isolated_sum": {
+                "availability": "derived", "origin": "derived-percentile-sum",
+                "percentiles_us": pct(30.0), "sample_count": 0,
+            },
+            "roundtrip": measured(40.0),
+            "stage": {
+                "availability": "unavailable", "origin": None,
+                "percentiles_us": None, "sample_count": 0,
+            },
+        },
+        "correctness": {
+            "contract": "expert-specific-transform-v1", "max_relative_error": 0.0,
+            "passed": True, "precision": _precision_evidence(),
+            "rank_evidence": [{
+                "input_unchanged": True, "order_stable": True,
+                "post_timing": copy.deepcopy(oracle), "pre_timing": copy.deepcopy(oracle),
+                "rank": 0,
+            }],
+            "scope": "dispatch-metadata-and-transformed-combine",
+        },
+        "evidence_id": evidence_id, "global_tokens": 1,
+        "byte_provenance": {
+            "combine": {
+                "accounting_contract": "activation-data-plus-scales-v1",
+                "activation_data_bytes": 2, "scale_bytes": 0,
+                "total_logical_bytes": 2,
+            },
+            "dispatch": {
+                "accounting_contract": "activation-data-plus-scales-v1",
+                "activation_data_bytes": 2, "scale_bytes": 0,
+                "total_logical_bytes": 2,
+            },
+            "roundtrip": {
+                "accounting_contract": "activation-data-plus-scales-v1",
+                "activation_data_bytes": 4, "scale_bytes": 0,
+                "total_logical_bytes": 4,
+            },
+            "stage": {
+                "accounting_contract": "activation-data-plus-scales-v1",
+                "activation_data_bytes": 0, "scale_bytes": 0,
+                "total_logical_bytes": 0,
+            },
+        },
+        "point_id": point_id,
+        "receive": {"max": 1, "mean": 1.0, "min": 1, "total": 1},
+        "routing": contracts._expected_routing_summary(
+            routing_rows,
+            routing_weights,
+            physical_experts=1,
+            ep_size=1,
+            tokens_per_rank=1,
+            gpus_per_node=1,
+            scale_up_domain=1,
+        ),
+        "sample_histograms": {
+            name: contracts._expected_histogram([value] * 512)
+            for name, value in (("combine", 20.0), ("dispatch", 10.0), ("roundtrip", 40.0))
+        },
+        "sample_sha256": sample_sha,
+        "token_rate_at_latency_percentile": pct(25_000.0), "tokens_per_rank": 1,
+    }
+    row["sample_histograms"]["stage"] = None
+    raw = {
+        "case": {
+            "attempt_ordinal": 1, "backend": backend,
+            "eplb": {
+                "calibration_token_offset": None, "calibration_trace_sha256": None,
+                "calibration_window": None, "calibration_workload_id": None,
+                "enabled": False, "imbalance_after": None, "imbalance_before": None,
+                "mapping_hash": None, "max_replicas": None, "num_logical_experts": 1,
+                "num_physical_experts": 1, "num_redundant": 0, "planner": None,
+                "reference_tokens_per_rank": None, "replicated_experts": 0,
+            },
+            "ep_size": 1, "mode": "normal", "phase": "decode",
+            "required_publication": "official", "resource_mode": "fixed-profile", "runner": "fixture",
+            "shape": {
+                "activation_profile": "canonical-counter-source-v4",
+                "precision_profile": identity.V1_CONTROL_PRECISION_PROFILE,
+                "dispatch_precision": communication_precision["dispatch"],
+                "combine_precision": communication_precision["combine"],
+                "eplb": False, "experts": 1, "experts_per_rank": 1, "hidden": 1,
+                "kernel_gen": kernel_generation, "num_logical_experts": 1,
+                "routing": "uniform", "topk": 1,
+            },
+            "suite": "ep-core-v1", "workload_name": "deepseek-v3-v1",
+        },
+        "format": contracts.RAW_FORMAT, "generated_at": "2026-07-04T00:00:00Z",
+        "identity": {
+            "allocation_factors": allocation_factors, "allocation_id": allocation_id,
+            "attempt_id": attempt_id, "attempt_ordinal": 1, "case_factors": case_factors,
+            "case_id": case_id, "series_factors": series_factors, "series_id": series_id,
+        },
+        "implementation": implementation,
+        "measurement": {
+            "component_order_contract": "qualification-hash-rotated-components-v1",
+            "conditioning": {
+                "contract": "fixed-phase-ramp-8-roundtrips-v1",
+                "ladder": [1, 2, 4, 8, 16, 32, 64, 128],
+                "roundtrips_per_shape": 8,
+            },
+            "contract": "layout-and-dispatch-v1",
+            "execution_order_sha256": "9" * 64,
+            "qualification_index": 1,
+            "rows": [row],
+            "sampling": {
+                "contract": "fixed-512-v1", "iterations_per_trial": 8,
+                "percentile_method": "nearest-rank",
+                "reduction": "cross-rank-max-per-iteration", "samples_per_component": 512,
+                "trials": 64, "warmup_iterations": 32,
+                "warmup_semantics": "full-roundtrip-before-each-component-trial-point-v1",
+            },
+            "source_allocation": "even",
+        },
+        "outcome": {
+            "publication_status": "diagnostic", "reasons": [], "status": "success",
+            "validity": {
+                "anomaly_free": True, "execution_status": "complete",
+                "measurement_conformance": "conformant", "provenance_complete": True,
+                "resource_conformance": implementation["resource_profile"]["conformance_class"],
+                "sampling_conformance": "conformant",
+                "semantic_correctness": "pass",
+                "workload_identity": "consistent-across-ranks",
+                "workload_source": "canonical-serialized",
+            },
+        },
+        "provenance": {
+            "allocation_stratum_sha256": "f" * 64,
+            "command": "run_ep", "distributed_launcher": "torchrun", "git_run": git_run,
+            "image": {
+                "arch": "amd64", "digest": "sha256:" + "d" * 64,
+                "digest_verified": True, "reference": "fixture:1", "squash_sha256": "e" * 64,
+            },
+            "redaction": "sanitized-v1",
+        },
+        "record_type": "case-attempt",
+        "runtime_fingerprint": runtime,
+        "sample_artifact": {
+            "bytes": len(sample_bytes), "format": contracts.SAMPLES_FORMAT,
+            "path": "samples.json", "sha256": hashlib.sha256(sample_bytes).hexdigest(),
+        },
+        "schema_version": 1,
+        "topology": {
+            "device_count": 1, "device_product": "Fixture GPU", "gpus_per_node": 1,
+            "nodes": 1, "placement": "packed",
+            "realized_placement": {
+                "gpus_per_node": 1, "nodes": 1, "ranks_per_node": 1,
+                "unique_local_ranks": True, "valid": True,
+            },
+            "scale_out_transport": None, "scale_up_domain": 1,
+            "scale_up_transport": "nvlink", "scope": "scale-up",
+            "topology_class": "fixture", "transport": "nvlink",
+            "world_size": 1,
+        },
+        "workload": {
+            "activation_generator": "collectivex-activation-counter-v4",
+            "activation_identity": hashlib.sha256(
+                b"counter|seed=67|hidden=1|gen=collectivex-activation-counter-v4"
+            ).hexdigest(),
+            "activation_profile": "canonical-counter-source-v4", "cross_rank_consistent": True,
+            "manifest_checksums": {member_id: member_checksums}, "members": [member_id],
+            "routing_generator": "collectivex-routing-counter-v3", "source": "canonical-serialized",
+            "trace_hashes": [routing_hash],
+            "trace_signature": hashlib.sha256(routing_hash.encode()).hexdigest(),
+            "workload_id": workload_id,
+        },
+    }
+    return raw, samples
+
+
+def _series(seed: str, backend: str, *, decision_grade: bool = False) -> tuple[dict, dict]:
+    case, allocation, attempt, series_id, point_id, evidence = _ids(seed)
+    allocations = [identity.allocation_id({"seed": seed, "run": run}) for run in range(3)]
+    eligibility = publisher._eligibility_record(
+        allocations if decision_grade else [allocation],
+        complete=decision_grade,
+        correct=True,
+        measured=True,
+        stable_ordering=True,
+        p50_ratio=1.01 if decision_grade else None,
+        p99_ratio=1.02 if decision_grade else None,
+    )
+    component = _component(1.0 if backend == "deepep" else 1.2)
+    qualification_indices = [1, 2, 3] if decision_grade else [1]
+    communication_precision = identity.precision_profile(
+        identity.V1_CONTROL_PRECISION_PROFILE
+    )
+    item = {
+        "series_id": series_id,
+        "label": f"H100 / {backend}",
+        "status": "decision-grade" if decision_grade else "diagnostic",
+        "case_ids": [case],
+        "allocation_ids": allocations if decision_grade else [allocation],
+        "model": "deepseek-v3-v1",
+        "suite": "ep-core-v1",
+        "mode": "normal",
+        "phase": "decode",
+        "publication_tier": "official",
+        "backend": {
+                    "id": backend, "label": publisher.BACKEND_LABELS[backend],
+                    "role": "reference" if backend == "nccl-ep" else "library",
+                    "generation": "nccl" if backend == "nccl-ep" else None,
+                    "version": "1.0"},
+        "build": {
+            "implementation_contract_sha256": hashlib.sha256(backend.encode()).hexdigest(),
+            "public_config_sha256": "0" * 64,
+            "routing_control_sha256": hashlib.sha256(backend.encode()).hexdigest(),
+            "runtime_fingerprint_sha256": "3" * 64,
+            "image_digest": "sha256:" + "1" * 64,
+            "source_sha": "a" * 40,
+            "squash_sha256": "2" * 64,
+        },
+        "system": {
+            "sku": "h100-dgxc", "label": "NVIDIA H100", "vendor": "nvidia",
+            "topology_class": "h100-nvlink-island", "transport": "nvlink",
+            "scale_up_transport": "nvlink", "scale_out_transport": None,
+            "scope": "scale-up", "nodes": 1, "gpus_per_node": 8,
+            "scale_up_domain": 8,
+            "world_size": 8, "ep_size": 8, "placement": "packed",
+        },
+        "workload": {
+            "workload_id": identity.workload_id({"shape": "shared"}),
+            "hidden": 7168, "top_k": 8, "experts": 256,
+            "routing": "uniform", "eplb": False,
+            "precision_profile": identity.V1_CONTROL_PRECISION_PROFILE,
+            "dispatch_precision": communication_precision["dispatch"],
+            "combine_precision": communication_precision["combine"],
+            "activation_profile": "canonical-counter-source-v4",
+        },
+        "eplb": {
+            "enabled": False,
+            "calibration_workload_id": None, "calibration_trace_sha256": None,
+            "calibration_window": None, "calibration_token_offset": None,
+            "planner": None, "mapping_sha256": None,
+            "logical_experts": 256, "physical_experts": 256,
+            "redundant_experts": 0, "reference_tokens_per_rank": None,
+            "replicated_experts": 0, "max_replicas": None,
+            "imbalance_before": None, "imbalance_after": None,
+        },
+        "resource": {"mode": "fixed-profile", "profile": "profile-1", "comm_units_kind": "sm", "configured_units": 24},
+        "measurement": {
+            "contract": "layout-and-dispatch-v1",
+            "component_order_contract": "qualification-hash-rotated-components-v1",
+            "combine_semantics": "activation-only", "payload_unit": "token-rank",
+            "sampling_contract": "fixed-512-v1",
+            "iters": 8, "trials": 64, "warmups": 32, "samples_per_component": 512,
+            "qualification_indices": qualification_indices,
+            "headline_component": "roundtrip", "headline_percentile": "p99",
+        },
+        "points": [{
+            "point_id": point_id, "tokens_per_rank": 8, "global_tokens": 64,
+            "anomalies": [],
+            "correctness": {"semantic_pass": True, "precision": _precision_evidence()},
+            "stability": {
+                "complete": decision_grade,
+                "qualification_indices": qualification_indices,
+                "p50_max_min_ratio": 1.02 if decision_grade else None,
+                "p99_max_min_ratio": 1.02 if decision_grade else None,
+                "stable_p50": decision_grade, "stable_p99": decision_grade,
+            },
+            "trial_diagnostics": {
+                "flagged": False,
+                "reasons": [],
+                "components": {
+                    "dispatch": None,
+                    "stage": None,
+                    "combine": None,
+                    "roundtrip": {
+                        "drift_flagged": False,
+                        "first_last_median_ratio": 1.0,
+                        "outlier_flagged": False,
+                        "robust_outlier_fraction": 0.0,
+                        "trial_count": 192,
+                    },
+                },
+            },
+            "routing": {
+                "fanout_mean": 4.0, "recv_tokens_max": 64,
+                "expert_load_cv": 0.5, "payload_rank_cv": 0.25,
+                "hotspot_ratio": 2.0, "empty_expert_count": 0,
+                "empty_rank_count": 0, "routed_copies": 256,
+            },
+            "components": {"dispatch": None, "stage": None, "combine": None,
+                           "roundtrip": component, "isolated_sum": None},
+            "roundtrip_token_rate_at_latency_percentile": {
+                name: 64 / (latency * 1e-6)
+                for name, latency in component["latency_us"].items()
+            },
+            "evidence_ids": [evidence],
+        }],
+        "eligibility": eligibility,
+    }
+    item["build"]["public_config_sha256"] = contracts.public_series_config_sha256(
+        publisher._public_series_config(item)
+    )
+    case = identity.digest("case", publisher._public_case_factors(item))
+    item["case_ids"] = [case]
+    build = item["build"]
+    series_id = identity.series_id({
+        "backend": item["backend"]["id"],
+        "case_id": case,
+        "image_digest": build["image_digest"],
+        "implementation_contract_sha256": build["implementation_contract_sha256"],
+        "public_config_sha256": build["public_config_sha256"],
+        "routing_control_sha256": build["routing_control_sha256"],
+        "runtime_fingerprint_sha256": build["runtime_fingerprint_sha256"],
+        "source_sha": build["source_sha"],
+        "squash_sha256": build["squash_sha256"],
+        "workload_id": item["workload"]["workload_id"],
+    })
+    item["series_id"] = series_id
+    point_id = identity.point_id(series=series_id, tokens_per_rank=8)
+    item["points"][0]["point_id"] = point_id
+    attempt = identity.attempt_id(allocation=allocation, case=case, ordinal=1)
+    evidence = identity.evidence_id(
+        point=point_id, allocation=allocation, attempt=attempt,
+        sample_sha256=hashlib.sha256(seed.encode()).hexdigest(),
+    )
+    item["points"][0]["evidence_ids"] = [evidence]
+    runs = {
+        str(run): {8: {
+            "latency_us": {
+                statistic: component["latency_us"][statistic] * (1 + run / 100)
+                for statistic in ("p50", "p99")
+            },
+            "activation_data_rate_gbps_at_latency_percentile": {
+                statistic: component["activation_data_rate_gbps_at_latency_percentile"][statistic] / (1 + run / 100)
+                for statistic in ("p50", "p99")
+            },
+            "total_logical_data_rate_gbps_at_latency_percentile": {
+                statistic: component["total_logical_data_rate_gbps_at_latency_percentile"][statistic] / (1 + run / 100)
+                for statistic in ("p50", "p99")
+            },
+        }}
+        for run in range(3)
+    }
+    trial_blocks = {
+        run_id: {8: {
+            "dispatch": None,
+            "stage": None,
+            "combine": None,
+            "roundtrip": tuple(
+                tuple(metrics[8]["latency_us"]["p99"] for _ in range(8))
+                for _ in range(64)
+            ),
+        }}
+        for run_id, metrics in runs.items()
+    }
+    internal = {"run_metrics": runs, "trial_blocks": trial_blocks}
+    return item, internal
+
+
+def _precision_series(
+    seed: str,
+    precision_profile: str,
+    *,
+    tokens: tuple[int, ...] = (128,),
+) -> tuple[dict, dict]:
+    item, internal = _series(seed, "deepep", decision_grade=True)
+    precision = identity.precision_profile(precision_profile)
+    if precision_profile != identity.V1_CONTROL_PRECISION_PROFILE:
+        item["suite"] = "ep-precision-normal-v1"
+        item["publication_tier"] = "comparable-experimental"
+    item["workload"].update({
+        "precision_profile": precision_profile,
+        "dispatch_precision": precision["dispatch"],
+        "combine_precision": precision["combine"],
+    })
+    template = item["points"][0]
+    old_token = template["tokens_per_rank"]
+    old_metrics = {
+        run_id: metrics[old_token]
+        for run_id, metrics in internal["run_metrics"].items()
+    }
+    old_trials = {
+        run_id: metrics[old_token]
+        for run_id, metrics in internal["trial_blocks"].items()
+    }
+    item["points"] = []
+    for token in tokens:
+        point = copy.deepcopy(template)
+        point["tokens_per_rank"] = token
+        point["global_tokens"] = token * item["system"]["ep_size"]
+        point["point_id"] = identity.point_id(
+            series=item["series_id"], tokens_per_rank=token
+        )
+        point["correctness"]["precision"] = _precision_evidence(precision_profile)
+        point["roundtrip_token_rate_at_latency_percentile"] = {
+            name: point["global_tokens"] / (latency * 1e-6)
+            for name, latency in point["components"]["roundtrip"]["latency_us"].items()
+        }
+        item["points"].append(point)
+    internal["run_metrics"] = {
+        run_id: {token: copy.deepcopy(metrics) for token in tokens}
+        for run_id, metrics in old_metrics.items()
+    }
+    internal["trial_blocks"] = {
+        run_id: {token: copy.deepcopy(metrics) for token in tokens}
+        for run_id, metrics in old_trials.items()
+    }
+    return item, internal
+
+
+def _dataset() -> dict:
+    item, _ = _series("one", "deepep")
+    case = item["case_ids"][0]
+    allocation = item["allocation_ids"][0]
+    attempt = identity.attempt_id(allocation=allocation, case=case, ordinal=1)
+    evidence = item["points"][0]["evidence_ids"][0]
+    return {
+        "format": "collectivex.public.v1", "schema_version": 1,
+        "generated_at": "2026-07-04T00:00:00Z", "source_bundle_ids": ["c" * 64],
+        "promotion": {
+            "status": "diagnostic", "reason": None, "matrix_id": "d" * 64,
+            "allocation_ids": [allocation], "required_allocations": 3,
+            "qualification_indices": [1],
+            "requested_cases": 1, "terminal_cases": 1,
+            "measured_cases": 1, "unsupported_cases": 0,
+            "requested_points": 1, "terminal_points": 1,
+            "measured_points": 1, "unsupported_points": 0,
+            "policy": "collectivex-decision-grade-v1",
+        },
+        "coverage": [{
+            "case_id": case, "label": "case", "required": True, "sku": "h100-dgxc",
+            "suite": item["suite"], "workload": item["model"],
+            "publication_tier": item["publication_tier"],
+            "backend": "deepep", "backend_generation": item["backend"]["generation"],
+            "mode": "normal", "phase": "decode",
+            "routing": item["workload"]["routing"], "eplb": False,
+            "precision_profile": item["workload"]["precision_profile"],
+            "dispatch_precision": item["workload"]["dispatch_precision"],
+            "combine_precision": item["workload"]["combine_precision"],
+            "resource": item["resource"],
+            "topology": publisher._coverage_topology(item["system"]),
+            "points": [{
+                "point_id": item["points"][0]["point_id"],
+                "series_id": item["series_id"],
+                "tokens_per_rank": 8, "global_tokens": 64,
+                "terminal_status": "measured", "reason": None,
+            }],
+            "disposition": "runnable",
+            "selected_attempt_id": attempt,
+            "outcome": "success", "failure_mode": None, "reason": None,
+            "attempt_ids": [attempt],
+        }],
+        "attempts": [{
+            "attempt_id": attempt,
+            "evidence": [{"evidence_id": evidence,
+                          "point_id": item["points"][0]["point_id"]}],
+            "case_id": case,
+            "allocation_id": allocation, "run_id": "1", "run_attempt": 1,
+            "qualification_index": 1,
+            "attempt_index": 1,
+            "selected": True, "outcome": "success", "failure_mode": None, "reason": None,
+            "series_id": item["series_id"],
+            "completed_at": "2026-07-04T00:00:00Z",
+        }],
+        "series": [item], "cohorts": [], "rankings": [], "recommendations": [],
+        "sensitivities": [],
+    }
+
+
+def _promoted_dataset(*, precision_profiles: tuple[str, ...] = ()) -> dict:
+    specifications = [
+        ("library-fast", "deepep", None, False, None),
+        ("library-slow", "uccl", None, False, None),
+        ("chip-peer", "deepep", "h200-dgxc", False, None),
+        ("system-one", "nccl-ep", None, True, None),
+        ("system-two", "nccl-ep", "h200-dgxc", True, None),
+        ("routing-zipf", "deepep", None, False, None),
+        ("routing-zipf-eplb", "deepep", None, False, None),
+    ]
+    specifications.extend(
+        (f"precision-{index}", "deepep", None, False, precision_profile)
+        for index, precision_profile in enumerate(precision_profiles)
+    )
+    series = []
+    internals = {}
+    attempts = []
+    coverage = []
+    for seed, backend, peer_sku, reference, precision_profile in specifications:
+        item, internal = _series(seed, backend, decision_grade=True)
+        if peer_sku:
+            platform = publisher.capability.PLATFORMS[peer_sku]
+            item["system"].update({
+                "sku": peer_sku,
+                "label": f"NVIDIA {platform['product'].upper()}",
+                "topology_class": platform["topology_class"],
+                "transport": platform["transport"],
+            })
+        if reference:
+            item["backend"]["role"] = "reference"
+        if seed.startswith("routing-zipf"):
+            item["suite"] = "ep-routing-v1"
+            item["publication_tier"] = "comparable-experimental"
+            item["workload"]["routing"] = "zipf"
+        if seed == "routing-zipf-eplb":
+            item["workload"]["eplb"] = True
+            plan, calibration = contracts._expected_eplb_calibration(
+                "zipf", 7168, 8, 256, 288, item["system"]["ep_size"], 67, 2048
+            )
+            item["eplb"] = {
+                "enabled": True, **calibration, "planner": "greedy-rank-major-v1",
+                "mapping_sha256": contracts.eplb_contract.mapping_hash(plan),
+                "logical_experts": 256, "physical_experts": 288,
+                "redundant_experts": 32, "reference_tokens_per_rank": 2048,
+                "replicated_experts": plan["replicated_experts"],
+                "max_replicas": plan["max_replicas"],
+                "imbalance_before": plan["imbalance_before"],
+                "imbalance_after": plan["imbalance_after"],
+            }
+            item["build"]["implementation_contract_sha256"] = "8" * 64
+        if precision_profile is not None:
+            precision = identity.precision_profile(precision_profile)
+            item["suite"] = "ep-precision-normal-v1"
+            item["publication_tier"] = "comparable-experimental"
+            item["workload"].update({
+                "precision_profile": precision_profile,
+                "dispatch_precision": precision["dispatch"],
+                "combine_precision": precision["combine"],
+            })
+            item["points"][0]["correctness"]["precision"] = _precision_evidence(
+                precision_profile
+            )
+        case_id = identity.digest("case", publisher._public_case_factors(item))
+        item["case_ids"] = [case_id]
+        build = item["build"]
+        build["public_config_sha256"] = contracts.public_series_config_sha256(
+            publisher._public_series_config(item)
+        )
+        item["series_id"] = identity.series_id({
+            "backend": item["backend"]["id"],
+            "case_id": case_id,
+            "image_digest": build["image_digest"],
+            "implementation_contract_sha256": build["implementation_contract_sha256"],
+            "public_config_sha256": build["public_config_sha256"],
+            "routing_control_sha256": build["routing_control_sha256"],
+            "runtime_fingerprint_sha256": build["runtime_fingerprint_sha256"],
+            "source_sha": build["source_sha"],
+            "squash_sha256": build["squash_sha256"],
+            "workload_id": item["workload"]["workload_id"],
+        })
+        point = item["points"][0]
+        point["point_id"] = identity.point_id(
+            series=item["series_id"], tokens_per_rank=point["tokens_per_rank"]
+        )
+        case_attempts = []
+        evidence_ids = []
+        for run_id, allocation_id in enumerate(item["allocation_ids"], 1):
+            attempt_id = identity.attempt_id(
+                allocation=allocation_id, case=case_id, ordinal=1
+            )
+            evidence_id = identity.evidence_id(
+                point=point["point_id"], allocation=allocation_id,
+                attempt=attempt_id,
+                sample_sha256=hashlib.sha256(f"{seed}-{run_id}".encode()).hexdigest(),
+            )
+            attempts.append({
+                "attempt_id": attempt_id,
+                "evidence": [{"evidence_id": evidence_id, "point_id": point["point_id"]}],
+                "case_id": case_id, "allocation_id": allocation_id,
+                "run_id": str(run_id), "run_attempt": 1,
+                "qualification_index": run_id,
+                "attempt_index": 1, "selected": True,
+                "outcome": "success", "failure_mode": None, "reason": None,
+                "series_id": item["series_id"],
+                "completed_at": "2026-07-04T00:00:00Z",
+            })
+            case_attempts.append(attempt_id)
+            evidence_ids.append(evidence_id)
+        point["evidence_ids"] = evidence_ids
+        coverage.append({
+            "case_id": case_id, "label": seed, "required": True,
+            "sku": item["system"]["sku"], "suite": item["suite"],
+            "workload": item["model"], "publication_tier": item["publication_tier"],
+            "backend": backend, "backend_generation": item["backend"]["generation"],
+            "mode": item["mode"], "phase": item["phase"],
+            "routing": item["workload"]["routing"], "eplb": item["workload"]["eplb"],
+            "precision_profile": item["workload"]["precision_profile"],
+            "dispatch_precision": item["workload"]["dispatch_precision"],
+            "combine_precision": item["workload"]["combine_precision"],
+            "resource": item["resource"], "disposition": "runnable",
+            "topology": publisher._coverage_topology(item["system"]),
+            "points": [{
+                "point_id": point["point_id"], "series_id": item["series_id"],
+                "tokens_per_rank": point["tokens_per_rank"],
+                "global_tokens": point["global_tokens"],
+                "terminal_status": "measured", "reason": None,
+            }],
+            "selected_attempt_id": case_attempts[-1], "outcome": "success",
+            "failure_mode": None, "reason": None, "attempt_ids": case_attempts,
+        })
+        series.append(item)
+        internals[item["series_id"]] = internal
+
+    unsupported_case, unsupported = next(
+        (case_id, case)
+        for case_id, case in publisher._canonical_coverage_cases().items()
+        if case["sku"] == "mi355x" and case["backend"] == "deepep"
+        and case["phase"] == "decode" and case["routing"] == "uniform"
+        and not case["eplb"] and case["ep"] == 8
+    )
+    unsupported_attempts = []
+    for run_id in range(1, 4):
+        allocation_id = identity.allocation_id(
+            {"seed": "planned-unsupported", "run": run_id}
+        )
+        attempt_id = identity.attempt_id(
+            allocation=allocation_id, case=unsupported_case, ordinal=1
+        )
+        attempts.append({
+            "attempt_id": attempt_id, "evidence": [], "case_id": unsupported_case,
+            "allocation_id": allocation_id, "run_id": str(run_id),
+            "run_attempt": 1, "qualification_index": run_id,
+            "attempt_index": 1, "selected": True, "outcome": "unsupported",
+            "failure_mode": "capability", "reason": "backend-platform-unsupported",
+            "series_id": None, "completed_at": "2026-07-04T00:00:00Z",
+        })
+        unsupported_attempts.append(attempt_id)
+    coverage.append({
+        "case_id": unsupported_case, "label": "planned unsupported", "required": True,
+        "sku": unsupported["sku"], "suite": unsupported["suite"],
+        "workload": unsupported["workload"],
+        "publication_tier": unsupported["required_publication"],
+        "backend": unsupported["backend"], "backend_generation": None,
+        "mode": unsupported["mode"], "phase": unsupported["phase"],
+        "routing": unsupported["routing"], "eplb": unsupported["eplb"],
+        "precision_profile": identity.V1_CONTROL_PRECISION_PROFILE,
+        "dispatch_precision": identity.precision_profile(
+            identity.V1_CONTROL_PRECISION_PROFILE
+        )["dispatch"],
+        "combine_precision": identity.precision_profile(
+            identity.V1_CONTROL_PRECISION_PROFILE
+        )["combine"],
+        "resource": {
+            "mode": "fixed-profile", "profile": None,
+            "comm_units_kind": None, "configured_units": None,
+        },
+        "topology": publisher._coverage_topology(unsupported),
+        "points": [{
+            "point_id": None, "series_id": None,
+            "tokens_per_rank": token, "global_tokens": token * unsupported["ep"],
+            "terminal_status": "unsupported",
+            "reason": "backend-platform-unsupported",
+        } for token in map(int, unsupported["ladder"].split())],
+        "disposition": "unsupported", "selected_attempt_id": unsupported_attempts[-1],
+        "outcome": "unsupported", "failure_mode": "capability",
+        "reason": "backend-platform-unsupported", "attempt_ids": unsupported_attempts,
+    })
+    cohorts, rankings, recommendations, sensitivities = publisher.build_decisions(
+        series, internals
+    )
+    return {
+        "format": "collectivex.public.v1", "schema_version": 1,
+        "generated_at": "2026-07-04T00:00:00Z",
+        "source_bundle_ids": ["a" * 64, "b" * 64, "c" * 64],
+        "promotion": {
+            "status": "promoted", "reason": None,
+            "matrix_id": publisher.CANONICAL_FULL_V1_MATRIX_SHA256,
+            "allocation_ids": sorted({item["allocation_id"] for item in attempts}),
+            "required_allocations": 3, "qualification_indices": [1, 2, 3],
+            "requested_cases": len(coverage), "terminal_cases": len(coverage),
+            "measured_cases": len(coverage) - 1, "unsupported_cases": 1,
+            "requested_points": sum(len(item["points"]) for item in coverage),
+            "terminal_points": sum(len(item["points"]) for item in coverage),
+            "measured_points": sum(
+                point["terminal_status"] == "measured"
+                for item in coverage for point in item["points"]
+            ),
+            "unsupported_points": sum(
+                point["terminal_status"] == "unsupported"
+                for item in coverage for point in item["points"]
+            ),
+            "policy": "collectivex-decision-grade-v1",
+        },
+        "coverage": sorted(coverage, key=lambda item: item["case_id"]),
+        "attempts": sorted(attempts, key=lambda item: item["attempt_id"]),
+        "series": sorted(series, key=lambda item: item["series_id"]),
+        "cohorts": cohorts, "rankings": rankings,
+        "recommendations": recommendations, "sensitivities": sensitivities,
+    }
+
+
+def _cohort_counts(dataset: dict) -> dict[str, int]:
+    return {
+        kind: sum(item["kind"] == kind for item in dataset["cohorts"])
+        for kind in ("library", "system", "routing")
+    }
+
+
+class PublisherTest(unittest.TestCase):
+    def test_trial_diagnostics_flag_drift_and_robust_outliers(self) -> None:
+        def runs() -> dict[str, dict[int, dict[str, object]]]:
+            return {
+                str(index): {
+                    8: {
+                        "dispatch": tuple(tuple([10.0] * 8) for _ in range(64)),
+                        "stage": None,
+                        "combine": tuple(tuple([10.0] * 8) for _ in range(64)),
+                        "roundtrip": tuple(tuple([10.0] * 8) for _ in range(64)),
+                    }
+                }
+                for index in range(1, 4)
+            }
+
+        stable = publisher._trial_diagnostics(runs(), 8)
+        self.assertFalse(stable["flagged"])
+
+        drift = runs()
+        drift["1"][8]["roundtrip"] = tuple(
+            tuple([12.0 if trial >= 56 else 10.0] * 8) for trial in range(64)
+        )
+        self.assertEqual(publisher._trial_diagnostics(drift, 8)["reasons"], ["trial-drift"])
+
+        outliers = runs()
+        outliers["1"][8]["roundtrip"] = tuple(
+            tuple([100.0 if 20 <= trial < 36 else 10.0] * 8) for trial in range(64)
+        )
+        summary = publisher._trial_diagnostics(outliers, 8)
+        self.assertEqual(summary["reasons"], ["trial-outliers"])
+        self.assertGreater(
+            summary["components"]["roundtrip"]["robust_outlier_fraction"],
+            publisher.TRIAL_OUTLIER_FRACTION_LIMIT,
+        )
+
+    def test_terminal_allocation_and_source_status_are_bound(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve()
+            matrix, artifact = _unsupported_delivery(root)
+            path = next(artifact.glob("*.json"))
+            terminal = contracts.strict_load(path)
+            self.assertIs(contracts.validate_terminal_document(terminal), terminal)
+            self.assertEqual(
+                contracts.validate_delivery(
+                    [str(path)], str(matrix), disposition="unsupported"
+                ),
+                1,
+            )
+
+            for control_sha256 in (None, "0" * 64):
+                broken = copy.deepcopy(terminal)
+                broken["provenance"]["control_sha256"] = control_sha256
+                path.write_text(json.dumps(broken))
+                with self.assertRaisesRegex(contracts.ContractError, "exact control document"):
+                    contracts.validate_delivery(
+                        [str(path)], str(matrix), disposition="unsupported"
+                    )
+            path.write_text(json.dumps(terminal))
+
+            for field in (
+                "artifact", "job", "repo", "run_attempt", "run_id", "source_sha", "runner"
+            ):
+                broken = copy.deepcopy(terminal)
+                broken["identity"]["allocation_factors"][field] = f"forged-{field}"
+                allocation_id = identity.allocation_id(
+                    broken["identity"]["allocation_factors"]
+                )
+                broken["identity"]["allocation_id"] = allocation_id
+                broken["identity"]["attempt_id"] = identity.attempt_id(
+                    allocation=allocation_id,
+                    case=broken["identity"]["case_id"],
+                    ordinal=broken["identity"]["attempt_ordinal"],
+                )
+                with self.assertRaisesRegex(
+                    contracts.ContractError, "allocation factors differ"
+                ):
+                    contracts.validate_terminal_document(broken)
+
+            broken = copy.deepcopy(terminal)
+            broken["outcome"]["status"] = "failed"
+            with self.assertRaisesRegex(contracts.ContractError, "source and outcome"):
+                contracts.validate_terminal_document(broken)
+            broken = copy.deepcopy(terminal)
+            broken["provenance"]["source"] = "runtime-emitter"
+            with self.assertRaisesRegex(contracts.ContractError, "source and outcome"):
+                contracts.validate_terminal_document(broken)
+
+            for path_parts, replacement in (
+                (("provenance", "source"), "unregistered-producer"),
+                (("outcome", "failure_mode"), "unsupported-capability"),
+                (("outcome", "reason"), "unregistered-capability"),
+            ):
+                with self.subTest(path=path_parts):
+                    broken = copy.deepcopy(terminal)
+                    broken[path_parts[0]][path_parts[1]] = replacement
+                    with self.assertRaises(publisher.PublisherError):
+                        publisher._schema("terminal-outcome-v1.schema.json", broken)
+                    with self.assertRaises(contracts.ContractError):
+                        contracts.validate_terminal_document(broken)
+
+            runtime_allocation = copy.deepcopy(
+                terminal["identity"]["allocation_factors"]
+            )
+            runtime_allocation["runner"] = terminal["identity"]["case_factors"]["sku"]
+            runtime = contracts.make_terminal_document(
+                allocation_factors=runtime_allocation,
+                attempt_ordinal=1,
+                case=terminal["case"],
+                case_factors=terminal["identity"]["case_factors"],
+                control_sha256=terminal["provenance"]["control_sha256"],
+                failure_mode="setup",
+                generated_at=terminal["generated_at"],
+                git_run=terminal["provenance"]["git_run"],
+                reason="launcher-setup-failed",
+                return_code=1,
+                source="runtime-emitter",
+                status="failed",
+                expected_case_id=terminal["identity"]["case_id"],
+            )
+            publisher._schema("terminal-outcome-v1.schema.json", runtime)
+            broken = copy.deepcopy(runtime)
+            broken["outcome"]["reason"] = "backend-setup-failed"
+            with self.assertRaises(publisher.PublisherError):
+                publisher._schema("terminal-outcome-v1.schema.json", broken)
+            with self.assertRaises(contracts.ContractError):
+                contracts.validate_terminal_document(broken)
+
+    def test_post_emit_demotion_uses_closed_failure_taxonomy(self) -> None:
+        raw, _ = _native_fixture()
+        expected = {
+            5: "runtime-identity",
+            6: "execution",
+            124: "timeout",
+            137: "timeout",
+            134: "execution",
+            9: "execution",
+        }
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary)
+            for return_code, failure_mode in expected.items():
+                with self.subTest(return_code=return_code):
+                    path = root / f"attempt-{return_code}.json"
+                    path.write_text(json.dumps(raw))
+                    terminal = contracts.demote_raw_attempt(path, return_code)
+                    self.assertEqual(
+                        terminal["outcome"],
+                        {
+                            "failure_mode": failure_mode,
+                            "reason": "post-emit-distributed-command-failed",
+                            "return_code": return_code,
+                            "status": "failed",
+                        },
+                    )
+                    self.assertEqual(terminal["provenance"]["source"], "post-emit-command")
+                    publisher._schema("terminal-outcome-v1.schema.json", terminal)
+
+                    broken = copy.deepcopy(terminal)
+                    broken["outcome"]["reason"] = "distributed-command-failed"
+                    with self.assertRaises(publisher.PublisherError):
+                        publisher._schema("terminal-outcome-v1.schema.json", broken)
+                    with self.assertRaises(contracts.ContractError):
+                        contracts.validate_terminal_document(broken)
+
+    def test_artifact_safety_accepts_current_v1_fixtures(self) -> None:
+        raw, samples = _native_fixture()
+        publisher.artifact_safety.assert_publication_safe([
+            sweep_matrix.resolve_matrix(backends="all"),
+            raw,
+            samples,
+            _dataset(),
+            _promoted_dataset(),
+        ])
+
+    def test_native_raw_and_sample_schema_match_semantic_validator(self) -> None:
+        raw, samples = _native_fixture()
+        publisher._schema("samples-v1.schema.json", samples)
+        publisher._schema("raw-case-v1.schema.json", raw)
+        self.assertIs(contracts.validate_raw_document(raw, samples), raw)
+        provenance = raw["provenance"]
+        image = provenance["image"]
+        self.assertTrue(contracts.provenance_complete(
+            raw["implementation"]["provenance"], raw["case"]["backend"],
+            provenance["git_run"],
+            allocation_stratum_sha256=provenance["allocation_stratum_sha256"],
+            image_digest=image["digest"], image_verified=image["digest_verified"],
+            squash_sha256=image["squash_sha256"],
+        ))
+        self.assertFalse(contracts.provenance_complete(
+            raw["implementation"]["provenance"], raw["case"]["backend"],
+            provenance["git_run"], allocation_stratum_sha256=None,
+            image_digest=image["digest"], image_verified=image["digest_verified"],
+            squash_sha256=image["squash_sha256"],
+        ))
+        missing_stratum = copy.deepcopy(raw)
+        missing_stratum["provenance"]["allocation_stratum_sha256"] = None
+        with self.assertRaises(publisher.PublisherError):
+            publisher._schema("raw-case-v1.schema.json", missing_stratum)
+        with self.assertRaisesRegex(contracts.ContractError, "allocation stratum"):
+            contracts.validate_raw_document(missing_stratum, samples)
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve()
+            (root / "samples.json").write_bytes(contracts.canonical_json_bytes(samples))
+            (root / "raw.json").write_bytes(contracts.canonical_json_bytes(raw))
+            self.assertEqual(contracts.load_raw_attempt(root / "raw.json"), raw)
+        for target in ("raw", "samples"):
+            broken_raw, broken_samples = copy.deepcopy((raw, samples))
+            broken = broken_raw if target == "raw" else broken_samples
+            broken["unexpected"] = True
+            with self.assertRaises(publisher.PublisherError):
+                publisher._schema(
+                    "raw-case-v1.schema.json" if target == "raw" else "samples-v1.schema.json",
+                    broken,
+                )
+            with self.assertRaises(contracts.ContractError):
+                contracts.validate_raw_document(broken_raw, broken_samples)
+        tampered = copy.deepcopy(raw)
+        tampered["measurement"]["rows"][0]["token_rate_at_latency_percentile"]["p50"] *= 2
+        with self.assertRaisesRegex(contracts.ContractError, "token_rate_at_latency_percentile"):
+            contracts.validate_raw_document(tampered, samples)
+        tampered = copy.deepcopy(raw)
+        tampered["case"]["shape"]["hidden"] = 2
+        with self.assertRaises(contracts.ContractError):
+            contracts.validate_raw_document(tampered, samples)
+        tampered = copy.deepcopy(raw)
+        configured = tampered["implementation"]["resource_profile"]["configured_units"]
+        tampered["implementation"]["resource_profile"]["configured_units"] = (
+            1 if configured is None else configured + 1
+        )
+        with self.assertRaisesRegex(contracts.ContractError, "resource profile"):
+            contracts.validate_raw_document(tampered, samples)
+        tampered = copy.deepcopy(raw)
+        oracle = tampered["measurement"]["rows"][0]["correctness"]["rank_evidence"][0]
+        oracle["pre_timing"]["checks"]["combine_values"] = False
+        with self.assertRaisesRegex(contracts.ContractError, "passed differs"):
+            contracts.validate_raw_document(tampered, samples)
+
+    def test_hybrid_raw_binds_realized_config_and_every_rank_artifact(self) -> None:
+        raw, samples = _native_fixture("deepep-hybrid")
+        publisher._schema("raw-case-v1.schema.json", raw)
+        self.assertIs(contracts.validate_raw_document(raw, samples), raw)
+
+        mutations = {
+            "hidden_dim": lambda provenance: provenance["realized_config"].update(
+                hidden_dim=2
+            ),
+            "experts_per_rank": lambda provenance: provenance["realized_config"].update(
+                num_of_experts_per_rank=2
+            ),
+            "ranks_per_node": lambda provenance: provenance["realized_config"].update(
+                num_of_ranks_per_node=2
+            ),
+            "num_nodes": lambda provenance: provenance["realized_config"].update(
+                num_of_nodes=2
+            ),
+            "token_data_type": lambda provenance: provenance["realized_config"].update(
+                token_data_type="UINT8"
+            ),
+            "rank_coverage": lambda provenance: [
+                artifact["rank_artifacts"].append({
+                    "bytes": 1, "rank": 1, "sha256": "9" * 64,
+                })
+                for artifact in provenance["jit_shared_objects"]
+            ],
+        }
+        for name, mutate in mutations.items():
+            with self.subTest(name=name):
+                changed = copy.deepcopy(raw)
+                mutate(changed["implementation"]["provenance"])
+                with self.assertRaisesRegex(
+                    contracts.ContractError,
+                    "DeepEP Hybrid realized config/JIT evidence differs",
+                ):
+                    contracts.validate_raw_document(changed, samples)
+
+    def test_native_contract_recomputes_routing_receive_histograms_and_anomalies(self) -> None:
+        raw, samples = _native_fixture()
+
+        tampered = copy.deepcopy(raw)
+        changed = tampered["measurement"]["rows"][0]
+        changed["routing"]["routed_copies"] *= 2
+        for name in ("combine", "dispatch", "roundtrip"):
+            byte_provenance = changed["byte_provenance"][name]
+            byte_provenance["activation_data_bytes"] *= 2
+            byte_provenance["total_logical_bytes"] *= 2
+        with self.assertRaisesRegex(contracts.ContractError, "routing.routed_copies"):
+            contracts.validate_raw_document(tampered, samples)
+
+        tampered = copy.deepcopy(raw)
+        changed = tampered["measurement"]["rows"][0]
+        changed["routing"]["payload_copies_per_rank"] = [2]
+        changed["receive"] = {"max": 2, "mean": 2.0, "min": 2, "total": 2}
+        with self.assertRaisesRegex(contracts.ContractError, "payload_copies_per_rank"):
+            contracts.validate_raw_document(tampered, samples)
+
+        tampered = copy.deepcopy(raw)
+        tampered["measurement"]["rows"][0]["sample_histograms"]["roundtrip"][
+            "counts"
+        ] = [511]
+        with self.assertRaisesRegex(contracts.ContractError, "sample_histograms"):
+            contracts.validate_raw_document(tampered, samples)
+
+        tampered = copy.deepcopy(raw)
+        tampered["measurement"]["rows"][0]["anomalies"] = [{
+            "type": "roundtrip_gt_isolated_sum",
+            "T": 1,
+            "roundtrip_p99": 40.0,
+            "isolated_sum_p99": 30.0,
+            "ratio": 1.33,
+            "threshold": 3.0,
+        }]
+        tampered["outcome"]["validity"]["anomaly_free"] = False
+        with self.assertRaisesRegex(contracts.ContractError, "anomalies"):
+            contracts.validate_raw_document(tampered, samples)
+
+        anomalous_raw, anomalous_samples = copy.deepcopy((raw, samples))
+        sample_point = anomalous_samples["points"][0]
+        sample_point["components"]["roundtrip"]["trials"] = [
+            [100.0] * 8 for _ in range(64)
+        ]
+        sample_core = {
+            "components": sample_point["components"],
+            "tokens_per_rank": sample_point["tokens_per_rank"],
+        }
+        sample_sha = hashlib.sha256(
+            contracts.canonical_json_bytes(sample_core)
+        ).hexdigest()
+        point_id = sample_point["point_id"]
+        evidence_id = identity.evidence_id(
+            point=point_id,
+            allocation=anomalous_raw["identity"]["allocation_id"],
+            attempt=anomalous_raw["identity"]["attempt_id"],
+            sample_sha256=sample_sha,
+        )
+        sample_point.update({"sample_sha256": sample_sha, "evidence_id": evidence_id})
+        changed = anomalous_raw["measurement"]["rows"][0]
+        changed["sample_sha256"] = sample_sha
+        changed["evidence_id"] = evidence_id
+        changed["components"]["roundtrip"]["percentiles_us"] = {
+            name: 100.0 for name in ("p50", "p90", "p95", "p99")
+        }
+        changed["token_rate_at_latency_percentile"] = {
+            name: 10_000.0 for name in ("p50", "p90", "p95", "p99")
+        }
+        changed["sample_histograms"]["roundtrip"] = contracts._expected_histogram(
+            [100.0] * 512
+        )
+        changed["anomalies"] = contracts._expected_anomalies(1, changed["components"])
+        anomalous_raw["outcome"]["validity"]["anomaly_free"] = False
+        sample_bytes = contracts.canonical_json_bytes(anomalous_samples)
+        anomalous_raw["sample_artifact"].update({
+            "bytes": len(sample_bytes),
+            "sha256": hashlib.sha256(sample_bytes).hexdigest(),
+        })
+        self.assertIs(
+            contracts.validate_raw_document(anomalous_raw, anomalous_samples),
+            anomalous_raw,
+        )
+        changed["anomalies"] = []
+        anomalous_raw["outcome"]["validity"]["anomaly_free"] = True
+        with self.assertRaisesRegex(contracts.ContractError, "anomalies"):
+            contracts.validate_raw_document(anomalous_raw, anomalous_samples)
+
+    def test_native_contract_rejects_every_schema_only_nested_mutation(self) -> None:
+        raw, samples = _native_fixture()
+        self.assertIs(contracts.validate_raw_document(raw, samples), raw)
+
+        def locate(document: object, path: tuple[object, ...]) -> object:
+            value = document
+            for part in path:
+                value = value[part]  # type: ignore[index]
+            return value
+
+        def reject_raw(document: dict) -> None:
+            with self.assertRaises(publisher.PublisherError):
+                publisher._schema("raw-case-v1.schema.json", document)
+            with self.assertRaises(contracts.ContractError):
+                contracts.validate_raw_document(document, samples)
+
+        required_fields = (
+            (("measurement", "rows", 0, "receive"), "total"),
+            (("measurement", "rows", 0, "routing"), "fanout_mean"),
+            (("measurement", "rows", 0, "routing", "source_token_stats"), "ranks"),
+            (("measurement", "rows", 0, "sample_histograms"), "roundtrip"),
+            (("measurement", "rows", 0, "sample_histograms", "roundtrip"), "n"),
+            (("runtime_fingerprint", "accelerator_runtime"), "kind"),
+            (("runtime_fingerprint", "collective_library"), "kind"),
+            (("runtime_fingerprint", "framework"), "kind"),
+        )
+        for path, required in required_fields:
+            with self.subTest(path=path, mutation="missing"):
+                broken = copy.deepcopy(raw)
+                del locate(broken, path)[required]  # type: ignore[index]
+                reject_raw(broken)
+            with self.subTest(path=path, mutation="extra"):
+                broken = copy.deepcopy(raw)
+                locate(broken, path)["unexpected"] = None  # type: ignore[index]
+                reject_raw(broken)
+
+        invalid_values = (
+            (("measurement", "rows", 0, "receive", "mean"), "one"),
+            (("measurement", "rows", 0, "routing", "fanout_mean"), "one"),
+            (("measurement", "rows", 0, "sample_histograms", "roundtrip", "bins"), 0),
+            (("provenance", "image", "arch"), "AMD64"),
+            (("runtime_fingerprint", "accelerator_runtime", "kind"), "rocm"),
+        )
+        for path, invalid in invalid_values:
+            with self.subTest(path=path, mutation="value"):
+                broken = copy.deepcopy(raw)
+                parent = locate(broken, path[:-1])
+                parent[path[-1]] = invalid  # type: ignore[index]
+                reject_raw(broken)
+
+        def reject_samples(document: dict) -> None:
+            with self.assertRaises(publisher.PublisherError):
+                publisher._schema("samples-v1.schema.json", document)
+            with self.assertRaises(contracts.ContractError):
+                contracts.validate_samples_document(document)
+
+        for path, required in (
+            (("points", 0), "evidence_id"),
+            (("points", 0, "components"), "roundtrip"),
+            (("points", 0, "components", "roundtrip"), "trials"),
+            (("sampling",), "reduction"),
+        ):
+            with self.subTest(path=path, artifact="samples"):
+                broken = copy.deepcopy(samples)
+                del locate(broken, path)[required]  # type: ignore[index]
+                reject_samples(broken)
+
+    def test_terminal_contract_and_schema_reject_the_same_shape_gaps(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            _, artifact = _unsupported_delivery(Path(temporary).resolve())
+            terminal = contracts.strict_load(next(artifact.glob("*.json")))
+        publisher._schema("terminal-outcome-v1.schema.json", terminal)
+        self.assertIs(contracts.validate_terminal_document(terminal), terminal)
+
+        def reject(document: dict) -> None:
+            with self.assertRaises(publisher.PublisherError):
+                publisher._schema("terminal-outcome-v1.schema.json", document)
+            with self.assertRaises(contracts.ContractError):
+                contracts.validate_terminal_document(document)
+
+        for path, invalid in (
+            (("outcome", "failure_mode"), "Not Safe"),
+            (("outcome", "reason"), "x" * 241),
+            (("provenance", "source"), "Not Safe"),
+            (("provenance", "git_run", "ref"), ""),
+        ):
+            with self.subTest(path=path):
+                broken = copy.deepcopy(terminal)
+                parent = broken
+                for part in path[:-1]:
+                    parent = parent[part]
+                parent[path[-1]] = invalid
+                reject(broken)
+
+    def test_invalid_retry_is_quarantined_before_valid_retry_upload(self) -> None:
+        raw, samples = _native_fixture()
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve()
+            sample_bytes = contracts.canonical_json_bytes(samples)
+            bad = copy.deepcopy(raw)
+            bad["sample_artifact"].update({
+                "path": "a01.samples.json", "bytes": len(sample_bytes),
+                "sha256": hashlib.sha256(sample_bytes).hexdigest(),
+            })
+            bad["measurement"]["rows"][0]["token_rate_at_latency_percentile"]["p50"] *= 2
+            (root / "a01.samples.json").write_bytes(sample_bytes)
+            (root / "a01.json").write_bytes(contracts.canonical_json_bytes(bad))
+            self.assertTrue(contracts.quarantine_invalid_attempt(root / "a01.json"))
+            valid = copy.deepcopy(raw)
+            valid["sample_artifact"].update({
+                "path": "a02.samples.json", "bytes": len(sample_bytes),
+                "sha256": hashlib.sha256(sample_bytes).hexdigest(),
+            })
+            (root / "a02.samples.json").write_bytes(sample_bytes)
+            (root / "a02.json").write_bytes(contracts.canonical_json_bytes(valid))
+            paths = sorted(str(path) for path in root.glob("*.json"))
+            self.assertEqual(contracts.validate_attempt_paths(paths), 1)
+            self.assertTrue((root / "a01.json.quarantine").is_file())
+            self.assertTrue((root / "a01.samples.json.quarantine").is_file())
+
+    def test_ingest_archives_without_publishing_a_channel(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve()
+            matrix, artifact = _unsupported_delivery(root)
+            self.assertEqual(len(summarize.load_results(str(artifact), None, None)), 1)
+            result = publisher.ingest_command(_args(root / "store", matrix, artifact))
+            store = publisher.Store(root / "store")
+            self.assertEqual(result["status"], "accepted")
+            self.assertTrue((store.incoming / result["incoming_id"] / "COMPLETE").is_file())
+            self.assertTrue((store.bundles / result["bundle_id"] / "COMPLETE").is_file())
+            self.assertEqual(list(store.channels.iterdir()), [])
+            self.assertEqual(list(store.datasets.iterdir()), [])
+            self.assertEqual(os.stat(store.private).st_mode & 0o777, 0o700)
+            self.assertEqual(os.stat(store.public).st_mode & 0o777, 0o755)
+            self.assertEqual(os.stat(store.bundles / result["bundle_id"]).st_mode & 0o777, 0o500)
+
+    def test_repeated_ingest_is_content_idempotent(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve()
+            matrix, artifact = _unsupported_delivery(root)
+            args = _args(root / "store", matrix, artifact)
+            first = publisher.ingest_command(args)
+            store = publisher.Store(root / "store")
+            second = publisher.ingest_command(args)
+            self.assertEqual(second, first)
+            self.assertEqual(len(list(store.incoming.iterdir())), 1)
+            self.assertEqual(len(list(store.bundles.iterdir())), 1)
+            self.assertEqual(len(list(store.datasets.iterdir())), 0)
+            self.assertEqual(len(list(store.channels.iterdir())), 0)
+            bundle = publisher.strict_load(
+                store.bundles / first["bundle_id"] / "bundle.json"
+            )
+            terminal = publisher.strict_load(next(artifact.glob("*.json")))
+            self.assertEqual(bundle["created_at"], terminal["generated_at"])
+
+    def test_dataset_is_invariant_to_bundle_argument_order(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve()
+            store_root = root / "store"
+            bundle_ids = []
+            for run_id in (9, 11, 10):
+                run = {**RUN, "run_id": str(run_id)}
+                delivery = root / f"run-{run_id}"
+                delivery.mkdir()
+                matrix, artifact = _unsupported_delivery(delivery, run=run)
+                result = publisher.ingest_command(
+                    _args(store_root, matrix, artifact, run=run)
+                )
+                bundle_ids.append(result["bundle_id"])
+            datasets = [
+                publisher.build_dataset(
+                    publisher.Store(store_root), order, promote=False,
+                )
+                for order in itertools.permutations(bundle_ids)
+            ]
+            self.assertTrue(all(dataset == datasets[0] for dataset in datasets[1:]))
+            self.assertEqual(datasets[0]["generated_at"], "2026-07-04T00:00:00Z")
+            selected = datasets[0]["coverage"][0]["selected_attempt_id"]
+            selected_attempt = next(
+                item for item in datasets[0]["attempts"]
+                if item["attempt_id"] == selected
+            )
+            self.assertEqual(selected_attempt["run_id"], "11")
+
+    def test_diagnostic_dataset_orders_reruns_by_run_attempt(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve()
+            store_root = root / "store"
+            bundle_ids = []
+            for run_attempt in (1, 2):
+                run = {**RUN, "run_attempt": run_attempt}
+                delivery = root / f"attempt-{run_attempt}"
+                delivery.mkdir()
+                matrix, artifact = _unsupported_delivery(delivery, run=run)
+                result = publisher.ingest_command(
+                    _args(store_root, matrix, artifact, run=run)
+                )
+                bundle_ids.append(result["bundle_id"])
+            dataset = publisher.build_dataset(
+                publisher.Store(store_root), bundle_ids, promote=False
+            )
+            selected_id = dataset["coverage"][0]["selected_attempt_id"]
+            selected = next(
+                item for item in dataset["attempts"]
+                if item["attempt_id"] == selected_id
+            )
+            self.assertEqual(selected["run_attempt"], 2)
+
+    def test_promotion_requires_every_runnable_case_to_succeed_in_every_bundle(self) -> None:
+        cases = {
+            "runnable": {"_disposition": "runnable"},
+            "planned-unsupported": {"_disposition": "unsupported"},
+        }
+        bundles = []
+        for _ in range(3):
+            runnable = {
+                "identity": {"case_id": "runnable"},
+                "outcome": {"status": "success"},
+            }
+            unsupported = {
+                "identity": {"case_id": "planned-unsupported"},
+                "outcome": {"status": "unsupported"},
+            }
+            bundles.append({
+                "selected": {"runnable": runnable, "planned-unsupported": unsupported},
+                "documents": {"runnable": runnable, "planned-unsupported": unsupported},
+            })
+        publisher._require_runnable_promotion_success(bundles, cases)
+
+        for status in ("failed", "invalid", "unsupported", "diagnostic"):
+            with self.subTest(status=status):
+                broken = copy.deepcopy(bundles)
+                broken[1]["selected"]["runnable"]["outcome"]["status"] = status
+                with self.assertRaisesRegex(
+                    publisher.PublisherError, "every runnable matrix case"
+                ):
+                    publisher._require_runnable_promotion_success(broken, cases)
+
+        broken = copy.deepcopy(bundles)
+        broken[1]["documents"]["retry"] = {
+            "identity": {"case_id": "runnable"},
+            "outcome": {"status": "failed"},
+        }
+        with self.assertRaisesRegex(publisher.PublisherError, "rejects runnable cases"):
+            publisher._require_runnable_promotion_success(broken, cases)
+
+    def test_promoted_public_dataset_rejects_failed_retry_history(self) -> None:
+        dataset = _promoted_dataset()
+        successful = next(
+            item for item in dataset["attempts"]
+            if item["outcome"] == "success"
+        )
+        failed = copy.deepcopy(successful)
+        old_attempt_id = successful["attempt_id"]
+        successful["attempt_index"] = 2
+        successful["attempt_id"] = identity.attempt_id(
+            allocation=successful["allocation_id"], case=successful["case_id"], ordinal=2
+        )
+        failed.update({
+            "attempt_id": old_attempt_id,
+            "attempt_index": 1,
+            "outcome": "failed",
+            "failure_mode": "execution",
+            "reason": "execution-failed",
+            "series_id": None,
+            "selected": False,
+            "evidence": [],
+        })
+        dataset["attempts"].append(failed)
+        dataset["attempts"].sort(key=lambda item: item["attempt_id"])
+        coverage = next(
+            item for item in dataset["coverage"]
+            if item["case_id"] == failed["case_id"]
+        )
+        coverage["attempt_ids"] = [
+            successful["attempt_id"] if value == old_attempt_id else value
+            for value in coverage["attempt_ids"]
+        ]
+        coverage["attempt_ids"].append(failed["attempt_id"])
+        coverage["attempt_ids"].sort()
+        if coverage["selected_attempt_id"] == old_attempt_id:
+            coverage["selected_attempt_id"] = successful["attempt_id"]
+
+        fixture_catalog = publisher._case_disposition_catalog_sha256(dataset["coverage"])
+        with mock.patch.object(
+            publisher, "CANONICAL_FULL_V1_CASE_CATALOG_SHA256", fixture_catalog
+        ), self.assertRaisesRegex(publisher.PublisherError, "rejects runnable cases"):
+            publisher.validate_public_dataset(dataset)
+
+    def test_unselected_success_does_not_reference_an_unpublished_series(self) -> None:
+        raw, _ = _native_fixture()
+        retained = publisher._public_attempt(raw, selected=False)
+        selected = publisher._public_attempt(raw, selected=True)
+        self.assertEqual(retained["outcome"], "success")
+        self.assertIsNone(retained["series_id"])
+        self.assertEqual(selected["series_id"], raw["identity"]["series_id"])
+
+    def test_public_dataset_selects_latest_derived_retry(self) -> None:
+        dataset = _dataset()
+        first = dataset["attempts"][0]
+        second = copy.deepcopy(first)
+        second.update({
+            "attempt_id": identity.attempt_id(
+                allocation=first["allocation_id"], case=first["case_id"], ordinal=2
+            ),
+            "attempt_index": 2,
+            "selected": False,
+            "series_id": None,
+            "evidence": [],
+        })
+        dataset["attempts"].append(second)
+        dataset["attempts"].sort(key=lambda item: item["attempt_id"])
+        dataset["coverage"][0]["attempt_ids"].append(second["attempt_id"])
+        dataset["coverage"][0]["attempt_ids"].sort()
+        with self.assertRaisesRegex(publisher.PublisherError, "select the latest retry"):
+            publisher.validate_public_dataset(dataset)
+
+        second["attempt_id"] = identity.digest("attempt", {"not": "derived"})
+        dataset["attempts"].sort(key=lambda item: item["attempt_id"])
+        dataset["coverage"][0]["attempt_ids"] = [
+            item["attempt_id"] for item in dataset["attempts"]
+        ]
+        with self.assertRaisesRegex(publisher.PublisherError, "retry identity differs"):
+            publisher.validate_public_dataset(dataset)
+
+    def test_promotion_requires_an_eligible_cohort_for_every_comparison_kind(self) -> None:
+        stable_fast, stable_fast_internal = _series(
+            "stable-fast", "deepep", decision_grade=True
+        )
+        stable_slow, stable_slow_internal = _series(
+            "stable-slow", "uccl", decision_grade=True
+        )
+        unstable_fast, unstable_fast_internal = _series(
+            "unstable-fast", "deepep", decision_grade=True
+        )
+        unstable_slow, unstable_slow_internal = _series(
+            "unstable-slow", "uccl", decision_grade=True
+        )
+        unstable_fast["phase"] = unstable_slow["phase"] = "prefill"
+        unstable_fast["series_id"] = identity.series_id({"test": "unstable-fast"})
+        unstable_slow["series_id"] = identity.series_id({"test": "unstable-slow"})
+        for statistic in ("p50", "p99"):
+            unstable_slow_internal["run_metrics"]["1"][8]["latency_us"][statistic] = (
+                unstable_fast_internal["run_metrics"]["1"][8]["latency_us"][statistic]
+                / 2
+            )
+            for field in (
+                "activation_data_rate_gbps_at_latency_percentile",
+                "total_logical_data_rate_gbps_at_latency_percentile",
+            ):
+                unstable_slow_internal["run_metrics"]["1"][8][field][statistic] = (
+                    unstable_fast_internal["run_metrics"]["1"][8][field][statistic] * 2
+                )
+        series = [stable_fast, stable_slow, unstable_fast, unstable_slow]
+        internals = {
+            stable_fast["series_id"]: stable_fast_internal,
+            stable_slow["series_id"]: stable_slow_internal,
+            unstable_fast["series_id"]: unstable_fast_internal,
+            unstable_slow["series_id"]: unstable_slow_internal,
+        }
+        cohorts, _, _, _ = publisher.build_decisions(series, internals)
+        eligible = [item for item in cohorts if item["eligibility"]["decision_grade"]]
+        ineligible = [item for item in cohorts if not item["eligibility"]["decision_grade"]]
+        self.assertEqual({item["kind"] for item in eligible}, {"library"})
+        self.assertTrue(ineligible)
+        anchor_series = [
+            {
+                "series_id": name,
+                "workload": {"routing": routing, "eplb": eplb},
+                "build": {"implementation_contract_sha256": "1" * 64},
+            }
+            for name, routing, eplb in (
+                ("uniform", "uniform", False),
+                ("zipf", "zipf", False),
+                ("zipf-eplb", "zipf", True),
+            )
+        ]
+        required = eligible + [
+            {
+                "kind": kind,
+                "eligibility": {"decision_grade": True},
+                **({"series_ids": [item["series_id"] for item in anchor_series]}
+                   if kind == "routing" else {}),
+            }
+            for kind in publisher.REQUIRED_COHORT_KINDS
+            if kind != "library"
+        ]
+        with mock.patch.object(
+            publisher, "REQUIRED_PROMOTION_COHORT_COUNTS", {}
+        ), mock.patch.object(
+            publisher, "_expected_chip_cohort_count", return_value=1
+        ):
+            publisher._require_promotion_cohorts(
+                required + ineligible, anchor_series
+            )
+            for kind in publisher.REQUIRED_COHORT_KINDS:
+                with self.subTest(missing_kind=kind), self.assertRaisesRegex(
+                    publisher.PublisherError, rf"cohort kinds:.*{kind}"
+                ):
+                    publisher._require_promotion_cohorts([
+                        item for item in required + ineligible
+                        if item["kind"] != kind or not item["eligibility"]["decision_grade"]
+                    ], anchor_series)
+
+    def test_promotion_requires_exact_counts_and_routing_anchors(self) -> None:
+        dataset = _promoted_dataset()
+        counts = _cohort_counts(dataset)
+        with mock.patch.object(
+            publisher, "REQUIRED_PROMOTION_COHORT_COUNTS", counts
+        ):
+            publisher._require_promotion_cohorts(
+                dataset["cohorts"], dataset["series"]
+            )
+            routing = next(
+                item for item in dataset["cohorts"] if item["kind"] == "routing"
+            )
+            eplb = next(
+                item for item in dataset["series"]
+                if item["series_id"] in routing["series_ids"]
+                and item["workload"]["eplb"]
+            )
+            eplb["workload"]["eplb"] = False
+            with self.assertRaisesRegex(publisher.PublisherError, "exact uniform"):
+                publisher._require_promotion_cohorts(
+                    dataset["cohorts"], dataset["series"]
+                )
+
+        dataset = _promoted_dataset()
+        routing = next(item for item in dataset["cohorts"] if item["kind"] == "routing")
+        zipf = next(
+            item for item in dataset["series"]
+            if item["series_id"] in routing["series_ids"]
+            and item["workload"]["routing"] == "zipf"
+            and not item["workload"]["eplb"]
+        )
+        zipf["build"]["implementation_contract_sha256"] = "f" * 64
+        with mock.patch.object(
+            publisher, "REQUIRED_PROMOTION_COHORT_COUNTS", counts
+        ), self.assertRaisesRegex(publisher.PublisherError, "identical off-EPLB"):
+            publisher._require_promotion_cohorts(dataset["cohorts"], dataset["series"])
+
+        wrong_counts = {**counts, "library": counts["library"] + 1}
+        with mock.patch.object(
+            publisher, "REQUIRED_PROMOTION_COHORT_COUNTS", wrong_counts
+        ), self.assertRaisesRegex(publisher.PublisherError, "exactly"):
+            publisher._require_promotion_cohorts(
+                dataset["cohorts"], dataset["series"]
+            )
+
+    def test_promotion_requires_every_derived_chip_cohort_to_be_stable(self) -> None:
+        dataset = _promoted_dataset()
+        chip = next(item for item in dataset["cohorts"] if item["kind"] == "chip")
+        self.assertEqual(
+            publisher._expected_chip_cohort_count(dataset["series"]),
+            sum(item["kind"] == "chip" for item in dataset["cohorts"]),
+        )
+        with mock.patch.object(
+            publisher, "REQUIRED_PROMOTION_COHORT_COUNTS", _cohort_counts(dataset)
+        ):
+            missing = [item for item in dataset["cohorts"] if item is not chip]
+            with self.assertRaisesRegex(publisher.PublisherError, "derived chip cohorts"):
+                publisher._require_promotion_cohorts(missing, dataset["series"])
+
+            chip["eligibility"]["decision_grade"] = False
+            with self.assertRaisesRegex(publisher.PublisherError, "derived chip cohorts"):
+                publisher._require_promotion_cohorts(
+                    dataset["cohorts"], dataset["series"]
+                )
+
+    def test_promotion_rejects_more_than_three_bundles(self) -> None:
+        bundles = {
+            str(run_id): {
+                "id": str(run_id), "cases": [],
+                "manifest": {
+                    "matrix": {"sha256": publisher.CANONICAL_FULL_V1_MATRIX_SHA256},
+                    "run": {
+                        "run_id": str(run_id), "run_attempt": 1,
+                        "qualification_index": min(run_id, 3),
+                    },
+                },
+            }
+            for run_id in range(1, 5)
+        }
+        with mock.patch.object(
+            publisher, "load_bundle", side_effect=lambda _, bundle_id: bundles[bundle_id]
+        ), self.assertRaisesRegex(publisher.PublisherError, "qualification indices"):
+            publisher.build_dataset(object(), list(bundles), promote=True)
+
+        dataset = _promoted_dataset()
+        dataset["source_bundle_ids"].append("d" * 64)
+        counts = _cohort_counts(dataset)
+        with mock.patch.object(
+            publisher,
+            "CANONICAL_FULL_V1_CASE_CATALOG_SHA256",
+            publisher._case_disposition_catalog_sha256(dataset["coverage"]),
+        ), mock.patch.object(
+            publisher, "REQUIRED_PROMOTION_COHORT_COUNTS", counts
+        ), self.assertRaisesRegex(publisher.PublisherError, "complete coverage"):
+            publisher.validate_public_dataset(dataset)
+
+    def test_standalone_promotion_binds_matrix_and_requested_dispositions(self) -> None:
+        dataset = _promoted_dataset()
+        fixture_catalog = publisher._case_disposition_catalog_sha256(dataset["coverage"])
+        with self.assertRaisesRegex(
+            publisher.PublisherError, "canonical case/disposition catalog"
+        ):
+            publisher.validate_public_dataset(dataset)
+        with mock.patch.object(
+            publisher, "CANONICAL_FULL_V1_CASE_CATALOG_SHA256", fixture_catalog
+        ), mock.patch.object(
+            publisher,
+            "REQUIRED_PROMOTION_COHORT_COUNTS",
+            _cohort_counts(dataset),
+        ):
+            publisher.validate_public_dataset(dataset)
+
+        diagnostic = copy.deepcopy(dataset)
+        item = diagnostic["series"][0]
+        item["status"] = "diagnostic"
+        item["eligibility"].update({
+            "decision_grade": False,
+            "stable_p50": False,
+            "p50_max_min_ratio": 1.20,
+            "reasons": ["unstable-p50"],
+        })
+        with mock.patch.object(
+            publisher, "CANONICAL_FULL_V1_CASE_CATALOG_SHA256", fixture_catalog
+        ), mock.patch.object(
+            publisher,
+            "REQUIRED_PROMOTION_COHORT_COUNTS",
+            _cohort_counts(dataset),
+        ), self.assertRaisesRegex(
+            publisher.PublisherError, "unstable or incomplete required series"
+        ):
+            publisher.validate_public_dataset(diagnostic)
+
+        broken = copy.deepcopy(dataset)
+        broken["promotion"]["matrix_id"] = "d" * 64
+        with self.assertRaisesRegex(publisher.PublisherError, "canonical full-v1 matrix"):
+            publisher.validate_public_dataset(broken)
+
+        for original, replacement in (("runnable", "unsupported"),
+                                      ("unsupported", "runnable")):
+            with self.subTest(original=original):
+                broken = copy.deepcopy(dataset)
+                item = next(
+                    coverage for coverage in broken["coverage"]
+                    if coverage["disposition"] == original
+                )
+                item["disposition"] = replacement
+                with mock.patch.object(
+                    publisher,
+                    "CANONICAL_FULL_V1_CASE_CATALOG_SHA256",
+                    publisher._case_disposition_catalog_sha256(broken["coverage"]),
+                ), self.assertRaisesRegex(
+                    publisher.PublisherError,
+                    "requested dispositions" if original == "runnable"
+                    else "coverage dimensions",
+                ):
+                    publisher.validate_public_dataset(broken)
+
+    def test_workflow_matrix_and_catalog_digests_do_not_drift(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            matrix_path = Path(temporary) / "matrix_full.json"
+            result = subprocess.run(
+                [
+                    sys.executable, str(ROOT / "sweep_matrix.py"),
+                    "--suites", "all", "--max-cases", "128",
+                    "--backends", "all", "--out", str(matrix_path),
+                ],
+                text=True,
+                capture_output=True,
+            )
+            self.assertEqual(result.returncode, 0, result.stderr)
+            if publisher.capability.provisional_precision_targets():
+                workflow = (
+                    ROOT.parent.parent / ".github" / "workflows" / "collectivex-sweep.yml"
+                ).read_text()
+                self.assertIn(
+                    "V1 sweeps require every precision capability cell to be resolved",
+                    workflow,
+                )
+                return
+            self.assertEqual(
+                hashlib.sha256(matrix_path.read_bytes()).hexdigest(),
+                publisher.CANONICAL_FULL_V1_MATRIX_SHA256,
+            )
+            matrix = contracts.strict_load(matrix_path)
+        coverage = [
+            {
+                "case_id": item["case"]["case_id"],
+                "disposition": item["disposition"],
+            }
+            for item in matrix["requested_cases"]
+        ]
+        self.assertEqual(
+            publisher._case_disposition_catalog_sha256(coverage),
+            publisher.CANONICAL_FULL_V1_CASE_CATALOG_SHA256,
+        )
+        self.assertEqual(
+            (
+                len(matrix["include"]), len(coverage),
+                sum(item["disposition"] == "runnable" for item in coverage),
+                sum(item["disposition"] == "unsupported" for item in coverage),
+                sum(
+                    len(item["case"]["ladder"].split())
+                    for item in matrix["requested_cases"]
+                ),
+                sum(
+                    len(item["case"]["ladder"].split())
+                    for item in matrix["requested_cases"]
+                    if item["disposition"] == "runnable"
+                ),
+                sum(
+                    len(item["case"]["ladder"].split())
+                    for item in matrix["requested_cases"]
+                    if item["disposition"] == "unsupported"
+                ),
+            ),
+            (58, 608, 364, 244, 1600, 940, 660),
+        )
+        library: dict[tuple, set[str]] = {}
+        system: dict[tuple, set[str]] = {}
+        routing: dict[tuple, list[tuple[str, bool]]] = {}
+        for requested in matrix["requested_cases"]:
+            if requested["disposition"] != "runnable":
+                continue
+            case = requested["case"]
+            shape = tuple(
+                case[field]
+                for field in (
+                    "workload", "mode", "hidden", "topk", "experts", "ep", "phase"
+                )
+            )
+            route = (case["routing"], case["eplb"])
+            if case["backend"] != "nccl-ep":
+                library.setdefault((requested["sku"], shape, route), set()).add(
+                    case["backend"]
+                )
+            else:
+                system.setdefault((shape, route), set()).add(requested["sku"])
+            routing.setdefault(
+                (requested["sku"], case["backend"], shape), []
+            ).append(route)
+        anchors = {("uniform", False), ("zipf", False), ("zipf", True)}
+        self.assertEqual(
+            {
+                "library": sum(len(variants) >= 2 for variants in library.values()),
+                "system": sum(len(variants) >= 2 for variants in system.values()),
+                "routing": sum(
+                    len(variants) == 3 and set(variants) == anchors
+                    for variants in routing.values()
+                ),
+            },
+            publisher.REQUIRED_PROMOTION_COHORT_COUNTS,
+        )
+
+    def test_build_promotion_requires_canonical_full_matrix(self) -> None:
+        bundles = {
+            str(run_id): {
+                "id": str(run_id), "cases": [],
+                "manifest": {
+                    "matrix": {"sha256": "d" * 64},
+                    "run": {
+                        "run_id": str(run_id), "run_attempt": 1,
+                        "qualification_index": run_id,
+                    },
+                },
+            }
+            for run_id in range(1, 4)
+        }
+        with mock.patch.object(
+            publisher, "load_bundle", side_effect=lambda _, bundle_id: bundles[bundle_id]
+        ), self.assertRaisesRegex(publisher.PublisherError, "canonical full-v1 matrix"):
+            publisher.build_dataset(object(), list(bundles), promote=True)
+
+    def test_rejection_is_quarantined_without_updating_dev_latest(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve()
+            matrix, artifact = _unsupported_delivery(root)
+            store = publisher.Store(root / "store")
+            sentinel = b"existing-promoted-pointer\n"
+            (store.channels / "dev-latest.json").write_bytes(sentinel)
+            (artifact / "unknown.json").write_text('{"format":"unknown"}')
+            with self.assertRaises(publisher.PublisherError):
+                publisher.ingest_command(_args(store.root, matrix, artifact))
+            self.assertEqual((store.channels / "dev-latest.json").read_bytes(), sentinel)
+            self.assertFalse((store.channels / "latest-attempt.json").exists())
+            self.assertEqual(list(store.datasets.iterdir()), [])
+            self.assertTrue(any(store.quarantine.iterdir()))
+
+    def test_repeated_rejection_is_content_idempotent(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve()
+            matrix, artifact = _unsupported_delivery(root)
+            store = publisher.Store(root / "store")
+            (artifact / "unknown.json").write_text('{"format":"unknown"}')
+            with self.assertRaises(publisher.PublisherError):
+                publisher.ingest_command(_args(store.root, matrix, artifact))
+            counts = tuple(
+                len(list(path.iterdir()))
+                for path in (store.incoming, store.quarantine, store.datasets, store.channels)
+            )
+            with self.assertRaises(publisher.PublisherError):
+                publisher.ingest_command(_args(store.root, matrix, artifact))
+            self.assertEqual(
+                tuple(
+                    len(list(path.iterdir()))
+                    for path in (
+                        store.incoming, store.quarantine, store.datasets, store.channels
+                    )
+                ),
+                counts,
+            )
+
+    def test_distinct_rejections_create_distinct_quarantine_objects(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve()
+            matrix, artifact = _unsupported_delivery(root)
+            store = publisher.Store(root / "store")
+            unknown = artifact / "unknown.json"
+            unknown.write_text('{"format":"unknown-one"}')
+            with self.assertRaises(publisher.PublisherError):
+                publisher.ingest_command(_args(store.root, matrix, artifact))
+            first = {path.name for path in store.quarantine.iterdir()}
+            unknown.write_text('{"format":"unknown-two"}')
+            with self.assertRaises(publisher.PublisherError):
+                publisher.ingest_command(_args(store.root, matrix, artifact))
+            second = {path.name for path in store.quarantine.iterdir()}
+            self.assertNotEqual(second, first)
+            self.assertEqual(len(second), 2)
+            self.assertEqual(list(store.datasets.iterdir()), [])
+            self.assertEqual(list(store.channels.iterdir()), [])
+
+    def test_zip_traversal_is_rejected(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve()
+            archive = root / "bad.zip"
+            with zipfile.ZipFile(archive, "w") as handle:
+                handle.writestr("../escape.json", "{}")
+            with self.assertRaisesRegex(publisher.PublisherError, "escapes"):
+                publisher.extract_archive(archive, root / "out")
+
+    def test_store_and_directory_archive_reject_symlinks(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve()
+            real = root / "real"
+            real.mkdir()
+            alias = root / "alias"
+            alias.symlink_to(real, target_is_directory=True)
+            with self.assertRaisesRegex(publisher.PublisherError, "symlinked parent"):
+                publisher.Store(alias / "store")
+            self.assertFalse((real / "store").exists())
+            artifact = root / f"cxunsupported-{RUN['run_id']}-{RUN['run_attempt']}"
+            artifact.mkdir()
+            target = root / "target.json"
+            target.write_text("{}")
+            (artifact / "linked.json").symlink_to(target)
+            with self.assertRaisesRegex(publisher.PublisherError, "symlink"):
+                publisher._archive_download_directory(artifact, root / "artifact.zip")
+
+    def test_offline_caller_metadata_is_validated_before_store_creation(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve()
+            matrix, artifact = _unsupported_delivery(root)
+            store_root = root / "store"
+            args = _args(store_root, matrix, artifact)
+            args.run_id = "0"
+            with self.assertRaisesRegex(publisher.PublisherError, "run-id"):
+                publisher.ingest_command(args)
+            self.assertFalse(store_root.exists())
+
+            promote = types.SimpleNamespace(
+                store_root=str(store_root), bundle=["not-a-digest"]
+            )
+            with self.assertRaisesRegex(publisher.PublisherError, "bundle IDs"):
+                publisher.promote_command(promote)
+            self.assertFalse(store_root.exists())
+            with self.assertRaisesRegex(publisher.PublisherError, "absolute path"):
+                publisher._store_from_args(types.SimpleNamespace(store_root="relative-store"))
+
+    def test_store_rejects_group_or_world_writable_root(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve() / "unsafe-store"
+            root.mkdir()
+            root.chmod(0o772)
+            with self.assertRaisesRegex(publisher.PublisherError, "group/world writable"):
+                publisher.Store(root)
+
+    def test_retry_ordinals_must_be_contiguous_from_one(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve()
+            matrix, artifact = _unsupported_delivery(root, (1, 3))
+            with self.assertRaisesRegex(publisher.PublisherError, "contiguous ordinals"):
+                publisher.ingest_command(_args(root / "store", matrix, artifact))
+
+    def test_delivery_rejects_extra_archive_and_non_native_member(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve()
+            matrix, artifact = _unsupported_delivery(root)
+            extra = root / f"cxshard-extra-{RUN['run_id']}-{RUN['run_attempt']}"
+            extra.mkdir()
+            (extra / "extra.json").write_text("{}")
+            args = _args(root / "store-extra", matrix, artifact)
+            args.artifact.append(str(extra))
+            with self.assertRaisesRegex(publisher.PublisherError, "archive set"):
+                publisher.ingest_command(args)
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve()
+            matrix, artifact = _unsupported_delivery(root)
+            (artifact / "notes.txt").write_text("not native evidence")
+            with self.assertRaisesRegex(publisher.PublisherError, "unconsumed"):
+                publisher.ingest_command(_args(root / "store-member", matrix, artifact))
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve()
+            matrix, artifact = _unsupported_delivery(root)
+            path = next(artifact.glob("*.json"))
+            terminal = json.loads(path.read_text())
+            terminal["outcome"]["reason"] = next(
+                reason for reason in contracts.CAPABILITY_FAILURE_REASONS
+                if reason != terminal["outcome"]["reason"]
+            )
+            path.write_text(json.dumps(terminal))
+            with self.assertRaisesRegex(publisher.PublisherError, "reason differs"):
+                publisher.ingest_command(_args(root / "store-reason", matrix, artifact))
+
+    def test_rates_invert_latency_and_global_tokens_use_ep_size(self) -> None:
+        dataset = _dataset()
+        publisher.validate_public_dataset(dataset)
+        rates = dataset["series"][0]["points"][0]["components"]["roundtrip"][
+            "activation_data_rate_gbps_at_latency_percentile"
+        ]
+        self.assertGreater(rates["p50"], rates["p99"])
+        broken = copy.deepcopy(dataset)
+        broken["series"][0]["points"][0]["global_tokens"] = 128
+        with self.assertRaisesRegex(publisher.PublisherError, "EP size"):
+            publisher.validate_public_dataset(broken)
+        broken = copy.deepcopy(dataset)
+        broken["series"][0]["points"][0]["roundtrip_token_rate_at_latency_percentile"]["p99"] *= 2
+        with self.assertRaisesRegex(publisher.PublisherError, "token throughput"):
+            publisher.validate_public_dataset(broken)
+        broken = copy.deepcopy(dataset)
+        broken["attempts"][0]["evidence"][0]["point_id"] = identity.point_id(
+            series=broken["series"][0]["series_id"], tokens_per_rank=16
+        )
+        with self.assertRaisesRegex(publisher.PublisherError, "point evidence"):
+            publisher.validate_public_dataset(broken)
+        broken = copy.deepcopy(dataset)
+        broken["attempts"][0]["series_id"] = None
+        with self.assertRaisesRegex(publisher.PublisherError, "present exactly for selected success"):
+            publisher.validate_public_dataset(broken)
+        broken = copy.deepcopy(dataset)
+        component = broken["series"][0]["points"][0]["components"]["roundtrip"]
+        component["activation_data_rate_gbps_at_latency_percentile"] = None
+        with self.assertRaisesRegex(publisher.PublisherError, "measured data rates are missing"):
+            publisher.validate_public_dataset(broken)
+
+        for mutate in (
+            lambda item: item.update({"model": "different-model"}),
+            lambda item: item["workload"].update({"hidden": 4096}),
+            lambda item: item["workload"].update({"top_k": 4}),
+            lambda item: item["workload"].update({"experts": 128}),
+        ):
+            broken = copy.deepcopy(dataset)
+            mutate(broken["series"][0])
+            with self.assertRaisesRegex(publisher.PublisherError, "frozen v1"):
+                publisher.validate_public_dataset(broken)
+
+        broken = copy.deepcopy(dataset)
+        broken["series"][0]["eplb"]["mapping_sha256"] = "f" * 64
+        with self.assertRaisesRegex(publisher.PublisherError, "claims a plan"):
+            publisher.validate_public_dataset(broken)
+
+        broken = copy.deepcopy(dataset)
+        broken["series"][0]["backend"].update({
+            "id": "nccl-ep", "label": publisher.BACKEND_LABELS["nccl-ep"],
+            "role": "reference", "generation": "rccl",
+        })
+        broken["coverage"][0]["backend"] = "nccl-ep"
+        with self.assertRaisesRegex(publisher.PublisherError, "configuration"):
+            publisher.validate_public_dataset(broken)
+
+    def test_public_coverage_binds_exact_topology_and_case_identity(self) -> None:
+        dataset = _promoted_dataset()
+        dataset["promotion"]["status"] = "diagnostic"
+        self.assertEqual(
+            {item["disposition"] for item in dataset["coverage"]},
+            {"runnable", "unsupported"},
+        )
+        for item in dataset["coverage"]:
+            self.assertEqual(
+                tuple(item["topology"]), publisher.COVERAGE_TOPOLOGY_FIELDS
+            )
+        publisher.validate_public_dataset(dataset)
+
+        broken = copy.deepcopy(dataset)
+        unsupported = next(
+            item for item in broken["coverage"]
+            if item["disposition"] == "unsupported"
+        )
+        unsupported["topology"]["nodes"] = 2
+        with self.assertRaisesRegex(publisher.PublisherError, "capability registry"):
+            publisher.validate_public_dataset(broken)
+
+        broken = copy.deepcopy(dataset)
+        unsupported = next(
+            item for item in broken["coverage"]
+            if item["disposition"] == "unsupported"
+        )
+        unsupported["sku"] = "mi325x"
+        topology = publisher.capability.topology_for("mi325x", 8)
+        self.assertIsNotNone(topology)
+        unsupported["topology"] = publisher._coverage_topology({
+            "ep_size": 8, **topology,
+        })
+        with self.assertRaisesRegex(publisher.PublisherError, "case identity"):
+            publisher.validate_public_dataset(broken)
+
+    def test_cohort_contract_and_labels_name_mode_explicitly(self) -> None:
+        dataset = _promoted_dataset()
+        dataset["promotion"]["status"] = "diagnostic"
+        publisher.validate_public_dataset(dataset)
+        for cohort in dataset["cohorts"]:
+            self.assertIn("mode", cohort["controlled_factors"])
+            self.assertIn("/ normal /", cohort["label"])
+
+        broken = copy.deepcopy(dataset)
+        cohort = broken["cohorts"][0]
+        cohort["controlled_factors"].remove("mode")
+        cohort["cohort_id"] = publisher._derived_id("cxcohort-v1-", {
+            "kind": cohort["kind"], "series_ids": cohort["series_ids"],
+            "controlled_factors": cohort["controlled_factors"],
+            "varying_factors": cohort["varying_factors"],
+        })
+        broken["cohorts"].sort(key=lambda item: item["cohort_id"])
+        with self.assertRaisesRegex(publisher.PublisherError, "cohort factors"):
+            publisher.validate_public_dataset(broken)
+
+    def test_routing_and_eplb_facts_must_match_across_repeats(self) -> None:
+        raw, _ = _native_fixture()
+        descriptor = publisher._eplb_descriptor(raw)
+        facts = publisher._routing_facts(raw["measurement"]["rows"][0])
+        self.assertEqual(
+            publisher._exact_repeat_value([descriptor, copy.deepcopy(descriptor)], "EPLB"),
+            descriptor,
+        )
+        self.assertEqual(
+            publisher._exact_repeat_value([facts, copy.deepcopy(facts)], "routing"),
+            facts,
+        )
+        changed = copy.deepcopy(facts)
+        changed["hotspot_ratio"] += 0.1
+        with self.assertRaisesRegex(publisher.PublisherError, "routing differs"):
+            publisher._exact_repeat_value([facts, changed], "routing")
+
+        dataset = _promoted_dataset()
+        dataset["promotion"]["status"] = "diagnostic"
+        eplb = next(item for item in dataset["series"] if item["eplb"]["enabled"])
+        eplb["points"][0]["routing"]["empty_expert_count"] = 280
+        publisher.validate_public_dataset(dataset)
+        eplb["points"][0]["routing"]["empty_expert_count"] = 288
+        with self.assertRaisesRegex(publisher.PublisherError, "routing/load facts"):
+            publisher.validate_public_dataset(dataset)
+
+        for field, value in (
+            ("mapping_sha256", "0" * 64),
+            ("redundant_experts", 31),
+            ("replicated_experts", 1),
+            ("max_replicas", 2),
+            ("replicated_experts", 257),
+            ("max_replicas", 999),
+            ("imbalance_after", 0.4),
+            ("planner", "different-planner"),
+            ("reference_tokens_per_rank", 1024),
+        ):
+            broken = _promoted_dataset()
+            broken["promotion"]["status"] = "diagnostic"
+            descriptor = next(
+                item["eplb"] for item in broken["series"] if item["eplb"]["enabled"]
+            )
+            descriptor[field] = value
+            with self.subTest(eplb_field=field), self.assertRaisesRegex(
+                publisher.PublisherError, "EPLB descriptor"
+            ):
+                publisher.validate_public_dataset(broken)
+
+    def test_publisher_owns_stable_rankings_and_recommendations(self) -> None:
+        fast, fast_internal = _series("fast", "deepep", decision_grade=True)
+        slow, slow_internal = _series("slow", "uccl", decision_grade=True)
+        reference, reference_internal = _series("reference", "nccl-ep", decision_grade=True)
+        reference_peer, reference_peer_internal = _series(
+            "reference-peer", "nccl-ep", decision_grade=True
+        )
+        reference["backend"]["role"] = "reference"
+        reference_peer["backend"]["role"] = "reference"
+        reference_peer["system"].update({"sku": "h200-dgxc", "label": "NVIDIA H200"})
+        cohorts, rankings, recommendations, _ = publisher.build_decisions(
+            [fast, slow, reference, reference_peer], {
+                fast["series_id"]: fast_internal,
+                slow["series_id"]: slow_internal,
+                reference["series_id"]: reference_internal,
+                reference_peer["series_id"]: reference_peer_internal,
+            }
+        )
+        library = next(item for item in cohorts if item["kind"] == "library")
+        ranking = next(item for item in rankings if item["cohort_id"] == library["cohort_id"]
+                       and item["metric"]["measure"] == "latency_us"
+                       and item["metric"]["statistic"] == "p99")
+        self.assertTrue(library["eligibility"]["decision_grade"])
+        self.assertEqual(ranking["entries"][0]["series_id"], fast["series_id"])
+        self.assertTrue(any(item["series_id"] == fast["series_id"] for item in recommendations))
+        self.assertFalse(any(
+            entry["series_id"] == reference["series_id"]
+            for item in rankings if item["cohort_id"] == library["cohort_id"]
+            for entry in item["entries"]
+        ))
+        self.assertTrue(any(
+            item["kind"] == "system" and reference["series_id"] in item["series_ids"]
+            for item in cohorts
+        ))
+
+    def test_routing_evidence_is_experimental_and_not_a_configuration_recommendation(self) -> None:
+        dataset = _promoted_dataset()
+        routing = next(item for item in dataset["cohorts"] if item["kind"] == "routing")
+        members = [
+            item for item in dataset["series"]
+            if item["series_id"] in routing["series_ids"]
+        ]
+        self.assertEqual(
+            {(item["workload"]["routing"], item["workload"]["eplb"]) for item in members},
+            {("uniform", False), ("zipf", False), ("zipf", True)},
+        )
+        self.assertIn("implementation-static-build", routing["controlled_factors"])
+        self.assertIn("resource", routing["controlled_factors"])
+        self.assertEqual(
+            routing["varying_factors"],
+            ["workload.routing", "workload.eplb", "implementation-config"],
+        )
+        self.assertEqual(
+            len({item["build"]["routing_control_sha256"] for item in members}),
+            1,
+        )
+        self.assertGreater(
+            len({item["build"]["implementation_contract_sha256"] for item in members}),
+            1,
+        )
+        self.assertEqual(len({json.dumps(item["resource"], sort_keys=True) for item in members}), 1)
+        self.assertEqual(routing["publication_tier"], "comparable-experimental")
+        self.assertTrue(any(
+            item["cohort_id"] == routing["cohort_id"] for item in dataset["rankings"]
+        ))
+        self.assertFalse(any(
+            item["cohort_id"] == routing["cohort_id"] for item in dataset["recommendations"]
+        ))
+        self.assertTrue(all(
+            item["publication_tier"] == "official"
+            for item in dataset["recommendations"]
+        ))
+        self.assertFalse(any(
+            dataset_cohort["publication_tier"] == "comparable-experimental"
+            and item["cohort_id"] == dataset_cohort["cohort_id"]
+            for item in dataset["recommendations"]
+            for dataset_cohort in dataset["cohorts"]
+        ))
+        self.assertTrue(all(
+            item["publication_tier"] == "comparable-experimental"
+            for item in dataset["sensitivities"]
+            if item["cohort_id"] == routing["cohort_id"]
+        ))
+
+    def test_routing_implementation_mismatch_blocks_all_decisions(self) -> None:
+        dataset = _promoted_dataset()
+        published = next(item for item in dataset["cohorts"] if item["kind"] == "routing")
+        members = [
+            item for item in dataset["series"]
+            if item["series_id"] in published["series_ids"]
+        ]
+        zipf = next(
+            item for item in members
+            if item["workload"]["routing"] == "zipf" and not item["workload"]["eplb"]
+        )
+        zipf["build"]["implementation_contract_sha256"] = "f" * 64
+        internals = {}
+        for member in members:
+            point = member["points"][0]
+            roundtrip = point["components"]["roundtrip"]
+            metrics = {
+                "latency_us": {
+                    name: roundtrip["latency_us"][name] for name in ("p50", "p99")
+                },
+                **{
+                    field: {
+                        name: roundtrip[field][name] for name in ("p50", "p99")
+                    }
+                    for field in (
+                        "activation_data_rate_gbps_at_latency_percentile",
+                        "total_logical_data_rate_gbps_at_latency_percentile",
+                    )
+                },
+            }
+            internals[member["series_id"]] = {
+                "run_metrics": {
+                    str(run): {point["tokens_per_rank"]: metrics}
+                    for run in range(3)
+                }
+            }
+        cohorts, rankings, recommendations, sensitivities = publisher.build_decisions(
+            members, internals
+        )
+        routing = next(item for item in cohorts if item["kind"] == "routing")
+        self.assertFalse(routing["eligibility"]["decision_grade"])
+        self.assertIn(
+            "implementation-config-mismatch", routing["eligibility"]["reasons"]
+        )
+        self.assertEqual((rankings, recommendations, sensitivities), ([], [], []))
+
+    def test_promoted_series_fields_are_bound_to_case_and_series_identities(self) -> None:
+        dataset = _promoted_dataset()
+        changed = copy.deepcopy(dataset)
+        series = next(
+            item for item in changed["series"]
+            if item["system"]["sku"] == "h100-dgxc"
+        )
+        series["system"].update({
+            "sku": "h200-dgxc", "label": "NVIDIA H200",
+            "topology_class": "h200-nvlink-island",
+        })
+        for case_id in series["case_ids"]:
+            coverage = next(
+                item for item in changed["coverage"] if item["case_id"] == case_id
+            )
+            coverage["sku"] = "h200-dgxc"
+            coverage["topology"] = publisher._coverage_topology(series["system"])
+        with self.assertRaisesRegex(publisher.PublisherError, "configuration|case identity"):
+            publisher.validate_public_dataset(changed)
+
+        for field, value in (
+            ("source_sha", "b" * 40),
+            ("image_digest", "sha256:" + "4" * 64),
+            ("squash_sha256", "5" * 64),
+            ("runtime_fingerprint_sha256", "6" * 64),
+            ("implementation_contract_sha256", "7" * 64),
+            ("public_config_sha256", "9" * 64),
+            ("routing_control_sha256", "8" * 64),
+        ):
+            changed = copy.deepcopy(dataset)
+            changed["series"][0]["build"][field] = value
+            with self.subTest(build_field=field), self.assertRaisesRegex(
+                publisher.PublisherError, "commit"
+            ):
+                publisher.validate_public_dataset(changed)
+        changed = copy.deepcopy(dataset)
+        changed["series"][0]["workload"]["workload_id"] = identity.workload_id(
+            {"changed": True}
+        )
+        with self.assertRaisesRegex(publisher.PublisherError, "committed factors"):
+            publisher.validate_public_dataset(changed)
+
+        for mutate, message in (
+            (lambda item: item["backend"].update({
+                "generation": "fabricated", "version": "fabricated-999",
+            }), "configuration"),
+            (lambda item: item["resource"].update({
+                "profile": "profile-fabricated", "configured_units": 99,
+            }), "configuration"),
+            (lambda item: item["system"].update({"label": "Fabricated H100"}), "projection|commit"),
+        ):
+            changed = copy.deepcopy(dataset)
+            mutate(changed["series"][0])
+            with self.assertRaisesRegex(publisher.PublisherError, message):
+                publisher.validate_public_dataset(changed)
+
+        diagnostic = _dataset()
+        diagnostic["series"][0]["build"]["source_sha"] = "b" * 40
+        with self.assertRaisesRegex(publisher.PublisherError, "committed factors"):
+            publisher.validate_public_dataset(diagnostic)
+
+    def test_all_decision_metrics_require_stable_repeat_ordering(self) -> None:
+        fast, fast_internal = _series("ordering-fast", "deepep", decision_grade=True)
+        slow, slow_internal = _series("ordering-slow", "uccl", decision_grade=True)
+        internals = {
+            fast["series_id"]: fast_internal,
+            slow["series_id"]: slow_internal,
+        }
+
+        cohorts, rankings, recommendations, _ = publisher.build_decisions(
+            [fast, slow], internals
+        )
+        library = next(item for item in cohorts if item["kind"] == "library")
+        self.assertTrue(library["eligibility"]["decision_grade"])
+        self.assertEqual(
+            len([item for item in rankings if item["cohort_id"] == library["cohort_id"]]),
+            6,
+        )
+        self.assertEqual(
+            len([
+                item for item in recommendations
+                if item["cohort_id"] == library["cohort_id"]
+            ]),
+            1,
+        )
+
+        for statistic in ("p50", "p99"):
+            for field in (
+                "activation_data_rate_gbps_at_latency_percentile",
+                "total_logical_data_rate_gbps_at_latency_percentile",
+            ):
+                slow_internal["run_metrics"]["1"][8][field][statistic] = (
+                    fast_internal["run_metrics"]["1"][8][field][statistic] * 2
+                )
+        cohorts, rankings, recommendations, _ = publisher.build_decisions(
+            [fast, slow], internals
+        )
+        library = next(item for item in cohorts if item["kind"] == "library")
+        self.assertFalse(library["eligibility"]["decision_grade"])
+        self.assertIn("unstable-ordering", library["eligibility"]["reasons"])
+        self.assertFalse(any(
+            item["cohort_id"] == library["cohort_id"] for item in rankings
+        ))
+        self.assertFalse(any(
+            item["cohort_id"] == library["cohort_id"] for item in recommendations
+        ))
+
+    def test_p99_bootstrap_is_deterministic_and_dataset_bound(self) -> None:
+        fast, fast_internal = _series("bootstrap-fast", "deepep", decision_grade=True)
+        slow, slow_internal = _series("bootstrap-slow", "uccl", decision_grade=True)
+        internals = {
+            fast["series_id"]: fast_internal,
+            slow["series_id"]: slow_internal,
+        }
+
+        first = publisher._hierarchical_p99_ratio(
+            fast["series_id"], slow["series_id"], 8, internals, "a" * 64
+        )
+        repeated = publisher._hierarchical_p99_ratio(
+            fast["series_id"], slow["series_id"], 8, internals, "a" * 64
+        )
+        rebound = publisher._hierarchical_p99_ratio(
+            fast["series_id"], slow["series_id"], 8, internals, "b" * 64
+        )
+
+        self.assertEqual(first, repeated)
+        self.assertEqual(first["resamples"], 10_000)
+        self.assertEqual(first["confidence"], 0.95)
+        self.assertEqual(first["equivalence_band"], 0.05)
+        self.assertTrue(first["all_runs_agree"])
+        self.assertTrue(first["baseline_wins"])
+        self.assertGreater(first["ci95"][0], 1.05)
+        self.assertNotEqual(first["seed_sha256"], rebound["seed_sha256"])
+
+    def test_p99_equivalence_band_emits_competition_tie_without_recommendation(self) -> None:
+        fast, fast_internal = _series("tie-fast", "deepep", decision_grade=True)
+        near, near_internal = _series("tie-near", "uccl", decision_grade=True)
+        fast_point = fast["points"][0]
+        near_point = near["points"][0]
+        fast_component = fast_point["components"]["roundtrip"]
+        near_component = near_point["components"]["roundtrip"]
+        for statistic, latency in fast_component["latency_us"].items():
+            near_latency = latency * 1.03
+            near_component["latency_us"][statistic] = near_latency
+            for field, byte_field in (
+                ("activation_data_rate_gbps_at_latency_percentile", "activation_data_bytes"),
+                ("total_logical_data_rate_gbps_at_latency_percentile", "total_logical_bytes"),
+            ):
+                near_component[field][statistic] = (
+                    near_component["byte_provenance"][byte_field]
+                    / (near_latency * 1000.0)
+                )
+            near_point["roundtrip_token_rate_at_latency_percentile"][statistic] = (
+                near_point["global_tokens"] / (near_latency * 1e-6)
+            )
+        for run_id, fast_metrics in fast_internal["run_metrics"].items():
+            for statistic in ("p50", "p99"):
+                latency = fast_metrics[8]["latency_us"][statistic] * 1.03
+                near_internal["run_metrics"][run_id][8]["latency_us"][statistic] = latency
+                for field, byte_field in (
+                    ("activation_data_rate_gbps_at_latency_percentile", "activation_data_bytes"),
+                    ("total_logical_data_rate_gbps_at_latency_percentile", "total_logical_bytes"),
+                ):
+                    near_internal["run_metrics"][run_id][8][field][statistic] = (
+                        near_component["byte_provenance"][byte_field]
+                        / (latency * 1000.0)
+                    )
+            near_internal["trial_blocks"][run_id][8]["roundtrip"] = tuple(
+                tuple(sample * 1.03 for sample in block)
+                for block in fast_internal["trial_blocks"][run_id][8]["roundtrip"]
+            )
+        internals = {
+            fast["series_id"]: fast_internal,
+            near["series_id"]: near_internal,
+        }
+
+        cohorts, rankings, recommendations, _ = publisher.build_decisions(
+            [fast, near], internals, dataset_binding="c" * 64
+        )
+        library = next(item for item in cohorts if item["kind"] == "library")
+        ranking = next(
+            item for item in rankings
+            if item["cohort_id"] == library["cohort_id"]
+            and item["metric"]["measure"] == "latency_us"
+            and item["metric"]["statistic"] == "p99"
+        )
+        self.assertEqual([entry["rank"] for entry in ranking["entries"]], [1, 1])
+        self.assertFalse(any(
+            item["cohort_id"] == library["cohort_id"]
+            for item in recommendations
+        ))
+        self.assertNotIn(
+            "trial_blocks", json.dumps({"series": [fast, near], "rankings": rankings})
+        )
+
+    def test_p99_winner_requires_every_run_to_agree(self) -> None:
+        fast, fast_internal = _series("run-fast", "deepep", decision_grade=True)
+        slow, slow_internal = _series("run-slow", "uccl", decision_grade=True)
+        ratios = {"0": 0.98, "1": 1.20, "2": 1.20}
+        for run_id, ratio in ratios.items():
+            slow_internal["trial_blocks"][run_id][8]["roundtrip"] = tuple(
+                tuple(sample * ratio for sample in block)
+                for block in fast_internal["trial_blocks"][run_id][8]["roundtrip"]
+            )
+        result = publisher._hierarchical_p99_ratio(
+            fast["series_id"], slow["series_id"], 8,
+            {
+                fast["series_id"]: fast_internal,
+                slow["series_id"]: slow_internal,
+            },
+            "d" * 64,
+        )
+        self.assertFalse(result["all_runs_agree"])
+        self.assertFalse(result["baseline_wins"])
+        self.assertTrue(result["tie"])
+
+    def test_precision_cohorts_isolate_axes_and_never_recommend(self) -> None:
+        profiles = (
+            identity.V1_CONTROL_PRECISION_PROFILE,
+            "d-fp8-e4m3fn-b128-f32-fused.c-bf16",
+            "d-bf16.c-logfmt10-dynamic64",
+            "d-fp8-e4m3fn-b128-f32-fused.c-logfmt10-dynamic64",
+        )
+        series = []
+        internals = {}
+        for index, profile_id in enumerate(profiles):
+            item, internal = _series(
+                f"precision-{index}", "deepep", decision_grade=True
+            )
+            precision = identity.precision_profile(profile_id)
+            item["suite"] = (
+                "ep-low-latency-v1"
+                if index == 0
+                else "ep-precision-low-latency-v1"
+            )
+            item["mode"] = "low-latency"
+            item["publication_tier"] = (
+                "official" if index == 0 else "comparable-experimental"
+            )
+            item["workload"].update({
+                "precision_profile": profile_id,
+                "dispatch_precision": precision["dispatch"],
+                "combine_precision": precision["combine"],
+            })
+            item["series_id"] = identity.series_id({"precision-fixture": profile_id})
+            series.append(item)
+            internals[item["series_id"]] = internal
+
+        cohorts, rankings, recommendations, sensitivities = publisher.build_decisions(
+            series, internals, dataset_binding="e" * 64
+        )
+        precision_cohorts = [
+            cohort for cohort in cohorts
+            if cohort["kind"] in publisher.PRECISION_COHORT_KINDS
+        ]
+        self.assertEqual(
+            {kind: sum(cohort["kind"] == kind for cohort in precision_cohorts)
+             for kind in publisher.PRECISION_COHORT_KINDS},
+            {"dispatch-precision": 2, "combine-precision": 2, "precision-pair": 1},
+        )
+        self.assertTrue(all(
+            cohort["publication_tier"] == "comparable-experimental"
+            and cohort["eligibility"]["decision_grade"]
+            for cohort in precision_cohorts
+        ))
+        self.assertEqual(len(rankings), 30)
+        self.assertEqual(len(sensitivities), 24)
+        self.assertEqual(recommendations, [])
+        pair = next(
+            cohort for cohort in precision_cohorts
+            if cohort["kind"] == "precision-pair"
+        )
+        self.assertEqual(
+            pair["varying_factors"],
+            [
+                "dispatch-precision", "combine-precision", "precision-profile",
+                "resource",
+            ],
+        )
+        self.assertNotIn("resource", pair["controlled_factors"])
+        self.assertFalse(any(
+            sensitivity["cohort_id"] == pair["cohort_id"]
+            for sensitivity in sensitivities
+        ))
+
+    def test_private_trial_copy_is_component_extensible(self) -> None:
+        blocks = [[float(trial + iteration + 1) for iteration in range(8)]
+                  for trial in range(64)]
+        copied = publisher._private_trial_components({
+            "points": [{
+                "tokens_per_rank": 8,
+                "components": {
+                    "roundtrip": {"availability": "measured", "trials": blocks},
+                    "stage": {"availability": "measured", "trials": blocks},
+                    "combine": {"availability": "not-applicable", "trials": None},
+                },
+            }],
+        })
+        self.assertEqual(set(copied[8]), {"roundtrip", "stage", "combine"})
+        self.assertEqual(len(copied[8]["stage"]), 64)
+        self.assertIsNone(copied[8]["combine"])
+
+    def test_missing_private_trials_blocks_decision_grade(self) -> None:
+        fast, fast_internal = _series("trials-fast", "deepep", decision_grade=True)
+        slow, slow_internal = _series("trials-slow", "uccl", decision_grade=True)
+        del slow_internal["trial_blocks"]
+        cohorts, rankings, recommendations, sensitivities = publisher.build_decisions(
+            [fast, slow], {
+                fast["series_id"]: fast_internal,
+                slow["series_id"]: slow_internal,
+            }
+        )
+        library = next(item for item in cohorts if item["kind"] == "library")
+        self.assertFalse(library["eligibility"]["decision_grade"])
+        self.assertIn("missing-trial-blocks", library["eligibility"]["reasons"])
+        self.assertEqual((rankings, recommendations, sensitivities), ([], [], []))
+
+    def test_extra_eligibility_reason_blocks_decision_grade(self) -> None:
+        allocations = [identity.allocation_id({"run": run}) for run in range(3)]
+        eligibility = publisher._eligibility_record(
+            allocations, complete=True, correct=True, measured=True,
+            stable_ordering=True, p50_ratio=1.01, p99_ratio=1.02,
+            extra_reasons=["incomplete-provenance"],
+        )
+        self.assertFalse(eligibility["decision_grade"])
+        self.assertEqual(eligibility["reasons"], ["incomplete-provenance"])
+        self.assertIs(publisher._eligibility(eligibility, "fixture"), eligibility)
+        broken = {**eligibility, "decision_grade": True}
+        with self.assertRaisesRegex(publisher.PublisherError, "promotion gates"):
+            publisher._eligibility(broken, "fixture")
+
+    def test_schema_is_strict_and_channel_target_must_be_complete(self) -> None:
+        dataset = _dataset()
+        dataset["unexpected"] = True
+        with self.assertRaises(publisher.PublisherError):
+            publisher.validate_public_dataset(dataset)
+        with mock.patch.object(publisher, "MAX_PUBLIC_DATASET_BYTES", 1), self.assertRaisesRegex(
+            publisher.PublisherError, "serving size limit"
+        ):
+            publisher.validate_public_dataset(_dataset())
+        with tempfile.TemporaryDirectory() as temporary:
+            store = publisher.Store(Path(temporary).resolve())
+            dataset = _promoted_dataset()
+            with mock.patch.object(
+                publisher, "CANONICAL_FULL_V1_CASE_CATALOG_SHA256",
+                publisher._case_disposition_catalog_sha256(dataset["coverage"]),
+            ), mock.patch.object(
+                publisher, "REQUIRED_PROMOTION_COHORT_COUNTS", _cohort_counts(dataset),
+            ):
+                digest, size = store.install_dataset(dataset)
+                store.update_channel("dev-latest", digest, size, dataset["generated_at"])
+                self.assertEqual(
+                    store.verify_channel("dev-latest")["dataset"]["sha256"], digest
+                )
+                channel_path = store.channels / "dev-latest.json"
+                pointer = publisher.strict_load(channel_path)
+                pointer["generated_at"] = "2099-01-01T00:00:00Z"
+                channel_path.write_bytes(contracts.canonical_json_bytes(pointer))
+                with self.assertRaisesRegex(publisher.PublisherError, "metadata differs"):
+                    store.verify_channel("dev-latest")
+                store.update_channel("dev-latest", digest, size, dataset["generated_at"])
+                with self.assertRaisesRegex(publisher.PublisherError, "metadata differs"):
+                    store.update_channel(
+                        "dev-latest", digest, size + 1, dataset["generated_at"]
+                    )
+                with self.assertRaisesRegex(publisher.PublisherError, "metadata differs"):
+                    store.update_channel(
+                        "dev-latest", digest, size, "2026-07-05T00:00:00Z"
+                    )
+                os.chmod(channel_path, 0o666)
+                with self.assertRaisesRegex(publisher.PublisherError, "regular 644"):
+                    store.verify_channel("dev-latest")
+                os.chmod(channel_path, 0o644)
+                dataset_dir = store.datasets / digest
+                os.chmod(dataset_dir, 0o755)
+                with self.assertRaisesRegex(publisher.PublisherError, "mode differs"):
+                    store.verify_channel("dev-latest")
+                os.chmod(dataset_dir, 0o555)
+                os.chmod(dataset_dir / "dataset.json", 0o644)
+                with self.assertRaisesRegex(publisher.PublisherError, "mode differs"):
+                    store.verify_channel("dev-latest")
+                os.chmod(dataset_dir / "dataset.json", 0o444)
+                os.chmod(dataset_dir, 0o755)
+                (dataset_dir / "COMPLETE").unlink()
+                os.chmod(dataset_dir, 0o555)
+                with self.assertRaisesRegex(publisher.PublisherError, "incomplete"):
+                    store.verify_channel("dev-latest")
+
+    def test_store_modes_do_not_depend_on_process_umask(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            previous = os.umask(0o077)
+            try:
+                store = publisher.Store(Path(temporary).resolve())
+                dataset = _promoted_dataset()
+                with mock.patch.object(
+                    publisher, "CANONICAL_FULL_V1_CASE_CATALOG_SHA256",
+                    publisher._case_disposition_catalog_sha256(dataset["coverage"]),
+                ), mock.patch.object(
+                    publisher, "REQUIRED_PROMOTION_COHORT_COUNTS",
+                    _cohort_counts(dataset),
+                ):
+                    digest, size = store.install_dataset(dataset)
+                    store.update_channel(
+                        "dev-latest", digest, size, dataset["generated_at"]
+                    )
+                with store.locked():
+                    pass
+            finally:
+                os.umask(previous)
+            self.assertEqual(
+                store.root.stat().st_mode & 0o777,
+                0o750,
+            )
+            self.assertEqual(
+                (store.channels / "dev-latest.json").stat().st_mode & 0o777,
+                0o644,
+            )
+            self.assertEqual(
+                (store.datasets / digest / "dataset.json").stat().st_mode & 0o777,
+                0o444,
+            )
+            self.assertEqual(
+                (store.locks / "publisher.lock").stat().st_mode & 0o777,
+                0o600,
+            )
+
+    def test_verify_requires_a_promoted_dev_latest_channel(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve()
+            args = types.SimpleNamespace(
+                store_root=str(root / "store"), channel=None, bundle=[]
+            )
+            with self.assertRaises(publisher.PublisherError):
+                publisher.verify_command(args)
+            store = publisher.Store(args.store_root)
+            dataset = _promoted_dataset()
+            with mock.patch.object(
+                publisher, "CANONICAL_FULL_V1_CASE_CATALOG_SHA256",
+                publisher._case_disposition_catalog_sha256(dataset["coverage"]),
+            ), mock.patch.object(
+                publisher, "REQUIRED_PROMOTION_COHORT_COUNTS", _cohort_counts(dataset),
+            ):
+                digest, size = store.install_dataset(dataset)
+                store.update_channel(
+                    "dev-latest", digest, size, dataset["generated_at"]
+                )
+                result = publisher.verify_command(args)
+                self.assertEqual(set(result["channels"]), {"dev-latest"})
+                explicit = types.SimpleNamespace(
+                    store_root=args.store_root, channel=["dev-latest"], bundle=[]
+                )
+                self.assertEqual(
+                    publisher.verify_command(explicit)["channels"], result["channels"]
+                )
+            unknown = types.SimpleNamespace(
+                store_root=args.store_root, channel=["latest-attempt"], bundle=[]
+            )
+            with self.assertRaisesRegex(publisher.PublisherError, "unknown channel"):
+                publisher.verify_command(unknown)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/experimental/CollectiveX/tests/test_qualification_planning.py b/experimental/CollectiveX/tests/test_qualification_planning.py
new file mode 100644
index 000000000..4718818ed
--- /dev/null
+++ b/experimental/CollectiveX/tests/test_qualification_planning.py
@@ -0,0 +1,247 @@
+#!/usr/bin/env python3
+"""CPU-only tests for qualification-specific shard execution planning."""
+from __future__ import annotations
+
+import copy
+import hashlib
+import json
+import os
+from pathlib import Path
+import sys
+import tempfile
+import unittest
+from unittest import mock
+
+
+HERE = Path(__file__).resolve().parent
+ROOT = HERE.parent
+sys.path[:0] = [str(ROOT), str(HERE)]
+
+import identity  # noqa: E402
+import sweep_matrix  # noqa: E402
+
+
+def _canonical(value: object) -> bytes:
+    return json.dumps(
+        value, ensure_ascii=True, sort_keys=True, separators=(",", ":")
+    ).encode()
+
+
+class QualificationPlanningTest(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls) -> None:
+        cls.matrix = sweep_matrix.validate_matrix_document(
+            sweep_matrix.resolve_matrix(
+                backend="deepep", only_sku="h100-dgxc", max_cases=128
+            )
+        )
+        cls.shard = next(item for item in cls.matrix["include"] if item["n"] >= 3)
+
+    def test_matrix_semantics_do_not_depend_on_qualification_index(self) -> None:
+        expected = _canonical(self.matrix)
+        for qualification_index in (1, 2, 3):
+            with mock.patch.dict(
+                os.environ,
+                {"CX_QUALIFICATION_INDEX": str(qualification_index)},
+            ):
+                observed = sweep_matrix.validate_matrix_document(
+                    sweep_matrix.resolve_matrix(
+                        backend="deepep", only_sku="h100-dgxc", max_cases=128
+                    )
+                )
+            self.assertEqual(_canonical(observed), expected)
+
+    def test_extract_shard_has_deterministic_distinct_exact_plans(self) -> None:
+        matrix_bytes = _canonical(self.matrix) + b"\n"
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary)
+            matrix_path = root / "matrix.json"
+            matrix_path.write_bytes(matrix_bytes)
+            original_digest = hashlib.sha256(matrix_path.read_bytes()).hexdigest()
+            controls = []
+            for qualification_index in (1, 2, 3):
+                first = sweep_matrix.extract_shard(
+                    matrix_path,
+                    self.shard["id"],
+                    root / f"q{qualification_index}-first.json",
+                    sku=self.shard["sku"],
+                    backend=self.shard["backend"],
+                    nodes=self.shard["nodes"],
+                    qualification_index=qualification_index,
+                )
+                repeated = sweep_matrix.extract_shard(
+                    matrix_path,
+                    self.shard["id"],
+                    root / f"q{qualification_index}-repeated.json",
+                    sku=self.shard["sku"],
+                    backend=self.shard["backend"],
+                    nodes=self.shard["nodes"],
+                    qualification_index=qualification_index,
+                )
+                self.assertEqual(first, repeated)
+                self.assertEqual(first["qualification_index"], qualification_index)
+                self.assertEqual(
+                    {case["case_id"] for case in first["cases"]},
+                    set(self.shard["case_ids"]),
+                )
+                plan = [
+                    [
+                        case["case_id"],
+                        case.get(
+                            "precision_profile",
+                            identity.V1_CONTROL_PRECISION_PROFILE,
+                        ),
+                    ]
+                    for case in first["cases"]
+                ]
+                self.assertEqual(
+                    first["execution_plan_sha256"],
+                    hashlib.sha256(
+                        json.dumps(plan, separators=(",", ":")).encode()
+                    ).hexdigest(),
+                )
+                sweep_matrix.validate_shard_control(
+                    first,
+                    sku=self.shard["sku"],
+                    backend=self.shard["backend"],
+                    nodes=self.shard["nodes"],
+                    qualification_index=qualification_index,
+                )
+                controls.append(first)
+            self.assertEqual(
+                len({control["execution_plan_sha256"] for control in controls}), 3
+            )
+            self.assertEqual(
+                len({tuple(case["case_id"] for case in control["cases"])
+                     for control in controls}),
+                3,
+            )
+            self.assertEqual(
+                hashlib.sha256(matrix_path.read_bytes()).hexdigest(), original_digest
+            )
+
+    def test_precision_profiles_and_cases_rotate_across_repeats(self) -> None:
+        profiles = list(identity.V1_PRECISION_PROFILES)[:3]
+        cases = [
+            {
+                "case_id": identity.digest("case", {"fixture": index}),
+                "precision_profile": profiles[index % len(profiles)],
+            }
+            for index in range(9)
+        ]
+        plans = [
+            sweep_matrix.qualification_execution_order(
+                "qualification-fixture", cases, qualification_index
+            )
+            for qualification_index in (1, 2, 3)
+        ]
+        expected_ids = {case["case_id"] for case in cases}
+        self.assertTrue(all(
+            {case["case_id"] for case in plan} == expected_ids for plan in plans
+        ))
+        self.assertEqual(
+            len({tuple(case["case_id"] for case in plan) for plan in plans}), 3
+        )
+        self.assertEqual(
+            len({tuple(case["precision_profile"] for case in plan) for plan in plans}),
+            3,
+        )
+
+    def test_matrix_execution_plan_digest_is_repeat_specific_and_stable(self) -> None:
+        digests = [
+            sweep_matrix.qualification_execution_plan_sha256(self.matrix, index)
+            for index in (1, 2, 3)
+        ]
+        self.assertEqual(len(set(digests)), 3)
+        self.assertTrue(all(len(digest) == 64 for digest in digests))
+        self.assertEqual(
+            digests,
+            [
+                sweep_matrix.qualification_execution_plan_sha256(
+                    copy.deepcopy(self.matrix), index
+                )
+                for index in (1, 2, 3)
+            ],
+        )
+        self.assertTrue(all(shard["execution_weight"] > 0 for shard in self.matrix["include"]))
+        tampered = copy.deepcopy(self.matrix)
+        tampered["include"][0]["execution_weight"] += 1
+        with self.assertRaisesRegex(
+            sweep_matrix.MatrixError, "execution_weight differs from its cases"
+        ):
+            sweep_matrix.qualification_execution_plan_sha256(tampered, 1)
+
+    def test_frontend_catalog_covers_every_requested_case_and_point(self) -> None:
+        catalog = sweep_matrix.frontend_catalog(self.matrix)
+        self.assertEqual(catalog["format"], "collectivex.frontend-catalog.v1")
+        self.assertEqual(catalog["case_count"], len(self.matrix["requested_cases"]))
+        self.assertEqual(
+            catalog["point_count"],
+            sum(
+                len(item["case"]["ladder"].split())
+                for item in self.matrix["requested_cases"]
+            ),
+        )
+        self.assertEqual(
+            {item["case_id"] for item in catalog["cases"]},
+            {item["case"]["case_id"] for item in self.matrix["requested_cases"]},
+        )
+        self.assertLess(len(_canonical(catalog)) + 1, 1024 * 1024)
+
+    def test_invalid_qualification_controls_are_rejected(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary)
+            matrix_path = root / "matrix.json"
+            matrix_path.write_bytes(_canonical(self.matrix) + b"\n")
+            arguments = {
+                "sku": self.shard["sku"],
+                "backend": self.shard["backend"],
+                "nodes": self.shard["nodes"],
+            }
+            for invalid in (0, 4, True):
+                with self.subTest(qualification_index=invalid), self.assertRaisesRegex(
+                    sweep_matrix.MatrixError, "integer in 1..3"
+                ):
+                    sweep_matrix.extract_shard(
+                        matrix_path,
+                        self.shard["id"],
+                        root / "invalid.json",
+                        qualification_index=invalid,
+                        **arguments,
+                    )
+            with mock.patch.dict(os.environ, {"CX_QUALIFICATION_INDEX": "invalid"}):
+                with self.assertRaisesRegex(
+                    sweep_matrix.MatrixError, "integer in 1..3"
+                ):
+                    sweep_matrix.extract_shard(
+                        matrix_path,
+                        self.shard["id"],
+                        root / "invalid-env.json",
+                        **arguments,
+                    )
+
+            with mock.patch.dict(os.environ, {}, clear=True):
+                control = sweep_matrix.extract_shard(
+                    matrix_path,
+                    self.shard["id"],
+                    root / "default.json",
+                    **arguments,
+                )
+            self.assertEqual(control["qualification_index"], 1)
+
+            invalid_control = copy.deepcopy(control)
+            invalid_control["qualification_index"] = 4
+            with self.assertRaisesRegex(
+                sweep_matrix.MatrixError, "integer in 1..3"
+            ):
+                sweep_matrix.validate_shard_control(invalid_control, **arguments)
+            tampered = copy.deepcopy(control)
+            tampered["execution_plan_sha256"] = "0" * 64
+            with self.assertRaisesRegex(
+                sweep_matrix.MatrixError, "differs from its ordered cases"
+            ):
+                sweep_matrix.validate_shard_control(tampered, **arguments)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/experimental/CollectiveX/tests/test_sampling_contract.py b/experimental/CollectiveX/tests/test_sampling_contract.py
new file mode 100644
index 000000000..e0c10225f
--- /dev/null
+++ b/experimental/CollectiveX/tests/test_sampling_contract.py
@@ -0,0 +1,3433 @@
+#!/usr/bin/env python3
+"""CPU-only behavioral tests for the CollectiveX v1 execution contract."""
+from __future__ import annotations
+
+import argparse
+import ast
+import copy
+import hashlib
+import io
+import json
+import os
+from pathlib import Path
+import re
+import stat
+import subprocess
+import sys
+import tarfile
+import tempfile
+import types
+import unittest
+from unittest import mock
+
+import numpy as np
+
+HERE = Path(__file__).resolve().parent
+ROOT = HERE.parent
+sys.path[:0] = [str(ROOT), str(HERE)]
+
+import artifact_safety  # noqa: E402
+import capability  # noqa: E402
+import contracts  # noqa: E402
+import eplb  # noqa: E402
+import ep_harness  # noqa: E402
+import identity  # noqa: E402
+import run_ep  # noqa: E402
+import source_archive  # noqa: E402
+import summarize  # noqa: E402
+import sweep_matrix  # noqa: E402
+import workload  # noqa: E402
+
+
+class SamplingContractTest(unittest.TestCase):
+    def test_identity_and_fixed_sampling_profile(self) -> None:
+        identity.verify_test_vector()
+        self.assertTrue(identity.is_typed_id(identity.IDENTITY_TEST_VECTOR["series_id"], "series"))
+        self.assertEqual(ep_harness.SAMPLING_CONTRACT, "fixed-512-v1")
+        self.assertEqual(
+            (
+                ep_harness.TIMED_ITERS_PER_TRIAL,
+                ep_harness.TRIALS_PER_POINT,
+                ep_harness.TIMED_SAMPLES_PER_POINT,
+                ep_harness.WARMUP_ITERS_PER_TRIAL,
+            ),
+            (8, 64, 512, 32),
+        )
+        self.assertEqual(identity.V1_CASE_PROFILE["activation_profile"], "canonical-counter-source-v4")
+        self.assertEqual(
+            identity.V1_CASE_PROFILE["activation_generator"],
+            "collectivex-activation-counter-v4",
+        )
+        self.assertEqual(identity.V1_CASE_PROFILE["sampling_contract"], "fixed-512-v1")
+        self.assertEqual(identity.V1_CASE_PROFILE["percentile_method"], "nearest-rank")
+        self.assertEqual(
+            identity.V1_CASE_PROFILE["rank_reduction"],
+            "cross-rank-max-per-iteration",
+        )
+        self.assertEqual(
+            identity.V1_CASE_PROFILE["oracle_contract"],
+            "expert-specific-transform-v1",
+        )
+        self.assertEqual(
+            set(identity.V1_CASE_PROFILES), {"normal", "low-latency"}
+        )
+        self.assertEqual(
+            identity.V1_LOW_LATENCY_CASE_PROFILE["payload_unit"], "token-expert"
+        )
+        self.assertNotEqual(
+            identity.digest("case", identity.V1_NORMAL_CASE_PROFILE),
+            identity.digest("case", identity.V1_LOW_LATENCY_CASE_PROFILE),
+        )
+        parser = argparse.ArgumentParser()
+        ep_harness.add_common_args(parser)
+        args = parser.parse_args(
+            [
+                "--runner", "test", "--topology-class", "test",
+                "--scope", "scale-up", "--scale-up-transport", "nvlink",
+                "--out", "result.json",
+            ]
+        )
+        self.assertEqual((args.iters, args.trials, args.warmup), (8, 64, 32))
+        self.assertEqual(args.qualification_index, 1)
+        for profile in ((8, 64, 32), (128, 4, 32), (8, 1, 4), (0, 64, 32)):
+            with self.subTest(profile=profile):
+                self.assertEqual(
+                    ep_harness.sampling_contract_error(*profile) is None,
+                    profile == (8, 64, 32),
+                )
+
+    def test_nearest_rank_percentiles_use_all_512_samples(self) -> None:
+        samples = list(range(1, 513))
+        self.assertEqual(ep_harness.percentile(samples, 50), 256)
+        self.assertEqual(ep_harness.percentile(samples, 99), 507)
+
+    def test_qualification_order_is_deterministic_and_position_balanced(self) -> None:
+        values = [1, 2, 4, 8, 16, 32, 64, 128]
+        for qualification_index in range(1, ep_harness.QUALIFICATION_RUNS + 1):
+            orders = [
+                ep_harness.qualification_order(values, qualification_index, trial)
+                for trial in range(64)
+            ]
+            self.assertEqual(
+                orders,
+                [
+                    ep_harness.qualification_order(values, qualification_index, trial)
+                    for trial in range(64)
+                ],
+            )
+            self.assertTrue(all(sorted(order) == values for order in orders))
+            for position in range(len(values)):
+                self.assertEqual(
+                    {value: sum(order[position] == value for order in orders) for value in values},
+                    {value: 8 for value in values},
+                )
+        with self.assertRaises(ValueError):
+            ep_harness.qualification_order(values, 0, 0)
+        with self.assertRaises(ValueError):
+            ep_harness.qualification_order([1, 1], 1, 0)
+
+    def test_sample_evidence_preserves_exact_trial_blocks(self) -> None:
+        trials = [
+            [float(trial * 8 + sample) for sample in range(8)]
+            for trial in range(64)
+        ]
+        evidence = ep_harness.sampled_component_evidence(trials)
+        self.assertEqual(evidence["availability"], "measured")
+        self.assertEqual(evidence["sample_count"], 512)
+        self.assertEqual(evidence["trials"], trials)
+        self.assertIsNot(evidence["trials"], trials)
+        self.assertEqual(
+            ep_harness.sampled_component_evidence([]),
+            {"availability": "unavailable", "sample_count": 0, "trials": None},
+        )
+        for malformed in (trials[:-1], [*trials[:-1], trials[-1][:-1]]):
+            with self.assertRaises(ValueError):
+                ep_harness.sampled_component_evidence(malformed)
+        invalid = copy.deepcopy(trials)
+        invalid[0][0] = float("nan")
+        with self.assertRaises(ValueError):
+            ep_harness.sampled_component_evidence(invalid)
+
+    def test_terminal_summary_uses_bound_sku_and_route(self) -> None:
+        terminal = {
+            "format": contracts.TERMINAL_FORMAT,
+            "case": {
+                "backend": "deepep", "phase": "prefill", "ep": 8,
+                "suite": "ep-routing-v1", "routing": "zipf", "eplb": True,
+                "required_publication": "comparable-experimental",
+            },
+            "identity": {"case_factors": {"sku": "h100-dgxc"}},
+        }
+        self.assertEqual(
+            summarize._identity(terminal),
+            (
+                "h100-dgxc", "ep-routing-v1", "zipf", "prefill", True,
+                "comparable-experimental", 8,
+            ),
+        )
+
+    def test_matrix_cases_and_shards_are_identity_bound(self) -> None:
+        matrix = sweep_matrix.validate_matrix_document(
+            sweep_matrix.resolve_matrix(backends="all")
+        )
+        requested = {item["case"]["case_id"]: item for item in matrix["requested_cases"]}
+        assigned = [case_id for shard in matrix["include"] for case_id in shard["case_ids"]]
+        runnable = {
+            case_id for case_id, item in requested.items()
+            if item["disposition"] == "runnable"
+        }
+        runnable_cases = [
+            item for item in matrix["requested_cases"]
+            if item["disposition"] == "runnable"
+        ]
+        unsupported_cases = [
+            item for item in matrix["requested_cases"]
+            if item["disposition"] == "unsupported"
+        ]
+        self.assertEqual(
+            (
+                len(matrix["include"]),
+                len(matrix["requested_cases"]),
+                len(runnable_cases),
+                len(unsupported_cases),
+                sum(
+                    len(item["case"]["ladder"].split())
+                    for item in matrix["requested_cases"]
+                ),
+                sum(len(item["case"]["ladder"].split()) for item in runnable_cases),
+                sum(len(item["case"]["ladder"].split()) for item in unsupported_cases),
+            ),
+            (58, 608, 364, 244, 1600, 940, 660),
+        )
+        expected_topologies = {}
+        for sku, product in (
+            ("h100-dgxc", "h100"), ("h200-dgxc", "h200"),
+            ("b200-dgxc", "b200"), ("b300", "b300"),
+        ):
+            expected_topologies[sku, 8] = (
+                1, 8, 8, "scale-up", "nvlink", None, "nvlink",
+                f"{product}-nvlink-island",
+            )
+            expected_topologies[sku, 16] = (
+                2, 8, 8, "scale-out", "nvlink", "rdma", "nvlink-rdma",
+                f"{product}-nvlink-rdma",
+            )
+        for sku in ("gb200", "gb300"):
+            topology_class = f"{sku}-nvl72-mnnvl"
+            expected_topologies[sku, 8] = (
+                2, 4, 72, "scale-up", "mnnvl", None, "mnnvl", topology_class,
+            )
+            expected_topologies[sku, 16] = (
+                4, 4, 72, "scale-up", "mnnvl", None, "mnnvl", topology_class,
+            )
+        for sku in ("mi325x", "mi355x"):
+            expected_topologies[sku, 8] = (
+                1, 8, 8, "scale-up", "xgmi", None, "xgmi", f"{sku}-xgmi",
+            )
+            expected_topologies[sku, 16] = (
+                2, 8, 8, "scale-out", "xgmi", "rdma", "xgmi-rdma",
+                f"{sku}-xgmi-rdma",
+            )
+        topology_fields = sweep_matrix.TOPOLOGY_FIELDS
+        observed_topologies: dict[tuple[str, int], set[tuple[object, ...]]] = {}
+        for item in matrix["requested_cases"]:
+            case = item["case"]
+            observed_topologies.setdefault((item["sku"], case["ep"]), set()).add(
+                tuple(case[field] for field in topology_fields)
+            )
+        self.assertEqual(
+            {key: next(iter(values)) for key, values in observed_topologies.items()},
+            expected_topologies,
+        )
+        self.assertTrue(all(len(values) == 1 for values in observed_topologies.values()))
+        self.assertEqual(
+            {
+                (sku, ep): tuple(topology[field] for field in topology_fields)
+                for sku, platform in capability.PLATFORMS.items()
+                for ep, topology in platform["topologies"].items()
+            },
+            expected_topologies,
+        )
+        self.assertEqual(
+            {shard["n"] for shard in matrix["include"]}, {6, 7}
+        )
+        self.assertEqual(
+            sum(shard["n"] == 7 for shard in matrix["include"]), 16
+        )
+        ll_cases = [
+            item for item in matrix["requested_cases"]
+            if item["case"]["mode"] == "low-latency"
+        ]
+        self.assertEqual(len(ll_cases), 32)
+        self.assertTrue(all(
+            item["case"]["suite"] == "ep-low-latency-v1"
+            and item["case"]["backend"] in {"deepep", "uccl"}
+            and item["case"]["phase"] == "decode"
+            and item["case"]["routing"] == "uniform"
+            and not item["case"]["eplb"]
+            and item["case"]["ladder"] == "1 2 4 8 16 32 64 128"
+            for item in ll_cases
+        ))
+        for shard in matrix["include"]:
+            ep = next(
+                requested[case_id]["case"]["ep"] for case_id in shard["case_ids"]
+            )
+            self.assertEqual(
+                tuple(shard[field] for field in topology_fields),
+                expected_topologies[shard["sku"], ep],
+            )
+        routing_points = {
+            phase: {
+                int(point)
+                for item in matrix["requested_cases"]
+                if item["case"]["suite"] == "ep-routing-v1"
+                and item["case"]["phase"] == phase
+                for point in item["case"]["ladder"].split()
+            }
+            for phase in ("decode", "prefill")
+        }
+        self.assertEqual(routing_points, {"decode": {128}, "prefill": {512}})
+        skus = sorted({shard["sku"] for shard in matrix["include"]})
+        self.assertEqual(
+            [shard["sku"] for shard in matrix["include"][:len(skus)]],
+            skus,
+        )
+        self.assertEqual(set(assigned), runnable)
+        self.assertEqual(len(assigned), len(set(assigned)))
+        self.assertEqual({item["case"]["ep"] for item in matrix["requested_cases"]}, {8, 16})
+        self.assertFalse(capability.resolve("gb200", "deepep", ep=8, nodes=1)[0])
+        excluded = {
+            "uccl": {"b200-dgxc", "b300"},
+        }
+        for backend, skus in excluded.items():
+            for sku in skus:
+                with self.subTest(backend=backend, sku=sku):
+                    self.assertFalse(capability.resolve(sku, backend)[0])
+        for case_id, item in requested.items():
+            case = {key: value for key, value in item["case"].items() if key != "case_id"}
+            self.assertEqual(
+                case_id,
+                identity.case_id(
+                    sku=item["sku"], profile=identity.profile_for_case(case), case=case
+                ),
+            )
+            self.assertEqual(case["timing"], "8:64:32")
+            self.assertEqual(case["samples_per_point"], 512)
+
+        bad_matrix = copy.deepcopy(matrix)
+        bad_matrix["schema_version"] = True
+        with self.assertRaises(sweep_matrix.MatrixError):
+            sweep_matrix.validate_matrix_document(bad_matrix)
+
+        bad_catalog = copy.deepcopy(matrix)
+        wrapper = next(
+            item for item in bad_catalog["requested_cases"]
+            if item["disposition"] == "runnable"
+        )
+        old_id = wrapper["case"]["case_id"]
+        wrapper["case"]["hidden"] = 1
+        factors = {key: value for key, value in wrapper["case"].items() if key != "case_id"}
+        new_id = identity.case_id(
+            sku=wrapper["sku"], profile=identity.V1_CASE_PROFILE, case=factors
+        )
+        wrapper["case"]["case_id"] = new_id
+        for shard in bad_catalog["include"]:
+            shard["case_ids"] = [new_id if value == old_id else value for value in shard["case_ids"]]
+        with self.assertRaisesRegex(sweep_matrix.MatrixError, "frozen v1"):
+            sweep_matrix.validate_matrix_document(bad_catalog)
+
+        bad_topology = copy.deepcopy(matrix)
+        wrapper = next(
+            item for item in bad_topology["requested_cases"]
+            if item["disposition"] == "runnable"
+        )
+        old_id = wrapper["case"]["case_id"]
+        wrapper["case"]["transport"] = "incorrect-transport"
+        factors = {key: value for key, value in wrapper["case"].items() if key != "case_id"}
+        new_id = identity.case_id(
+            sku=wrapper["sku"], profile=identity.V1_CASE_PROFILE, case=factors
+        )
+        wrapper["case"]["case_id"] = new_id
+        for shard in bad_topology["include"]:
+            shard["case_ids"] = [new_id if value == old_id else value for value in shard["case_ids"]]
+        with self.assertRaisesRegex(sweep_matrix.MatrixError, "platform registry"):
+            sweep_matrix.validate_matrix_document(bad_topology)
+
+        shard_meta = matrix["include"][0]
+        requested_cases = {item["case"]["case_id"]: item["case"] for item in matrix["requested_cases"]}
+        shard = {
+            "schema_version": True,
+            "id": shard_meta["id"],
+            "sku": shard_meta["sku"],
+            "backend": shard_meta["backend"],
+            "nodes": shard_meta["nodes"],
+            "n": shard_meta["n"],
+            "cases": [requested_cases[value] for value in shard_meta["case_ids"]],
+        }
+        with self.assertRaises(sweep_matrix.MatrixError):
+            sweep_matrix.validate_shard_control(
+                shard, sku=shard_meta["sku"], backend=shard_meta["backend"],
+                nodes=shard_meta["nodes"],
+            )
+
+    def test_matrix_yaml_and_config_validation_are_strict(self) -> None:
+        suites = sweep_matrix._load("suites.yaml")
+        workloads = sweep_matrix._load("workloads.yaml")
+        self.assertEqual(
+            {tuple(suite["ep_degrees"]) for suite in suites["suites"].values()},
+            {(8, 16)},
+        )
+        invalid = (
+            ("unknown top", lambda s, _w: s.update({"typo": True})),
+            (
+                "unknown suite field",
+                lambda s, _w: s["suites"]["ep-core-v1"].update({"modes": ["normal"]}),
+            ),
+            (
+                "unknown workload field",
+                lambda _s, w: w["model_derived"]["deepseek-v3-v1"].update({"unused": 1}),
+            ),
+            (
+                "string phases",
+                lambda s, _w: s["suites"]["ep-core-v1"].update({"phases": "decode"}),
+            ),
+            (
+                "unknown routing",
+                lambda s, _w: s["suites"]["ep-core-v1"].update({"routings": ["random"]}),
+            ),
+            (
+                "integer EPLB",
+                lambda s, _w: s["suites"]["ep-routing-v1"].update({"eplb": [0, 1]}),
+            ),
+            (
+                "duplicate platform",
+                lambda s, _w: s["suites"]["ep-core-v1"]["platforms"].append("h100-dgxc"),
+            ),
+            (
+                "missing EP degrees",
+                lambda s, _w: s["suites"]["ep-core-v1"].pop("ep_degrees"),
+            ),
+            (
+                "non-v1 EP degrees",
+                lambda s, _w: s["suites"]["ep-core-v1"].update({"ep_degrees": [8]}),
+            ),
+            ("missing top field", lambda s, _w: s.pop("schema_version")),
+            (
+                "string dimension",
+                lambda _s, w: w["model_derived"]["deepseek-v3-v1"].update({"hidden": "7168"}),
+            ),
+            (
+                "unreachable phase ladder",
+                lambda s, _w: s["suites"]["ep-routing-v1"].update({"phases": ["prefill"]}),
+            ),
+        )
+        for label, mutate in invalid:
+            with self.subTest(label=label), self.assertRaises(SystemExit):
+                bad_suites, bad_workloads = copy.deepcopy(suites), copy.deepcopy(workloads)
+                mutate(bad_suites, bad_workloads)
+                sweep_matrix.validate_config_documents(bad_suites, bad_workloads)
+
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary)
+            (root / "configs").mkdir()
+            (root / "configs" / "duplicate.yaml").write_text(
+                "schema_version: 1\nsuites:\n  same: 1\n  same: 2\n"
+            )
+            with mock.patch.object(sweep_matrix, "HERE", root), self.assertRaisesRegex(
+                SystemExit, "duplicate YAML key"
+            ):
+                sweep_matrix._load("duplicate.yaml")
+
+    def test_semantically_duplicate_suite_points_are_rejected(self) -> None:
+        matrix = sweep_matrix.resolve_matrix()
+        with mock.patch.object(
+            sweep_matrix, "_semantic_points", return_value=["duplicate"]
+        ), self.assertRaisesRegex(
+            sweep_matrix.MatrixError, "duplicates a semantic token point"
+        ):
+            sweep_matrix.validate_matrix_document(matrix)
+
+    def test_only_three_shared_launchers_are_registered(self) -> None:
+        expected = {
+            "launch_single-slurm.sh",
+            "launch_gb-nv.sh",
+            "launch_mi-amds.sh",
+        }
+        self.assertEqual({path.name for path in (ROOT / "launchers").glob("launch_*.sh")}, expected)
+        self.assertEqual(
+            {platform["launcher"] for platform in capability.PLATFORMS.values()},
+            {"single-slurm", "gb-nv", "mi-amds"},
+        )
+        for platform in capability.PLATFORMS.values():
+            launcher = ROOT / "launchers" / f"launch_{platform['launcher']}.sh"
+            self.assertTrue(launcher.is_file())
+            source = launcher.read_text()
+            self.assertNotIn("RUNNER_NAME", source)
+            self.assertIn("cx_preflight_allocation", source)
+            lock_environment = 'cx_lock_canonical_gha_env "$RUNNER"'
+            self.assertIn(lock_environment, source)
+            self.assertLess(
+                source.index("cx_load_operator_config"),
+                source.index(lock_environment),
+            )
+            validate = 'cx_validate_shard_control "$CX_DIR"'
+            stage = 'MOUNT_SRC="$(cx_stage_path '
+            self.assertIn(validate, source)
+            self.assertLess(source.index(validate), source.index(stage))
+            self.assertLess(source.index(stage), source.index('cx_stage_repo "$REPO_ROOT"'))
+            self.assertLess(source.index(validate), source.index("cx_require_vars"))
+            if platform["launcher"] in {"single-slurm", "mi-amds"}:
+                network = "cx_validate_network_profile_on_job"
+                self.assertIn(network, source)
+                self.assertLess(source.index("cx_salloc_jobid"), source.index(network))
+                self.assertLess(source.index(network), source.index("cx_preflight_allocation"))
+                if platform["launcher"] == "single-slurm":
+                    self.assertLess(
+                        source.index(network),
+                        source.index("CX_ENROOT_LOCAL_IMPORT=1 cx_ensure_squash"),
+                    )
+
+        common = (ROOT / "runtime" / "common.sh").read_text()
+        workflow = (ROOT.parent.parent / ".github" / "workflows" / "collectivex-sweep.yml").read_text()
+        self.assertNotIn("RUNNER_NAME", common)
+        self.assertNotIn("RUNNER_NAME:", workflow)
+        self.assertNotIn("flashinfer", capability.BACKENDS)
+        self.assertFalse((HERE / "ep_flashinfer.py").exists())
+
+    def test_canonical_operator_config_requires_a_private_audit_salt(self) -> None:
+        salt = "a" * 64
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve()
+            config = root / "operator.json"
+            document = {
+                "schema_version": 1,
+                "audit_salt": salt,
+                "runners": {
+                    "h100-dgxc": {
+                        "partition": "test", "account": "test",
+                        "squash_dir": str(root), "stage_dir": str(root),
+                    },
+                },
+            }
+            command = (
+                'source "$1"; export COLLECTIVEX_EXECUTION_ID="audit-config-$$"; '
+                "trap 'cx_cleanup_private_logs 0' EXIT; cx_load_operator_config; "
+                'test "$CX_AUDIT_SALT" = "$EXPECTED_AUDIT_SALT"'
+            )
+
+            def invoke(
+                value: dict, *, canonical: bool, expect_salt: bool = True
+            ) -> subprocess.CompletedProcess[str]:
+                config.write_text(json.dumps(value))
+                config.chmod(0o600)
+                environment = {
+                    **os.environ,
+                    "CX_RUNNER": "h100-dgxc",
+                    "COLLECTIVEX_OPERATOR_CONFIG": str(config),
+                    "EXPECTED_AUDIT_SALT": salt,
+                }
+                if canonical:
+                    environment["COLLECTIVEX_CANONICAL_GHA"] = "1"
+                invocation = command if expect_salt else (
+                    'source "$1"; export COLLECTIVEX_EXECUTION_ID="audit-config-$$"; '
+                    "trap 'cx_cleanup_private_logs 0' EXIT; cx_load_operator_config; "
+                    'test -z "${CX_AUDIT_SALT+x}"'
+                )
+                return subprocess.run(
+                    ["bash", "-c", invocation, "_", str(ROOT / "runtime" / "common.sh")],
+                    text=True,
+                    capture_output=True,
+                    env=environment,
+                )
+
+            accepted = invoke(document, canonical=True)
+            self.assertEqual(accepted.returncode, 0, accepted.stderr)
+            self.assertNotIn(salt, accepted.stdout + accepted.stderr)
+
+            missing = copy.deepcopy(document)
+            del missing["audit_salt"]
+            rejected = invoke(missing, canonical=True)
+            self.assertNotEqual(rejected.returncode, 0)
+            self.assertNotIn(salt, rejected.stdout + rejected.stderr)
+
+            manual = invoke(missing, canonical=False, expect_salt=False)
+            self.assertEqual(manual.returncode, 0, manual.stderr)
+            self.assertNotIn(salt, manual.stdout + manual.stderr)
+
+            malformed = copy.deepcopy(document)
+            malformed["audit_salt"] = "A" * 64
+            rejected = invoke(malformed, canonical=False)
+            self.assertNotEqual(rejected.returncode, 0)
+            self.assertNotIn("A" * 64, rejected.stdout + rejected.stderr)
+
+    def test_scaleout_network_profile_is_explicit_and_allowlisted(self) -> None:
+        command = r'''
+          set -euo pipefail
+          source "$1"
+          ! (unset CX_SOCKET_IFNAME CX_RDMA_DEVICES; cx_apply_network_profile 2 nvlink-rdma)
+          ! (export CX_SOCKET_IFNAME=eth0; unset CX_RDMA_DEVICES; cx_apply_network_profile 2 nvlink-rdma)
+          export CX_SOCKET_IFNAME=ib0 CX_RDMA_DEVICES=mlx5_0:1,mlx5_1:1
+          export NCCL_NET=Socket NCCL_IB_HCA=stale NVSHMEM_HCA_LIST=stale
+          cx_apply_network_profile 1 nvlink
+          test -z "${NCCL_NET+x}${NCCL_IB_HCA+x}${NVSHMEM_HCA_LIST+x}"
+          cx_apply_network_profile 4 mnnvl
+          test -z "${NCCL_NET+x}${NCCL_IB_HCA+x}${NVSHMEM_HCA_LIST+x}"
+          export CX_IB_GID_INDEX=3 CX_RDMA_SERVICE_LEVEL=2
+          cx_apply_network_profile 2 nvlink-rdma
+          test "$NCCL_SOCKET_IFNAME:$GLOO_SOCKET_IFNAME:$UCCL_SOCKET_IFNAME" = ib0:ib0:ib0
+          test "$NCCL_NET:$NCCL_IB_HCA" = 'IB:=mlx5_0:1,mlx5_1:1'
+          test "$NVSHMEM_HCA_LIST" = mlx5_0:1,mlx5_1:1
+          test "$MORI_RDMA_DEVICES:$EP_NIC_NAME" = mlx5_0,mlx5_1:mlx5_0
+          test "$NCCL_IB_GID_INDEX:$NCCL_IB_SL" = 3:2
+          test "$NVSHMEM_IB_ENABLE_IBGDA:$NVSHMEM_IBGDA_NIC_HANDLER" = 1:gpu
+        '''
+        subprocess.run(
+            ["bash", "-c", command, "_", str(ROOT / "runtime" / "common.sh")],
+            check=True,
+            env={**os.environ, "COLLECTIVEX_OPERATOR_CONFIG": "/dev/null"},
+        )
+
+    def test_network_profile_validation_is_private_and_all_node(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve()
+            binary = root / "srun"
+            arguments = root / "arguments"
+            script = root / "script"
+            binary.write_text(
+                "#!/usr/bin/env bash\n"
+                "printf '%s\\n' \"$@\" > \"$CAPTURE_ARGS\"\n"
+                "cat > \"$CAPTURE_SCRIPT\"\n"
+                "exit \"${SRUN_RC:-0}\"\n"
+            )
+            binary.chmod(0o700)
+            command = (
+                'source "$1"; export COLLECTIVEX_EXECUTION_ID="network-test-$$"; '
+                "trap 'cx_cleanup_private_logs 0' EXIT; "
+                'cx_validate_network_profile_on_job 42 2 nvlink-rdma'
+            )
+            environment = {
+                **os.environ,
+                "PATH": f"{root}:{os.environ['PATH']}",
+                "COLLECTIVEX_OPERATOR_CONFIG": "/dev/null",
+                "CAPTURE_ARGS": str(arguments),
+                "CAPTURE_SCRIPT": str(script),
+                "CX_SOCKET_IFNAME": "privateif0",
+                "CX_RDMA_DEVICES": "privatehca0:1",
+                "CX_IB_GID_INDEX": "3",
+            }
+            result = subprocess.run(
+                ["bash", "-c", command, "_", str(ROOT / "runtime" / "common.sh")],
+                text=True,
+                capture_output=True,
+                env=environment,
+            )
+            self.assertEqual(result.returncode, 0, result.stderr)
+            invoked = arguments.read_text()
+            self.assertIn("--nodes=2", invoked)
+            self.assertIn("--ntasks=2", invoked)
+            self.assertIn("--input=all", invoked)
+            self.assertIn("CX_SOCKET_IFNAME,CX_RDMA_DEVICES,CX_IB_GID_INDEX", invoked)
+            self.assertIn('/sys/class/infiniband/$device/ports', script.read_text())
+            self.assertNotIn("privateif0", result.stdout + result.stderr)
+            self.assertNotIn("privatehca0", result.stdout + result.stderr)
+
+            failed = subprocess.run(
+                ["bash", "-c", command, "_", str(ROOT / "runtime" / "common.sh")],
+                text=True,
+                capture_output=True,
+                env={**environment, "SRUN_RC": "9"},
+            )
+            self.assertNotEqual(failed.returncode, 0)
+            self.assertNotIn("privateif0", failed.stdout + failed.stderr)
+            self.assertNotIn("privatehca0", failed.stdout + failed.stderr)
+
+            arguments.unlink()
+            subprocess.run(
+                [
+                    "bash", "-c",
+                    'source "$1"; cx_validate_network_profile_on_job 42 1 nvlink',
+                    "_", str(ROOT / "runtime" / "common.sh"),
+                ],
+                check=True,
+                env=environment,
+            )
+            self.assertFalse(arguments.exists())
+
+    def test_allocation_preflight_proves_shared_write_visibility(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve()
+            mount = root / "mount"
+            runtime = mount / "experimental" / "CollectiveX" / "runtime"
+            runtime.mkdir(parents=True)
+            (runtime / "run_in_container.sh").write_text("#!/bin/sh\n")
+            squash = root / "image.sqsh"
+            squash.write_bytes(b"squash")
+            binary = root / "bin"
+            binary.mkdir()
+            (binary / "unsquashfs").write_text("#!/bin/sh\nexit 0\n")
+            (binary / "unsquashfs").chmod(0o700)
+            (binary / "srun").write_text(
+                "#!/usr/bin/env bash\n"
+                "set -euo pipefail\n"
+                "case \" $* \" in *' --input=all '*) ;; *) exit 97 ;; esac\n"
+                "worker=\"$FAKE_ROOT/worker.sh\"\n"
+                "cat > \"$worker\"\n"
+                "args=(\"$@\")\n"
+                "start=0\n"
+                "for ((i=0; i<${#args[@]}; i++)); do\n"
+                "  [ \"${args[$i]}\" != -- ] || start=$((i + 1))\n"
+                "done\n"
+                "[ \"$start\" -gt 0 ]\n"
+                "worker_args=(\"${args[@]:$start}\")\n"
+                "probe=\"${worker_args[4]}\"\n"
+                "case \"${FAKE_MODE:-success}\" in\n"
+                "  missing-source) rm -f -- \"$probe/source\" ;;\n"
+                "  readonly) chmod 500 \"$probe\" ;;\n"
+                "esac\n"
+                "for ((node=0; node<FAKE_TASKS; node++)); do\n"
+                "  SLURM_NODEID=\"$node\" bash \"$worker\" \"${worker_args[@]}\"\n"
+                "done\n"
+            )
+            (binary / "srun").chmod(0o700)
+            command = r'''
+              set -euo pipefail
+              source "$1"
+              case "$(uname -m)" in
+                arm64|aarch64) export CX_IMAGE_PLATFORM=linux/arm64 ;;
+                *) export CX_IMAGE_PLATFORM=linux/amd64 ;;
+              esac
+              export COLLECTIVEX_EXECUTION_ID="preflight-success-$$"
+              cx_preflight_allocation 42 2 "$2" "$3" ""
+              test ! -e "$2/.collectivex-preflight"
+              cx_cleanup_private_logs 0
+              export COLLECTIVEX_EXECUTION_ID="preflight-node-$$" FAKE_TASKS=1
+              ! cx_preflight_allocation 42 2 "$2" "$3" ""
+              test ! -e "$2/.collectivex-preflight"
+              cx_cleanup_private_logs 0
+              export COLLECTIVEX_EXECUTION_ID="preflight-missing-$$"
+              export FAKE_TASKS=2 FAKE_MODE=missing-source
+              ! cx_preflight_allocation 42 2 "$2" "$3" ""
+              test ! -e "$2/.collectivex-preflight"
+              cx_cleanup_private_logs 0
+              export COLLECTIVEX_EXECUTION_ID="preflight-readonly-$$" FAKE_MODE=readonly
+              ! cx_preflight_allocation 42 2 "$2" "$3" ""
+              test ! -e "$2/.collectivex-preflight"
+              cx_cleanup_private_logs 0
+            '''
+            subprocess.run(
+                [
+                    "bash", "-c", command, "_",
+                    str(ROOT / "runtime" / "common.sh"), str(mount), str(squash),
+                ],
+                check=True,
+                env={
+                    **os.environ,
+                    "PATH": f"{binary}:{os.environ['PATH']}",
+                    "FAKE_ROOT": str(root),
+                    "FAKE_TASKS": "2",
+                    "COLLECTIVEX_OPERATOR_CONFIG": "/dev/null",
+                },
+            )
+
+    def test_image_pinned_deepep_and_input_integrity_order_are_explicit(self) -> None:
+        runtime = (ROOT / "runtime" / "run_in_container.sh").read_text()
+        probe = runtime[runtime.index("cx_probe_deepep()"):
+                        runtime.index("cx_activate_deepep_v2()")]
+        self.assertIn('expected_version="1.2.1"', probe)
+        self.assertIn('expected_version="1.1.0+814e508"', probe)
+        self.assertNotIn("pip install", probe)
+        self.assertNotIn("cx_fetch_revision", probe)
+        self.assertIn("Path(deep_ep.__file__).resolve() in recorded_files", probe)
+        self.assertIn("Path(buffer_module.__file__).resolve() in recorded_files", probe)
+
+        harness = (HERE / "ep_harness.py").read_text()
+        pass_one = harness[harness.index("# ---- Pass 1"):
+                           harness.index("# ---- Pass 2")]
+        self.assertLess(
+            pass_one.index("input_snapshots[T] ="),
+            pass_one.index("oracle = _run_expert_oracle"),
+        )
+        self.assertIn("pre_input_unchanged", pass_one)
+        self.assertIn(
+            "hh = prep_combine()\n                        torch.cuda.synchronize()",
+            harness,
+        )
+
+    def test_squash_imports_are_reproducible_and_use_a_fresh_cache_key(self) -> None:
+        common = (ROOT / "runtime" / "common.sh").read_text()
+        amd = (ROOT / "launchers" / "launch_mi-amds.sh").read_text()
+        self.assertIn('CX_SQUASH_FORMAT_VERSION="repro-v1"', common)
+        self.assertIn("SOURCE_DATE_EPOCH=\"$CX_SQUASH_SOURCE_DATE_EPOCH\"", common)
+        self.assertIn("${COLLECTIVEX_IMAGE_DIGEST#sha256:}", common)
+        self.assertIn("cx_ensure_squash_on_job", amd)
+        self.assertIn('"${CX_LOCK_DIR:-}"', amd)
+        self.assertNotIn('"${CX_LOCK_DIR:-/tmp}"', amd)
+        self.assertIn('[ -n "$lock_dir" ] || lock_dir="$squash_dir/.locks"', common)
+        self.assertGreaterEqual(common.count("--chdir=/tmp"), 2)
+        self.assertGreaterEqual(amd.count("--chdir=/tmp"), 2)
+        self.assertIn('ENROOT_CACHE_PATH="$compute_home/enroot-cache"', common)
+        self.assertIn('ENROOT_RUNTIME_PATH="$compute_home/enroot-run"', common)
+        self.assertEqual(common.count('cx_reverify_registry_image "$image"'), 2)
+        result = subprocess.run(
+            [
+                "bash",
+                "-c",
+                f'source "{ROOT / "runtime" / "common.sh"}"; '
+                'COLLECTIVEX_IMAGE_DIGEST="sha256:$(printf b%.0s {1..64})"; '
+                'CX_IMAGE_PLATFORM=linux/amd64; cx_squash_path /cache repo/image:tag; '
+                'printf "\\n"; CX_IMAGE_PLATFORM=linux/arm64; '
+                'cx_squash_path /cache repo/image:tag',
+            ],
+            text=True,
+            capture_output=True,
+        )
+        self.assertEqual(result.returncode, 0, result.stderr)
+        digest = "b" * 64
+        self.assertEqual(
+            result.stdout.splitlines(),
+            [
+                f"/cache/repro-v1_{digest}_repo_image_tag.sqsh",
+                f"/cache/repro-v1_linux_arm64_{digest}_repo_image_tag.sqsh",
+            ],
+        )
+
+    def test_launchers_preserve_platform_specific_runtime_requirements(self) -> None:
+        single = (ROOT / "launchers" / "launch_single-slurm.sh").read_text()
+        gb = (ROOT / "launchers" / "launch_gb-nv.sh").read_text()
+        amd = (ROOT / "launchers" / "launch_mi-amds.sh").read_text()
+        common = (ROOT / "runtime" / "common.sh").read_text()
+        self.assertIn("ALLOC_EXTRA=(--mem=0)", single)
+        self.assertIn("ALLOC_EXTRA=(-N 1 --mem=0)", single)
+        self.assertIn("SRUN_EXTRA=(--mpi=none --container-remap-root)", single)
+        self.assertIn("CX_ENROOT_LOCAL_IMPORT=1", single)
+        self.assertIn('PRODUCT="${CX_SHARD_SKU:-${CX_GB_PRODUCT:-', gb)
+        self.assertIn("cx_ensure_squash_on_job", gb)
+        self.assertIn("--mem=0 --cpus-per-task=35", gb)
+        self.assertIn("--container-writable", gb)
+        self.assertIn("--container-remap-root", gb)
+        workload_stage = common[
+            common.index("workload_args=("):
+            common.index("workload_log=", common.index("workload_args=("))
+        ]
+        self.assertNotIn("--workload", workload_stage)
+        self.assertIn("mi325x) CPUS_PER_TASK=256", amd)
+        self.assertIn("/dev/kfd:/dev/kfd,/dev/dri:/dev/dri", amd)
+        self.assertIn("--container-writable --container-remap-root", amd)
+        self.assertIn(
+            "CX_DISTRIBUTED_CONTAINER_ARGS=(--container-writable --container-remap-root)",
+            amd,
+        )
+        collect = common[common.index("cx_collect_results()"):
+                         common.index("cx_cleanup_stage()")]
+        cleanup = common[common.index("cx_launcher_cleanup()"):
+                         common.index("cx_install_launcher_fail_safe()")]
+        self.assertNotIn("cx_cleanup_stage", collect)
+        self.assertLess(cleanup.index("cx_cancel_job"), cleanup.index("cx_cleanup_stage"))
+        runtime = (ROOT / "runtime" / "run_in_container.sh").read_text()
+        self.assertIn('distribution.read_text("direct_url.json")', runtime)
+        self.assertIn("6548e9c504a12b2471af4b7f4d9546321210a57a456b5dc55bd4a8dad0f932ac", runtime)
+        self.assertIn("2671cff7baf8c2c214ff4bac721af875d513130670bec57601998bd1aae82882", runtime)
+
+    def test_deferred_backend_provenance_resolves_before_measurement(self) -> None:
+        harness = (ROOT / "tests" / "ep_harness.py").read_text()
+        conditioning = harness.index("for wt in conditioning_ladder")
+        provenance = harness.index("# Setup may materialize deferred provenance")
+        measurement = harness.index("# ---- Pass 1: build each deterministic problem")
+        self.assertLess(conditioning, provenance)
+        self.assertLess(provenance, measurement)
+
+    def test_backend_specific_routing_contracts_are_explicit(self) -> None:
+        hybrid = (ROOT / "tests" / "ep_deepep_hybrid.py").read_text()
+        self.assertIn("self.domain_rank = int(self.buffer.local_rank)", hybrid)
+        self.assertIn(
+            "probability_columns = self.domain_rank * self.local_experts + local_expert_ids",
+            hybrid,
+        )
+        self.assertIn("h.recv_probs[:count][rows, probability_columns]", hybrid)
+
+        mori = (ROOT / "tests" / "ep_mori.py").read_text()
+        self.assertIn("topk_idx=indices", mori)
+        self.assertIn("indices=indices", mori)
+        self.assertIn(
+            "combine_indices = p.indices if self._async_ll else h.dispatch_indices",
+            mori,
+        )
+        self.assertIn("h.combine_input,\n            None,\n            combine_indices", mori)
+        self.assertIn('"use_external_inp_buf": self._external_input', mori)
+        self.assertIn("self.block_num = self._block_target = 64", mori)
+        self.assertIn('config_kwargs["block_num"] = self.block_num', mori)
+        self.assertIn(
+            'config_kwargs["warp_num_per_block"] = self.dispatch_warps', mori
+        )
+        self.assertIn("count > tensor.size(0)", mori)
+        self.assertIn("return combined[:p.T]", mori)
+        self.assertNotIn("return combined\n", mori)
+        self.assertIn(
+            "raw_expert_ids < local_start + experts_per_rank",
+            mori,
+        )
+        self.assertNotIn("MoRI returned a non-local expert", mori)
+        harness = (ROOT / "tests" / "ep_harness.py").read_text()
+        self.assertIn("problem.recv_tokens = backend.recv_tokens(handle)", harness)
+
+    def test_mori_masks_global_topk_metadata_to_the_local_rank(self) -> None:
+        path = HERE / "ep_mori.py"
+        tree = ast.parse(path.read_text(), str(path))
+        helper = next(
+            node
+            for node in tree.body
+            if isinstance(node, ast.FunctionDef) and node.name == "_project_local_metadata"
+        )
+        namespace: dict[str, object] = {}
+        exec(compile(ast.Module(body=[helper], type_ignores=[]), str(path), "exec"), namespace)
+        raw_ids = np.array([[0, 32, 63, -1], [64, 95, 7, 96]], dtype=np.int64)
+        raw_weights = np.arange(8, dtype=np.float32).reshape(2, 4)
+        torch_module = types.SimpleNamespace(
+            where=np.where,
+            full_like=np.full_like,
+            zeros_like=np.zeros_like,
+        )
+        ids, weights, local_ids = namespace["_project_local_metadata"](
+            torch_module, raw_ids, raw_weights, 1, 32
+        )
+        np.testing.assert_array_equal(
+            ids,
+            np.array([[-1, 32, 63, -1], [-1, -1, -1, -1]], dtype=np.int64),
+        )
+        np.testing.assert_array_equal(
+            weights,
+            np.array([[0, 1, 2, 0], [0, 0, 0, 0]], dtype=np.float32),
+        )
+        counts = np.bincount(local_ids, minlength=32)
+        self.assertEqual((counts[0], counts[31], int(counts.sum())), (1, 1, 2))
+        commit_helper = next(
+            node for node in tree.body
+            if isinstance(node, ast.FunctionDef) and node.name == "_mori_source_commit"
+        )
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary)
+            module = root / "python" / "mori" / "__init__.py"
+            module.parent.mkdir(parents=True)
+            module.touch()
+            git = root / ".git"
+            git.mkdir()
+            (git / "HEAD").write_text("a" * 40 + "\n")
+            commit_namespace = {
+                "Path": Path,
+                "re": re,
+                "mori": types.SimpleNamespace(__file__=str(module)),
+            }
+            exec(
+                compile(ast.Module(body=[commit_helper], type_ignores=[]), str(path), "exec"),
+                commit_namespace,
+            )
+            self.assertEqual(commit_namespace["_mori_source_commit"](), "a" * 40)
+            (git / "HEAD").write_text("ref: refs/heads/main\n")
+            with self.assertRaisesRegex(RuntimeError, "detached commit"):
+                commit_namespace["_mori_source_commit"]()
+
+        profile = contracts.project_resource_profile(
+            {
+                "block_num": 64,
+                "device_cus": 304,
+                "kernel_type": "AsyncLL",
+                "tuned_source": "upstream-asyncll-64x8-external-input",
+            }
+        )
+        self.assertIsNone(profile["comm_units_kind"])
+        self.assertIsNone(profile["configured_units"])
+
+    def test_squash_identity_rehashes_instead_of_trusting_a_sidecar(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            image = Path(temporary) / "image.sqsh"
+            image.write_bytes(b"current squash bytes")
+            sidecar = Path(f"{image}.sha256")
+            sidecar.write_text("a" * 64)
+            os.utime(sidecar, (image.stat().st_mtime + 10, image.stat().st_mtime + 10))
+            result = subprocess.run(
+                [
+                    "bash", "-c",
+                    'source "$1"; COLLECTIVEX_EXECUTION_ID="squash-hash-$$"; '
+                    'cx_export_squash_identity "$2"; cx_cleanup_private_logs 0; '
+                    'printf "%s" "$COLLECTIVEX_SQUASH_SHA256"',
+                    "_", str(ROOT / "runtime" / "common.sh"), str(image),
+                ],
+                text=True,
+                capture_output=True,
+            )
+            self.assertEqual(result.returncode, 0, result.stderr)
+            self.assertEqual(result.stdout, hashlib.sha256(image.read_bytes()).hexdigest())
+
+    def _run_salloc_scenario(
+        self, salloc_body: str, squeue_body: str, *, cleanup: bool
+    ) -> dict[str, object]:
+        prefix = f"inferencex-collectivex-{os.getpid()}-1-"
+        with tempfile.TemporaryDirectory(prefix=prefix, dir="/tmp") as temporary:
+            root = Path(temporary)
+            command_dir = root / "bin"
+            repo = root / "repo"
+            command_dir.mkdir()
+            repo.mkdir()
+            paths = {
+                name: root / name
+                for name in ("arguments", "squeue-calls", "sleep-calls", "scancel-calls")
+            }
+            scripts = {
+                "salloc": (
+                    "printf '%s\\n' \"$@\" > \"$CX_TEST_SALLOC_ARGUMENTS\"\n"
+                    + salloc_body
+                ),
+                "squeue": (
+                    "printf '%s\\n' \"$*\" >> \"$CX_TEST_SQUEUE_CALLS\"\n"
+                    + squeue_body
+                ),
+                "sleep": "printf '%s\\n' \"$1\" >> \"$CX_TEST_SLEEP_CALLS\"\n",
+                "scancel": (
+                    "printf '%s\\n' \"$*\" >> \"$CX_TEST_SCANCEL_CALLS\"\n"
+                ),
+            }
+            for name, body in scripts.items():
+                path = command_dir / name
+                path.write_text(f"#!/usr/bin/env bash\n{body}\n")
+                path.chmod(0o700)
+            execution_id = f"scheduler-{root.name}"
+            expected_name = "cx-" + hashlib.sha256(
+                execution_id.encode()
+            ).hexdigest()[:24]
+            command = r'''
+              source "$1"
+              JOB_ID=""
+              set +e
+              cx_salloc_jobid --partition=compute
+              run_rc=$?
+              set -e
+              printf '%s:%s:%s\n' \
+                "$run_rc" "$JOB_ID" "$CX_ALLOCATION_UNCERTAIN"
+              cx_cleanup_private_logs 0
+              if [ "$3" = cleanup ]; then
+                export CX_JOB_ROOT="$2" REPO_ROOT="$2/repo" MOUNT_SRC="$2/repo"
+                export COLLECTIVEX_CANONICAL_GHA=1
+                cx_write_cleanup_guard() {
+                  rm -f -- "$CX_JOB_ROOT/cleanup-safe" "$CX_JOB_ROOT/cleanup-unsafe"
+                  : > "$CX_JOB_ROOT/cleanup-$1"
+                }
+                unset CX_BENCH
+                cx_launcher_cleanup "$run_rc"
+              fi
+              exit "$run_rc"
+            '''
+            result = subprocess.run(
+                [
+                    "bash", "-c", command, "_", str(ROOT / "runtime" / "common.sh"),
+                    str(root), "cleanup" if cleanup else "no-cleanup",
+                ],
+                text=True,
+                capture_output=True,
+                env={
+                    **os.environ,
+                    "PATH": f"{command_dir}:{os.environ['PATH']}",
+                    "COLLECTIVEX_EXECUTION_ID": execution_id,
+                    "CX_TEST_SALLOC_ARGUMENTS": str(paths["arguments"]),
+                    "CX_TEST_SQUEUE_CALLS": str(paths["squeue-calls"]),
+                    "CX_TEST_SLEEP_CALLS": str(paths["sleep-calls"]),
+                    "CX_TEST_SCANCEL_CALLS": str(paths["scancel-calls"]),
+                },
+            )
+            return {
+                "result": result,
+                "job_name": expected_name,
+                "arguments": paths["arguments"].read_text().splitlines(),
+                "squeue_calls": (
+                    paths["squeue-calls"].read_text().splitlines()
+                    if paths["squeue-calls"].exists() else []
+                ),
+                "sleep_calls": (
+                    paths["sleep-calls"].read_text().splitlines()
+                    if paths["sleep-calls"].exists() else []
+                ),
+                "scancel_calls": (
+                    paths["scancel-calls"].read_text().splitlines()
+                    if paths["scancel-calls"].exists() else []
+                ),
+                "cleanup_safe": (root / "cleanup-safe").is_file(),
+                "cleanup_unsafe": (root / "cleanup-unsafe").is_file(),
+            }
+
+    def test_salloc_job_id_parser_uses_the_portable_grant_message(self) -> None:
+        scenario = self._run_salloc_scenario(
+            "printf 'salloc: Granted job allocation 4242\\n' >&2",
+            "exit 2",
+            cleanup=False,
+        )
+        result = scenario["result"]
+        self.assertIsInstance(result, subprocess.CompletedProcess)
+        self.assertEqual(result.returncode, 0, result.stderr)
+        self.assertEqual(
+            result.stdout, "0:4242:0\n"
+        )
+        self.assertEqual(
+            scenario["arguments"],
+            [
+                "--partition=compute",
+                f"--job-name={scenario['job_name']}",
+                "--no-shell",
+            ],
+        )
+        self.assertEqual(scenario["squeue_calls"], [])
+
+    def test_salloc_verified_rejection_is_cleanup_safe(self) -> None:
+        scenario = self._run_salloc_scenario("exit 1", "exit 0", cleanup=True)
+        result = scenario["result"]
+        self.assertEqual(result.returncode, 1)
+        self.assertEqual(result.stdout, "1::0\n")
+        self.assertEqual(len(scenario["squeue_calls"]), 3)
+        scheduler_user = subprocess.check_output(["id", "-un"], text=True).strip()
+        self.assertTrue(all(
+            f"--name={scenario['job_name']}" in call
+            and f"--user={scheduler_user}" in call
+            for call in scenario["squeue_calls"]
+        ))
+        self.assertEqual(scenario["sleep_calls"], ["1", "2"])
+        self.assertTrue(scenario["cleanup_safe"])
+        self.assertFalse(scenario["cleanup_unsafe"])
+
+    def test_salloc_recovers_and_cancels_one_matching_allocation(self) -> None:
+        scenario = self._run_salloc_scenario(
+            "exit 1",
+            r'''
+              case " $* " in
+                *" --name="*) printf '5151\n' ;;
+                *" -j 5151 "*) exit 0 ;;
+                *) exit 2 ;;
+              esac
+            ''',
+            cleanup=True,
+        )
+        result = scenario["result"]
+        self.assertEqual(result.returncode, 1)
+        self.assertEqual(result.stdout, "1:5151:0\n")
+        self.assertEqual(scenario["scancel_calls"], ["5151"])
+        self.assertTrue(scenario["cleanup_safe"])
+        self.assertFalse(scenario["cleanup_unsafe"])
+
+    def test_salloc_ambiguous_lookup_remains_cleanup_unsafe(self) -> None:
+        scenario = self._run_salloc_scenario(
+            "exit 1", "printf '5151\\n5152\\n'", cleanup=True
+        )
+        result = scenario["result"]
+        self.assertEqual(result.returncode, 1)
+        self.assertEqual(result.stdout, "1::1\n")
+        self.assertEqual(scenario["scancel_calls"], [])
+        self.assertFalse(scenario["cleanup_safe"])
+        self.assertTrue(scenario["cleanup_unsafe"])
+
+    def test_salloc_query_failure_and_interruption_remain_cleanup_unsafe(self) -> None:
+        query_failure = self._run_salloc_scenario("exit 1", "exit 2", cleanup=True)
+        self.assertEqual(query_failure["result"].returncode, 1)
+        self.assertEqual(len(query_failure["squeue_calls"]), 1)
+        self.assertFalse(query_failure["cleanup_safe"])
+        self.assertTrue(query_failure["cleanup_unsafe"])
+
+        interrupted = self._run_salloc_scenario("exit 130", "exit 0", cleanup=True)
+        self.assertEqual(interrupted["result"].returncode, 1)
+        self.assertEqual(interrupted["squeue_calls"], [])
+        self.assertFalse(interrupted["cleanup_safe"])
+        self.assertTrue(interrupted["cleanup_unsafe"])
+
+    def test_allocation_cleanup_fails_closed_when_scheduler_queries_fail(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            directory = Path(temporary)
+            for name, body in {
+                "scancel": "exit 0",
+                "squeue": "exit 2",
+                "sleep": "exit 0",
+            }.items():
+                command = directory / name
+                command.write_text(f"#!/usr/bin/env bash\n{body}\n")
+                command.chmod(0o700)
+            result = subprocess.run(
+                [
+                    "bash", "-c",
+                    'source "$1"; cx_cancel_job 4242',
+                    "_", str(ROOT / "runtime" / "common.sh"),
+                ],
+                text=True,
+                capture_output=True,
+                env={**os.environ, "PATH": f"{directory}:{os.environ['PATH']}"},
+            )
+            self.assertNotEqual(result.returncode, 0)
+            self.assertIn("did not terminate", result.stderr)
+
+        workflow = (ROOT.parent.parent / ".github" / "workflows" / "collectivex-sweep.yml").read_text()
+        self.assertIn("cleanup-unsafe", workflow)
+        self.assertIn("cleanup-safe", workflow)
+        self.assertIn("Confirm allocation cleanup", workflow)
+        self.assertIn("Prepare pinned backend source archive", workflow)
+        self.assertIn("Install pinned backend source seed", workflow)
+        self.assertIn("CX_BACKEND_SOURCE_SEED_ROOT", workflow)
+        self.assertIn("steps.gen.outputs.source_backends", workflow)
+        self.assertIn('python3 "$destination/source_archive.py"', workflow)
+        artifact_validation = workflow[workflow.index("- name: Validate shard artifact safety"):]
+        self.assertIn("steps.allocation_cleanup.outcome == 'success'", artifact_validation)
+        self.assertIn(
+            "inputs.operation != 'probe-precision' || steps.sweep_shard.outcome == 'success'",
+            artifact_validation,
+        )
+        cleanup_function = (ROOT / "runtime" / "common.sh").read_text()
+        self.assertIn('[ "${CX_PRECISION_PROBE:-0}" != 1 ]', cleanup_function)
+        sweep_workflow = workflow[workflow.index("  sweep:"):]
+        self.assertNotIn("GITHUB_WORKSPACE", sweep_workflow)
+        self.assertNotIn("RUNNER_WORKSPACE", sweep_workflow)
+        self.assertIn('CX_SOURCE_ROOT: /tmp/inferencex-collectivex-', sweep_workflow)
+        source_step = sweep_workflow[:sweep_workflow.index("- uses: actions/download-artifact")]
+        self.assertNotIn("unsafe_guards=", source_step)
+        self.assertIn("cutoff = time.time() - 86400", source_step)
+        self.assertIn("stat.S_IMODE(metadata.st_mode) != 0o700", source_step)
+        self.assertIn('for marker_name in ("cleanup-safe", "cleanup-unsafe")', source_step)
+        self.assertIn("stat.S_IMODE(marker.st_mode) == 0o600", source_step)
+        self.assertIn("shutil.rmtree(entry.path)", source_step)
+        self.assertLess(
+            source_step.index('rev-parse HEAD'),
+            source_step.index("echo 'prepared=true'"),
+        )
+        upload = workflow[workflow.index("- name: Stage shard artifact"):]
+        self.assertIn("id: stage_artifact", upload)
+        self.assertIn("id: upload_artifact", upload)
+        self.assertIn("steps.stage_artifact.outcome == 'success'", upload)
+        cleanup = workflow[workflow.index("- name: Cleanup isolated workspace"):]
+        for step in (
+            "sweep_shard", "allocation_cleanup", "artifact_safety",
+            "delivery_contracts", "stage_artifact", "upload_artifact",
+        ):
+            self.assertIn(f"steps.{step}.outcome", cleanup)
+        self.assertLess(
+            cleanup.index('cleanup-safe" ]'),
+            cleanup.index('rm -rf -- "$CX_JOB_ROOT"'),
+        )
+
+    def test_v1_publication_requires_explicit_release_markers(self) -> None:
+        workflows = ROOT.parent.parent / ".github" / "workflows"
+        sweep = (workflows / "collectivex-sweep.yml").read_text()
+        self.assertFalse((workflows / "collectivex-publish.yml").exists())
+
+        self.assertIn("options: [sweep, probe-precision, publish-v1, refresh-v1]", sweep)
+        self.assertIn("collectivex.precision-probe-plan.v1", (ROOT / "tests" / "probe_precision.py").read_text())
+        self.assertIn("cxprecision-probes-${{ github.run_id }}-${{ github.run_attempt }}", sweep)
+        self.assertIn("--validate-bundle", sweep)
+        self.assertIn("release_tag:", sweep)
+        self.assertIn("default: unversioned", sweep)
+        self.assertIn("options: [unversioned, v1]", sweep)
+        self.assertIn("qualification_index:", sweep)
+        self.assertIn("inputs.release_tag == 'v1'", sweep)
+        self.assertIn("collectivex.release-tag.v1", sweep)
+        self.assertIn("V1 release tag requires the locked full matrix", sweep)
+        self.assertIn("EXPECTED_MATRIX_SHA256", sweep)
+        self.assertIn("cxrelease-v1-${{ github.run_id }}-${{ github.run_attempt }}", sweep)
+
+        self.assertIn("publish_run_ids must contain exactly three IDs", sweep)
+        self.assertIn("source runs do not share one source SHA", sweep)
+        self.assertIn("cxrelease-v1-$run_id-$attempt/release.json", sweep)
+        self.assertIn("run $run_id is not tagged for V1 publication", sweep)
+        self.assertIn("ref: ${{ steps.runs.outputs.source_sha }}", sweep)
+        self.assertIn("[ \"$attempt\" = 1 ]", sweep)
+        self.assertIn("cxpublication-v1-${{ github.run_id }}-${{ github.run_attempt }}", sweep)
+        self.assertIn("refresh source bytes differ from their requested digest", sweep)
+        self.assertIn("retention-days: 90", sweep)
+        self.assertNotIn("workflow_run:", sweep)
+
+    def test_source_archive_preserves_only_contained_leaf_symlinks(self) -> None:
+        selected = "deepep-hybrid-pinned"
+        other = "deepep-v2-pinned"
+
+        def directory(name: str) -> tarfile.TarInfo:
+            member = tarfile.TarInfo(name)
+            member.type = tarfile.DIRTYPE
+            member.mode = 0o755
+            return member
+
+        def regular(
+            name: str, payload: bytes, mode: int = 0o644
+        ) -> tuple[tarfile.TarInfo, io.BytesIO]:
+            member = tarfile.TarInfo(name)
+            member.size = len(payload)
+            member.mode = mode
+            return member, io.BytesIO(payload)
+
+        def symbolic(name: str, target: str) -> tarfile.TarInfo:
+            member = tarfile.TarInfo(name)
+            member.type = tarfile.SYMTYPE
+            member.linkname = target
+            member.mode = 0o777
+            return member
+
+        def write_archive(path: Path, extras: list[tarfile.TarInfo] | None = None) -> None:
+            root = f".cx_sources/{selected}"
+            with tarfile.open(path, "w") as archive:
+                for name in (
+                    ".cx_sources", root, f"{root}/third-party",
+                    f"{root}/third-party/nccl", f"{root}/third-party/nccl/pkg",
+                    f"{root}/third-party/nccl/pkg/debian",
+                    f".cx_sources/{other}",
+                ):
+                    archive.addfile(directory(name))
+                member, stream = regular(
+                    f"{root}/third-party/nccl/LICENSE.txt", b"license\n"
+                )
+                archive.addfile(member, stream)
+                member, stream = regular(f".cx_sources/{other}/sentinel", b"other\n")
+                archive.addfile(member, stream)
+                member, stream = regular(f"{root}/group-executable", b"exec\n", 0o010)
+                archive.addfile(member, stream)
+                archive.addfile(symbolic(
+                    f"{root}/third-party/nccl/pkg/debian/copyright",
+                    "../../LICENSE.txt",
+                ))
+                for member in extras or []:
+                    archive.addfile(member)
+            path.chmod(0o600)
+
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve()
+            archive = root / "source.tar"
+            destination = root / "destination"
+            destination.mkdir(mode=0o700)
+            write_archive(archive)
+            source_archive.extract_source_archive(archive, destination, selected)
+            link = (
+                destination / ".cx_sources" / selected / "third-party" / "nccl"
+                / "pkg" / "debian" / "copyright"
+            )
+            self.assertTrue(link.is_symlink())
+            self.assertEqual(os.readlink(link), "../../LICENSE.txt")
+            self.assertEqual(link.read_text(), "license\n")
+            self.assertFalse((destination / ".cx_sources" / other).exists())
+            extracted = destination / ".cx_sources" / selected
+            self.assertEqual(
+                stat.S_IMODE((extracted / "group-executable").stat().st_mode), 0o700
+            )
+            self.assertEqual(
+                stat.S_IMODE(
+                    (extracted / "third-party" / "nccl" / "LICENSE.txt").stat().st_mode
+                ),
+                0o600,
+            )
+
+        invalid: dict[str, list[tarfile.TarInfo]] = {
+            "absolute member": [directory("/outside")],
+            "traversal member": [directory(".cx_sources/../outside")],
+            "duplicate member": [directory(f".cx_sources/{selected}")],
+            "absolute link": [symbolic(f".cx_sources/{selected}/absolute", "/tmp/x")],
+            "escaping link": [symbolic(f".cx_sources/{selected}/escape", "../x")],
+            "cross-root link": [
+                symbolic(f".cx_sources/{selected}/cross", f"../{other}/sentinel")
+            ],
+            "missing target": [symbolic(f".cx_sources/{selected}/missing", "none")],
+        }
+        hardlink = tarfile.TarInfo(f".cx_sources/{selected}/hard")
+        hardlink.type = tarfile.LNKTYPE
+        hardlink.linkname = f".cx_sources/{selected}/third-party/nccl/LICENSE.txt"
+        invalid["hardlink"] = [hardlink]
+        fifo = tarfile.TarInfo(f".cx_sources/{selected}/fifo")
+        fifo.type = tarfile.FIFOTYPE
+        invalid["fifo"] = [fifo]
+        character = tarfile.TarInfo(f".cx_sources/{selected}/character")
+        character.type = tarfile.CHRTYPE
+        invalid["character device"] = [character]
+        block = tarfile.TarInfo(f".cx_sources/{selected}/block")
+        block.type = tarfile.BLKTYPE
+        invalid["block device"] = [block]
+        unknown = tarfile.TarInfo(f".cx_sources/{selected}/unknown")
+        unknown.type = b"Z"
+        invalid["unknown type"] = [unknown]
+        invalid["unsafe unselected root"] = [
+            symbolic(f".cx_sources/{other}/escape", f"../{selected}/group-executable")
+        ]
+        chain_target = symbolic(
+            f".cx_sources/{selected}/chain-target", "third-party/nccl/LICENSE.txt"
+        )
+        invalid["symlink chain"] = [
+            chain_target, symbolic(f".cx_sources/{selected}/chain", "chain-target")
+        ]
+        linked_child = tarfile.TarInfo(f".cx_sources/{selected}/linked-file/child")
+        invalid["symlink parent"] = [
+            symbolic(
+                f".cx_sources/{selected}/linked-file",
+                "third-party/nccl/LICENSE.txt",
+            ),
+            linked_child,
+        ]
+        for label, extras in invalid.items():
+            with self.subTest(label=label), tempfile.TemporaryDirectory() as temporary:
+                root = Path(temporary).resolve()
+                archive = root / "source.tar"
+                destination = root / "destination"
+                destination.mkdir(mode=0o700)
+                write_archive(archive, extras)
+                with self.assertRaises(source_archive.SourceArchiveError):
+                    source_archive.extract_source_archive(archive, destination, selected)
+                self.assertFalse((destination / ".cx_sources").exists())
+
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve()
+            archive = root / "source.tar"
+            destination = root / "destination"
+            destination.mkdir(mode=0o700)
+            existing = destination / ".cx_sources"
+            existing.mkdir(mode=0o700)
+            marker = existing / "marker"
+            marker.write_text("existing\n")
+            write_archive(archive)
+            with self.assertRaises(source_archive.SourceArchiveError):
+                source_archive.extract_source_archive(archive, destination, selected)
+            self.assertEqual(marker.read_text(), "existing\n")
+
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve()
+            archive = root / "source.tar"
+            write_archive(archive)
+            real_destination = root / "real-destination"
+            real_destination.mkdir(mode=0o700)
+            linked_destination = root / "linked-destination"
+            linked_destination.symlink_to(real_destination, target_is_directory=True)
+            with self.assertRaises((OSError, source_archive.SourceArchiveError)):
+                source_archive.extract_source_archive(archive, linked_destination, selected)
+            self.assertFalse((real_destination / ".cx_sources").exists())
+
+            unsafe_destination = root / "unsafe-destination"
+            unsafe_destination.mkdir(mode=0o700)
+            unsafe_destination.chmod(0o755)
+            with self.assertRaises(source_archive.SourceArchiveError):
+                source_archive.extract_source_archive(archive, unsafe_destination, selected)
+            self.assertFalse((unsafe_destination / ".cx_sources").exists())
+
+        for limit, value in (
+            ("MAX_ARCHIVE_MEMBERS", 1),
+            ("MAX_MEMBER_BYTES", 1),
+            ("MAX_EXPANDED_BYTES", 1),
+            ("MAX_ARCHIVE_BYTES", 1),
+            ("MAX_ARCHIVE_HEADERS", 1),
+        ):
+            with self.subTest(limit=limit), tempfile.TemporaryDirectory() as temporary:
+                root = Path(temporary).resolve()
+                archive = root / "source.tar"
+                destination = root / "destination"
+                destination.mkdir(mode=0o700)
+                write_archive(archive)
+                with mock.patch.object(source_archive, limit, value):
+                    with self.assertRaises(source_archive.SourceArchiveError):
+                        source_archive.extract_source_archive(archive, destination, selected)
+                self.assertFalse((destination / ".cx_sources").exists())
+
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve()
+            archive = root / "source.tar"
+            destination = root / "destination"
+            destination.mkdir(mode=0o700)
+            write_archive(archive)
+            long_name = f".cx_sources/{selected}/long-name-result\0".encode()
+            with tarfile.open(archive, "a") as handle:
+                for _ in range(3):
+                    extension = tarfile.TarInfo("././@LongLink")
+                    extension.type = tarfile.GNUTYPE_LONGNAME
+                    extension.size = len(long_name)
+                    handle.addfile(extension, io.BytesIO(long_name))
+                member, stream = regular("placeholder", b"payload\n")
+                handle.addfile(member, stream)
+            archive.chmod(0o600)
+            for limit, value in (
+                ("MAX_EXTENSION_CHAIN", 1),
+                ("MAX_EXTENSION_MEMBER_BYTES", 1),
+                ("MAX_EXTENSION_BYTES", len(long_name) * 2),
+            ):
+                with self.subTest(limit=limit), mock.patch.object(
+                    source_archive, limit, value
+                ):
+                    with self.assertRaises(source_archive.SourceArchiveError):
+                        source_archive.extract_source_archive(
+                            archive, destination, selected
+                        )
+                    self.assertFalse((destination / ".cx_sources").exists())
+
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve()
+            archive = root / "source.tar"
+            destination = root / "destination"
+            destination.mkdir(mode=0o700)
+            write_archive(archive)
+            with tarfile.open(archive, "a", format=tarfile.PAX_FORMAT) as handle:
+                member, stream = regular(
+                    f".cx_sources/{selected}/sparse-v1", b"1\n0\n1\n"
+                )
+                member.pax_headers = {
+                    "GNU.sparse.major": "1",
+                    "GNU.sparse.minor": "0",
+                    "GNU.sparse.realsize": "1",
+                }
+                handle.addfile(member, stream)
+            archive.chmod(0o600)
+            with self.assertRaises(source_archive.SourceArchiveError):
+                source_archive.extract_source_archive(archive, destination, selected)
+            self.assertFalse((destination / ".cx_sources").exists())
+
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve()
+            archive = root / "source.tar"
+            destination = root / "destination"
+            destination.mkdir(mode=0o700)
+            write_archive(archive)
+            original_next = tarfile.TarFile.next
+
+            def sparse_next(handle: tarfile.TarFile) -> tarfile.TarInfo | None:
+                member = original_next(handle)
+                if member is not None and member.isfile():
+                    member.sparse = [(0, 1)]
+                return member
+
+            with mock.patch.object(tarfile.TarFile, "next", sparse_next):
+                with self.assertRaises(source_archive.SourceArchiveError):
+                    source_archive.extract_source_archive(archive, destination, selected)
+            self.assertFalse((destination / ".cx_sources").exists())
+
+    def test_runtime_identity_and_realized_placement_are_behavioral(self) -> None:
+        self.assertFalse(capability.runtime_identity_issues(
+            "mi325x", vendor="amd", arch="gfx942", machine="amd64",
+            device_name="AMD Instinct MI325X", device_count=8, world_size=8,
+        ))
+        self.assertTrue(capability.runtime_identity_issues(
+            "mi355x", vendor="amd", arch="gfx942", machine="amd64",
+            device_name="AMD Instinct MI325X", device_count=8, world_size=8,
+        ))
+        records = [("private-a", rank) for rank in range(4)] + [
+            ("private-b", rank) for rank in range(4)
+        ]
+        self.assertEqual(
+            run_ep._summarize_realized_placement(
+                records, expected_nodes=2, expected_gpus_per_node=4, expected_world_size=8
+            ),
+            {
+                "gpus_per_node": 4,
+                "nodes": 2,
+                "ranks_per_node": 4,
+                "unique_local_ranks": True,
+                "valid": True,
+            },
+        )
+        with self.assertRaises(ValueError):
+            run_ep._summarize_realized_placement(
+                records[:-1] + [("private-b", 2)],
+                expected_nodes=2,
+                expected_gpus_per_node=4,
+                expected_world_size=8,
+            )
+
+    def test_private_allocation_stratum_is_salted_ordered_and_rank_consistent(self) -> None:
+        salt = "a" * 64
+        hosts = ["private-node-b", "private-node-a", "private-node-a"]
+        selectors = {
+            "ib_gid_index": "3",
+            "rdma_devices": "private-hca0:1,private-hca1:1",
+            "rdma_service_level": "2",
+            "socket_ifname": "private-if0",
+        }
+        digest = run_ep._allocation_stratum_sha256(
+            hosts, audit_salt=salt, fabric_selectors=selectors, required=True
+        )
+        self.assertRegex(digest or "", r"^[0-9a-f]{64}$")
+        self.assertEqual(
+            digest,
+            run_ep._allocation_stratum_sha256(
+                list(reversed(hosts)),
+                audit_salt=salt,
+                fabric_selectors=selectors,
+                required=True,
+            ),
+        )
+        for changed_hosts, changed_salt, changed_selectors in (
+            (hosts + ["private-node-c"], salt, selectors),
+            (hosts, "b" * 64, selectors),
+            (hosts, salt, {**selectors, "ib_gid_index": "4"}),
+        ):
+            self.assertNotEqual(
+                digest,
+                run_ep._allocation_stratum_sha256(
+                    changed_hosts,
+                    audit_salt=changed_salt,
+                    fabric_selectors=changed_selectors,
+                    required=True,
+                ),
+            )
+        serialized = json.dumps({"allocation_stratum_sha256": digest})
+        private_literals = [
+            salt,
+            *hosts,
+            selectors["rdma_devices"],
+            selectors["socket_ifname"],
+        ]
+        self.assertFalse(any(value in serialized for value in private_literals))
+        self.assertNotIn("physical_hosts", serialized)
+        self.assertNotIn("fabric_selectors", serialized)
+        self.assertEqual(
+            run_ep._common_allocation_stratum([digest, digest, digest], required=True),
+            digest,
+        )
+        with self.assertRaisesRegex(ValueError, "differs across distributed ranks"):
+            run_ep._common_allocation_stratum([digest, "b" * 64], required=True)
+        with self.assertRaisesRegex(ValueError, "requires"):
+            run_ep._allocation_stratum_sha256(
+                hosts, audit_salt=None, fabric_selectors=selectors, required=True
+            )
+        self.assertIsNone(run_ep._allocation_stratum_sha256(
+            hosts, audit_salt=None, fabric_selectors=selectors, required=False
+        ))
+
+    def test_collective_version_and_rccl_fingerprint_are_normalized(self) -> None:
+        self.assertEqual(ep_harness.format_collective_version(23004), "2.30.4")
+        self.assertEqual(ep_harness.format_collective_version(21805), "2.18.5")
+        self.assertEqual(ep_harness.format_collective_version((2, 21, 5)), "2.21.5")
+
+        properties = types.SimpleNamespace(
+            multi_processor_count=304, total_memory=1024, warp_size=64
+        )
+        fake = types.SimpleNamespace(
+            __version__="2.9.0",
+            version=types.SimpleNamespace(cuda=None, hip="7.2"),
+            cuda=types.SimpleNamespace(
+                get_device_properties=lambda _device: properties,
+                get_device_name=lambda _device: "AMD Instinct MI325X",
+                nccl=types.SimpleNamespace(version=lambda: 21805),
+            ),
+        )
+        with mock.patch.object(
+            run_ep, "_loaded_collective_version", return_value="2.18.5"
+        ):
+            fingerprint = run_ep._runtime_fingerprint(
+                fake, "device", machine="amd64", vendor="amd", arch="gfx942"
+            )
+        self.assertEqual(fingerprint["collective_library"], {"kind": "rccl", "version": "2.18.5"})
+        self.assertEqual(fingerprint["accelerator_runtime"], {"kind": "hip", "version": "7.2"})
+
+        class FakeCollective:
+            @staticmethod
+            def ncclGetVersion(pointer) -> int:
+                pointer._obj.value = 23004
+                return 0
+
+        maps = "0-1 r-xp 0 00:00 0 /runtime/libnccl.so.2\n"
+        with (
+            mock.patch("builtins.open", return_value=io.StringIO(maps)),
+            mock.patch.object(run_ep.os.path, "isfile", return_value=True),
+            mock.patch.object(
+                run_ep.os.path, "realpath", return_value="/runtime/libnccl.so.2"
+            ),
+            mock.patch.object(run_ep.ctypes, "CDLL", return_value=FakeCollective()),
+        ):
+            self.assertEqual(run_ep._loaded_collective_version(), "2.30.4")
+
+        path = HERE / "ep_nccl.py"
+        tree = ast.parse(path.read_text(), str(path))
+        helper = next(
+            node for node in tree.body
+            if isinstance(node, ast.FunctionDef) and node.name == "_runtime_collective"
+        )
+        namespace = {"re": re}
+        exec(compile(ast.Module(body=[helper], type_ignores=[]), str(path), "exec"), namespace)
+        args = types.SimpleNamespace(
+            runtime_fingerprint={
+                "collective_library": {"kind": "nccl", "version": "2.30.4"}
+            }
+        )
+        cuda = types.SimpleNamespace(version=types.SimpleNamespace(hip=None))
+        self.assertEqual(namespace["_runtime_collective"](args, cuda), ("nccl", "2.30.4"))
+        args.runtime_fingerprint["collective_library"]["version"] = None
+        with self.assertRaisesRegex(RuntimeError, "runtime identity is unavailable"):
+            namespace["_runtime_collective"](args, cuda)
+        self.assertNotIn("torch.cuda.nccl.version", path.read_text())
+
+    def test_workloads_bind_generator_activation_and_trace(self) -> None:
+        args = ("uniform", 7168, 8, 256, 8, 64, 67)
+        first = workload.compute_workload_id(*args)
+        self.assertTrue(identity.is_typed_id(first, "workload"))
+        self.assertEqual(first, workload.compute_workload_id(*args))
+        self.assertNotEqual(first, workload.compute_workload_id(*args[:-1], 68))
+        self.assertNotEqual(
+            first,
+            workload.compute_workload_id(*args, trace_checksum="a" * 64),
+        )
+        _, _, manifest = workload.build_workload(8, 2, 4, "uniform", 4, 67, 2)
+        member, checksums, _, _ = workload.canonical_member(
+            "uniform", 8, 2, 4, 2, 2, 67
+        )
+        self.assertEqual(member, manifest["workload_id"])
+        self.assertEqual(checksums, manifest["checksums"])
+
+    def test_eplb_calibration_window_is_disjoint_and_identity_bound(self) -> None:
+        evaluation = workload.canonical_member("zipf", 8, 2, 8, 2, 4, 67)
+        calibration = workload.canonical_eplb_calibration_member(
+            "zipf", 8, 2, 8, 2, 4, 67
+        )
+        self.assertNotEqual(evaluation[0], calibration[0])
+        self.assertNotEqual(evaluation[1]["trace"], calibration[1]["trace"])
+        self.assertGreater(
+            workload.EPLB_CALIBRATION_TOKEN_OFFSET,
+            2 * 4,
+        )
+        repeated = workload.canonical_eplb_calibration_member(
+            "zipf", 8, 2, 8, 2, 4, 67
+        )
+        self.assertEqual(calibration, repeated)
+        with self.assertRaises(ValueError):
+            workload.canonical_routing_rows(
+                8, 8, 2, "zipf", 67, token_offset=-1
+            )
+
+    def test_canonical_members_are_bound_to_each_scheduled_row(self) -> None:
+        case = {
+            "routing": "uniform", "hidden": 8, "topk": 2, "experts": 4, "ep": 2,
+            "mode": "normal",
+        }
+        eplb_record = {
+            "enabled": False, "mapping_hash": None, "num_physical_experts": 4,
+        }
+
+        def expected(
+            *, tokens: int = 1, hidden: int = 8
+        ) -> tuple[str, dict[str, str], str]:
+            member, checksums, row_hash, _, _ = contracts._expected_canonical_trace(
+                "uniform", hidden, 2, 4, 4, 2, tokens, 67, False, 2048
+            )
+            return member, checksums, row_hash
+
+        member, checksums, row_hash = expected()
+        rows = [{"tokens_per_rank": 1, "routing": {"hash": row_hash}}]
+        proof = {
+            "manifest_checksums": {member: checksums},
+            "members": [member],
+            "workload_id": identity.workload_id({
+                "members": [{"checksums": checksums, "workload_id": member}]
+            }),
+        }
+        contracts._validate_canonical_workload(proof, case, rows, eplb_record)
+
+        def replace_member(document: dict, replacement: tuple[str, dict[str, str], str]) -> None:
+            replacement_id, replacement_checksums, _ = replacement
+            document["members"] = [replacement_id]
+            document["manifest_checksums"] = {replacement_id: replacement_checksums}
+            document["workload_id"] = identity.workload_id({
+                "members": [{
+                    "checksums": replacement_checksums,
+                    "workload_id": replacement_id,
+                }]
+            })
+
+        mutations = {
+            "wrong member token": lambda document, mutated_rows: replace_member(
+                document, expected(tokens=2)
+            ),
+            "wrong member dimensions": lambda document, mutated_rows: replace_member(
+                document, expected(hidden=16)
+            ),
+            "wrong member checksum": lambda document, mutated_rows: replace_member(
+                document,
+                (
+                    member,
+                    {**checksums, "topk_idx": "0" * 64},
+                    row_hash,
+                ),
+            ),
+            "row hash unrelated to member": lambda document, mutated_rows: mutated_rows[0][
+                "routing"
+            ].update({"hash": "f" * 64}),
+        }
+        for label, mutate in mutations.items():
+            with self.subTest(label=label), self.assertRaises(contracts.ContractError):
+                bad_proof, bad_rows = copy.deepcopy(proof), copy.deepcopy(rows)
+                mutate(bad_proof, bad_rows)
+                contracts._validate_canonical_workload(
+                    bad_proof, case, bad_rows, eplb_record
+                )
+
+    def test_eplb_row_hash_is_bound_to_the_frozen_remap(self) -> None:
+        case = {
+            "routing": "zipf", "hidden": 8, "topk": 2, "experts": 4, "ep": 2,
+            "mode": "normal",
+        }
+        physical = eplb.physical_count(4, 32, 2)
+        plan = contracts._expected_eplb_plan("zipf", 2, 4, physical, 2, 67, 2048)
+        eplb_record = {
+            "enabled": True,
+            "mapping_hash": eplb.mapping_hash(plan),
+            "num_physical_experts": physical,
+        }
+        member, checksums, row_hash, _, _ = contracts._expected_canonical_trace(
+            "zipf", 8, 2, 4, physical, 2, 1, 67, True, 2048
+        )
+        self.assertNotEqual(row_hash, checksums["trace"])
+        workload_proof = {
+            "manifest_checksums": {member: checksums},
+            "members": [member],
+            "workload_id": identity.workload_id({
+                "members": [{"checksums": checksums, "workload_id": member}]
+            }),
+        }
+        rows = [{"tokens_per_rank": 1, "routing": {"hash": row_hash}}]
+        contracts._validate_canonical_workload(workload_proof, case, rows, eplb_record)
+        with self.assertRaisesRegex(contracts.ContractError, "EPLB mapping"):
+            contracts._validate_canonical_workload(
+                workload_proof, case, rows, {**eplb_record, "mapping_hash": "0" * 64}
+            )
+
+    def test_oracle_pass_cannot_ignore_combined_value_failure(self) -> None:
+        oracle = {
+            "atol": ep_harness.ORACLE_ATOL,
+            "checks": {
+                "combine_values": True,
+                "counts": True,
+                "metadata": True,
+                "multiplicity": True,
+                "payload": True,
+                "source_set": True,
+                "weights": True,
+            },
+            "combine_weight_semantics": "unweighted-rank-sum",
+            "contract": ep_harness.ORACLE_CONTRACT,
+            "dispatch_sha256": "a" * 64,
+            "max_absolute_error": 0.0,
+            "max_elementwise_relative_error": 0.0,
+            "max_relative_error": 0.0,
+            "max_weight_error": 0.0,
+            "order_sha256": "b" * 64,
+            "ordering_contract": "stable-v1",
+            "passed": True,
+            "receive_count": 1,
+            "rtol": ep_harness.ORACLE_RTOL,
+        }
+        contracts._validate_oracle(oracle, "oracle")
+        weighted = copy.deepcopy(oracle)
+        weighted["combine_weight_semantics"] = "native-gate-weighted"
+        with self.assertRaisesRegex(contracts.ContractError, "differs from v1"):
+            contracts._validate_oracle(weighted, "oracle")
+        tampered = copy.deepcopy(oracle)
+        tampered["checks"]["combine_values"] = False
+        with self.assertRaises(contracts.ContractError):
+            contracts._validate_oracle(tampered, "oracle")
+
+    def test_oracle_stability_canonicalizes_native_receive_order(self) -> None:
+        source = (HERE / "ep_harness.py").read_text()
+        begin = source.index("canonical_order = torch.argsort")
+        canonical = source[begin:source.index("problem.recv_tokens = receive_count", begin)]
+        self.assertIn("canonical_sources", canonical)
+        self.assertIn("canonical_ids", canonical)
+        self.assertIn("canonical_weights", canonical)
+        self.assertNotIn("_tensor_sha256(source_ids", canonical)
+        mori = (HERE / "ep_mori.py").read_text()
+        self.assertIn('"inter-node-v1" if self._inter_node', mori)
+        self.assertIn('else "async-ll" if self._async_ll', mori)
+        backend = types.SimpleNamespace(name="mori", kernel_generation="async-ll")
+        self.assertEqual(ep_harness.kernel_generation(backend), "async-ll")
+        backend.kernel_generation = "inter-node-v1"
+        self.assertEqual(ep_harness.kernel_generation(backend), "inter-node-v1")
+
+    def test_terminal_fail_safe_fills_only_missing_shard_cases(self) -> None:
+        matrix = sweep_matrix.resolve_matrix(backends="all", max_cases=128)
+        shard = next(item for item in matrix["include"] if item["n"] >= 2)
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary)
+            matrix_path = root / "matrix.json"
+            control_path = root / "control.json"
+            out_dir = root / "results"
+            matrix_path.write_text(json.dumps(matrix))
+            control = sweep_matrix.extract_shard(
+                matrix_path, shard["id"], control_path,
+                sku=shard["sku"], backend=shard["backend"], nodes=shard["nodes"],
+            )
+            control["cases"] = control["cases"][:2]
+            control["n"] = 2
+            control_path.write_text(json.dumps(control))
+            first = {key: value for key, value in control["cases"][0].items() if key != "case_id"}
+            git_run = {
+                "artifact": "artifact", "job": "job", "ref": "collectivex",
+                "repo": "SemiAnalysisAI/InferenceX", "run_attempt": "1",
+                "run_id": "123", "source_sha": "a" * 40,
+                "qualification_index": 1,
+            }
+            allocation = {
+                "artifact": "artifact", "execution_id": "execution", "job": "job",
+                "repo": "SemiAnalysisAI/InferenceX", "run_attempt": "1", "run_id": "123",
+                "runner": shard["sku"], "source_sha": "a" * 40,
+                "qualification_index": 1,
+            }
+            out_dir.mkdir()
+            existing = contracts.make_terminal_document(
+                allocation_factors=allocation, attempt_ordinal=1, case=first,
+                case_factors={"case": first, "profile": identity.V1_CASE_PROFILE, "sku": shard["sku"]},
+                control_sha256=hashlib.sha256(control_path.read_bytes()).hexdigest(),
+                failure_mode="setup", generated_at="2026-07-04T00:00:00Z", git_run=git_run,
+                reason="launcher-setup-failed", return_code=7, source="runtime-emitter",
+                status="failed",
+                expected_case_id=control["cases"][0]["case_id"],
+            )
+            (out_dir / "existing.json").write_text(json.dumps(existing))
+            (out_dir / "partial.json").write_text(json.dumps({
+                "format": contracts.RAW_FORMAT,
+                "identity": {"case_id": control["cases"][1]["case_id"]},
+                "sample_artifact": {"path": "partial.samples.json"},
+            }))
+            (out_dir / "partial.samples.json").write_text("{broken")
+            environment = {
+                **os.environ,
+                "COLLECTIVEX_OPERATOR_CONFIG": "/dev/null",
+                "CX_SHARD_FILE": str(control_path),
+                "CX_SHARD_SKU": shard["sku"],
+                "CX_RUNNER": shard["sku"],
+                "CX_BENCH": shard["backend"],
+                "CX_NODES": str(shard["nodes"]),
+                "COLLECTIVEX_EXECUTION_ID": "execution",
+                "COLLECTIVEX_ARTIFACT_NAME": "artifact",
+                "GITHUB_JOB": "job", "GITHUB_REF_NAME": "collectivex",
+                "GITHUB_REPOSITORY": "SemiAnalysisAI/InferenceX",
+                "GITHUB_RUN_ATTEMPT": "1", "GITHUB_RUN_ID": "123",
+                "GITHUB_SHA": "a" * 40,
+                "CX_QUALIFICATION_INDEX": "1",
+            }
+            subprocess.run(
+                [
+                    "bash", "-c",
+                    'source "$1"; cx_emit_setup_failures "$2" "$3" "$4" 7',
+                    "_", str(ROOT / "runtime" / "common.sh"), str(ROOT),
+                    str(out_dir), shard["backend"],
+                ],
+                check=True,
+                env=environment,
+            )
+            attempts = [contracts.strict_load(path) for path in out_dir.glob("*.json")]
+            self.assertEqual(len(attempts), 2)
+            self.assertEqual(
+                contracts.validate_attempt_paths([str(path) for path in out_dir.glob("*.json")]),
+                2,
+            )
+            delivery = [str(path) for path in out_dir.glob("*.json")]
+            self.assertEqual(contracts.validate_delivery(delivery, str(control_path)), 2)
+            with self.assertRaises(contracts.ContractError):
+                contracts.validate_delivery(delivery[:1], str(control_path))
+            self.assertEqual(
+                {attempt["identity"]["case_id"] for attempt in attempts},
+                {case["case_id"] for case in control["cases"]},
+            )
+            self.assertTrue((out_dir / "partial.json.quarantine").is_file())
+            self.assertTrue((out_dir / "partial.samples.json.quarantine").is_file())
+
+            preallocation = root / "preallocation"
+            preallocation_results = preallocation / "experimental" / "CollectiveX" / "results"
+            preallocation_results.mkdir(parents=True)
+            failed = subprocess.run(
+                [
+                    "bash", "-c",
+                    'source "$1"; REPO_ROOT="$2"; export REPO_ROOT; '
+                    'cx_install_launcher_fail_safe; cx_load_operator_config',
+                    "_", str(ROOT / "runtime" / "common.sh"), str(preallocation),
+                ],
+                env={**environment, "COLLECTIVEX_OPERATOR_CONFIG_REQUIRED": "1"},
+            )
+            self.assertNotEqual(failed.returncode, 0)
+            preallocation_attempts = [
+                contracts.validate_terminal_document(contracts.strict_load(path))
+                for path in preallocation_results.glob("*.json")
+            ]
+            self.assertEqual(
+                {attempt["identity"]["case_id"] for attempt in preallocation_attempts},
+                {case["case_id"] for case in control["cases"]},
+            )
+
+    def test_runtime_identity_mismatch_is_failed_not_unsupported(self) -> None:
+        wrapper = next(
+            item for item in sweep_matrix.resolve_matrix()["requested_cases"]
+            if item["disposition"] == "runnable"
+        )
+        case = wrapper["case"]
+        environment = {
+            "CX_RUNNER": wrapper["sku"], "CX_CASE_ID": case["case_id"],
+            "CX_SUITE": case["suite"], "CX_WORKLOAD_NAME": case["workload"],
+            "CX_REQUIRED_PUBLICATION": case["required_publication"],
+            "CX_ROUTING": case["routing"], "CX_EPLB": "1" if case["eplb"] else "",
+            "CX_EP": str(case["ep"]), "CX_NGPUS": str(case["ep"]),
+            "CX_HIDDEN": str(case["hidden"]), "CX_TOPK": str(case["topk"]),
+            "CX_EXPERTS": str(case["experts"]), "CX_NODES": str(case["nodes"]),
+            "CX_GPUS_PER_NODE": str(case["gpus_per_node"]),
+            "CX_SCALE_UP_DOMAIN": str(case["scale_up_domain"]),
+            "CX_MODE": case["mode"], "CX_SCOPE": case["scope"],
+            "CX_TOPO": case["topology_class"], "CX_TRANSPORT": case["transport"],
+            "CX_SCALE_UP_TRANSPORT": case["scale_up_transport"],
+            "CX_SCALE_OUT_TRANSPORT": case["scale_out_transport"] or "",
+            "CX_TOKENS_LADDER": case["ladder"], "CX_CANONICAL": "1",
+            "CX_ITERS": "8", "CX_TRIALS": "64", "CX_WARMUP": "32",
+            "CX_SAMPLES_PER_POINT": "512", "GITHUB_RUN_ID": "123",
+            "GITHUB_RUN_ATTEMPT": "1", "GITHUB_REF_NAME": "collectivex",
+            "GITHUB_SHA": "a" * 40, "GITHUB_REPOSITORY": "SemiAnalysisAI/InferenceX",
+            "GITHUB_JOB": "sweep", "COLLECTIVEX_ARTIFACT_NAME": "artifact",
+            "COLLECTIVEX_EXECUTION_ID": "execution",
+        }
+        with mock.patch.dict(os.environ, environment, clear=False):
+            terminal = contracts.make_terminal_from_environment(
+                backend=case["backend"], phase=case["phase"], return_code=5
+            )
+        self.assertEqual(terminal["identity"]["case_id"], case["case_id"])
+        self.assertEqual(
+            terminal["outcome"],
+            {
+                "failure_mode": "runtime-identity",
+                "reason": "runtime-identity-mismatch",
+                "return_code": 5,
+                "status": "failed",
+            },
+        )
+        for mode, reason in contracts.RUNTIME_FAILURE_REASONS.items():
+            with self.subTest(mode=mode), mock.patch.dict(os.environ, environment, clear=False):
+                staged = contracts.make_terminal_from_environment(
+                    backend=case["backend"], phase=case["phase"], return_code=1,
+                    failure_mode=mode,
+                )
+                self.assertEqual(staged["outcome"]["reason"], reason)
+                mismatched = copy.deepcopy(staged)
+                mismatched["outcome"]["reason"] = "distributed-command-failed"
+                if reason == "distributed-command-failed":
+                    mismatched["outcome"]["reason"] = "backend-setup-failed"
+                with self.assertRaisesRegex(
+                    contracts.ContractError, "source and outcome are not registered"
+                ):
+                    contracts.validate_terminal_document(mismatched)
+        with mock.patch.dict(os.environ, environment, clear=False):
+            with self.assertRaisesRegex(
+                contracts.ContractError, "runtime failure mode is not registered"
+            ) as raised:
+                contracts.make_terminal_from_environment(
+                    backend=case["backend"], phase=case["phase"], return_code=1,
+                    failure_mode="raw-private-error",
+                )
+        self.assertNotIn("raw-private-error", str(raised.exception))
+        with mock.patch.dict(os.environ, environment, clear=False):
+            generic = contracts.make_terminal_from_environment(
+                backend=case["backend"], phase=case["phase"], return_code=6,
+            )
+        self.assertEqual(
+            generic["outcome"],
+            {
+                "failure_mode": "execution",
+                "reason": "distributed-command-failed",
+                "return_code": 6,
+                "status": "failed",
+            },
+        )
+        manual_environment = {
+            "CX_RUNNER": "manual-runner",
+            "COLLECTIVEX_EXECUTION_ID": "manual-execution",
+        }
+        with mock.patch.dict(os.environ, manual_environment, clear=True):
+            manual = contracts.make_terminal_from_environment(
+                backend="nccl-ep", phase="decode", return_code=6,
+            )
+        self.assertIsNone(manual["provenance"]["git_run"])
+        self.assertEqual(
+            {
+                field: manual["case"][field]
+                for field in ("suite", "workload", "canonical", "required_publication")
+            },
+            {
+                "suite": "manual", "workload": "manual", "canonical": False,
+                "required_publication": "diagnostic",
+            },
+        )
+        self.assertEqual(
+            manual["identity"]["allocation_factors"],
+            {
+                "artifact": None, "execution_id": "manual-execution", "job": None,
+                "qualification_index": 1, "repo": None,
+                "run_attempt": None, "run_id": None,
+                "runner": "manual-runner", "source_sha": None,
+            },
+        )
+        broken = copy.deepcopy(manual)
+        broken["identity"]["allocation_factors"]["artifact"] = "forged-artifact"
+        allocation_id = identity.allocation_id(
+            broken["identity"]["allocation_factors"]
+        )
+        broken["identity"]["allocation_id"] = allocation_id
+        broken["identity"]["attempt_id"] = identity.attempt_id(
+            allocation=allocation_id,
+            case=broken["identity"]["case_id"],
+            ordinal=broken["identity"]["attempt_ordinal"],
+        )
+        with self.assertRaisesRegex(
+            contracts.ContractError, "allocation factors differ"
+        ):
+            contracts.validate_terminal_document(broken)
+
+    def test_launchers_use_private_logs_and_allowlisted_failure_stages(self) -> None:
+        expected = {
+            "launch_single-slurm.sh": {
+                "setup", "registry-verification", "container-import", "container-hash",
+                "repository-stage", "scheduler-allocation", "container-launch",
+                "artifact-collection",
+            },
+            "launch_gb-nv.sh": {
+                "setup", "registry-verification", "container-import", "container-hash",
+                "repository-stage", "scheduler-allocation", "container-launch", "backend-setup",
+                "execution", "artifact-collection",
+            },
+            "launch_mi-amds.sh": {
+                "setup", "repository-stage", "registry-verification", "scheduler-allocation",
+                "container-import", "container-hash", "container-launch", "artifact-collection",
+            },
+        }
+        common = (ROOT / "runtime" / "common.sh").read_text()
+        for name, stages in expected.items():
+            launcher = (ROOT / "launchers" / name).read_text()
+            stage_source = launcher + common if name == "launch_gb-nv.sh" else launcher
+            self.assertNotIn("--export=ALL", launcher)
+            if name == "launch_gb-nv.sh":
+                self.assertIn("cx_run_distributed_shard", launcher)
+            else:
+                self.assertIn("cx_container_exports", launcher)
+            self.assertIn("collect_rc=0", launcher)
+            for stage in stages:
+                with self.subTest(launcher=name, stage=stage):
+                    self.assertIn(f"cx_set_failure_stage {stage}", stage_source)
+        amd = (ROOT / "launchers" / "launch_mi-amds.sh").read_text()
+        self.assertIn("cx_ensure_squash_on_job", amd)
+        self.assertIn("cx_fail_stage container-hash", amd)
+        self.assertNotIn('cat "$import_log"', amd)
+        self.assertIn('bash -s -- "$sq" "$lock" "$image"', common)
+        self.assertIn("> \"$log\" 2>&1 <<'BASH'", common)
+        self.assertIn("cx_fail_stage container-import", common)
+        runtime = (ROOT / "runtime" / "run_in_container.sh").read_text()
+        export_start = common.index("\ncx_container_exports() {")
+        exports = common[export_start:common.index("\n}", export_start)]
+        export_names = {
+            name
+            for payload in re.findall(r"printf '%s' '([^']*)'", exports)
+            for name in payload.split(",") if name
+        }
+        for private_name in (
+            "COLLECTIVEX_OPERATOR_CONFIG", "GITHUB_TOKEN", "GITHUB_WORKSPACE", "HOME",
+            "CX_PARTITION", "CX_ACCOUNT", "CX_SQUASH_DIR", "CX_STAGE_DIR",
+        ):
+            self.assertNotIn(private_name, export_names)
+        self.assertIn("CX_BACKEND_CACHE_ROOT", export_names)
+        self.assertIn("CX_BACKEND_CACHE_SENTINEL_SHA256", export_names)
+        self.assertNotIn("CX_PREPARED_BACKEND_CACHE", export_names)
+        self.assertIn("MORI_COMMIT", export_names)
+        self.assertIn("cx_write_runtime_stage backend-setup", runtime)
+        self.assertIn("cx_write_runtime_stage execution", runtime)
+        distributed = common[common.index("cx_run_distributed_shard()") :]
+        self.assertIn("cx_private_log_path shard-summary", distributed)
+        self.assertIn("cx_fail_stage execution", distributed)
+        self.assertIn('cx_fail_stage execution "$runtime_log"', distributed)
+
+    def test_case_failure_diagnostic_precedes_normal_srun_footer(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            log = Path(temporary) / "runtime.log"
+            log.write_text(
+                "WARN: deepep decode run failed rc=1 (CX_RUN_TIMEOUT=900s)\n"
+                "SHARD done: 6/6 case(s) failed\n"
+                "srun: error: task exited 1\n"
+            )
+            result = subprocess.run(
+                [
+                    "bash", "-c",
+                    'source "$1"; cx_fail_stage execution "$2"',
+                    "_", str(ROOT / "runtime" / "common.sh"), str(log),
+                ],
+                text=True,
+                capture_output=True,
+            )
+            self.assertEqual(result.returncode, 1)
+            self.assertIn("diagnostic=benchmark-case-failure", result.stderr)
+
+    def test_non_timeout_failure_warning_is_classified_as_case_failure(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            log = Path(temporary) / "runtime.log"
+            log.write_text("WARN: deepep decode run failed rc=1\nsrun: task exited 1\n")
+            result = subprocess.run(
+                [
+                    "bash", "-c",
+                    'source "$1"; cx_fail_stage execution "$2"',
+                    "_", str(ROOT / "runtime" / "common.sh"), str(log),
+                ],
+                text=True,
+                capture_output=True,
+            )
+            self.assertEqual(result.returncode, 1)
+            self.assertNotIn("diagnostic=network-or-timeout", result.stderr)
+            self.assertIn("diagnostic=benchmark-case-failure", result.stderr)
+
+    def test_private_runtime_failure_signatures_override_case_footer(self) -> None:
+        signatures = {
+            "DeepEP V2 no-GIN run is outside one realized LSA domain":
+                "accelerator-topology",
+            "NCCL exception (/src/nccl.cu:111): 3": "accelerator-topology",
+            "NCCL exception (/src/nccl.cu:112): 3": "accelerator-topology",
+            "CUDA error: call requires newer driver": "accelerator-driver",
+            "NCCL failure in ncclCommWindowRegister": "nccl-device-api",
+            "Communicator does not support symmetric memory": "nccl-device-api",
+            "NCCL exception (/src/nccl.cu:106): 5": "nccl-device-api",
+            "NCCL exception (/src/nccl.cu:127): 5": "nccl-device-api",
+            "NCCL exception (/src/nccl.cu:128): 5": "nccl-device-api",
+            "NCCL exception (/src/nccl.cu:129): 5": "nccl-device-api",
+            "NCCL exception (/src/nccl.cu:135): 5": "nccl-device-api",
+            "NVCC compilation failed": "jit-toolchain",
+            "CUDA out of memory": "accelerator-memory",
+            "torch rendezvous timed out": "network-or-timeout",
+        }
+        with tempfile.TemporaryDirectory() as temporary:
+            log = Path(temporary) / "runtime.log"
+            for signature, diagnostic in signatures.items():
+                log.write_text(f"{signature}\nSHARD done: 6/6 case(s) failed\n")
+                result = subprocess.run(
+                    [
+                        "bash", "-c",
+                        'source "$1"; cx_fail_stage execution "$2"',
+                        "_", str(ROOT / "runtime" / "common.sh"), str(log),
+                    ],
+                    text=True,
+                    capture_output=True,
+                    env={**os.environ, "CX_BENCH": "deepep-v2"},
+                )
+                self.assertEqual(result.returncode, 1)
+                self.assertIn(f"diagnostic={diagnostic}", result.stderr)
+
+            log.write_text(
+                "NCCL exception (/src/nccl.cu:106): 5\n"
+                "SHARD done: 6/6 case(s) failed\n"
+            )
+            result = subprocess.run(
+                [
+                    "bash", "-c", 'source "$1"; cx_fail_stage execution "$2"',
+                    "_", str(ROOT / "runtime" / "common.sh"), str(log),
+                ],
+                text=True, capture_output=True,
+                env={**os.environ, "CX_BENCH": "deepep"},
+            )
+            self.assertIn("diagnostic=benchmark-case-failure", result.stderr)
+
+    def test_runtime_stage_marker_distinguishes_launch_from_execution(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            mount = Path(temporary)
+            root = mount / "experimental" / "CollectiveX"
+            root.mkdir(parents=True)
+            command = r'''
+              set -euo pipefail
+              source "$1"
+              export COLLECTIVEX_EXECUTION_ID=test_1_shard CX_TS=test
+              cx_set_failure_stage container-launch
+              cx_prepare_runtime_marker "$2"
+              (cd "$2/experimental/CollectiveX"; cx_write_runtime_stage backend-setup)
+              cx_adopt_runtime_stage "$2"
+              test "$CX_FAILSAFE_MODE" = backend-setup
+              (cd "$2/experimental/CollectiveX"; cx_write_runtime_stage execution)
+              cx_adopt_runtime_stage "$2"
+              test "$CX_FAILSAFE_MODE" = execution
+            '''
+            subprocess.run(
+                ["bash", "-c", command, "_", str(ROOT / "runtime" / "common.sh"),
+                 str(mount)],
+                check=True,
+            )
+
+    def test_canonical_gha_environment_is_locked_but_manual_overrides_survive(self) -> None:
+        common = ROOT / "runtime" / "common.sh"
+        command = r'''
+          set -euo pipefail
+          source "$1"
+          export COLLECTIVEX_CANONICAL_GHA=1 GITHUB_ACTIONS=true
+          export GITHUB_RUN_ID=123 GITHUB_RUN_ATTEMPT=1
+          export COLLECTIVEX_SOURCE_SHA=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+          export CX_SHARD_FILE=.shards/test.json CX_SHARD_SKU=mi325x
+          export CX_NODES=1 CX_GPUS_PER_NODE=8
+          export CX_IMAGE=untrusted CX_IMAGE_DIGEST=untrusted CX_NGPUS=99
+          export CX_NCCL_HOME=/untrusted CX_LOCK_DIR=/tmp CX_SQUASH_DIR=/shared/containers
+          export CX_STAGE_DIR=/private/stale-stage
+          export CX_MORI_KERNEL_TYPE=intranode MORI_ENABLE_SDMA=0
+          export NCCL_MNNVL_ENABLE=1 MC_FORCE_MNNVL=1 CX_DRYRUN=1
+          export CX_BACKEND_CACHE_ROOT=/untrusted CX_BACKEND_CACHE_SENTINEL_SHA256=bad
+          export CX_PREPARED_BACKEND_CACHE=/untrusted CX_BACKEND_SOURCE_ROOT=/untrusted
+          ! (cx_lock_canonical_gha_env mi325x)
+          export COLLECTIVEX_OPERATOR_CONFIG_LOADED=$$
+          export CX_STAGE_DIR="$GITHUB_WORKSPACE" CX_AUDIT_SALT="$(printf 'a%.0s' {1..64})"
+          unset CX_LOCK_DIR
+          cx_lock_canonical_gha_env mi325x
+          test "$CX_IMAGE" = "$CX_IMAGE_AMD_MORI_MI325"
+          test "$CX_IMAGE_DIGEST" = "$CX_IMAGE_AMD_MORI_MI325_DIGEST"
+          test "$CX_NGPUS:$CX_SEED:$CX_RUN_TIMEOUT" = 8:67:1800
+          test "$CX_MORI_KERNEL_TYPE:$MORI_DISABLE_AUTO_XGMI:$MORI_ENABLE_SDMA" = asyncll:0:1
+          test "$MORI_COMMIT" = "$CX_MORI_COMMIT_MI325"
+          test "$MORI_APP_LOG_LEVEL:$MORI_SHMEM_LOG_LEVEL:$MORI_IO_LOG_LEVEL" = info:info:info
+          test "$CX_STAGE_DIR" = "$GITHUB_WORKSPACE"
+          test -z "${CX_NCCL_HOME+x}${CX_LOCK_DIR+x}${NCCL_MNNVL_ENABLE+x}${MC_FORCE_MNNVL+x}"
+          test -z "${CX_BACKEND_CACHE_ROOT+x}${CX_BACKEND_CACHE_SENTINEL_SHA256+x}"
+          test -z "${CX_PREPARED_BACKEND_CACHE+x}${CX_BACKEND_SOURCE_ROOT+x}"
+          test -z "${CX_DRYRUN+x}"
+
+          export CX_STAGE_DIR=/shared/gb-stage
+          export CX_SHARD_SKU=gb300 CX_NODES=2 CX_GPUS_PER_NODE=4
+          export CX_IMAGE=untrusted CX_NGPUS=1 CX_MORI_KERNEL_TYPE=untrusted
+          export MORI_ENABLE_SDMA=0 CX_NCCL_HOME=/untrusted CX_MASTER_PORT=1
+          cx_lock_canonical_gha_env gb300
+          test "$CX_IMAGE" = "$CX_IMAGE_MULTIARCH"
+          test "$CX_IMAGE_DIGEST" = "$CX_IMAGE_MULTIARCH_DIGEST"
+          test "$CX_NGPUS:$CX_SEED:$CX_RUN_TIMEOUT" = 8:67:900
+          test "$CX_NCCL_HOME:$CX_MASTER_PORT" = /usr:29551
+          test "$CX_STAGE_DIR" = /shared/gb-stage
+          test -z "${CX_MORI_KERNEL_TYPE+x}${MORI_ENABLE_SDMA+x}"
+
+          export COLLECTIVEX_OPERATOR_CONFIG_LOADED=$$
+          export CX_SHARD_SKU=mi355x CX_NODES=1 CX_GPUS_PER_NODE=8
+          export CX_LOCK_DIR=/validated/amd-locks CX_STAGE_DIR=/validated/amd-stage
+          export CX_AUDIT_SALT="$(printf 'a%.0s' {1..64})"
+          cx_lock_canonical_gha_env mi355x
+          test "$CX_LOCK_DIR" = /validated/amd-locks
+          test "$CX_STAGE_DIR" = /validated/amd-stage
+          test "$MORI_COMMIT" = "$CX_MORI_COMMIT_MI355"
+
+          unset COLLECTIVEX_CANONICAL_GHA
+          unset COLLECTIVEX_OPERATOR_CONFIG_LOADED
+          CX_IMAGE=manual CX_IMAGE_DIGEST=manual CX_NGPUS=3
+          CX_MORI_KERNEL_TYPE=manual
+          cx_lock_canonical_gha_env mi355x
+          test "$CX_IMAGE:$CX_IMAGE_DIGEST:$CX_NGPUS:$CX_MORI_KERNEL_TYPE" = manual:manual:3:manual
+        '''
+        with tempfile.TemporaryDirectory(dir=Path.home()) as workspace:
+            Path(workspace).chmod(0o720)
+            subprocess.run(
+                ["bash", "-c", command, "_", str(common)],
+                check=True,
+                env={
+                    **os.environ,
+                    "COLLECTIVEX_OPERATOR_CONFIG": "/dev/null",
+                    "GITHUB_WORKSPACE": workspace,
+                },
+            )
+            self.assertEqual(list(Path(workspace).iterdir()), [])
+
+    def test_canonical_amd_stage_uses_config_not_world_writable_workspace(self) -> None:
+        common = ROOT / "runtime" / "common.sh"
+        command = r'''
+          source "$1"
+          export COLLECTIVEX_CANONICAL_GHA=1 GITHUB_ACTIONS=true
+          export GITHUB_RUN_ID=123 GITHUB_RUN_ATTEMPT=1
+          export COLLECTIVEX_SOURCE_SHA=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+          export CX_SHARD_FILE=.shards/test.json CX_SHARD_SKU=mi325x
+          export CX_NODES=1 CX_GPUS_PER_NODE=8 CX_SQUASH_DIR=/shared/containers
+          export COLLECTIVEX_OPERATOR_CONFIG_LOADED=$$ CX_STAGE_DIR=/shared/amd-stage
+          export CX_AUDIT_SALT="$(printf 'a%.0s' {1..64})"
+          cx_lock_canonical_gha_env mi325x
+          printf '%s' "$CX_STAGE_DIR"
+        '''
+        with tempfile.TemporaryDirectory(dir=Path.home()) as workspace:
+            Path(workspace).chmod(0o702)
+            result = subprocess.run(
+                ["bash", "-c", command, "_", str(common)],
+                text=True,
+                capture_output=True,
+                env={
+                    **os.environ,
+                    "COLLECTIVEX_OPERATOR_CONFIG": "/dev/null",
+                    "GITHUB_WORKSPACE": workspace,
+                },
+            )
+            self.assertEqual(result.returncode, 0, result.stderr)
+            self.assertEqual(result.stdout, "/shared/amd-stage")
+            self.assertNotIn(workspace, result.stderr)
+
+    def test_canonical_amd_stage_uses_config_not_symlinked_workspace(self) -> None:
+        common = ROOT / "runtime" / "common.sh"
+        command = r'''
+          source "$1"
+          export COLLECTIVEX_CANONICAL_GHA=1 GITHUB_ACTIONS=true
+          export GITHUB_RUN_ID=123 GITHUB_RUN_ATTEMPT=1
+          export COLLECTIVEX_SOURCE_SHA=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+          export CX_SHARD_FILE=.shards/test.json CX_SHARD_SKU=mi325x
+          export CX_NODES=1 CX_GPUS_PER_NODE=8 CX_SQUASH_DIR=/shared/containers
+          export COLLECTIVEX_OPERATOR_CONFIG_LOADED=$$ CX_STAGE_DIR=/shared/amd-stage
+          export CX_AUDIT_SALT="$(printf 'a%.0s' {1..64})"
+          cx_lock_canonical_gha_env mi325x
+          printf '%s' "$CX_STAGE_DIR"
+        '''
+        with tempfile.TemporaryDirectory(dir=Path.home()) as temporary:
+            root = Path(temporary)
+            real = root / "real"
+            real.mkdir()
+            link = root / "workspace"
+            link.symlink_to(real, target_is_directory=True)
+            result = subprocess.run(
+                ["bash", "-c", command, "_", str(common)],
+                text=True,
+                capture_output=True,
+                env={
+                    **os.environ,
+                    "COLLECTIVEX_OPERATOR_CONFIG": "/dev/null",
+                    "GITHUB_WORKSPACE": str(link),
+                },
+            )
+            self.assertEqual(result.returncode, 0, result.stderr)
+            self.assertEqual(result.stdout, "/shared/amd-stage")
+            self.assertNotIn(str(root), result.stderr)
+
+    def test_image_selection_and_registry_verification_are_fail_closed(self) -> None:
+        common = ROOT / "runtime" / "common.sh"
+        command = r'''
+          source "$1"
+          test "$(cx_default_image mi325x)" = "$CX_IMAGE_AMD_MORI_MI325"
+          test "$(cx_default_image mi355x)" = "$CX_IMAGE_AMD_MORI"
+          pinned="sha256:$(printf 'a%.0s' {1..64})"
+          curl() {
+            case "$*" in
+              *auth.docker.io*) printf '{"token":"test"}' ;;
+              *) printf 'Docker-Content-Digest: %s\r\n' "$pinned" ;;
+            esac
+          }
+          test "$(cx_resolve_registry_digest ubuntu:latest)" = "$pinned"
+          test "$(cx_resolve_registry_digest docker.io/library/ubuntu:latest)" = "$pinned"
+          ! (cx_resolve_registry_digest "ubuntu@$pinned")
+          ! (cx_resolve_registry_digest ghcr.io/example/image:tag)
+          ! (cx_resolve_registry_digest 'ubuntu@sha256:bad')
+          curl() {
+            case "$*" in *auth.docker.io*) printf '{"token":"test"}';; esac
+          }
+          ! (cx_resolve_registry_digest ubuntu:latest)
+          cx_resolve_registry_digest() { printf '%s' "$CX_IMAGE_MULTIARCH_DIGEST"; }
+          cx_verify_registry_image "$CX_IMAGE_MULTIARCH"
+          test "$COLLECTIVEX_IMAGE_DIGEST_VERIFIED" = 1
+          test "$COLLECTIVEX_IMAGE_DIGEST" = "$CX_IMAGE_MULTIARCH_DIGEST"
+          cx_reverify_registry_image "$CX_IMAGE_MULTIARCH"
+          cx_resolve_registry_digest() { printf 'sha256:%064d' 0; }
+          ! (cx_reverify_registry_image "$CX_IMAGE_MULTIARCH")
+          ! (cx_verify_registry_image "$CX_IMAGE_MULTIARCH")
+        '''
+        subprocess.run(
+            ["bash", "-c", command, "_", str(common)],
+            check=True,
+            env={**os.environ, "COLLECTIVEX_OPERATOR_CONFIG": "/dev/null"},
+        )
+
+    def test_canonical_gha_requires_compute_visible_staging(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve()
+            repo = root / "repo"
+            squash = root / "squash"
+            source = repo / "experimental" / "CollectiveX"
+            source.mkdir(parents=True)
+            squash.mkdir()
+            (source / "public.py").write_text("public\n")
+            (source / "private-infra.md").write_text("private\n")
+            command = r'''
+              set -euo pipefail
+              source "$1"
+              unset CX_SHARD_FILE CX_STAGE_DIR
+              ! (COLLECTIVEX_CANONICAL_GHA=1; cx_stage_path "$2" "")
+              staged="$(COLLECTIVEX_CANONICAL_GHA=0; cx_stage_path "$2" "")"
+              cx_stage_repo "$2" "$staged"
+              test "$staged" != "$2"
+              test -f "$staged/experimental/CollectiveX/public.py"
+              test ! -e "$staged/experimental/CollectiveX/private-infra.md"
+              cx_cleanup_stage "$staged" "$2"
+              test ! -e "$staged"
+            '''
+            subprocess.run(
+                ["bash", "-c", command, "_", str(ROOT / "runtime" / "common.sh"),
+                 str(repo)],
+                check=True,
+                env={
+                    **os.environ,
+                    "COLLECTIVEX_OPERATOR_CONFIG": "/dev/null",
+                    "CX_SQUASH_DIR": str(squash),
+                },
+            )
+            self.assertEqual(list(squash.iterdir()), [])
+
+    def test_manual_stage_does_not_write_to_checkout_parent(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            parent = Path(temporary).resolve() / "readonly-parent"
+            repo = parent / "repo"
+            squash = parent / "squash"
+            source = repo / "experimental" / "CollectiveX"
+            source.mkdir(parents=True)
+            squash.mkdir(mode=0o700)
+            (source / "public.py").write_text("public\n")
+            original_mode = parent.stat().st_mode & 0o777
+            parent.chmod(0o555)
+            try:
+                command = r'''
+                  set -euo pipefail
+                  source "$1"
+                  unset CX_STAGE_DIR
+                  staged="$(cx_stage_path "$2" "")"
+                  cx_stage_repo "$2" "$staged"
+                  case "$staged" in "$3"/.collectivex-stage-*) ;; *) exit 1 ;; esac
+                  test -f "$staged/experimental/CollectiveX/public.py"
+                  test ! -e "$4/.collectivex-stage"
+                  cx_cleanup_stage "$staged" "$2"
+                  test ! -e "$staged"
+                '''
+                subprocess.run(
+                    [
+                        "bash", "-c", command, "_",
+                        str(ROOT / "runtime" / "common.sh"), str(repo),
+                        str(squash), str(parent),
+                    ],
+                    check=True,
+                    env={
+                        **os.environ,
+                        "COLLECTIVEX_OPERATOR_CONFIG": "/dev/null",
+                        "CX_SQUASH_DIR": str(squash),
+                    },
+                )
+            finally:
+                parent.chmod(original_mode)
+            self.assertEqual(
+                sorted(path.name for path in parent.iterdir()),
+                ["repo", "squash"],
+            )
+            self.assertEqual(list(squash.iterdir()), [])
+
+    def test_stage_refuses_to_reuse_an_execution_child(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve()
+            repo = root / "repo"
+            source = repo / "experimental" / "CollectiveX"
+            source.mkdir(parents=True)
+            (source / "public.py").write_text("public\n")
+            base = root / "stage"
+            child = base / "job_collision"
+            child.mkdir(parents=True, mode=0o700)
+            sentinel = child / "keep"
+            sentinel.write_text("keep")
+            command = r'''
+              source "$1"
+              ! (cx_stage_repo "$2" "$3/job_collision")
+            '''
+            subprocess.run(
+                [
+                    "bash", "-c", command, "_",
+                    str(ROOT / "runtime" / "common.sh"), str(repo), str(base),
+                ],
+                check=True,
+                env={
+                    **os.environ,
+                    "COLLECTIVEX_OPERATOR_CONFIG": "/dev/null",
+                    "COLLECTIVEX_CANONICAL_GHA": "1",
+                    "COLLECTIVEX_EXECUTION_ID": "collision",
+                    "CX_STAGE_DIR": str(base),
+                },
+            )
+            self.assertEqual(sentinel.read_text(), "keep")
+
+    def test_stage_removes_its_execution_child_when_rsync_fails(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve()
+            repo = root / "repo"
+            source = repo / "experimental" / "CollectiveX"
+            source.mkdir(parents=True)
+            (source / "public.py").write_text("public\n")
+            base = root / "stage"
+            base.mkdir(mode=0o700)
+            sentinel = root / "rsync-called"
+            command = r'''
+              source "$1"
+              rsync() { : > "$RSYNC_CALLED"; return 1; }
+              staged="$(cx_stage_path "$2" "$3")"
+              ! cx_stage_repo "$2" "$staged"
+            '''
+            subprocess.run(
+                [
+                    "bash", "-c", command, "_",
+                    str(ROOT / "runtime" / "common.sh"), str(repo), str(base),
+                ],
+                check=True,
+                env={
+                    **os.environ,
+                    "COLLECTIVEX_OPERATOR_CONFIG": "/dev/null",
+                    "COLLECTIVEX_CANONICAL_GHA": "1",
+                    "CX_STAGE_DIR": str(base),
+                    "RSYNC_CALLED": str(sentinel),
+                },
+            )
+            self.assertTrue(sentinel.is_file())
+            self.assertEqual(list(base.iterdir()), [])
+
+    def test_interrupted_stage_is_cleanup_capable_before_copy(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve()
+            repo = root / "repo"
+            source = repo / "experimental" / "CollectiveX"
+            source.mkdir(parents=True)
+            (source / "public.py").write_text("public\n")
+            base = root / "stage"
+            base.mkdir(mode=0o700)
+            sibling = base / "keep"
+            sibling.write_text("keep\n")
+            command = r'''
+              set -euo pipefail
+              source "$1"
+              export REPO_ROOT="$2" CX_BENCH=nccl-ep
+              MOUNT_SRC="$(cx_stage_path "$REPO_ROOT" "$3")"
+              cx_install_launcher_fail_safe
+              rsync() { kill -TERM $$; return 143; }
+              cx_stage_repo "$REPO_ROOT" "$MOUNT_SRC"
+            '''
+            result = subprocess.run(
+                [
+                    "bash", "-c", command, "_",
+                    str(ROOT / "runtime" / "common.sh"), str(repo), str(base),
+                ],
+                text=True,
+                capture_output=True,
+                env={
+                    **os.environ,
+                    "COLLECTIVEX_OPERATOR_CONFIG": "/dev/null",
+                    "COLLECTIVEX_CANONICAL_GHA": "1",
+                    "COLLECTIVEX_EXECUTION_ID": "interrupted",
+                    "CX_STAGE_DIR": str(base),
+                },
+            )
+            self.assertNotEqual(result.returncode, 0)
+            self.assertFalse((base / "job_interrupted").exists())
+            self.assertEqual(sibling.read_text(), "keep\n")
+
+    def test_stage_base_and_early_cleanup_are_isolated(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary).resolve()
+            repo = root / "repo"
+            source = repo / "experimental" / "CollectiveX"
+            source.mkdir(parents=True)
+            (source / "public.py").write_text("public\n")
+            nested = repo / "stage"
+            nested.mkdir(mode=0o700)
+            group_writable = root / "group-stage"
+            group_writable.mkdir(mode=0o770)
+            group_writable.chmod(0o770)
+            setgid = root / "setgid-stage"
+            setgid.mkdir(mode=0o750)
+            setgid.chmod(0o2750)
+            command = r'''
+              set -euo pipefail
+              source "$1"
+              ! (CX_STAGE_DIR="$3"; cx_stage_path "$2" "$3")
+              ! (CX_STAGE_DIR="$4"; cx_stage_path "$2" "$4")
+              export CX_STAGE_DIR="$5" COLLECTIVEX_EXECUTION_ID="setgid-$$"
+              trap 'cx_cleanup_private_logs 0' EXIT
+              staged="$(cx_stage_path "$2" "$CX_STAGE_DIR")"
+              cx_stage_repo "$2" "$staged"
+              chmod 2700 "$staged"
+              cx_cleanup_stage "$staged" "$2"
+              test ! -e "$staged"
+            '''
+            subprocess.run(
+                [
+                    "bash", "-c", command, "_",
+                    str(ROOT / "runtime" / "common.sh"), str(repo), str(nested),
+                    str(group_writable), str(setgid),
+                ],
+                check=True,
+                env={**os.environ, "COLLECTIVEX_OPERATOR_CONFIG": "/dev/null"},
+            )
+
+            early = r'''
+              set -euo pipefail
+              source "$1"
+              export REPO_ROOT="$2" CX_STAGE_DIR="$3" CX_BENCH=nccl-ep
+              export COLLECTIVEX_EXECUTION_ID="pre-marker-$$"
+              MOUNT_SRC="$(cx_stage_path "$REPO_ROOT" "$CX_STAGE_DIR")"
+              cx_install_launcher_fail_safe
+              mkdir -m 700 "$MOUNT_SRC"
+              exit 17
+            '''
+            result = subprocess.run(
+                [
+                    "bash", "-c", early, "_",
+                    str(ROOT / "runtime" / "common.sh"), str(repo), str(setgid),
+                ],
+                text=True,
+                capture_output=True,
+                env={**os.environ, "COLLECTIVEX_OPERATOR_CONFIG": "/dev/null"},
+            )
+            self.assertEqual(result.returncode, 17, result.stderr)
+            self.assertEqual(list(setgid.iterdir()), [])
+
+    def test_backend_cache_reuses_v3_and_falls_back_once_without_repair(self) -> None:
+        common = ROOT / "runtime" / "common.sh"
+        with tempfile.TemporaryDirectory() as temporary:
+            parent = Path(temporary) / "stage"
+            parent.mkdir(mode=0o700)
+            concurrent = Path(temporary) / "concurrent"
+            concurrent.mkdir(mode=0o700)
+            command = r'''
+              set -euo pipefail
+              source "$1"
+              for worker in 1 2 3; do
+                (
+                  cx_prepare_backend_cache "$2"
+                  printf '%s %s\n' "$CX_BACKEND_CACHE_SENTINEL_SHA256" \
+                    "$CX_PREPARED_BACKEND_CACHE" > "$3/$worker"
+                ) &
+              done
+              wait
+              cmp "$3/1" "$3/2"
+              cmp "$3/1" "$3/3"
+              cx_prepare_backend_cache "$2"
+              first="$CX_PREPARED_BACKEND_CACHE"
+              first_digest="$CX_BACKEND_CACHE_SENTINEL_SHA256"
+              chmod 2700 "$first"
+              cx_prepare_backend_cache "$2"
+              second="$CX_PREPARED_BACKEND_CACHE"
+              test "$first" = "$second"
+              test "$first_digest" = "$CX_BACKEND_CACHE_SENTINEL_SHA256"
+              test "$first" = "$(cd "$2" && pwd -P)/.collectivex-backend-cache-v3-$(id -u)"
+              export CX_BACKEND_CACHE_ROOT="$first"
+              cx_verify_backend_cache_mount
+              export CX_BACKEND_CACHE_SENTINEL_SHA256="$(printf '0%.0s' {1..64})"
+              ! cx_verify_backend_cache_mount
+            '''
+            subprocess.run(
+                [
+                    "bash", "-c", command, "_", str(common), str(parent),
+                    str(concurrent),
+                ],
+                check=True,
+            )
+            cache = parent / f".collectivex-backend-cache-v3-{os.getuid()}"
+            self.assertTrue(cache.is_dir())
+            self.assertEqual(cache.stat().st_mode & 0o777, 0o700)
+            self.assertEqual(
+                list(cache.glob(".collectivex-mount-sentinel-v1.tmp.*")), []
+            )
+            alias = Path(temporary) / "stage-alias"
+            alias.symlink_to(parent, target_is_directory=True)
+            canonical = subprocess.run(
+                [
+                    "bash", "-c",
+                    'source "$1"; cx_prepare_backend_cache "$2"; '
+                    'printf "%s\\n%s\\n" "$CX_PREPARED_BACKEND_CACHE" '
+                    '"$CX_BACKEND_CACHE_SENTINEL_SHA256"',
+                    "_", str(common), str(alias),
+                ],
+                text=True,
+                capture_output=True,
+                check=True,
+            )
+            cache_path, digest = canonical.stdout.splitlines()
+            self.assertEqual(cache_path, str(cache.resolve()))
+            self.assertRegex(digest, r"^[0-9a-f]{64}$")
+            saved = parent / "saved-cache"
+            cache.rename(saved)
+            cache.mkdir(mode=0o700)
+            replacement = cache / ".collectivex-mount-sentinel-v1"
+            replacement.write_bytes(b"replacement".ljust(32, b"!"))
+            replacement.chmod(0o600)
+            replaced = subprocess.run(
+                [
+                    "bash", "-c",
+                    'source "$1"; export CX_BACKEND_CACHE_ROOT="$2" '
+                    'CX_BACKEND_CACHE_SENTINEL_SHA256="$3"; '
+                    'cx_verify_backend_cache_mount',
+                    "_", str(common), str(cache), digest,
+                ]
+            )
+            self.assertNotEqual(replaced.returncode, 0)
+            replacement.unlink()
+            cache.rmdir()
+            saved.rename(cache)
+            (cache / ".collectivex-mount-sentinel-v1").unlink()
+            cache.rmdir()
+            target = Path(temporary) / "target"
+            target.mkdir(mode=0o700)
+            cache.symlink_to(target, target_is_directory=True)
+            fallback = subprocess.run(
+                [
+                    "bash", "-c",
+                    'source "$1"; cx_prepare_backend_cache "$2"; '
+                    'printf "%s\\n" "$CX_PREPARED_BACKEND_CACHE"',
+                    "_", str(common), str(parent),
+                ],
+                text=True,
+                capture_output=True,
+                check=True,
+            )
+            v4 = parent / f".collectivex-backend-cache-v4-{os.getuid()}"
+            self.assertEqual(fallback.stdout.strip(), str(v4.resolve()))
+            self.assertTrue(cache.is_symlink())
+            self.assertTrue(v4.is_dir())
+            (v4 / ".collectivex-mount-sentinel-v1").unlink()
+            v4.rmdir()
+            v4.symlink_to(target, target_is_directory=True)
+            result = subprocess.run(
+                [
+                    "bash", "-c", 'source "$1"; cx_prepare_backend_cache "$2"',
+                    "_", str(common), str(parent),
+                ],
+                text=True,
+                capture_output=True,
+            )
+            self.assertNotEqual(result.returncode, 0)
+            self.assertNotIn(str(parent), result.stderr)
+            self.assertTrue(cache.is_symlink())
+            self.assertTrue(v4.is_symlink())
+
+        source = common.read_text().split("cx_prepare_backend_cache() {", 1)[1]
+        program = source.split("<<'PY'\n", 1)[1].split("\nPY\n", 1)[0]
+        with tempfile.TemporaryDirectory() as temporary:
+            parent = Path(temporary) / "stage"
+            parent.mkdir(mode=0o700)
+            fake_os = types.ModuleType("os")
+            fake_os.__dict__.update(os.__dict__)
+            fake_os.fsync = mock.Mock(side_effect=OSError("forced fsync failure"))
+            with (
+                mock.patch.dict(sys.modules, {"os": fake_os}),
+                mock.patch.object(sys, "argv", ["-", str(parent)]),
+                mock.patch.object(sys, "stdout", io.StringIO()),
+                self.assertRaises(SystemExit) as failure,
+            ):
+                exec(compile(program, "<cache-preparation>", "exec"), {})
+            self.assertEqual(failure.exception.code, 1)
+            self.assertEqual(
+                list(parent.rglob(".collectivex-mount-sentinel-v1.tmp.*")), []
+            )
+
+    def test_nvidia_namespace_package_roots_come_from_distribution_files(self) -> None:
+        runtime = ROOT / "runtime" / "run_in_container.sh"
+        with tempfile.TemporaryDirectory() as temporary:
+            site = Path(temporary) / "site"
+            package = site / "nvidia" / "nccl"
+            (package / "include").mkdir(parents=True)
+            (package / "lib").mkdir()
+            (package / "include" / "nccl.h").write_text("header\n")
+            (package / "lib" / "libnccl.so.2").write_text("library\n")
+            info = site / "nvidia_nccl_cu13-2.30.4.dist-info"
+            info.mkdir()
+            (info / "METADATA").write_text(
+                "Metadata-Version: 2.1\nName: nvidia-nccl-cu13\nVersion: 2.30.4\n"
+            )
+            (info / "RECORD").write_text(
+                "nvidia/nccl/include/nccl.h,,\n"
+                "nvidia/nccl/lib/libnccl.so.2,,\n"
+                "nvidia_nccl_cu13-2.30.4.dist-info/METADATA,,\n"
+                "nvidia_nccl_cu13-2.30.4.dist-info/RECORD,,\n"
+            )
+            command = r'''
+              set -euo pipefail
+              eval "$(sed -n '/^cx_nvidia_package_root()/,/^}/p' "$1")"
+              root="$(cx_nvidia_package_root nvidia-nccl-cu13 nccl)"
+              test "$root" = "$2/nvidia/nccl"
+              ! cx_nvidia_package_root nvidia-nccl-cu13 nvshmem
+            '''
+            subprocess.run(
+                ["bash", "-c", command, "_", str(runtime), str(site.resolve())],
+                check=True,
+                env={**os.environ, "PYTHONPATH": str(site)},
+            )
+
+    def test_cuda_cccl_exports_the_resolved_jit_toolchain_root(self) -> None:
+        runtime = ROOT / "runtime" / "run_in_container.sh"
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary)
+            toolkit = root / "cuda-13.0"
+            (toolkit / "bin").mkdir(parents=True)
+            (toolkit / "include").mkdir()
+            (toolkit / "lib64").mkdir()
+            cccl = toolkit / "targets" / "x86_64-linux" / "include" / "cccl"
+            cccl.mkdir(parents=True)
+            nvcc = toolkit / "bin" / "nvcc"
+            nvcc.write_text("#!/bin/sh\nexit 0\n")
+            nvcc.chmod(0o755)
+            alias = root / "cuda"
+            alias.symlink_to(toolkit, target_is_directory=True)
+            command = r'''
+              set -euo pipefail
+              eval "$(sed -n '/^cx_prepare_cuda_cccl()/,/^}/p' "$1")"
+              cx_prepare_cuda_cccl
+              test "$CUDA_HOME" = "$2"
+              test "$CX_CUDA_CCCL" = "$2/targets/x86_64-linux/include/cccl"
+              test "$CPATH" = "$2/targets/x86_64-linux/include/cccl:"
+              test "$NVCC_PREPEND_FLAGS" = "-I$2/targets/x86_64-linux/include/cccl "
+            '''
+            subprocess.run(
+                ["bash", "-c", command, "_", str(runtime), str(toolkit.resolve())],
+                check=True,
+                env={
+                    **os.environ,
+                    "PATH": f"{alias / 'bin'}:{os.environ['PATH']}",
+                    "CPATH": "",
+                    "NVCC_PREPEND_FLAGS": "",
+                },
+            )
+
+    def test_deepep_v2_toolchain_rejects_overlay_lock_failure(self) -> None:
+        runtime = ROOT / "runtime" / "run_in_container.sh"
+        with tempfile.TemporaryDirectory() as temporary:
+            command = r'''
+              set -euo pipefail
+              eval "$(sed -n '/^cx_prepare_deepep_toolchain()/,/^}/p' "$1")"
+              cache_root="$2"
+              cx_nvidia_package_root() { printf '%s' /unused; }
+              cx_deepep_v2_root() { printf '%s' "$cache_root"; }
+              cx_log() { :; }
+              flock() { return 1; }
+              ! cx_prepare_deepep_toolchain
+            '''
+            subprocess.run(
+                ["bash", "-c", command, "_", str(runtime), temporary],
+                check=True,
+            )
+
+    def test_pinned_source_fetch_retries_transient_failures(self) -> None:
+        common = ROOT / "runtime" / "common.sh"
+        with tempfile.TemporaryDirectory() as temporary:
+            command = r'''
+              set -euo pipefail
+              eval "$(sed -n '/^cx_git()/,/^}/p' "$1")"
+              eval "$(sed -n '/^cx_git_in_tree()/,/^}/p' "$1")"
+              eval "$(sed -n '/^cx_fetch_revision()/,/^}/p' "$1")"
+              attempts=0
+              expected_directory="$(cd -P -- "$3" && pwd -P)"
+              sleep() { :; }
+              git() {
+                local argument has_directory=0 has_trust=0
+                if [ "$1" = '-c' ] && [ "$3" = init ]; then
+                  mkdir -p "${@: -1}"
+                  return 0
+                fi
+                for argument in "$@"; do
+                  [ "$argument" != '-C' ] || has_directory=1
+                  [ "$argument" != "safe.directory=$expected_directory" ] || has_trust=1
+                  [ "$argument" != 'safe.directory=*' ] || return 1
+                done
+                [ "$has_directory" = 0 ] || [ "$has_trust" = 1 ] || return 1
+                case " $* " in
+                  *' fetch '*)
+                    attempts=$((attempts + 1))
+                    [ "$attempts" = 3 ]
+                    ;;
+                  *' rev-parse HEAD '*) printf '%s\n' "$revision" ;;
+                  *) return 0 ;;
+                esac
+              }
+              cx_fetch_revision https://example.invalid/repo "$2" "$3"
+              test "$attempts" = 3
+            '''
+            revision = "a" * 40
+            subprocess.run(
+                ["bash", "-c", command, "_", str(common), revision, temporary],
+                check=True,
+            )
+
+    def test_git_tree_trust_is_exact_and_command_scoped(self) -> None:
+        common = ROOT / "runtime" / "common.sh"
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary)
+            repository = root / "repo"
+            repository.mkdir()
+            alias = root / "alias"
+            alias.symlink_to(repository, target_is_directory=True)
+            wildcard = root / "*"
+            wildcard.mkdir()
+            arguments = root / "arguments"
+            command = r'''
+              set -euo pipefail
+              eval "$(sed -n '/^cx_git()/,/^}/p' "$1")"
+              eval "$(sed -n '/^cx_git_in_tree()/,/^}/p' "$1")"
+              arguments="$4"
+              git() { printf '%s\n' "$@" > "$arguments"; }
+              cx_git_in_tree "$2" status --porcelain
+              ! cx_git_in_tree relative status
+              ! cx_git_in_tree "$3" status
+              ! cx_git_in_tree "$5" status
+            '''
+            subprocess.run(
+                [
+                    "bash",
+                    "-c",
+                    command,
+                    "_",
+                    str(common),
+                    str(repository),
+                    str(alias),
+                    str(arguments),
+                    str(wildcard),
+                ],
+                check=True,
+            )
+            self.assertEqual(
+                arguments.read_text().splitlines(),
+                [
+                    "-c",
+                    "credential.helper=",
+                    "-c",
+                    f"safe.directory={repository.resolve()}",
+                    "-C",
+                    str(repository.resolve()),
+                    "status",
+                    "--porcelain",
+                ],
+            )
+            self.assertNotIn("safe.directory=*", arguments.read_text())
+
+    def test_runtime_materializes_the_verified_host_source_without_network(self) -> None:
+        common = ROOT / "runtime" / "common.sh"
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary)
+            seed = root / "seed"
+            seed.mkdir()
+            (seed / "pinned").write_text("source\n")
+            destination = root / "build"
+            fetched = root / "network-fetch"
+            command = r'''
+              set -euo pipefail
+              source "$1"
+              export CX_BACKEND_SOURCE_ROOT="$2/source"
+              SEED="$3" FETCHED="$5"
+              copy_mode=
+              cx_backend_source_path() { printf '%s' "$SEED"; }
+              cx_backend_source_is_valid() { test -f "$2/pinned"; }
+              cx_fetch_revision() { : > "$FETCHED"; return 1; }
+              cp() {
+                test "$1" = -R
+                copy_mode=recursive
+                command cp "$@"
+              }
+              cx_materialize_backend_source deepep-hybrid "$4"
+              test -f "$4/pinned"
+              test "$copy_mode" = recursive
+              python3 - "$4" <<'PY'
+import os
+import stat
+import sys
+assert stat.S_IMODE(os.stat(sys.argv[1]).st_mode) == 0o700
+PY
+              test ! -e "$FETCHED"
+            '''
+            subprocess.run(
+                [
+                    "bash", "-c", command, "_", str(common), str(root),
+                    str(seed), str(destination), str(fetched),
+                ],
+                check=True,
+            )
+
+    def test_backend_source_validation_rejects_status_errors_and_ignored_files(self) -> None:
+        common = ROOT / "runtime" / "common.sh"
+        with tempfile.TemporaryDirectory() as temporary:
+            command = r'''
+              set -euo pipefail
+              source "$1"
+              cx_backend_source_pin() { printf '%s|%s|' revision tree; }
+              git() {
+                case " $* " in
+                  *' rev-parse HEAD '*) printf '%s\n' revision ;;
+                  *' rev-parse HEAD^{tree} '*) printf '%s\n' tree ;;
+                  *' status --porcelain '*) [ "$mode" != status-error ] ;;
+                  *' ls-files --others --ignored '*)
+                    [ "$mode" != ignored ] || printf '%s\n' ignored.bin
+                    ;;
+                  *) return 1 ;;
+                esac
+              }
+              mode=status-error
+              ! cx_backend_source_is_valid backend "$2"
+              mode=ignored
+              ! cx_backend_source_is_valid backend "$2"
+              mode=clean
+              cx_backend_source_is_valid backend "$2"
+            '''
+            subprocess.run(
+                ["bash", "-c", command, "_", str(common), temporary],
+                check=True,
+            )
+
+    def test_backend_source_root_normalizes_inherited_special_mode(self) -> None:
+        common = ROOT / "runtime" / "common.sh"
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary)
+            source_root = root / "experimental" / "CollectiveX" / ".cx_sources"
+            source = source_root / "backend-revision"
+            source.mkdir(parents=True)
+            command = r'''
+              set -euo pipefail
+              source "$1"
+              export COLLECTIVEX_EXECUTION_ID="source-mode-$$"
+              trap 'cx_cleanup_private_logs 0' EXIT
+              expected_mount="$2"
+              expected_source="$3"
+              expected_root="${expected_source%/*}"
+              observed_mode=2700
+              mock_stage_owner=4200
+              mock_root_owner=4200
+              chmod_calls=0
+              chmod() {
+                test "$1" = 700 && test "$2" = "$expected_root"
+                chmod_calls=$((chmod_calls + 1))
+                [ "$chmod_calls" = 2 ] || return 1
+                observed_mode=700
+              }
+              stat() {
+                case "$2" in
+                  %u)
+                    case "$3" in
+                      "$expected_mount") printf '%s\n' "$mock_stage_owner" ;;
+                      "$expected_root") printf '%s\n' "$mock_root_owner" ;;
+                      *) return 1 ;;
+                    esac
+                    ;;
+                  %a)
+                    case "$3" in
+                      "$expected_mount") printf '2700\n' ;;
+                      "$expected_root") printf '%s\n' "$observed_mode" ;;
+                      *) return 1 ;;
+                    esac
+                    ;;
+                  *) return 1 ;;
+                esac
+              }
+              cx_backend_source_path() { printf '%s' "$expected_source"; }
+              cx_backend_source_is_valid() {
+                test "$1" = backend && test "$2" = "$expected_source"
+              }
+              cx_prepare_backend_source "$2" backend
+              test "$observed_mode" = 2700
+              test "$chmod_calls" = 0
+              observed_mode=2750
+              ! _cx_prepare_backend_source "$2" backend
+              test "$chmod_calls" = 1
+              _cx_prepare_backend_source "$2" backend
+              test "$observed_mode" = 700
+              mock_root_owner=4300
+              ! _cx_prepare_backend_source "$2" backend
+            '''
+            subprocess.run(
+                ["bash", "-c", command, "_", str(common), str(root), str(source)],
+                check=True,
+            )
+
+    def test_canonical_backend_sources_use_verified_seed_without_network(self) -> None:
+        common = ROOT / "runtime" / "common.sh"
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary)
+            mount = root / "mount"
+            source_root = mount / "experimental" / "CollectiveX" / ".cx_sources"
+            seed_root = root / "seed"
+            seeds = [
+                seed_root / f"{backend}-revision"
+                for backend in ("backend-one", "backend-two")
+            ]
+            mount.mkdir(mode=0o700)
+            source_root.parent.mkdir(parents=True, mode=0o700)
+            for seed in seeds:
+                seed.mkdir(parents=True, mode=0o700)
+                (seed / "pinned").write_text("source\n")
+            network = root / "network"
+            command = r'''
+              set -euo pipefail
+              source "$1"
+              export COLLECTIVEX_CANONICAL_GHA=1
+              export CX_BACKEND_SOURCE_SEED_ROOT="$4"
+              export COLLECTIVEX_EXECUTION_ID="source-seed-$$"
+              trap 'cx_cleanup_private_logs 0' EXIT
+              NETWORK="$5"
+              stat() {
+                case "$2" in
+                  %u) printf '4200\n' ;;
+                  %a) printf '700\n' ;;
+                  *) return 1 ;;
+                esac
+              }
+              cx_backend_source_path() { printf '%s/%s-revision' "$1" "$2"; }
+              cx_backend_source_is_valid() { test -f "$2/pinned"; }
+              cx_fetch_revision() { : > "$NETWORK"; return 1; }
+              cx_prepare_backend_source "$2" backend-one
+              cx_prepare_backend_source "$2" backend-two
+              test -f "$3/backend-one-revision/pinned"
+              test -f "$3/backend-two-revision/pinned"
+              test ! -e "$NETWORK"
+              rm -rf -- "$3/backend-one-revision" "$3/backend-two-revision"
+              unset CX_BACKEND_SOURCE_SEED_ROOT
+              ! _cx_prepare_backend_source "$2" backend-one
+              test ! -e "$NETWORK"
+            '''
+            subprocess.run(
+                [
+                    "bash", "-c", command, "_", str(common), str(mount),
+                    str(source_root), str(seed_root), str(network),
+                ],
+                check=True,
+            )
+
+    def test_deepep_hybrid_cache_reuse_revalidates_extensions(self) -> None:
+        common = ROOT / "runtime" / "common.sh"
+        runtime = ROOT / "runtime" / "run_in_container.sh"
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary)
+            (root / "deep_ep_cpp.so").write_bytes(b"deep")
+            (root / "hybrid_ep_cpp.so").write_bytes(b"hybrid")
+            command = r'''
+              set -euo pipefail
+              chmod 700 "$3"
+              source "$1"
+              eval "$(sed -n '/^cx_deepep_hybrid_marker_content_sha256()/,/^}/p' "$2")"
+              eval "$(sed -n '/^cx_deepep_hybrid_cache_is_valid()/,/^}/p' "$2")"
+              revision=revision tree=tree
+              cx_git() {
+                case " $* " in
+                  *' rev-parse HEAD '*) printf '%s\n' "$revision" ;;
+                  *' rev-parse HEAD^{tree} '*) printf '%s\n' "$tree" ;;
+                  *' status --porcelain '*|*' ls-files --others '*) return 0 ;;
+                  *) return 1 ;;
+                esac
+              }
+              cx_git_in_tree() { shift; cx_git "$@"; }
+              marker="$3/.collectivex-complete"
+              digest="$(cx_extension_pair_sha256 "$3" 'deep_ep_cpp*.so' 'hybrid_ep_cpp*.so')"
+              (umask 077; printf '%s\n%s\n%s\n' "$revision" "$tree" "$digest" > "$marker")
+              cx_deepep_hybrid_cache_is_valid "$3" "$marker" "$revision" "$tree"
+              printf changed > "$3/hybrid_ep_cpp.so"
+              ! cx_deepep_hybrid_cache_is_valid "$3" "$marker" "$revision" "$tree"
+              printf hybrid > "$3/hybrid_ep_cpp.so"
+              cp "$3/deep_ep_cpp.so" "$3/deep_ep_cpp-extra.so"
+              ! cx_deepep_hybrid_cache_is_valid "$3" "$marker" "$revision" "$tree"
+            '''
+            subprocess.run(
+                ["bash", "-c", command, "_", str(common), str(runtime), temporary],
+                check=True,
+            )
+
+    def test_rack_backend_environment_is_shared_per_node_and_required(self) -> None:
+        runtime = ROOT / "runtime" / "run_in_container.sh"
+        launcher = (ROOT / "launchers" / "launch_gb-nv.sh").read_text()
+        assignment = next(
+            line for line in launcher.splitlines()
+            if line.startswith("SOURCE_BACKEND_ENV=")
+        )
+        self.assertNotIn("/tmp/.cx_backend_env", launcher)
+        self.assertIn('[ -f "$env_file" ] && [ -r "$env_file" ]', launcher)
+        self.assertIn('[ ! -L "$env_file" ]', launcher)
+        self.assertIn('$(stat -c "%u" "$env_root"):600', launcher)
+        self.assertIn('case "$(stat -c "%a" "$env_root")" in 700|[1-7]700)', launcher)
+        self.assertIn("node-${SLURM_NODEID}.sh", launcher)
+        self.assertIn("HybridEPBuffer", launcher)
+        self.assertIn('. "$env_file" || exit 66', launcher)
+        with tempfile.TemporaryDirectory() as temporary:
+            consumer = r'''
+              eval "$1"
+              env_root="$2/env"
+              SOURCE_BACKEND_ENV="${SOURCE_BACKEND_ENV//\/ix\/experimental\/CollectiveX\/.cx_backend\/env/$env_root}"
+              mkdir -p "$env_root"
+              env_file="$env_root/node-1.sh"
+              printf 'printf sourced > "$CX_SENTINEL"\n' > "$env_file"
+              chmod 600 "$env_file"
+              export CX_SENTINEL="$2/sentinel"
+              stat() {
+                [ "${STAT_FAIL:-0}" = 0 ] || return 1
+                case "$2" in
+                  %a) printf '%s\n' "$ROOT_MODE" ;;
+                  %u) printf '1000\n' ;;
+                  %u:%a) printf '%s\n' "$FILE_OWNER_MODE" ;;
+                  *) return 2 ;;
+                esac
+              }
+              run_case() {
+                rm -f "$CX_SENTINEL"
+                ROOT_MODE="$1" FILE_OWNER_MODE="$2" STAT_FAIL="$3" SLURM_NODEID="$4"
+                ( eval "$SOURCE_BACKEND_ENV" )
+                rc=$?
+                [ "$rc" = "$5" ] || return 1
+                if [ "$5" = 0 ]; then
+                  [ -f "$CX_SENTINEL" ]
+                else
+                  [ ! -e "$CX_SENTINEL" ]
+                fi
+              }
+              run_case 700 1000:600 0 1 0
+              run_case 2700 1000:600 0 1 0
+              run_case 755 1000:600 0 1 66
+              run_case 700 1000:600 1 1 66
+              run_case 700 2000:600 0 1 66
+              mv "$env_file" "$env_file.real"
+              ln -s "$env_file.real" "$env_file"
+              run_case 700 1000:600 0 1 66
+              rm "$env_file"
+              mv "$env_file.real" "$env_file"
+              run_case 700 1000:600 0 invalid 66
+            '''
+            subprocess.run(
+                ["bash", "-c", consumer, "_", assignment, temporary],
+                check=True,
+            )
+            command = r'''
+              set -euo pipefail
+              cd "$2"
+              eval "$(sed -n '/^cx_persist_backend_env()/,/^}/p' "$1")"
+              export SLURM_NODEID=1 PYTHONPATH=/ix/pinned DEEPEP_COMMIT=abc
+              cx_persist_backend_env
+              env_file="$PWD/.cx_backend/env/node-1.sh"
+              test -f "$env_file"
+              test "$(stat -f %Lp "$env_file" 2>/dev/null || stat -c %a "$env_file")" = 600
+              unset PYTHONPATH DEEPEP_COMMIT
+              . "$env_file"
+              test "$PYTHONPATH" = /ix/pinned
+              test "$DEEPEP_COMMIT" = abc
+              SLURM_NODEID=invalid && ! cx_persist_backend_env
+            '''
+            subprocess.run(
+                ["bash", "-c", command, "_", str(runtime), temporary],
+                check=True,
+            )
+
+    def test_stage_cleanup_failure_fails_job_but_marks_allocation_safe(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary)
+            (root / "repo").mkdir()
+            (root / "stage").mkdir()
+            command = r'''
+              source "$1"
+              cx_write_cleanup_guard() {
+                rm -f -- "$CX_JOB_ROOT/cleanup-safe" "$CX_JOB_ROOT/cleanup-unsafe"
+                : > "$CX_JOB_ROOT/cleanup-$1"
+              }
+              cx_cleanup_stage() { return 1; }
+              cx_cleanup_private_logs() { : > "$CX_JOB_ROOT/logs-deleted"; }
+              export CX_JOB_ROOT="$2" REPO_ROOT="$2/repo" MOUNT_SRC="$2/stage"
+              export COLLECTIVEX_CANONICAL_GHA=1 CX_ALLOCATION_UNCERTAIN=0
+              unset CX_BENCH JOB_ID
+              cx_launcher_cleanup 0
+            '''
+            result = subprocess.run(
+                ["bash", "-c", command, "_", str(ROOT / "runtime" / "common.sh"),
+                 str(root)],
+                text=True,
+                capture_output=True,
+                env={**os.environ, "COLLECTIVEX_OPERATOR_CONFIG": "/dev/null"},
+            )
+            self.assertEqual(result.returncode, 1, result.stderr)
+            self.assertTrue((root / "cleanup-safe").is_file())
+            self.assertFalse((root / "cleanup-unsafe").exists())
+            self.assertFalse((root / "logs-deleted").exists())
+
+    def test_generated_stage_cleanup_never_removes_configured_base(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary)
+            base = root / "stage"
+            repo = root / "repo"
+            generated = base / "job_execution"
+            generated.mkdir(parents=True, mode=0o700)
+            repo.mkdir()
+            marker = generated / ".collectivex-stage-v1"
+            marker.write_text("collectivex-stage-v1\nexecution\n")
+            marker.chmod(0o600)
+            (generated / "payload").write_text("temporary")
+            subprocess.run(
+                [
+                    "bash", "-c",
+                    'source "$1"; cx_cleanup_stage "$2" "$3"; '
+                    '! cx_cleanup_stage "$4" "$3"',
+                    "_", str(ROOT / "runtime" / "common.sh"), str(generated),
+                    str(repo), str(base),
+                ],
+                check=True,
+                env={
+                    **os.environ,
+                    "COLLECTIVEX_OPERATOR_CONFIG": "/dev/null",
+                    "COLLECTIVEX_EXECUTION_ID": "execution",
+                    "CX_STAGE_DIR": str(base),
+                },
+            )
+            self.assertFalse(generated.exists())
+            self.assertTrue(base.is_dir())
+            self.assertTrue(repo.is_dir())
+
+    def test_adapters_do_not_retain_dead_expected_methods(self) -> None:
+        for path in HERE.glob("ep_*.py"):
+            tree = ast.parse(path.read_text(), str(path))
+            methods = {
+                node.name for node in ast.walk(tree)
+                if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef))
+            }
+            self.assertNotIn("expected", methods, path.name)
+
+    def test_artifact_safety_rejects_sensitive_material(self) -> None:
+        private_address = ".".join(str(octet) for octet in (10, 0, 0, 1))
+        secret = "github_pat_" + "A" * 24
+        sensitive = {
+            "ipv4": ({"note": private_address}, private_address),
+            "ipv6": ({"note": "[2001:db8::1]:29500"}, "2001:db8::1"),
+            "user-at-host": ({"note": "ssh admin@private-host"}, "admin@private-host"),
+            "hostname": ({"note": "host=compute-17"}, "compute-17"),
+            "private-dns": ({"note": "worker-7.cluster.local"}, "worker-7.cluster.local"),
+            "suffixed-host": ({"worker_hostname": "relative"}, "worker_hostname"),
+            "suffixed-address": ({"control_address": "relative"}, "control_address"),
+            "suffixed-path": ({"scheduler_path": "relative"}, "scheduler_path"),
+            "exact-address": ({"address": "relative"}, "address"),
+            "exact-ip": ({"ip": "relative"}, "ip"),
+            "camel-host": ({"workerHost": "relative"}, "workerHost"),
+            "camel-path": ({"schedulerPath": "relative"}, "schedulerPath"),
+            "acronym-gpu-uuid": ({"gpuUUID": "relative"}, "gpuUUID"),
+            "acronym-device-uuid": ({"deviceUUID": "relative"}, "deviceUUID"),
+            "acronym-pci-bus": ({"pciBusID": "relative"}, "pciBusID"),
+            "mac-address": ({"note": "00:11:22:33:44:55"}, "00:11:22:33:44:55"),
+            "ib-guid": ({"note": "00:11:22:33:44:55:66:77"}, "00:11:22:33:44:55:66:77"),
+            "dgx-host": ({"note": "dgx-b300-001"}, "dgx-b300-001"),
+            "cloud-host": ({"note": "ip-10-20-30-40"}, "ip-10-20-30-40"),
+            "credential-field": ({"service_token": "short"}, "service_token"),
+            "prefixed-token": ({"note": secret}, secret),
+            "hf-token": ({"note": "hf_" + "A" * 24}, "hf_" + "A" * 24),
+            "payment-token": ({"note": "sk_live_" + "A" * 24}, "sk_live_" + "A" * 24),
+            "generic-secret": ({"note": "password=not-a-real-secret"}, "not-a-real-secret"),
+        }
+        for root in ("data", "it-share", "lustre", "raid", "nvme_home", "scratch", "gpfs", "fsx"):
+            value = f"/{root}/collectivex/run"
+            sensitive[f"private-root-{root}"] = ({"note": value}, value)
+        for name, (document, offending) in sensitive.items():
+            with self.subTest(name=name), self.assertRaises(
+                artifact_safety.ArtifactSafetyError
+            ) as caught:
+                artifact_safety.assert_publication_safe([document])
+            self.assertNotIn(offending, str(caught.exception))
+
+        artifact_safety.assert_publication_safe([{
+            "runner": "b300",
+            "redaction": "sanitized-v1",
+            "path": "datasets/" + "a" * 64 + "/dataset.json",
+            "timing": "8:64:32",
+            "image_digest": "sha256:" + "b" * 64,
+            "source": "github.com",
+        }])
+        for ref in ("release@candidate", "worker1-feature", "sk-refactor-long-component-name"):
+            artifact_safety.assert_publication_safe([{"ref": ref}])
+
+    def test_artifact_safety_cli_does_not_echo_sensitive_values(self) -> None:
+        private_value = ".".join(str(octet) for octet in (10, 24, 68, 12))
+        with tempfile.TemporaryDirectory() as temporary:
+            path = Path(temporary) / "artifact.json"
+            path.write_text(json.dumps({"note": private_value}))
+            result = subprocess.run(
+                [sys.executable, str(ROOT / "artifact_safety.py"), str(path)],
+                text=True,
+                capture_output=True,
+            )
+        self.assertNotEqual(result.returncode, 0)
+        self.assertIn("forbidden ipv4-address value", result.stderr)
+        self.assertNotIn(private_value, result.stderr)
+
+    def test_artifact_safety_rejects_linked_and_special_inputs(self) -> None:
+        with tempfile.TemporaryDirectory() as temporary:
+            root = Path(temporary)
+            source = root / "source.json"
+            source.write_text("{}")
+            linked = root / "linked.json"
+            linked.symlink_to(source)
+            fifo = root / "fifo.json"
+            os.mkfifo(fifo)
+            for path in (linked, fifo):
+                with self.subTest(path=path.name), self.assertRaises(
+                    artifact_safety.ArtifactSafetyError
+                ):
+                    artifact_safety.load_documents([str(path)])
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/experimental/CollectiveX/tests/test_schema_v1_contract.py b/experimental/CollectiveX/tests/test_schema_v1_contract.py
new file mode 100644
index 000000000..a517cd5d5
--- /dev/null
+++ b/experimental/CollectiveX/tests/test_schema_v1_contract.py
@@ -0,0 +1,436 @@
+#!/usr/bin/env python3
+"""Focused structural tests for the fail-closed CollectiveX V1 schemas."""
+from __future__ import annotations
+
+import copy
+import json
+import sys
+import unittest
+from pathlib import Path
+
+import jsonschema
+
+ROOT = Path(__file__).resolve().parents[1]
+if str(ROOT) not in sys.path:
+    sys.path.insert(0, str(ROOT))
+
+import identity  # noqa: E402
+
+
+def _load(name: str) -> dict:
+    return json.loads((ROOT / "schemas" / name).read_text())
+
+
+def _definition_validator(schema: dict, name: str) -> jsonschema.Validator:
+    return jsonschema.Draft202012Validator(
+        {
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+            "$defs": schema["$defs"],
+            "$ref": f"#/$defs/{name}",
+        }
+    )
+
+
+class CollectiveXV1SchemaContractTest(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls) -> None:
+        cls.raw = _load("raw-case-v1.schema.json")
+        cls.samples = _load("samples-v1.schema.json")
+        cls.public = _load("public-dataset-v1.schema.json")
+        cls.bundle = _load("private-bundle-v1.schema.json")
+        cls.terminal = _load("terminal-outcome-v1.schema.json")
+
+    def test_all_checked_in_schemas_are_draft_2020_12_valid(self) -> None:
+        for path in sorted((ROOT / "schemas").glob("*.schema.json")):
+            with self.subTest(path=path.name):
+                schema = json.loads(path.read_text())
+                jsonschema.Draft202012Validator.check_schema(schema)
+                self.assertFalse(schema["additionalProperties"])
+
+    def test_precision_catalog_and_axes_are_exact_and_strict(self) -> None:
+        expected = set(identity.V1_PRECISION_PROFILES)
+        self.assertEqual(set(self.raw["$defs"]["precision_profile"]["enum"]), expected)
+        self.assertEqual(set(self.public["$defs"]["precisionProfile"]["enum"]), expected)
+        self.assertEqual(set(self.terminal["$defs"]["precisionProfile"]["enum"]), expected)
+
+        axis_keys = {
+            "alignment_contract",
+            "api_input_dtype",
+            "api_output_dtype",
+            "communication_format",
+            "conversion_boundary",
+            "padding_contract",
+            "quantization_origin",
+            "scale_dtype",
+            "scale_group_size",
+            "scale_layout",
+        }
+        for schema, name in (
+            (self.raw, "communication_axis"),
+            (self.public, "communicationAxis"),
+            (self.terminal, "communicationAxis"),
+        ):
+            with self.subTest(schema=schema["$id"]):
+                axis = schema["$defs"][name]
+                self.assertFalse(axis["additionalProperties"])
+                self.assertEqual(set(axis["required"]), axis_keys)
+                self.assertEqual(set(axis["properties"]), axis_keys)
+
+        axis_validator = _definition_validator(self.raw, "communication_axis")
+        profile_validator = _definition_validator(self.raw, "communication_precision")
+        for name in sorted(expected):
+            profile = identity.precision_profile(name)
+            with self.subTest(profile=name):
+                axis_validator.validate(profile["dispatch"])
+                axis_validator.validate(profile["combine"])
+                profile_validator.validate(profile)
+        raw_case_profile = _definition_validator(self.raw, "case_profile")
+        terminal_case_profile = _definition_validator(self.terminal, "caseProfile")
+        for case in (
+            {"mode": "normal"},
+            {
+                "mode": "normal",
+                "precision_profile": "d-fp8-e4m3fn-b128-f32-prequantized.c-bf16",
+            },
+            {
+                "mode": "low-latency",
+                "precision_profile": "d-fp8-e4m3fn-b128-f32-fused.c-bf16",
+            },
+        ):
+            resolved = identity.profile_for_case(case)
+            raw_case_profile.validate(resolved)
+            terminal_case_profile.validate(resolved)
+
+        shape = self.raw["properties"]["case"]["properties"]["shape"]
+        self.assertIn("precision_profile", shape["required"])
+        self.assertIn("dispatch_precision", shape["required"])
+        self.assertIn("combine_precision", shape["required"])
+        self.assertNotIn("dispatch_dtype", shape["properties"])
+        self.assertNotIn("quant", shape["properties"])
+
+        workload = self.public["$defs"]["series"]["properties"]["workload"]
+        self.assertIn("precision_profile", workload["required"])
+        self.assertIn("dispatch_precision", workload["required"])
+        self.assertIn("combine_precision", workload["required"])
+        self.assertNotIn("dispatch_dtype", workload["properties"])
+        self.assertNotIn("combine_dtype", workload["properties"])
+
+        profile = self.raw["$defs"]["case_profile"]
+        self.assertEqual(
+            profile["properties"]["activation_generator"]["const"],
+            identity.V1_NORMAL_CASE_PROFILE["activation_generator"],
+        )
+        self.assertEqual(
+            profile["properties"]["activation_profile"]["const"],
+            identity.V1_NORMAL_CASE_PROFILE["activation_profile"],
+        )
+        self.assertEqual(
+            profile["properties"]["source_identity_contract"]["const"],
+            identity.V1_NORMAL_CASE_PROFILE["source_identity_contract"],
+        )
+
+    def test_qualification_index_is_bound_across_private_and_public_records(self) -> None:
+        paths = (
+            self.raw["properties"]["measurement"]["properties"]["qualification_index"],
+            self.raw["properties"]["identity"]["properties"]["allocation_factors"]["properties"]["qualification_index"],
+            self.raw["$defs"]["git_run"]["properties"]["qualification_index"],
+            self.samples["properties"]["qualification_index"],
+            self.bundle["properties"]["run"]["properties"]["qualification_index"],
+            self.terminal["$defs"]["allocationFactors"]["properties"]["qualification_index"],
+            self.terminal["$defs"]["gitRun"]["properties"]["qualification_index"],
+            self.public["$defs"]["attempt"]["properties"]["qualification_index"],
+        )
+        for value in paths:
+            self.assertEqual((value["minimum"], value["maximum"]), (1, 3))
+        promotion_indices = self.public["properties"]["promotion"]["properties"]["qualification_indices"]
+        series_indices = self.public["$defs"]["series"]["properties"]["measurement"]["properties"]["qualification_indices"]
+        for schema, valid_values in (
+            (promotion_indices, ([], [1], [1, 2, 3])),
+            (series_indices, ([1], [2, 3], [1, 2, 3])),
+        ):
+            validator = jsonschema.Draft202012Validator(schema)
+            for value in valid_values:
+                validator.validate(value)
+            for value in ([0], [4], [1, 1], [1, 2, 3, 1]):
+                with self.assertRaises(jsonschema.ValidationError):
+                    validator.validate(value)
+        measurement = self.raw["properties"]["measurement"]
+        self.assertIn("execution_order_sha256", measurement["required"])
+        self.assertEqual(
+            measurement["properties"]["execution_order_sha256"]["pattern"],
+            "^[0-9a-f]{64}$",
+        )
+
+    def test_private_allocation_stratum_is_required_only_in_raw_canonical_evidence(self) -> None:
+        provenance = self.raw["properties"]["provenance"]
+        self.assertIn("allocation_stratum_sha256", provenance["required"])
+        stratum = provenance["properties"]["allocation_stratum_sha256"]
+        jsonschema.Draft202012Validator(stratum).validate(None)
+        jsonschema.Draft202012Validator(stratum).validate("a" * 64)
+        with self.assertRaises(jsonschema.ValidationError):
+            jsonschema.Draft202012Validator(stratum).validate("A" * 64)
+        conditional = self.raw["allOf"][0]
+        self.assertEqual(
+            conditional["if"]["properties"]["workload"]["properties"]["source"],
+            {"const": "canonical-serialized"},
+        )
+        canonical_stratum = conditional["then"]["properties"]["provenance"][
+            "properties"
+        ]["allocation_stratum_sha256"]
+        with self.assertRaises(jsonschema.ValidationError):
+            jsonschema.Draft202012Validator(canonical_stratum).validate(None)
+        self.assertNotIn("allocation_stratum_sha256", json.dumps(self.public))
+
+    def test_stage_samples_are_absent_or_exactly_512(self) -> None:
+        validator = _definition_validator(self.samples, "component")
+        measured = {
+            "availability": "measured",
+            "sample_count": 512,
+            "trials": [[1.0] * 8 for _ in range(64)],
+        }
+        validator.validate(measured)
+        unavailable = {
+            "availability": "unavailable",
+            "sample_count": 0,
+            "trials": None,
+        }
+        validator.validate(unavailable)
+        for mutate in (
+            lambda value: value.update(sample_count=511),
+            lambda value: value["trials"].pop(),
+            lambda value: value["trials"][0].pop(),
+        ):
+            broken = copy.deepcopy(measured)
+            mutate(broken)
+            with self.assertRaises(jsonschema.ValidationError):
+                validator.validate(broken)
+
+        raw_components = self.raw["properties"]["measurement"]["properties"]["rows"]["items"]["properties"]["components"]
+        public_components = self.public["$defs"]["point"]["properties"]["components"]
+        self.assertIn("stage", raw_components["required"])
+        self.assertIn("stage", public_components["required"])
+
+    def test_byte_provenance_supports_precision_aware_rates(self) -> None:
+        expected = {
+            "accounting_contract",
+            "activation_data_bytes",
+            "scale_bytes",
+            "total_logical_bytes",
+        }
+        for schema, name in (
+            (self.raw, "byte_accounting"),
+            (self.public, "byteAccounting"),
+        ):
+            definition = schema["$defs"][name]
+            self.assertFalse(definition["additionalProperties"])
+            self.assertEqual(set(definition["required"]), expected)
+            self.assertEqual(set(definition["properties"]), expected)
+        component = self.public["$defs"]["component"]
+        self.assertIn("byte_provenance", component["required"])
+        self.assertIn("activation_data_rate_gbps_at_latency_percentile", component["required"])
+        self.assertIn("total_logical_data_rate_gbps_at_latency_percentile", component["required"])
+
+    def test_raw_correctness_carries_directional_precision_evidence(self) -> None:
+        correctness = self.raw["properties"]["measurement"]["properties"]["rows"]["items"]["properties"]["correctness"]
+        self.assertIn("precision", correctness["required"])
+        evidence = self.raw["$defs"]["precision_evidence"]
+        self.assertFalse(evidence["additionalProperties"])
+        self.assertEqual(
+            set(evidence["required"]),
+            {"profile_id", "dispatch", "combine", "passed"},
+        )
+        axis = self.raw["$defs"]["precision_axis_evidence"]
+        self.assertFalse(axis["additionalProperties"])
+        self.assertEqual(
+            set(axis["required"]),
+            {
+                "encoded_payload_valid",
+                "scales_finite",
+                "scales_positive",
+                "dequantized_semantics",
+                "saturation_count",
+                "saturation_rate",
+                "max_abs_error",
+                "max_rel_error",
+                "passed",
+            },
+        )
+
+    def test_eplb_calibration_provenance_is_explicit(self) -> None:
+        fields = {
+            "calibration_workload_id",
+            "calibration_trace_sha256",
+            "calibration_window",
+            "calibration_token_offset",
+        }
+        raw = self.raw["properties"]["case"]["properties"]["eplb"]
+        public = self.public["$defs"]["series"]["properties"]["eplb"]
+        for descriptor in (raw, public):
+            self.assertTrue(fields <= set(descriptor["required"]))
+            self.assertTrue(fields <= set(descriptor["properties"]))
+            self.assertFalse(descriptor["additionalProperties"])
+
+    def test_public_coverage_is_a_full_case_and_point_inventory(self) -> None:
+        coverage = self.public["$defs"]["coverage"]
+        dimensions = {
+            "sku",
+            "suite",
+            "workload",
+            "publication_tier",
+            "backend",
+            "backend_generation",
+            "resource",
+            "topology",
+            "phase",
+            "mode",
+            "routing",
+            "eplb",
+            "precision_profile",
+            "dispatch_precision",
+            "combine_precision",
+            "points",
+        }
+        self.assertTrue(dimensions <= set(coverage["required"]))
+        self.assertFalse(coverage["additionalProperties"])
+        point = self.public["$defs"]["coveragePoint"]
+        self.assertEqual(
+            set(point["properties"]["terminal_status"]["enum"]),
+            {"measured", "unsupported", "failed", "invalid", "diagnostic"},
+        )
+        self.assertIn("tokens_per_rank", point["required"])
+        self.assertIn("global_tokens", point["required"])
+        self.assertIn("reason", point["required"])
+
+        promotion = self.public["properties"]["promotion"]
+        counts = {
+            "requested_cases",
+            "terminal_cases",
+            "measured_cases",
+            "unsupported_cases",
+            "requested_points",
+            "terminal_points",
+            "measured_points",
+            "unsupported_points",
+        }
+        self.assertTrue(counts <= set(promotion["required"]))
+        self.assertFalse(promotion["additionalProperties"])
+
+    def test_public_coverage_point_reason_tracks_terminal_status(self) -> None:
+        validator = _definition_validator(self.public, "coveragePoint")
+        point_id = "cxpoint-v1-" + "1" * 64
+        series_id = "cxseries-v1-" + "2" * 64
+        measured = {
+            "point_id": point_id,
+            "series_id": series_id,
+            "tokens_per_rank": 8,
+            "global_tokens": 64,
+            "terminal_status": "measured",
+            "reason": None,
+        }
+        validator.validate(measured)
+        unsupported = {
+            **measured,
+            "point_id": None,
+            "series_id": None,
+            "terminal_status": "unsupported",
+            "reason": "backend-platform-unsupported",
+        }
+        validator.validate(unsupported)
+        failed = {**unsupported, "terminal_status": "failed", "reason": "execution-failed"}
+        validator.validate(failed)
+
+        for broken in (
+            {**measured, "reason": "unexpected-reason"},
+            {**unsupported, "reason": None},
+            {**failed, "reason": None},
+            {**unsupported, "reason": "contains spaces"},
+        ):
+            with self.assertRaises(jsonschema.ValidationError):
+                validator.validate(broken)
+
+    def test_public_measured_point_has_bounded_detail_and_three_run_stability(self) -> None:
+        point = self.public["$defs"]["point"]
+        self.assertFalse(point["additionalProperties"])
+        self.assertTrue({"anomalies", "correctness", "stability"} <= set(point["required"]))
+        self.assertNotIn("correct", point["properties"])
+
+        axis = {
+            "encoded_payload_valid": True,
+            "scales_finite": None,
+            "scales_positive": None,
+            "dequantized_semantics": True,
+            "saturation_count": 0,
+            "saturation_rate": 0.0,
+            "max_abs_error": 0.0,
+            "max_rel_error": 0.0,
+            "passed": True,
+        }
+        correctness = {
+            "semantic_pass": True,
+            "precision": {
+                "profile_id": identity.V1_CONTROL_PRECISION_PROFILE,
+                "dispatch": axis,
+                "combine": copy.deepcopy(axis),
+                "passed": True,
+            },
+        }
+        correctness_validator = _definition_validator(self.public, "pointCorrectness")
+        correctness_validator.validate(correctness)
+        broken_correctness = copy.deepcopy(correctness)
+        broken_correctness["precision"]["dispatch"]["unexpected"] = True
+        with self.assertRaises(jsonschema.ValidationError):
+            correctness_validator.validate(broken_correctness)
+
+        stability_validator = _definition_validator(self.public, "pointStability")
+        stability_validator.validate({
+            "complete": True,
+            "qualification_indices": [1, 2, 3],
+            "p50_max_min_ratio": 1.02,
+            "p99_max_min_ratio": 1.04,
+            "stable_p50": True,
+            "stable_p99": True,
+        })
+        stability_validator.validate({
+            "complete": False,
+            "qualification_indices": [2],
+            "p50_max_min_ratio": None,
+            "p99_max_min_ratio": None,
+            "stable_p50": False,
+            "stable_p99": False,
+        })
+        for broken in (
+            {
+                "complete": True,
+                "qualification_indices": [1, 2],
+                "p50_max_min_ratio": 1.0,
+                "p99_max_min_ratio": 1.0,
+                "stable_p50": True,
+                "stable_p99": True,
+            },
+            {
+                "complete": False,
+                "qualification_indices": [1],
+                "p50_max_min_ratio": 1.0,
+                "p99_max_min_ratio": None,
+                "stable_p50": False,
+                "stable_p99": False,
+            },
+        ):
+            with self.assertRaises(jsonschema.ValidationError):
+                stability_validator.validate(broken)
+
+        anomalies = point["properties"]["anomalies"]
+        self.assertEqual(anomalies["maxItems"], 16)
+        self.assertTrue(anomalies["uniqueItems"])
+        anomaly_validator = jsonschema.Draft202012Validator({
+            "$schema": "https://json-schema.org/draft/2020-12/schema",
+            "$defs": self.public["$defs"],
+            "$ref": "#/$defs/point/properties/anomalies",
+        })
+        anomaly_validator.validate(["roundtrip-gt-isolated-sum"])
+        with self.assertRaises(jsonschema.ValidationError):
+            anomaly_validator.validate([f"anomaly-{index}" for index in range(17)])
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/experimental/CollectiveX/tests/workload.py b/experimental/CollectiveX/tests/workload.py
new file mode 100644
index 000000000..74860be80
--- /dev/null
+++ b/experimental/CollectiveX/tests/workload.py
@@ -0,0 +1,416 @@
+#!/usr/bin/env python3
+"""Canonical, byte-stable CollectiveX routing workloads.
+
+A *canonical workload* is a routing trace generated ONCE, serialized to a platform-independent
+file, and referenced by an immutable `workload_id`. Every promoted benchmark point consumes the
+SAME serialized bytes, so "did NVIDIA and AMD run the identical workload?" is answered by a
+checksum match, not by trusting that two machines re-ran the same seeded generator.
+
+Layout on disk (one workload = two files, basename = workload_id):
+  <dir>/<workload_id>.npz            topk_idx [gt,topk] int32, topk_weights [gt,topk] float32
+  <dir>/<workload_id>.manifest.json  dims, routing profile, generator version, seed, SHA-256s
+
+Routing and gate weights come from a stdlib integer counter, not a framework RNG. The same
+parameters therefore produce the same int32/float32 bytes across PyTorch and accelerator images.
+"""
+from __future__ import annotations
+
+from array import array
+import bisect
+import hashlib
+import json
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+import identity  # noqa: E402
+
+WORKLOAD_SCHEMA_VERSION = 1
+# Bump when the counter or byte encoding changes. The workload ID binds parameters and trace bytes.
+GENERATOR_VERSION = "collectivex-routing-counter-v3"
+GATE_WEIGHT_FORMAT = "counter-u16-normalized-f32"
+ACTIVATION_GENERATOR = "collectivex-activation-counter-v4"
+EPLB_CALIBRATION_WINDOW = "collectivex-eplb-calibration-window-v1"
+EPLB_CALIBRATION_TOKEN_OFFSET = 1 << 32
+_MASK64 = (1 << 64) - 1
+
+
+def _sha256(b: bytes) -> str:
+    return hashlib.sha256(b).hexdigest()
+
+
+def _mix64(value: int) -> int:
+    value = (value + 0x9E3779B97F4A7C15) & _MASK64
+    value = ((value ^ (value >> 30)) * 0xBF58476D1CE4E5B9) & _MASK64
+    value = ((value ^ (value >> 27)) * 0x94D049BB133111EB) & _MASK64
+    return value ^ (value >> 31)
+
+
+def _counter(seed: int, token: int, slot: int, attempt: int, stream: int) -> int:
+    value = (
+        (seed & _MASK64)
+        ^ (((token + 1) * 0xD2B74407B1CE6E93) & _MASK64)
+        ^ (((slot + 1) * 0xCA5A826395121157) & _MASK64)
+        ^ (((attempt + 1) * 0x9E3779B185EBCA87) & _MASK64)
+        ^ (((stream + 1) * 0xA24BAED4963EE407) & _MASK64)
+    )
+    return _mix64(value)
+
+
+def canonical_routing_rows(
+    global_tokens: int,
+    experts: int,
+    topk: int,
+    routing: str,
+    seed: int,
+    *,
+    token_offset: int = 0,
+) -> tuple[list[list[int]], list[list[float]]]:
+    """Generate a deterministic routing window from exact integer counters."""
+    if routing not in {"uniform", "zipf"}:
+        raise ValueError(f"unknown routing {routing!r} (uniform|zipf)")
+    if global_tokens <= 0 or experts <= 0 or topk <= 0 or topk > experts:
+        raise ValueError("global_tokens/experts/topk must be positive and topk <= experts")
+    if type(token_offset) is not int or token_offset < 0:
+        raise ValueError("token_offset must be a non-negative integer")
+
+    cumulative: list[int] | None = None
+    if routing == "zipf":
+        total = 0
+        cumulative = []
+        for expert in range(experts):
+            total += (1 << 32) // (expert + 1)
+            cumulative.append(total)
+
+    indices: list[list[int]] = []
+    weights: list[list[float]] = []
+    for local_token in range(global_tokens):
+        token = token_offset + local_token
+        selected: list[int] = []
+        used: set[int] = set()
+        for slot in range(topk):
+            attempt = 0
+            while True:
+                value = _counter(seed, token, slot, attempt, 0)
+                expert = (
+                    value % experts
+                    if cumulative is None
+                    else bisect.bisect_right(cumulative, value % cumulative[-1])
+                )
+                if expert not in used:
+                    used.add(expert)
+                    selected.append(expert)
+                    break
+                attempt += 1
+                if attempt > experts * 16:
+                    raise RuntimeError("counter routing could not select distinct experts")
+        raw = [1 + _counter(seed, token, slot, 0, 1) % 65535 for slot in range(topk)]
+        denominator = float(sum(raw))
+        indices.append(selected)
+        weights.append([value / denominator for value in raw])
+    return indices, weights
+
+
+def _canonical_bytes(
+    indices: list[list[int]], weights: list[list[float]]
+) -> tuple[bytes, bytes]:
+    idx = array("i", (value for row in indices for value in row))
+    gate = array("f", (value for row in weights for value in row))
+    if idx.itemsize != 4 or gate.itemsize != 4:
+        raise RuntimeError("canonical workload requires 32-bit int and float arrays")
+    if sys.byteorder != "little":
+        idx.byteswap()
+        gate.byteswap()
+    return idx.tobytes(), gate.tobytes()
+
+
+def trace_checksums(
+    indices: list[list[int]], weights: list[list[float]]
+) -> dict[str, str]:
+    """Return the manifest hashes for exact logical or remapped routing rows."""
+    idx_bytes, weight_bytes = _canonical_bytes(indices, weights)
+    return {
+        "topk_idx": _sha256(idx_bytes),
+        "topk_weights": _sha256(weight_bytes),
+        "trace": _sha256(idx_bytes + weight_bytes),
+    }
+
+
+def canonical_member(
+    routing: str,
+    hidden: int,
+    topk: int,
+    experts: int,
+    ep_size: int,
+    tokens_per_rank: int,
+    seed: int,
+    *,
+    token_offset: int = 0,
+) -> tuple[str, dict[str, str], list[list[int]], list[list[float]]]:
+    """Derive one canonical manifest member and retain its rows for proof checks."""
+    global_tokens = ep_size * tokens_per_rank
+    indices, weights = canonical_routing_rows(
+        global_tokens,
+        experts,
+        topk,
+        routing,
+        seed,
+        token_offset=token_offset,
+    )
+    checksums = trace_checksums(indices, weights)
+    member = compute_workload_id(
+        routing,
+        hidden,
+        topk,
+        experts,
+        ep_size,
+        global_tokens,
+        seed,
+        trace_checksum=checksums["trace"],
+        token_offset=token_offset,
+    )
+    return member, checksums, indices, weights
+
+
+def canonical_eplb_calibration_member(
+    routing: str,
+    hidden: int,
+    topk: int,
+    experts: int,
+    ep_size: int,
+    tokens_per_rank: int,
+    seed: int,
+) -> tuple[str, dict[str, str], list[list[int]], list[list[float]]]:
+    """Return the EPLB calibration trace from a disjoint global-token window."""
+    return canonical_member(
+        routing,
+        hidden,
+        topk,
+        experts,
+        ep_size,
+        tokens_per_rank,
+        seed,
+        token_offset=EPLB_CALIBRATION_TOKEN_OFFSET,
+    )
+
+
+def compute_workload_id(routing: str, hidden: int, topk: int, experts: int,
+                        ep_size: int, global_tokens: int, seed: int,
+                        generator: str = GENERATOR_VERSION,
+                        trace_checksum: str | None = None,
+                        token_offset: int = 0) -> str:
+    """Deterministic ID over parameters and canonical trace bytes."""
+    if generator != GENERATOR_VERSION:
+        raise ValueError(f"unsupported workload generator {generator!r}")
+    if type(token_offset) is not int or token_offset < 0:
+        raise ValueError("token_offset must be a non-negative integer")
+    if trace_checksum is None:
+        indices, weights = canonical_routing_rows(
+            global_tokens,
+            experts,
+            topk,
+            routing,
+            seed,
+            token_offset=token_offset,
+        )
+        idx_bytes, weight_bytes = _canonical_bytes(indices, weights)
+        trace_checksum = _sha256(idx_bytes + weight_bytes)
+    key = {
+        "generator": generator, "routing": routing, "hidden": hidden, "topk": topk,
+        "experts": experts, "ep_size": ep_size, "global_tokens": global_tokens,
+        "seed": seed, "trace_sha256": trace_checksum,
+        "activation_generator": ACTIVATION_GENERATOR,
+        "activation_identity": compute_activation_identity(seed, hidden),
+    }
+    if token_offset:
+        key.update({
+            "routing_window": EPLB_CALIBRATION_WINDOW,
+            "token_offset": token_offset,
+        })
+    return identity.workload_id(key)
+
+
+def compute_activation_identity(seed, hidden, generator=ACTIVATION_GENERATOR) -> str:
+    """Identity of the exact counter-derived activation generator."""
+    key = f"counter|seed={seed}|hidden={hidden}|gen={generator}"
+    return _sha256(key.encode())
+
+
+def build_manifest(routing, hidden, topk, experts, global_tokens, seed, experts_per_rank,
+                   idx_np, weights_np):
+    """Assemble the manifest dict from the (numpy) trace arrays. Pure numpy/stdlib."""
+    if experts % experts_per_rank:
+        raise ValueError("experts must be divisible by experts_per_rank")
+    idx_bytes = idx_np.astype("<i4", copy=False).tobytes()
+    w_bytes = weights_np.astype("<f4", copy=False).tobytes()
+    ep_size = experts // experts_per_rank
+    trace_checksum = _sha256(idx_bytes + w_bytes)
+    wid = compute_workload_id(
+        routing, hidden, topk, experts, ep_size, global_tokens, seed,
+        trace_checksum=trace_checksum,
+    )
+    return {
+        "schema_version": WORKLOAD_SCHEMA_VERSION,
+        "workload_id": wid,
+        "generator_version": GENERATOR_VERSION,
+        "gate_weight_format": GATE_WEIGHT_FORMAT,
+        "dims": {"hidden": hidden, "topk": topk, "experts": experts, "ep_size": ep_size,
+                 "tokens_per_rank": int(global_tokens) // ep_size,
+                 "global_tokens": int(global_tokens), "experts_per_rank": experts_per_rank},
+        "routing_profile": routing,
+        "seed": seed,
+        "checksums": {  # SHA-256 over the raw little-endian array bytes (int32 / float32)
+            "topk_idx": _sha256(idx_bytes),
+            "topk_weights": _sha256(w_bytes),   # gate-weight (value) distribution identity
+            "trace": trace_checksum,
+        },
+        "activation_profile": "canonical-counter-source-v3",
+        "activation_generator": ACTIVATION_GENERATOR,
+        "activation_identity": compute_activation_identity(seed, hidden),
+    }
+
+
+def build_workload(hidden, topk, experts, routing, global_tokens, seed, experts_per_rank):
+    """Generate a canonical trace. Returns (idx_np, weights_np, manifest)."""
+    import numpy as np
+    indices, weights = canonical_routing_rows(global_tokens, experts, topk, routing, seed)
+    idx_np = np.asarray(indices, dtype=np.int32)
+    w_np = np.asarray(weights, dtype=np.float32)
+    manifest = build_manifest(
+        routing, hidden, topk, experts, global_tokens, seed,
+        experts_per_rank, idx_np, w_np,
+    )
+    return idx_np, w_np, manifest
+
+
+def save_workload(out_dir, idx_np, weights_np, manifest) -> str:
+    import numpy as np
+    os.makedirs(out_dir, exist_ok=True)
+    wid = manifest["workload_id"]
+    np.savez_compressed(os.path.join(out_dir, f"{wid}.npz"),
+                        topk_idx=idx_np.astype(np.int32), topk_weights=weights_np.astype(np.float32))
+    with open(os.path.join(out_dir, f"{wid}.manifest.json"), "w") as fh:
+        json.dump(manifest, fh, indent=2, sort_keys=True)
+    return wid
+
+
+def load_workload(npz_path, verify=True):
+    """Load a canonical trace (numpy + stdlib only). Returns (idx_np, weights_np, manifest).
+    Raises ValueError if verify=True and the on-disk bytes don't match the manifest checksums."""
+    import numpy as np
+    base = npz_path[:-4] if npz_path.endswith(".npz") else npz_path
+    with open(base + ".manifest.json") as fh:
+        manifest = json.load(fh)
+    if manifest.get("workload_id") != os.path.basename(base):
+        raise ValueError(f"workload manifest ID does not match filename for {base}")
+    with np.load(base + ".npz", allow_pickle=False) as archive:
+        if set(archive.files) != {"topk_idx", "topk_weights"}:
+            raise ValueError(f"workload archive fields differ for {base}")
+        idx_np = np.ascontiguousarray(archive["topk_idx"])
+        w_np = np.ascontiguousarray(archive["topk_weights"])
+    if verify:
+        ok, reason = verify_workload(manifest, idx_np, w_np)
+        if not ok:
+            raise ValueError(f"workload checksum mismatch for {base}: {reason}")
+    return idx_np, w_np, manifest
+
+
+def verify_workload(manifest, idx_np, weights_np):
+    """Recompute checksums and compare to the manifest. Returns (ok, reason)."""
+    import numpy as np
+    expected_fields = {
+        "schema_version", "workload_id", "generator_version", "gate_weight_format", "dims",
+        "routing_profile", "seed", "checksums", "activation_profile", "activation_generator",
+        "activation_identity",
+    }
+    if not isinstance(manifest, dict) or set(manifest) != expected_fields:
+        return False, "manifest fields differ from the v1 contract"
+    if (manifest["schema_version"] != WORKLOAD_SCHEMA_VERSION
+            or manifest["generator_version"] != GENERATOR_VERSION
+            or manifest["gate_weight_format"] != GATE_WEIGHT_FORMAT
+            or manifest["routing_profile"] not in {"uniform", "zipf"}):
+        return False, "manifest version or generator is unsupported"
+    if (isinstance(manifest["seed"], bool) or not isinstance(manifest["seed"], int)
+            or not identity.is_typed_id(manifest["workload_id"], "workload")):
+        return False, "manifest seed or workload ID is invalid"
+    dims = manifest["dims"]
+    dim_fields = {"hidden", "topk", "experts", "ep_size", "tokens_per_rank",
+                  "global_tokens", "experts_per_rank"}
+    if not isinstance(dims, dict) or set(dims) != dim_fields:
+        return False, "manifest dimensions are invalid"
+    if any(isinstance(dims[key], bool) or not isinstance(dims[key], int) or dims[key] <= 0
+           for key in dim_fields):
+        return False, "manifest dimensions must be positive integers"
+    if (dims["experts"] != dims["ep_size"] * dims["experts_per_rank"]
+            or dims["global_tokens"] != dims["ep_size"] * dims["tokens_per_rank"]):
+        return False, "manifest EP dimensions are inconsistent"
+    shape = (dims["global_tokens"], dims["topk"])
+    if (idx_np.dtype != np.int32 or weights_np.dtype != np.float32
+            or idx_np.shape != shape or weights_np.shape != shape
+            or not idx_np.flags.c_contiguous or not weights_np.flags.c_contiguous):
+        return False, "workload array dtype, shape, or layout is invalid"
+    if (np.any(idx_np < 0) or np.any(idx_np >= dims["experts"])
+            or np.any(np.diff(np.sort(idx_np, axis=1), axis=1) == 0)):
+        return False, "expert indices are out of range or repeated"
+    if (not np.isfinite(weights_np).all() or np.any(weights_np < 0)
+            or not np.allclose(weights_np.sum(axis=1), 1.0, rtol=1e-5, atol=1e-6)):
+        return False, "gate weights are invalid"
+    if (manifest["activation_profile"] != "canonical-counter-source-v3"
+            or manifest["activation_generator"] != ACTIVATION_GENERATOR
+            or manifest["activation_identity"]
+            != compute_activation_identity(
+                manifest["seed"], dims["hidden"], manifest["activation_generator"]
+            )):
+        return False, "activation identity is invalid"
+    ib = idx_np.astype("<i4", copy=False).tobytes()
+    wb = weights_np.astype("<f4", copy=False).tobytes()
+    cs = manifest.get("checksums", {})
+    if set(cs) != {"topk_idx", "topk_weights", "trace"}:
+        return False, "checksum fields are invalid"
+    if _sha256(ib) != cs.get("topk_idx"):
+        return False, "topk_idx hash differs"
+    if _sha256(wb) != cs.get("topk_weights"):
+        return False, "topk_weights hash differs"
+    if _sha256(ib + wb) != cs.get("trace"):
+        return False, "trace hash differs"
+    wid = compute_workload_id(
+        manifest["routing_profile"], manifest["dims"]["hidden"],
+        manifest["dims"]["topk"], manifest["dims"]["experts"],
+        manifest["dims"]["ep_size"], manifest["dims"]["global_tokens"], manifest["seed"],
+        manifest.get("generator_version", GENERATOR_VERSION), trace_checksum=cs["trace"],
+    )
+    if wid != manifest["workload_id"]:
+        return False, f"workload_id mismatch (recomputed {wid} != {manifest['workload_id']})"
+    return True, "ok"
+
+
+# --------------------------------------------------------------------------- self-test
+if __name__ == "__main__":
+    import sys
+    import tempfile
+    # (1) workload_id determinism + sensitivity — pure stdlib, always runs.
+    a = compute_workload_id("zipf", 7168, 8, 256, 8, 4096, 67)
+    b = compute_workload_id("zipf", 7168, 8, 256, 8, 4096, 67)
+    c = compute_workload_id("uniform", 7168, 8, 256, 8, 4096, 67)
+    assert a == b, "workload_id must be deterministic"
+    assert a != c, "workload_id must depend on routing"
+    print(f"workload_id determinism OK (zipf={a} uniform={c})")
+    # (2) build/save/load/verify roundtrip + cross-build identity — needs torch+numpy.
+    try:
+        import numpy as np  # noqa: F401
+        idx, w, man = build_workload(7168, 8, 256, "zipf", 512, 67, 32)
+        with tempfile.TemporaryDirectory() as d:
+            wid = save_workload(d, idx, w, man)
+            idx2, w2, man2 = load_workload(os.path.join(d, f"{wid}.npz"), verify=True)
+            assert (idx2 == idx).all() and (w2 == w).all(), "roundtrip array mismatch"
+            ok, reason = verify_workload(man2, idx2, w2)
+            assert ok, reason
+            # tamper -> must fail
+            idx2[0, 0] = (int(idx2[0, 0]) + 1) % 256
+            bad, _ = verify_workload(man2, idx2, w2)
+            assert not bad, "verify must catch tampering"
+        print(f"save/load/verify roundtrip OK (workload_id={wid})")
+    except ImportError:
+        print("(numpy unavailable — skipped serialization roundtrip; id logic passed)")
+    print("workload self-test: PASS")
+    sys.exit(0)