From c8f82b941a339384b1d56c368cabcc122789e798 Mon Sep 17 00:00:00 2001
From: igerber <isaac.gerber@gmail.com>
Date: Mon, 29 Jun 2026 08:50:25 -0400
Subject: [PATCH 1/8] feat(staggered): materialize non-estimable (g,t) cells as
 NaN in CallawaySantAnna

Uniformly materialize a NaN entry (with a machine-readable skip_reason) for every
non-estimable (g,t) group-time cell across all CS estimation paths (no-covariate
regression, covariate regression, IPW/DR, repeated cross-section, survey-weighted)
instead of omitting it. Previously only the covariate-singular case materialized
NaN; the other paths dropped the cell silently from the grid.

Cells carry no influence-function entry, so they are excluded from every
aggregation (simple/group/calendar/event-study), balance_e, and bootstrap -- all
aggregate point estimates and SEs, plus event-study n_groups / by-group n_periods,
are numerically unchanged and continue to match R did's aggte(). A fit where no
cell is estimable still raises ValueError. to_dataframe("group_time") now includes
the NaN rows and a skip_reason column.

Documented per-cell surface deviation from R's att_gt (which omits the rows).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                       |  15 +++
 diff_diff/staggered.py             | 157 ++++++++++++++++++++++----
 diff_diff/staggered_aggregation.py |   9 +-
 diff_diff/staggered_results.py     |   9 ++
 docs/methodology/REGISTRY.md       |   5 +-
 tests/test_staggered.py            | 174 +++++++++++++++++++++++++++++
 6 files changed, 342 insertions(+), 27 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f8df061a..7666f021 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -43,6 +43,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   Korn-Graubard (1990), and Solon-Haider-Wooldridge (2015) to `docs/references.rst`.
 
 ### Changed
+- **CallawaySantAnna now materializes non-estimable `(g,t)` cells as NaN entries instead of
+  omitting them.** Cells that cannot be estimated (missing base/post period, zero
+  treated/control, zero survey-weight mass, or a non-finite regression solve) are stored in
+  `group_time_effects` as NaN entries carrying a machine-readable `skip_reason`
+  (`"missing_period"` / `"zero_treated_control"` / `"zero_weight_mass"` /
+  `"non_finite_regression"`; estimable cells carry `None`), uniformly across all estimation paths
+  (no-covariate regression, covariate regression, IPW/DR, repeated cross-section, survey-weighted)
+  — previously only the covariate-regression singular case did this and the other paths dropped
+  the cell silently from the grid. The cells are excluded from every aggregation
+  (simple/group/calendar/event-study), from `balance_e`, and from the bootstrap, so all aggregate
+  point estimates and standard errors — and the event-study `n_groups` / by-group `n_periods`
+  metadata — are numerically **unchanged** and continue to match R `did`'s `aggte()`; a fit where
+  no cell is estimable still raises `ValueError`. `to_dataframe("group_time")` now includes these
+  NaN rows and a `skip_reason` column. This is a documented per-cell surface **deviation from R**'s
+  `att_gt` (which omits the rows). See REGISTRY.md "CallawaySantAnna" edge cases.
 - **CallawaySantAnna multiplier bootstrap now tiles weight generation over draws, cutting
   peak memory at large `n_units`.** The dense `(n_bootstrap × n_units)` multiplier-weight
   matrix (the dominant allocation for the default unit-level bootstrap — `cluster=None`,
diff --git a/diff_diff/staggered.py b/diff_diff/staggered.py
index bba513c0..d91d2abc 100644
--- a/diff_diff/staggered.py
+++ b/diff_diff/staggered.py
@@ -222,6 +222,44 @@ def _safe_inv(
     return inv
 
 
+def _nan_gt_entry(
+    n_treated: int = 0,
+    n_control: int = 0,
+    skip_reason: Optional[str] = None,
+    survey_weight_sum: Optional[float] = None,
+) -> Dict[str, Any]:
+    """Build a materialized NaN group-time entry for a non-estimable (g, t) cell.
+
+    Non-estimable cells (missing base/post period, zero treated/control, zero
+    survey-weight mass, or a non-finite regression solve) are stored as a NaN
+    entry in ``group_time_effects`` rather than omitted, so the (g, t) grid is
+    inspectable (``to_dataframe`` / direct dict access) and the reason is
+    machine-readable via ``skip_reason`` (one of ``"missing_period"``,
+    ``"zero_treated_control"``, ``"zero_weight_mass"``,
+    ``"non_finite_regression"``; estimable cells carry ``None``).
+
+    The cell carries NO ``influence_func_info`` entry: every aggregation and
+    bootstrap consumer finite-masks (``np.isfinite(effect)``) or filters to IF
+    members before use, so the NaN cell contributes nothing to any aggregate or
+    SE — aggregates stay numerically identical to the prior omit behavior, which
+    matches R ``did``'s ``aggte()``. See REGISTRY.md "CallawaySantAnna" edge
+    cases for the documented contract.
+    """
+    entry: Dict[str, Any] = {
+        "effect": np.nan,
+        "se": np.nan,
+        "t_stat": np.nan,
+        "p_value": np.nan,
+        "conf_int": (np.nan, np.nan),
+        "n_treated": int(n_treated),
+        "n_control": int(n_control),
+        "skip_reason": skip_reason,
+    }
+    if survey_weight_sum is not None:
+        entry["survey_weight_sum"] = survey_weight_sum
+    return entry
+
+
 class CallawaySantAnna(
     CallawaySantAnnaBootstrapMixin,
     CallawaySantAnnaAggregationMixin,
@@ -786,7 +824,9 @@ def _compute_att_gt_fast(
         covariates: Optional[List[str]],
         pscore_cache: Optional[Dict] = None,
         epv_diagnostics: Optional[Dict] = None,
-    ) -> Tuple[Optional[float], float, int, int, Optional[Dict[str, Any]], Optional[float]]:
+    ) -> Tuple[
+        Optional[float], float, int, int, Optional[Dict[str, Any]], Optional[float], Optional[str]
+    ]:
         """
         Compute ATT(g,t) using pre-computed data structures (fast version).
 
@@ -802,6 +842,11 @@ def _compute_att_gt_fast(
         inf_func_info : dict or None
         survey_weight_sum : float or None
             Sum of survey weights for treated units (for aggregation weighting).
+        skip_reason : str or None
+            When ``att_gt is None`` (non-estimable cell), the machine-readable
+            reason (``"missing_period"`` / ``"zero_treated_control"`` /
+            ``"zero_weight_mass"``) so the caller can materialize a NaN cell with
+            a ``skip_reason``. ``None`` on a successful return.
         """
         period_to_col = precomputed["period_to_col"]
         outcome_matrix = precomputed["outcome_matrix"]
@@ -824,11 +869,11 @@ def _compute_att_gt_fast(
 
         if base_period_val not in period_to_col:
             # Base period must exist; no fallback to maintain methodological consistency
-            return None, 0.0, 0, 0, None, None
+            return None, 0.0, 0, 0, None, None, "missing_period"
 
         # Check if periods exist in the data
         if base_period_val not in period_to_col or t not in period_to_col:
-            return None, 0.0, 0, 0, None, None
+            return None, 0.0, 0, 0, None, None, "missing_period"
 
         base_col = period_to_col[base_period_val]
         post_col = period_to_col[t]
@@ -866,7 +911,7 @@ def _compute_att_gt_fast(
         n_control = np.sum(control_valid)
 
         if n_treated == 0 or n_control == 0:
-            return None, 0.0, 0, 0, None, None
+            return None, 0.0, 0, 0, None, None, "zero_treated_control"
 
         # Extract outcome changes for treated and control
         treated_change = outcome_change[treated_valid]
@@ -879,9 +924,9 @@ def _compute_att_gt_fast(
 
         # Guard against zero effective mass after subpopulation filtering
         if sw_treated is not None and np.sum(sw_treated) <= 0:
-            return None, 0.0, 0, 0, None, None
+            return None, 0.0, 0, 0, None, None, "zero_weight_mass"
         if sw_control is not None and np.sum(sw_control) <= 0:
-            return None, 0.0, 0, 0, None, None
+            return None, 0.0, 0, 0, None, None, "zero_weight_mass"
 
         # Get covariates if specified (from the base period)
         X_treated = None
@@ -977,7 +1022,7 @@ def _compute_att_gt_fast(
         }
 
         sw_sum = float(np.sum(sw_treated)) if sw_treated is not None else None
-        return att_gt, se_gt, int(n_treated), int(n_control), inf_func_info, sw_sum
+        return att_gt, se_gt, int(n_treated), int(n_control), inf_func_info, sw_sum, None
 
     def _compute_all_att_gt_vectorized(
         self,
@@ -1034,6 +1079,7 @@ def _compute_all_att_gt_vectorized(
 
                 if base_period_val not in period_to_col or t not in period_to_col:
                     skipped_missing_period.append((g, t))
+                    group_time_effects[(g, t)] = _nan_gt_entry(skip_reason="missing_period")
                     continue
 
                 tasks.append(
@@ -1070,6 +1116,11 @@ def _compute_all_att_gt_vectorized(
 
             if n_treated == 0 or n_control == 0:
                 skipped_empty_cell.append((g, t))
+                group_time_effects[(g, t)] = _nan_gt_entry(
+                    n_treated=int(n_treated),
+                    n_control=int(n_control),
+                    skip_reason="zero_treated_control",
+                )
                 continue
 
             treated_change = outcome_change[treated_valid]
@@ -1085,6 +1136,11 @@ def _compute_all_att_gt_vectorized(
                 # Guard against zero effective mass
                 if np.sum(sw_t) <= 0 or np.sum(sw_c) <= 0:
                     skipped_empty_cell.append((g, t))
+                    group_time_effects[(g, t)] = _nan_gt_entry(
+                        n_treated=n_t,
+                        n_control=n_c,
+                        skip_reason="zero_weight_mass",
+                    )
                     continue
                 sw_t_norm = sw_t / np.sum(sw_t)
                 sw_c_norm = sw_c / np.sum(sw_c)
@@ -1123,6 +1179,7 @@ def _compute_all_att_gt_vectorized(
                 "conf_int": (np.nan, np.nan),
                 "n_treated": n_t,
                 "n_control": n_c,
+                "skip_reason": None if np.isfinite(att) else "non_finite_regression",
             }
             if sw_sum is not None:
                 gte_entry["survey_weight_sum"] = sw_sum
@@ -1251,6 +1308,7 @@ def _compute_all_att_gt_covariate_reg(
 
                 if base_period_val not in period_to_col or t not in period_to_col:
                     skipped_missing_period.append((g, t))
+                    group_time_effects[(g, t)] = _nan_gt_entry(skip_reason="missing_period")
                     continue
 
                 # Determine control regression grouping key.
@@ -1301,6 +1359,10 @@ def _compute_all_att_gt_covariate_reg(
             n_c_base = int(np.sum(control_valid_base))
             if n_c_base == 0:
                 skipped_empty_cell.extend((g, t) for g, t, *_ in tasks)
+                for g, t, *_ in tasks:
+                    group_time_effects[(g, t)] = _nan_gt_entry(
+                        n_control=0, skip_reason="zero_treated_control"
+                    )
                 continue
 
             X_ctrl = None
@@ -1358,6 +1420,9 @@ def _compute_all_att_gt_covariate_reg(
 
                 if n_t == 0 or n_c == 0:
                     skipped_empty_cell.append((g, t))
+                    group_time_effects[(g, t)] = _nan_gt_entry(
+                        n_treated=n_t, n_control=n_c, skip_reason="zero_treated_control"
+                    )
                     continue
 
                 treated_change = outcome_change[treated_valid]
@@ -1475,6 +1540,7 @@ def _compute_all_att_gt_covariate_reg(
                     "conf_int": (np.nan, np.nan),
                     "n_treated": n_t,
                     "n_control": n_c,
+                    "skip_reason": None if np.isfinite(att) else "non_finite_regression",
                 }
 
                 all_units = precomputed["all_units"]
@@ -1930,8 +1996,17 @@ def fit(
                         covariates,
                         epv_diagnostics=epv_diagnostics,
                     )
-                    att_gt, se_gt, n_treat, n_ctrl, inf_info, sw_sum = rc_result[:6]
-                    agg_w = rc_result[6] if len(rc_result) > 6 else n_treat
+                    (
+                        att_gt,
+                        se_gt,
+                        n_treat,
+                        n_ctrl,
+                        inf_info,
+                        sw_sum,
+                        cohort_mass,
+                        skip_reason,
+                    ) = rc_result
+                    agg_w = cohort_mass if cohort_mass is not None else n_treat
 
                     if att_gt is not None:
                         # Cluster-aware per-(g,t) SE on the RCS path. RC
@@ -1970,6 +2045,9 @@ def fit(
                             "n_treated": n_treat,
                             "n_control": n_ctrl,
                             "agg_weight": agg_w,
+                            "skip_reason": (
+                                None if np.isfinite(att_gt) else "non_finite_regression"
+                            ),
                         }
                         if sw_sum is not None:
                             gte_entry["survey_weight_sum"] = sw_sum
@@ -1979,6 +2057,11 @@ def fit(
                             influence_func_info[(g, t)] = inf_info
                     else:
                         _n_skipped_other += 1
+                        group_time_effects[(g, t)] = _nan_gt_entry(
+                            n_treated=n_treat,
+                            n_control=n_ctrl,
+                            skip_reason=skip_reason,
+                        )
 
         elif covariates is None and self.estimation_method == "reg":
             # Fast vectorized path for the common no-covariates regression case
@@ -2024,13 +2107,15 @@ def fit(
                     ]
 
                 for t in valid_periods:
-                    att_gt, se_gt, n_treat, n_ctrl, inf_info, sw_sum = self._compute_att_gt_fast(
-                        precomputed,
-                        g,
-                        t,
-                        covariates,
-                        pscore_cache=pscore_cache,
-                        epv_diagnostics=epv_diagnostics,
+                    att_gt, se_gt, n_treat, n_ctrl, inf_info, sw_sum, skip_reason = (
+                        self._compute_att_gt_fast(
+                            precomputed,
+                            g,
+                            t,
+                            covariates,
+                            pscore_cache=pscore_cache,
+                            epv_diagnostics=epv_diagnostics,
+                        )
                     )
 
                     if att_gt is not None:
@@ -2069,6 +2154,9 @@ def fit(
                             "conf_int": ci,
                             "n_treated": n_treat,
                             "n_control": n_ctrl,
+                            "skip_reason": (
+                                None if np.isfinite(att_gt) else "non_finite_regression"
+                            ),
                         }
                         if sw_sum is not None:
                             gte_entry["survey_weight_sum"] = sw_sum
@@ -2078,8 +2166,15 @@ def fit(
                             influence_func_info[(g, t)] = inf_info
                     else:
                         _n_skipped_other += 1
+                        group_time_effects[(g, t)] = _nan_gt_entry(
+                            n_treated=n_treat,
+                            n_control=n_ctrl,
+                            skip_reason=skip_reason,
+                        )
 
-        if not group_time_effects:
+        if not group_time_effects or not any(
+            np.isfinite(v["effect"]) for v in group_time_effects.values()
+        ):
             raise ValueError(
                 "Could not estimate any group-time effects. "
                 "Check that data has sufficient observations."
@@ -3175,7 +3270,16 @@ def _compute_att_gt_rc(
         t: Any,
         covariates: Optional[List[str]],
         epv_diagnostics: Optional[Dict] = None,
-    ) -> Tuple[Optional[float], float, int, int, Optional[Dict[str, Any]], Optional[float]]:
+    ) -> Tuple[
+        Optional[float],
+        float,
+        int,
+        int,
+        Optional[Dict[str, Any]],
+        Optional[float],
+        Optional[float],
+        Optional[str],
+    ]:
         """
         Compute ATT(g,t) for repeated cross-section data.
 
@@ -3191,6 +3295,13 @@ def _compute_att_gt_rc(
         n_control : int (control obs at period t)
         inf_func_info : dict or None
         survey_weight_sum : float or None
+        cohort_mass : float or None
+            Aggregation weight (survey-weighted cohort mass); ``None`` on a
+            non-estimable return.
+        skip_reason : str or None
+            When ``att_gt is None`` (non-estimable cell), the machine-readable
+            reason (``"missing_period"`` / ``"zero_treated_control"`` /
+            ``"zero_weight_mass"``); ``None`` on a successful return.
         """
         cohort_masks = precomputed["cohort_masks"]
         never_treated_mask = precomputed["never_treated_mask"]
@@ -3209,7 +3320,7 @@ def _compute_att_gt_rc(
                 base_period_val = g - 1 - self.anticipation
 
         if base_period_val not in period_to_col or t not in period_to_col:
-            return None, 0.0, 0, 0, None, None
+            return None, 0.0, 0, 0, None, None, None, "missing_period"
 
         # Treated mask = cohort g
         treated_mask = cohort_masks[g]
@@ -3239,7 +3350,7 @@ def _compute_att_gt_rc(
         n_cs = int(np.sum(control_s))
 
         if n_gt == 0 or n_ct == 0 or n_gs == 0 or n_cs == 0:
-            return None, 0.0, 0, 0, None, None
+            return None, 0.0, 0, 0, None, None, None, "zero_treated_control"
 
         # Extract outcomes for each group
         y_gt = obs_outcome[treated_t]
@@ -3257,9 +3368,9 @@ def _compute_att_gt_rc(
         # Guard against zero effective mass
         if sw_gt is not None:
             if np.sum(sw_gt) <= 0 or np.sum(sw_gs) <= 0:
-                return None, 0.0, 0, 0, None, None
+                return None, 0.0, 0, 0, None, None, None, "zero_weight_mass"
             if np.sum(sw_ct) <= 0 or np.sum(sw_cs) <= 0:
-                return None, 0.0, 0, 0, None, None
+                return None, 0.0, 0, 0, None, None, None, "zero_weight_mass"
 
         # Get covariates if specified
         obs_covariates = precomputed.get("obs_covariates")
@@ -3377,7 +3488,7 @@ def _compute_att_gt_rc(
         # n_treated = per-cell treated count at period t (for display).
         # cohort_mass = total treated across all periods (for aggregation weights).
         cohort_mass = precomputed.get("rcs_cohort_masses", {}).get(g, n_gt)
-        return att, se, n_gt, n_ct, inf_func_info, sw_sum, cohort_mass
+        return att, se, n_gt, n_ct, inf_func_info, sw_sum, cohort_mass, None
 
     def _rc_2x2_did(
         self,
diff --git a/diff_diff/staggered_aggregation.py b/diff_diff/staggered_aggregation.py
index dee32c34..5e574c04 100644
--- a/diff_diff/staggered_aggregation.py
+++ b/diff_diff/staggered_aggregation.py
@@ -690,7 +690,10 @@ def _aggregate_event_study(
 
             agg_effects_list.append(agg_effect)
             agg_ses_list.append(agg_se)
-            agg_n_groups.append(len(effect_list))
+            # Count only finite-contributing cells (gt_pairs is finite-filtered
+            # above) so materialized NaN cells don't inflate n_groups — matches
+            # the all-NaN early-return which already reports 0.
+            agg_n_groups.append(len(gt_pairs))
             agg_effective_dfs.append(eff_df)
             _psi_vectors.append(psi_e)
             _psi_event_times.append(e)
@@ -887,7 +890,9 @@ def _aggregate_by_group(
             agg_se, eff_df = self._compute_aggregated_se_with_wif(
                 gt_pairs, weights, effs, groups_for_gt, influence_func_info, df, unit, precomputed
             )
-            group_data_list.append((g, agg_effect, agg_se, len(g_effects), eff_df))
+            # Count only finite-contributing cells (gt_pairs is finite-filtered
+            # above) so materialized NaN cells don't inflate n_periods.
+            group_data_list.append((g, agg_effect, agg_se, len(gt_pairs), eff_df))
 
         if not group_data_list:
             return {}
diff --git a/diff_diff/staggered_results.py b/diff_diff/staggered_results.py
index 886fdc92..4bb45f37 100644
--- a/diff_diff/staggered_results.py
+++ b/diff_diff/staggered_results.py
@@ -36,6 +36,11 @@ class GroupTimeEffect:
         Number of treated observations.
     n_control : int
         Number of control observations.
+    skip_reason : str or None
+        ``None`` for an estimable cell; otherwise a machine-readable reason the
+        cell is non-estimable (``"missing_period"``, ``"zero_treated_control"``,
+        ``"zero_weight_mass"``, ``"non_finite_regression"``) and ``effect``/``se``
+        are NaN. Non-estimable cells are excluded from all aggregation.
     """
 
     group: Any
@@ -47,6 +52,7 @@ class GroupTimeEffect:
     conf_int: Tuple[float, float]
     n_treated: int
     n_control: int
+    skip_reason: Optional[str] = None
 
     @property
     def is_significant(self) -> bool:
@@ -433,6 +439,9 @@ def to_dataframe(self, level: str = "group_time") -> pd.DataFrame:
                     "p_value": data["p_value"],
                     "conf_int_lower": data["conf_int"][0],
                     "conf_int_upper": data["conf_int"][1],
+                    # None for estimable cells; a reason code for non-estimable
+                    # (NaN) cells materialized in group_time_effects.
+                    "skip_reason": data.get("skip_reason"),
                 }
                 if self.epv_diagnostics and (g, t) in self.epv_diagnostics:
                     row["epv"] = self.epv_diagnostics[(g, t)].get("epv")
diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md
index b801bddb..6fb66dc7 100644
--- a/docs/methodology/REGISTRY.md
+++ b/docs/methodology/REGISTRY.md
@@ -525,8 +525,9 @@ The multiplier bootstrap uses random weights w_i with E[w]=0 and Var(w)=1:
 
 *Edge cases:*
 - Groups with single observation: included but may have high variance
-- Missing group-time cells: omitted from `group_time_effects` with a consolidated warning listing skip reasons and counts
-  - **Note:** Non-estimable cells (missing base/post period, zero treated/control, insufficient data) are omitted rather than stored as NaN. A consolidated UserWarning is emitted from `fit()` across all estimation paths. R's `did` package also omits these cells from `aggte()` results.
+- Non-estimable group-time cells: materialized as NaN entries in `group_time_effects` with a consolidated warning listing skip reasons and counts
+  - **Note:** Non-estimable cells (missing base/post period, zero treated/control, zero survey-weight mass, non-finite regression solve) are stored as NaN entries — `effect`/`se`/`t_stat`/`p_value`/`conf_int` all NaN — carrying a machine-readable `skip_reason` code (`"missing_period"`, `"zero_treated_control"`, `"zero_weight_mass"`, `"non_finite_regression"`; estimable cells carry `None`). This is uniform across ALL estimation paths (no-covariate regression, covariate regression, IPW/DR, repeated cross-section, survey-weighted). A consolidated `UserWarning` is still emitted from `fit()`. The NaN cells are **excluded from every aggregation** (simple/overall, group, calendar, event-study), from `balance_e`, and from the bootstrap (they carry no influence-function entry, and all consumers finite-mask on `np.isfinite(effect)` or filter to IF members), so all aggregate point estimates and SEs — and `n_groups`/`n_periods` metadata — are **unchanged** from the prior omit behavior and match R `did`'s `aggte()` exactly. A fit where no cell is estimable (no finite effect) still raises a `ValueError`.
+  - **Deviation from R:** R's `did::att_gt` omits non-estimable cells from its result table entirely; diff-diff materializes them as NaN rows (with `skip_reason`) so the `(g,t)` grid is inspectable via `group_time_effects` / `to_dataframe("group_time")`. This is a per-cell *surface* difference only — R's `aggte()` aggregation behavior is matched exactly (non-estimable cells contribute nothing to any aggregate).
   - **Note:** When `balance_e` is specified, cohorts with NaN effects at the anchor horizon are excluded from the balanced panel
 - Anticipation: `anticipation` parameter shifts reference period
   - Group aggregation includes periods t >= g - anticipation (not just t >= g)
diff --git a/tests/test_staggered.py b/tests/test_staggered.py
index a09e6f40..edd9eea3 100644
--- a/tests/test_staggered.py
+++ b/tests/test_staggered.py
@@ -1064,6 +1064,9 @@ def mock_solve_ols(*args, **kwargs):
             assert np.isnan(
                 results.group_time_effects[(g, t)]["se"]
             ), f"NaN cell ({g},{t}) must have NaN SE"
+            assert (
+                results.group_time_effects[(g, t)]["skip_reason"] == "non_finite_regression"
+            ), f"NaN cell ({g},{t}) must carry skip_reason='non_finite_regression'"
 
         # Overall ATT should still be finite (NaN cells excluded from aggregation)
         assert np.isfinite(results.overall_att)
@@ -1280,6 +1283,177 @@ def test_reg_underdetermined_control_cell_no_crash(self, action):
         )
 
 
+def _cs_nonestimable_data(panel: bool, n: int = 25, seed: int = 0) -> pd.DataFrame:
+    """Staggered data (cohorts 2,3,4; periods 1-4; NO never-treated) where, with
+    ``control_group="not_yet_treated"``, every post cell at the final period t=4
+    has no not-yet-treated controls (no cohort treated after 4) -> deterministic
+    non-estimable cells (e.g. (4, 4)). Earlier-period post cells (g=2/3 at t=2/3)
+    stay estimable, so aggregates remain finite.
+
+    panel=True  -> each unit observed in all 4 periods (true panel).
+    panel=False -> each row is a distinct unit (true repeated cross-section).
+    """
+    rng = np.random.default_rng(seed)
+    rows = []
+    uid = 0
+    for g in (2, 3, 4):
+        for _ in range(n):
+            if panel:
+                fe = rng.normal(0, 1)
+                for t in range(1, 5):
+                    post = 1.0 if t >= g else 0.0
+                    rows.append(
+                        {
+                            "unit": uid,
+                            "time": t,
+                            "outcome": fe + 0.3 * t + 1.5 * post + rng.normal(0, 0.5),
+                            "first_treat": g,
+                        }
+                    )
+                uid += 1
+            else:
+                for t in range(1, 5):
+                    post = 1.0 if t >= g else 0.0
+                    rows.append(
+                        {
+                            "unit": uid,
+                            "time": t,
+                            "outcome": rng.normal(0, 1) + 0.3 * t + 1.5 * post,
+                            "first_treat": g,
+                        }
+                    )
+                    uid += 1
+    return pd.DataFrame(rows)
+
+
+class TestCallawaySantAnnaNonEstimableMaterialization:
+    """Non-estimable (g,t) cells are materialized as NaN entries carrying a
+    machine-readable ``skip_reason``, uniformly across estimation paths, and are
+    excluded from every aggregation so aggregates/SEs stay finite (the prior
+    omit behavior; exact aggregate values are pinned by test_methodology_callaway).
+    """
+
+    _KNOWN_REASONS = {
+        "missing_period",
+        "zero_treated_control",
+        "zero_weight_mass",
+        "non_finite_regression",
+    }
+
+    @pytest.mark.parametrize(
+        "method,panel",
+        [("reg", True), ("ipw", True), ("dr", True), ("reg", False)],
+    )
+    def test_materializes_nan_cell_with_skip_reason(self, method, panel):
+        """Each previously-omitting path now stores the non-estimable cell as NaN."""
+        data = _cs_nonestimable_data(panel=panel, seed=0)
+        cs = CallawaySantAnna(
+            n_bootstrap=0,
+            control_group="not_yet_treated",
+            estimation_method=method,
+            panel=panel,
+        )
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            results = cs.fit(
+                data, outcome="outcome", unit="unit", time="time", first_treat="first_treat"
+            )
+
+        # The (g=4, t=4) cell has no not-yet-treated controls -> materialized, not omitted.
+        key = (4, 4)
+        assert (
+            key in results.group_time_effects
+        ), f"non-estimable cell must be materialized (path={method}, panel={panel}), not omitted"
+        cell = results.group_time_effects[key]
+        assert np.isnan(cell["effect"]) and np.isnan(cell["se"])
+        assert np.isnan(cell["t_stat"]) and np.isnan(cell["p_value"])
+        assert all(np.isnan(b) for b in cell["conf_int"])
+        assert cell["skip_reason"] == "zero_treated_control"
+
+        # Every NaN cell carries a known reason + NaN SE; every estimable cell None.
+        n_finite = 0
+        for (g, t), v in results.group_time_effects.items():
+            if np.isnan(v["effect"]):
+                assert v["skip_reason"] in self._KNOWN_REASONS, (g, t, v["skip_reason"])
+                assert np.isnan(v["se"])
+            else:
+                assert v["skip_reason"] is None
+                n_finite += 1
+
+        # Non-empty dict mixing NaN + finite cells fits without raising, and the
+        # NaN cells are excluded from aggregation (the aggregation invariant).
+        assert n_finite > 0, "expected some estimable cells"
+        assert np.isfinite(results.overall_att), "NaN cells must be excluded from aggregation"
+        assert np.isfinite(results.overall_se)
+
+    def test_estimable_cells_have_skip_reason_none(self):
+        """A well-posed fit carries skip_reason=None on every cell."""
+        data = generate_staggered_data(n_units=200, seed=42)
+        cs = CallawaySantAnna(n_bootstrap=0)
+        results = cs.fit(
+            data, outcome="outcome", unit="unit", time="time", first_treat="first_treat"
+        )
+        assert len(results.group_time_effects) > 0
+        assert all(np.isfinite(v["effect"]) for v in results.group_time_effects.values())
+        assert all(v["skip_reason"] is None for v in results.group_time_effects.values())
+
+    def test_to_dataframe_includes_nan_row_and_skip_reason_column(self):
+        """to_dataframe('group_time') surfaces the NaN cell + a skip_reason column."""
+        data = _cs_nonestimable_data(panel=True, seed=0)
+        cs = CallawaySantAnna(n_bootstrap=0, control_group="not_yet_treated")
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            results = cs.fit(
+                data, outcome="outcome", unit="unit", time="time", first_treat="first_treat"
+            )
+        df = results.to_dataframe("group_time")
+        assert "skip_reason" in df.columns
+
+        nan_row = df[(df["group"] == 4) & (df["time"] == 4)]
+        assert len(nan_row) == 1, "the non-estimable cell must appear as a row"
+        assert np.isnan(nan_row["effect"].iloc[0])
+        assert nan_row["skip_reason"].iloc[0] == "zero_treated_control"
+
+        # Estimable rows carry a null skip_reason.
+        estimable = df[df["effect"].notna()]
+        assert len(estimable) > 0
+        assert estimable["skip_reason"].isna().all()
+
+    def test_all_nonestimable_raises_with_materialized_cells(self):
+        """All cells non-estimable (dict non-empty, all NaN) -> ValueError via the
+        no-finite-effect guard (distinct from the empty-dict case)."""
+        rng = np.random.default_rng(3)
+        rows = []
+        uid = 0
+        # Two treated cohorts (passes the not_yet_treated >=2-cohort upfront check
+        # is N/A here since control_group is never_treated) ...
+        for g in (2, 3):
+            for _ in range(20):
+                fe = rng.normal(0, 1)
+                for t in range(1, 5):
+                    rows.append(
+                        {
+                            "unit": uid,
+                            "time": t,
+                            "outcome": fe + 0.3 * t + (1.5 if t >= g else 0.0),
+                            "first_treat": g,
+                        }
+                    )
+                uid += 1
+        # Never-treated controls present (passes the upfront control check) but with
+        # all-NaN outcomes -> every cell has zero VALID controls -> all cells NaN.
+        for _ in range(20):
+            for t in range(1, 5):
+                rows.append({"unit": uid, "time": t, "outcome": np.nan, "first_treat": np.inf})
+                uid += 1
+        data = pd.DataFrame(rows)
+        cs = CallawaySantAnna(n_bootstrap=0, control_group="never_treated")
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            with pytest.raises(ValueError, match="Could not estimate any group-time effects"):
+                cs.fit(data, outcome="outcome", unit="unit", time="time", first_treat="first_treat")
+
+
 class TestRankGuardedAnalyticalSE:
     """Rank-guarded influence-function SE (constant/collinear covariate).
 

From fa149d55eabcc831551a2338af2fb5b0500d248c Mon Sep 17 00:00:00 2001
From: igerber <isaac.gerber@gmail.com>
Date: Mon, 29 Jun 2026 09:12:35 -0400
Subject: [PATCH 2/8] fix(staggered): uniform no-IF NaN cells + cover covariate
 paths (review #582)

- The covariate-regression non-finite cell now materializes via _nan_gt_entry
  with NO influence-function entry, matching the other paths and the documented
  REGISTRY/helper contract (previously it wrote a zero-IF entry and ran batch
  inference). Aggregates and SEs are unchanged -- the cell was finite-masked /
  IF-membership-filtered out either way; now the "NaN cells carry no IF entry"
  invariant holds uniformly across all paths.
- Extend the materialization test to cover covariate IPW/DR (panel + RCS) paths.
- Remove the now-implemented CallawaySantAnna NaN-cell row from TODO.md.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 TODO.md                 |  1 -
 diff_diff/staggered.py  | 18 ++++++++++++++----
 tests/test_staggered.py | 30 ++++++++++++++++++++++++------
 3 files changed, 38 insertions(+), 11 deletions(-)

diff --git a/TODO.md b/TODO.md
index bcaea156..97b6902b 100644
--- a/TODO.md
+++ b/TODO.md
@@ -29,7 +29,6 @@ The `Origin` column (Actionable tables) and the `PR` column (Deferred tables) bo
 | Issue | Location | Origin | Effort | Priority |
 |-------|----------|--------|--------|----------|
 | `SyntheticControl` cv: thread an `"infeasible"` reason-code from `_outer_solve_V_cv()` / `_placebo_fit_unit()` so `in_space_placebo()` / `leave_one_out()` distinguish a structural cv-refit exclusion (donor-indistinguishable re-aggregated window) from a genuine inner-solver non-convergence — mirror the split `in_time_placebo()` already emits. Warnings already distinguish the two causes; only the machine-readable status/count is missing. | `synthetic_control.py`, `synthetic_control_results.py` | follow-up | Mid | Low |
-| `CallawaySantAnna`: materialize NaN entries for non-estimable `(g,t)` cells in `group_time_effects` (currently omitted with a consolidated warning); requires updating downstream consumers (event study, `balance_e`, aggregation). | `staggered.py` | #256 | Mid | Low |
 | Survey-design resolution / collapse patterns are inconsistent across panel estimators — `ContinuousDiD` rebuilds unit-level design in SE code, `EfficientDiD` builds once in `fit()`, `StackedDiD` re-resolves on stacked data. Extract shared helpers for panel-to-unit collapse, post-filter re-resolution, and metadata recomputation. | `continuous_did.py`, `efficient_did.py`, `stacked_did.py` | #226 | Mid | Low |
 | `SyntheticControl` remaining ADH-2015 §4 items: the regression-weight `W^reg = X_0'(X_0 X_0')^{-1} X_1` extrapolation diagnostic (flag implied OLS weights outside `[0,1]`) and sparse-SC subset search (`l < J`, holding `V` fixed). LOO, in-time placebo, CV `V`-selection, and inverse-variance `V` have landed; these two are the deferred tail. | `synthetic_control.py`, `synthetic_control_results.py` | ADH-2015 | Mid | Low |
 | `SyntheticControl` conformal (CWZ 2021) extensions: (a) one-sided / signed-`t` variants (§7); (b) covariates in the conformal proxy (`X_jt`, eqs 4/6 — current proxy is outcomes-only); (c) AR / innovation-permutation path (Lemmas 5-7) for time-series proxies. The joint test, pointwise CIs, and average-effect CI have landed. | `conformal.py`, `synthetic_control_results.py` | CWZ-2021 | Heavy | Low |
diff --git a/diff_diff/staggered.py b/diff_diff/staggered.py
index d91d2abc..55b56208 100644
--- a/diff_diff/staggered.py
+++ b/diff_diff/staggered.py
@@ -1522,9 +1522,6 @@ def _compute_all_att_gt_covariate_reg(
 
                     if nan_cell:
                         att = np.nan
-                        se = np.nan
-                        inf_treated = np.zeros(n_t)
-                        inf_control = np.zeros(n_c)
                     else:
                         var_t = float(np.var(treated_residuals, ddof=1)) if n_t > 1 else 0.0
                         var_c = float(np.var(residuals, ddof=1)) if pair_n_c > 1 else 0.0
@@ -1532,6 +1529,19 @@ def _compute_all_att_gt_covariate_reg(
                         inf_treated = (treated_residuals - np.mean(treated_residuals)) / n_t
                         inf_control = -residuals / pair_n_c
 
+                # Non-estimable (non-finite regression) cell: materialize NaN with
+                # NO influence-function entry — uniform with the missing-period /
+                # zero-control / zero-weight paths, so every NaN cell is excluded
+                # from aggregation and the bootstrap (the IF-membership filter and
+                # the finite-mask both drop it). Skip batch inference (already NaN).
+                if not np.isfinite(att):
+                    group_time_effects[(g, t)] = _nan_gt_entry(
+                        n_treated=n_t,
+                        n_control=n_c,
+                        skip_reason="non_finite_regression",
+                    )
+                    continue
+
                 group_time_effects[(g, t)] = {
                     "effect": att,
                     "se": se,
@@ -1540,7 +1550,7 @@ def _compute_all_att_gt_covariate_reg(
                     "conf_int": (np.nan, np.nan),
                     "n_treated": n_t,
                     "n_control": n_c,
-                    "skip_reason": None if np.isfinite(att) else "non_finite_regression",
+                    "skip_reason": None,
                 }
 
                 all_units = precomputed["all_units"]
diff --git a/tests/test_staggered.py b/tests/test_staggered.py
index edd9eea3..7747e156 100644
--- a/tests/test_staggered.py
+++ b/tests/test_staggered.py
@@ -1298,6 +1298,7 @@ def _cs_nonestimable_data(panel: bool, n: int = 25, seed: int = 0) -> pd.DataFra
     uid = 0
     for g in (2, 3, 4):
         for _ in range(n):
+            x = rng.normal(0, 1)  # unit-level covariate (for IPW/DR/reg covariate paths)
             if panel:
                 fe = rng.normal(0, 1)
                 for t in range(1, 5):
@@ -1306,20 +1307,23 @@ def _cs_nonestimable_data(panel: bool, n: int = 25, seed: int = 0) -> pd.DataFra
                         {
                             "unit": uid,
                             "time": t,
-                            "outcome": fe + 0.3 * t + 1.5 * post + rng.normal(0, 0.5),
+                            "outcome": fe + 0.3 * t + 1.5 * post + 0.5 * x + rng.normal(0, 0.5),
                             "first_treat": g,
+                            "x": x,
                         }
                     )
                 uid += 1
             else:
                 for t in range(1, 5):
                     post = 1.0 if t >= g else 0.0
+                    x_t = rng.normal(0, 1)
                     rows.append(
                         {
                             "unit": uid,
                             "time": t,
-                            "outcome": rng.normal(0, 1) + 0.3 * t + 1.5 * post,
+                            "outcome": rng.normal(0, 1) + 0.3 * t + 1.5 * post + 0.5 * x_t,
                             "first_treat": g,
+                            "x": x_t,
                         }
                     )
                     uid += 1
@@ -1341,10 +1345,19 @@ class TestCallawaySantAnnaNonEstimableMaterialization:
     }
 
     @pytest.mark.parametrize(
-        "method,panel",
-        [("reg", True), ("ipw", True), ("dr", True), ("reg", False)],
+        "method,panel,covariates",
+        [
+            ("reg", True, None),  # no-covariate vectorized path
+            ("ipw", True, None),  # general path, no covariates
+            ("dr", True, None),  # general path, no covariates
+            ("reg", False, None),  # repeated cross-section path
+            ("reg", True, ["x"]),  # covariate-regression vectorized path
+            ("ipw", True, ["x"]),  # general path, covariate IPW
+            ("dr", True, ["x"]),  # general path, covariate DR
+            ("dr", False, ["x"]),  # repeated cross-section, covariate DR
+        ],
     )
-    def test_materializes_nan_cell_with_skip_reason(self, method, panel):
+    def test_materializes_nan_cell_with_skip_reason(self, method, panel, covariates):
         """Each previously-omitting path now stores the non-estimable cell as NaN."""
         data = _cs_nonestimable_data(panel=panel, seed=0)
         cs = CallawaySantAnna(
@@ -1356,7 +1369,12 @@ def test_materializes_nan_cell_with_skip_reason(self, method, panel):
         with warnings.catch_warnings():
             warnings.simplefilter("ignore")
             results = cs.fit(
-                data, outcome="outcome", unit="unit", time="time", first_treat="first_treat"
+                data,
+                outcome="outcome",
+                unit="unit",
+                time="time",
+                first_treat="first_treat",
+                covariates=covariates,
             )
 
         # The (g=4, t=4) cell has no not-yet-treated controls -> materialized, not omitted.

From 6910a57a846bd2af1455daa739a7c185c4c9bccf Mon Sep 17 00:00:00 2001
From: igerber <isaac.gerber@gmail.com>
Date: Mon, 29 Jun 2026 09:21:41 -0400
Subject: [PATCH 3/8] fix(staggered): treat non-finite ATT(g,t) as
 non-estimable in general/RCS paths (review #582 P1)

The general (IPW/DR) and RCS estimable builders treated `att_gt is not None` as
estimable even when att_gt was non-finite (NaN/inf): they stored effect=att_gt
(which could surface inf), kept the influence-function entry, and did not count
the cell in the consolidated skip total. Both now branch on
`att_gt is None or not np.isfinite(att_gt)` first and materialize via
_nan_gt_entry(skip_reason="non_finite_regression") with NO IF entry, so the
documented contract (non-estimable cells are NaN entries, carry no IF, excluded
from the bootstrap) holds uniformly across every path. Aggregate estimates and
SEs are unchanged (these cells were finite-masked / IF-filtered out either way).
Adds a regression test mimicking the inf-ATT-with-IF leak.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 diff_diff/staggered.py  | 52 ++++++++++++++++++++++++-----------------
 tests/test_staggered.py | 38 ++++++++++++++++++++++++++++++
 2 files changed, 68 insertions(+), 22 deletions(-)

diff --git a/diff_diff/staggered.py b/diff_diff/staggered.py
index 55b56208..c7f0e8dd 100644
--- a/diff_diff/staggered.py
+++ b/diff_diff/staggered.py
@@ -2018,7 +2018,20 @@ def fit(
                     ) = rc_result
                     agg_w = cohort_mass if cohort_mass is not None else n_treat
 
-                    if att_gt is not None:
+                    if att_gt is None or not np.isfinite(att_gt):
+                        # Non-estimable cell: materialize NaN with NO IF entry,
+                        # uniform across paths. att_gt is None -> the helper
+                        # reported the reason; a non-finite att_gt (solve produced
+                        # NaN/inf) -> "non_finite_regression".
+                        _n_skipped_other += 1
+                        group_time_effects[(g, t)] = _nan_gt_entry(
+                            n_treated=n_treat,
+                            n_control=n_ctrl,
+                            skip_reason=(
+                                skip_reason if att_gt is None else "non_finite_regression"
+                            ),
+                        )
+                    else:
                         # Cluster-aware per-(g,t) SE on the RCS path. RC
                         # IF indices are per-obs (vs per-unit on the panel
                         # path); the corresponding PSU array is
@@ -2055,9 +2068,7 @@ def fit(
                             "n_treated": n_treat,
                             "n_control": n_ctrl,
                             "agg_weight": agg_w,
-                            "skip_reason": (
-                                None if np.isfinite(att_gt) else "non_finite_regression"
-                            ),
+                            "skip_reason": None,
                         }
                         if sw_sum is not None:
                             gte_entry["survey_weight_sum"] = sw_sum
@@ -2065,13 +2076,6 @@ def fit(
 
                         if inf_info is not None:
                             influence_func_info[(g, t)] = inf_info
-                    else:
-                        _n_skipped_other += 1
-                        group_time_effects[(g, t)] = _nan_gt_entry(
-                            n_treated=n_treat,
-                            n_control=n_ctrl,
-                            skip_reason=skip_reason,
-                        )
 
         elif covariates is None and self.estimation_method == "reg":
             # Fast vectorized path for the common no-covariates regression case
@@ -2128,7 +2132,20 @@ def fit(
                         )
                     )
 
-                    if att_gt is not None:
+                    if att_gt is None or not np.isfinite(att_gt):
+                        # Non-estimable cell: materialize NaN with NO IF entry,
+                        # uniform across paths. att_gt is None -> the helper
+                        # reported the reason; a non-finite att_gt (solve produced
+                        # NaN/inf) -> "non_finite_regression".
+                        _n_skipped_other += 1
+                        group_time_effects[(g, t)] = _nan_gt_entry(
+                            n_treated=n_treat,
+                            n_control=n_ctrl,
+                            skip_reason=(
+                                skip_reason if att_gt is None else "non_finite_regression"
+                            ),
+                        )
+                    else:
                         # Cluster-aware per-(g,t) SE: when a survey PSU is
                         # in play (explicit OR synthesized from bare
                         # cluster=), aggregate the per-(g,t) IF by PSU
@@ -2164,9 +2181,7 @@ def fit(
                             "conf_int": ci,
                             "n_treated": n_treat,
                             "n_control": n_ctrl,
-                            "skip_reason": (
-                                None if np.isfinite(att_gt) else "non_finite_regression"
-                            ),
+                            "skip_reason": None,
                         }
                         if sw_sum is not None:
                             gte_entry["survey_weight_sum"] = sw_sum
@@ -2174,13 +2189,6 @@ def fit(
 
                         if inf_info is not None:
                             influence_func_info[(g, t)] = inf_info
-                    else:
-                        _n_skipped_other += 1
-                        group_time_effects[(g, t)] = _nan_gt_entry(
-                            n_treated=n_treat,
-                            n_control=n_ctrl,
-                            skip_reason=skip_reason,
-                        )
 
         if not group_time_effects or not any(
             np.isfinite(v["effect"]) for v in group_time_effects.values()
diff --git a/tests/test_staggered.py b/tests/test_staggered.py
index 7747e156..2aeb5db9 100644
--- a/tests/test_staggered.py
+++ b/tests/test_staggered.py
@@ -1437,6 +1437,44 @@ def test_to_dataframe_includes_nan_row_and_skip_reason_column(self):
         assert len(estimable) > 0
         assert estimable["skip_reason"].isna().all()
 
+    def test_general_path_nonfinite_att_materialized_as_nan_no_inf(self):
+        """A non-finite ATT(g,t) in the general (IPW/DR) path must surface as a NaN
+        cell with skip_reason, NOT a finite-but-non-finite (inf) effect carrying an
+        IF entry. Regression guard for the per-cell contract."""
+        from unittest.mock import patch
+
+        data = generate_staggered_data(n_units=120, seed=7)
+        real = CallawaySantAnna._compute_att_gt_fast
+        state = {"poisoned": None}
+
+        def wrapped(self, precomputed, g, t, covariates, **kw):
+            res = real(self, precomputed, g, t, covariates, **kw)
+            att = res[0]
+            # Poison the first estimable post cell: return inf ATT WITH a real IF
+            # entry (res[4]), mimicking a degenerate IPW/DR solve that returns a
+            # non-finite point estimate without a None sentinel.
+            if state["poisoned"] is None and att is not None and np.isfinite(att) and t >= g:
+                state["poisoned"] = (g, t)
+                return (np.inf,) + res[1:6] + (None,)
+            return res
+
+        cs = CallawaySantAnna(n_bootstrap=0, estimation_method="ipw")
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            with patch.object(CallawaySantAnna, "_compute_att_gt_fast", wrapped):
+                results = cs.fit(
+                    data, outcome="outcome", unit="unit", time="time", first_treat="first_treat"
+                )
+
+        assert state["poisoned"] is not None, "poison hook never fired (test is vacuous)"
+        cell = results.group_time_effects[state["poisoned"]]
+        # The non-finite ATT must be materialized as NaN (not inf) with the reason.
+        assert np.isnan(cell["effect"]), "non-finite ATT must surface as NaN, not inf"
+        assert not np.isinf(cell["effect"])
+        assert cell["skip_reason"] == "non_finite_regression"
+        # Excluded from aggregation -> overall ATT still finite.
+        assert np.isfinite(results.overall_att)
+
     def test_all_nonestimable_raises_with_materialized_cells(self):
         """All cells non-estimable (dict non-empty, all NaN) -> ValueError via the
         no-finite-effect guard (distinct from the empty-dict case)."""

From 5e499d2e1f95d1c263bbcaf1c42c8cde12d08cc2 Mon Sep 17 00:00:00 2001
From: igerber <isaac.gerber@gmail.com>
Date: Mon, 29 Jun 2026 09:29:17 -0400
Subject: [PATCH 4/8] fix(staggered): guard non-finite ATT in the no-covariate
 path too (review #582 P0)

Completes the uniform invariant: every CallawaySantAnna estimable-cell builder
(no-covariate vectorized, covariate regression, general IPW/DR, RCS) now routes a
non-finite ATT(g,t) to _nan_gt_entry(skip_reason="non_finite_regression") with NO
influence-function entry, skipping batch inference and bootstrap membership. The
no-covariate diff-in-means ATT is finite given n_t,n_c>0, but a non-finite outcome
(inf survives the NaN-only valid mask) could otherwise store inf as the effect and
produce t_stat=inf / p=0 / infinite CI via safe_inference_batch. Aggregate
estimates and SEs are unchanged. Adds a regression test injecting an inf outcome
through the no-covariate path.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 diff_diff/staggered.py  | 15 ++++++++++++++-
 tests/test_staggered.py | 30 ++++++++++++++++++++++++++++++
 2 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/diff_diff/staggered.py b/diff_diff/staggered.py
index c7f0e8dd..98274fbd 100644
--- a/diff_diff/staggered.py
+++ b/diff_diff/staggered.py
@@ -1170,6 +1170,19 @@ def _compute_all_att_gt_vectorized(
                 inf_control = -(control_change - np.mean(control_change)) / n_c
                 sw_sum = None
 
+            # A difference-in-means ATT is finite given n_t, n_c > 0, but enforce
+            # the uniform contract anyway: a non-estimable cell is a NaN entry with
+            # NO influence-function entry, skipped from batch inference and the
+            # bootstrap (matches the covariate / general / RCS paths).
+            if not np.isfinite(att):
+                group_time_effects[(g, t)] = _nan_gt_entry(
+                    n_treated=n_t,
+                    n_control=n_c,
+                    skip_reason="non_finite_regression",
+                    survey_weight_sum=sw_sum,
+                )
+                continue
+
             gte_entry = {
                 "effect": att,
                 "se": se,
@@ -1179,7 +1192,7 @@ def _compute_all_att_gt_vectorized(
                 "conf_int": (np.nan, np.nan),
                 "n_treated": n_t,
                 "n_control": n_c,
-                "skip_reason": None if np.isfinite(att) else "non_finite_regression",
+                "skip_reason": None,
             }
             if sw_sum is not None:
                 gte_entry["survey_weight_sum"] = sw_sum
diff --git a/tests/test_staggered.py b/tests/test_staggered.py
index 2aeb5db9..aa21f36e 100644
--- a/tests/test_staggered.py
+++ b/tests/test_staggered.py
@@ -1475,6 +1475,36 @@ def wrapped(self, precomputed, g, t, covariates, **kw):
         # Excluded from aggregation -> overall ATT still finite.
         assert np.isfinite(results.overall_att)
 
+    def test_no_covariate_path_nonfinite_att_materialized_as_nan(self):
+        """No-covariate vectorized path: an inf outcome (passes the NaN-only valid
+        mask) yields a non-finite ATT, which must be materialized as a NaN cell
+        with skip_reason -- not stored as inf with an IF entry / batch inference."""
+        data = generate_staggered_data(n_units=200, seed=11)
+        # Inject inf into one treated unit's outcome at its treatment period; inf is
+        # not NaN so it survives the valid mask and makes that cohort's cell ATT inf.
+        fin = data[np.isfinite(data["first_treat"]) & (data["first_treat"] > 0)]
+        u = fin["unit"].iloc[0]
+        g = int(data.loc[data["unit"] == u, "first_treat"].iloc[0])
+        data.loc[(data["unit"] == u) & (data["time"] == g), "outcome"] = np.inf
+
+        cs = CallawaySantAnna(n_bootstrap=0, estimation_method="reg")
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            results = cs.fit(
+                data, outcome="outcome", unit="unit", time="time", first_treat="first_treat"
+            )
+
+        nf = [
+            (k, v)
+            for k, v in results.group_time_effects.items()
+            if v["skip_reason"] == "non_finite_regression"
+        ]
+        assert nf, "an inf outcome must yield a non_finite_regression cell"
+        for _, v in nf:
+            assert np.isnan(v["effect"]) and not np.isinf(v["effect"])
+            assert np.isnan(v["se"]) and np.isnan(v["t_stat"]) and np.isnan(v["p_value"])
+        assert np.isfinite(results.overall_att)
+
     def test_all_nonestimable_raises_with_materialized_cells(self):
         """All cells non-estimable (dict non-empty, all NaN) -> ValueError via the
         no-finite-effect guard (distinct from the empty-dict case)."""

From 785773d34d965871e4fb3fb13b9ed7a76df79916 Mon Sep 17 00:00:00 2001
From: igerber <isaac.gerber@gmail.com>
Date: Mon, 29 Jun 2026 09:37:44 -0400
Subject: [PATCH 5/8] fix(staggered): omit all-non-estimable relative-time
 buckets from event study (review #582 P1)

_aggregate_event_study() appended an all-NaN row (effect=NaN, se=NaN, n_groups=0)
for a relative-time bucket whose cells are all non-estimable, instead of omitting
the bucket. With NaN-cell materialization this surfaced new all-NaN event-study
rows where the bucket previously had no cells (and thus no row) -- an aggregate-
surface change vs the prior omit behavior and R did::aggte(). The bucket is now
dropped when finite-filtering leaves no cell (tracked via a kept-periods list so
the result lists stay aligned), matching _aggregate_by_group, which already omits
all-NaN groups. Adds a test asserting an all-non-estimable relative time is absent
from event_study_effects.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 diff_diff/staggered_aggregation.py | 14 +++++++++-----
 tests/test_staggered.py            | 25 +++++++++++++++++++++++++
 2 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/diff_diff/staggered_aggregation.py b/diff_diff/staggered_aggregation.py
index 5e574c04..567a64bd 100644
--- a/diff_diff/staggered_aggregation.py
+++ b/diff_diff/staggered_aggregation.py
@@ -651,6 +651,7 @@ def _aggregate_event_study(
         agg_ses_list = []
         agg_n_groups = []
         agg_effective_dfs = []  # Per-horizon effective df (replicate designs)
+        agg_periods = []  # Relative times that yielded an estimable aggregate row
         _psi_vectors = []  # Per-event-time combined IF vectors for VCV
         _psi_event_times = []  # Event times that contributed a psi column
         for e, effect_list in sorted_periods:
@@ -665,10 +666,12 @@ def _aggregate_event_study(
                 ns = ns[finite_mask]
                 gt_pairs = [gt for gt, m in zip(gt_pairs, finite_mask) if m]
                 if len(effs) == 0:
-                    agg_effects_list.append(np.nan)
-                    agg_ses_list.append(np.nan)
-                    agg_n_groups.append(0)
-                    agg_effective_dfs.append(None)
+                    # Every cell in this relative-time bucket is non-estimable
+                    # (materialized NaN). Omit the bucket entirely so the
+                    # event-study surface matches the prior omit behavior and R
+                    # did::aggte() (a relative time with no estimable cell yields
+                    # no row), and stays consistent with _aggregate_by_group,
+                    # which already drops all-NaN groups.
                     continue
 
             weights = ns / np.sum(ns)
@@ -695,6 +698,7 @@ def _aggregate_event_study(
             # the all-NaN early-return which already reports 0.
             agg_n_groups.append(len(gt_pairs))
             agg_effective_dfs.append(eff_df)
+            agg_periods.append(e)
             _psi_vectors.append(psi_e)
             _psi_event_times.append(e)
 
@@ -730,7 +734,7 @@ def _aggregate_event_study(
         )
 
         event_study_effects = {}
-        for idx, (e, _) in enumerate(sorted_periods):
+        for idx, e in enumerate(agg_periods):
             event_study_effects[e] = {
                 "effect": agg_effects_list[idx],
                 "se": agg_ses_list[idx],
diff --git a/tests/test_staggered.py b/tests/test_staggered.py
index aa21f36e..92024062 100644
--- a/tests/test_staggered.py
+++ b/tests/test_staggered.py
@@ -1505,6 +1505,31 @@ def test_no_covariate_path_nonfinite_att_materialized_as_nan(self):
             assert np.isnan(v["se"]) and np.isnan(v["t_stat"]) and np.isnan(v["p_value"])
         assert np.isfinite(results.overall_att)
 
+    def test_event_study_omits_all_nonestimable_relative_time(self):
+        """An event-time bucket whose cells are ALL non-estimable is omitted from
+        event_study_effects (matches the prior omit behavior / R did::aggte),
+        not emitted as an all-NaN row."""
+        data = _cs_nonestimable_data(panel=True, seed=0)
+        cs = CallawaySantAnna(n_bootstrap=0, control_group="not_yet_treated")
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            results = cs.fit(
+                data,
+                outcome="outcome",
+                unit="unit",
+                time="time",
+                first_treat="first_treat",
+                aggregate="event_study",
+            )
+        es = results.event_study_effects
+        # Relative time e=2 contains only (g=2, t=4), which is non-estimable (no
+        # not-yet-treated controls at t=4) -> the whole bucket must be omitted.
+        assert 2 not in es, "all-non-estimable relative time must be omitted, not a NaN row"
+        # Every emitted relative time has a finite effect and >=1 contributing group.
+        for e, d in es.items():
+            assert np.isfinite(d["effect"]), f"e={e} effect should be finite"
+            assert d["n_groups"] >= 1, f"e={e} should have >=1 contributing group"
+
     def test_all_nonestimable_raises_with_materialized_cells(self):
         """All cells non-estimable (dict non-empty, all NaN) -> ValueError via the
         no-finite-effect guard (distinct from the empty-dict case)."""

From 4291a25358e5a3d89e70b48fbb50f5fb65b16737 Mon Sep 17 00:00:00 2001
From: igerber <isaac.gerber@gmail.com>
Date: Mon, 29 Jun 2026 09:44:20 -0400
Subject: [PATCH 6/8] docs(staggered): drop nonexistent "calendar" aggregation
 from CS NaN-cell notes (review #582 P3)

CallawaySantAnna's aggregation options are simple / event_study / group / all
(there is no calendar aggregation). Remove "calendar" from the REGISTRY edge-case
Note and the CHANGELOG entry listing which aggregations exclude materialized NaN
cells.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                 | 2 +-
 docs/methodology/REGISTRY.md | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7666f021..71c01a7c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -52,7 +52,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   (no-covariate regression, covariate regression, IPW/DR, repeated cross-section, survey-weighted)
   — previously only the covariate-regression singular case did this and the other paths dropped
   the cell silently from the grid. The cells are excluded from every aggregation
-  (simple/group/calendar/event-study), from `balance_e`, and from the bootstrap, so all aggregate
+  (simple/group/event-study), from `balance_e`, and from the bootstrap, so all aggregate
   point estimates and standard errors — and the event-study `n_groups` / by-group `n_periods`
   metadata — are numerically **unchanged** and continue to match R `did`'s `aggte()`; a fit where
   no cell is estimable still raises `ValueError`. `to_dataframe("group_time")` now includes these
diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md
index 6fb66dc7..8d9341a4 100644
--- a/docs/methodology/REGISTRY.md
+++ b/docs/methodology/REGISTRY.md
@@ -526,7 +526,7 @@ The multiplier bootstrap uses random weights w_i with E[w]=0 and Var(w)=1:
 *Edge cases:*
 - Groups with single observation: included but may have high variance
 - Non-estimable group-time cells: materialized as NaN entries in `group_time_effects` with a consolidated warning listing skip reasons and counts
-  - **Note:** Non-estimable cells (missing base/post period, zero treated/control, zero survey-weight mass, non-finite regression solve) are stored as NaN entries — `effect`/`se`/`t_stat`/`p_value`/`conf_int` all NaN — carrying a machine-readable `skip_reason` code (`"missing_period"`, `"zero_treated_control"`, `"zero_weight_mass"`, `"non_finite_regression"`; estimable cells carry `None`). This is uniform across ALL estimation paths (no-covariate regression, covariate regression, IPW/DR, repeated cross-section, survey-weighted). A consolidated `UserWarning` is still emitted from `fit()`. The NaN cells are **excluded from every aggregation** (simple/overall, group, calendar, event-study), from `balance_e`, and from the bootstrap (they carry no influence-function entry, and all consumers finite-mask on `np.isfinite(effect)` or filter to IF members), so all aggregate point estimates and SEs — and `n_groups`/`n_periods` metadata — are **unchanged** from the prior omit behavior and match R `did`'s `aggte()` exactly. A fit where no cell is estimable (no finite effect) still raises a `ValueError`.
+  - **Note:** Non-estimable cells (missing base/post period, zero treated/control, zero survey-weight mass, non-finite regression solve) are stored as NaN entries — `effect`/`se`/`t_stat`/`p_value`/`conf_int` all NaN — carrying a machine-readable `skip_reason` code (`"missing_period"`, `"zero_treated_control"`, `"zero_weight_mass"`, `"non_finite_regression"`; estimable cells carry `None`). This is uniform across ALL estimation paths (no-covariate regression, covariate regression, IPW/DR, repeated cross-section, survey-weighted). A consolidated `UserWarning` is still emitted from `fit()`. The NaN cells are **excluded from every aggregation** (simple/overall, group, event-study), from `balance_e`, and from the bootstrap (they carry no influence-function entry, and all consumers finite-mask on `np.isfinite(effect)` or filter to IF members), so all aggregate point estimates and SEs — and `n_groups`/`n_periods` metadata — are **unchanged** from the prior omit behavior and match R `did`'s `aggte()` exactly. A fit where no cell is estimable (no finite effect) still raises a `ValueError`.
   - **Deviation from R:** R's `did::att_gt` omits non-estimable cells from its result table entirely; diff-diff materializes them as NaN rows (with `skip_reason`) so the `(g,t)` grid is inspectable via `group_time_effects` / `to_dataframe("group_time")`. This is a per-cell *surface* difference only — R's `aggte()` aggregation behavior is matched exactly (non-estimable cells contribute nothing to any aggregate).
   - **Note:** When `balance_e` is specified, cohorts with NaN effects at the anchor horizon are excluded from the balanced panel
 - Anticipation: `anticipation` parameter shifts reference period

From 9d57e7678721ea4bc5b4e839f4ff944aa174c97b Mon Sep 17 00:00:00 2001
From: igerber <isaac.gerber@gmail.com>
Date: Mon, 29 Jun 2026 09:53:05 -0400
Subject: [PATCH 7/8] fix(staggered): report real treated/control counts on
 non-estimable cells (review #582 P2)

The per-cell helper sentinel returns (and the covariate-reg empty-control batch
site) hardcoded n_treated=n_control=0 for materialized NaN cells even after the
observation masks had been computed, so group_time_effects / to_dataframe could
show zero counts for a cell that actually had treated (or control) observations.
The zero-control / zero-weight exits in _compute_att_gt_fast and _compute_att_gt_rc,
and the covariate-reg empty-control batch site, now return the observed counts;
missing-period exits (masks not yet built) keep 0. Display-only metadata --
estimates, SEs, and aggregation are unchanged.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 diff_diff/staggered.py  | 26 ++++++++++++++++++--------
 tests/test_staggered.py |  4 ++++
 2 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/diff_diff/staggered.py b/diff_diff/staggered.py
index 98274fbd..cb256354 100644
--- a/diff_diff/staggered.py
+++ b/diff_diff/staggered.py
@@ -911,7 +911,7 @@ def _compute_att_gt_fast(
         n_control = np.sum(control_valid)
 
         if n_treated == 0 or n_control == 0:
-            return None, 0.0, 0, 0, None, None, "zero_treated_control"
+            return None, 0.0, int(n_treated), int(n_control), None, None, "zero_treated_control"
 
         # Extract outcome changes for treated and control
         treated_change = outcome_change[treated_valid]
@@ -924,9 +924,9 @@ def _compute_att_gt_fast(
 
         # Guard against zero effective mass after subpopulation filtering
         if sw_treated is not None and np.sum(sw_treated) <= 0:
-            return None, 0.0, 0, 0, None, None, "zero_weight_mass"
+            return None, 0.0, int(n_treated), int(n_control), None, None, "zero_weight_mass"
         if sw_control is not None and np.sum(sw_control) <= 0:
-            return None, 0.0, 0, 0, None, None, "zero_weight_mass"
+            return None, 0.0, int(n_treated), int(n_control), None, None, "zero_weight_mass"
 
         # Get covariates if specified (from the base period)
         X_treated = None
@@ -1372,9 +1372,19 @@ def _compute_all_att_gt_covariate_reg(
             n_c_base = int(np.sum(control_valid_base))
             if n_c_base == 0:
                 skipped_empty_cell.extend((g, t) for g, t, *_ in tasks)
-                for g, t, *_ in tasks:
+                # Controls are empty for this whole group; report each cell's actual
+                # treated count (computed as in the per-pair loop), not a hardcoded 0.
+                for g, t, _bp, base_col, post_col in tasks:
+                    if is_balanced:
+                        n_t_task = int(np.sum(cohort_masks[g]))
+                    else:
+                        valid_pair = ~(
+                            np.isnan(outcome_matrix[:, base_col])
+                            | np.isnan(outcome_matrix[:, post_col])
+                        )
+                        n_t_task = int(np.sum(cohort_masks[g] & valid_pair))
                     group_time_effects[(g, t)] = _nan_gt_entry(
-                        n_control=0, skip_reason="zero_treated_control"
+                        n_treated=n_t_task, n_control=0, skip_reason="zero_treated_control"
                     )
                 continue
 
@@ -3381,7 +3391,7 @@ def _compute_att_gt_rc(
         n_cs = int(np.sum(control_s))
 
         if n_gt == 0 or n_ct == 0 or n_gs == 0 or n_cs == 0:
-            return None, 0.0, 0, 0, None, None, None, "zero_treated_control"
+            return None, 0.0, n_gt, n_ct, None, None, None, "zero_treated_control"
 
         # Extract outcomes for each group
         y_gt = obs_outcome[treated_t]
@@ -3399,9 +3409,9 @@ def _compute_att_gt_rc(
         # Guard against zero effective mass
         if sw_gt is not None:
             if np.sum(sw_gt) <= 0 or np.sum(sw_gs) <= 0:
-                return None, 0.0, 0, 0, None, None, None, "zero_weight_mass"
+                return None, 0.0, n_gt, n_ct, None, None, None, "zero_weight_mass"
             if np.sum(sw_ct) <= 0 or np.sum(sw_cs) <= 0:
-                return None, 0.0, 0, 0, None, None, None, "zero_weight_mass"
+                return None, 0.0, n_gt, n_ct, None, None, None, "zero_weight_mass"
 
         # Get covariates if specified
         obs_covariates = precomputed.get("obs_covariates")
diff --git a/tests/test_staggered.py b/tests/test_staggered.py
index 92024062..1bed85d5 100644
--- a/tests/test_staggered.py
+++ b/tests/test_staggered.py
@@ -1387,6 +1387,10 @@ def test_materializes_nan_cell_with_skip_reason(self, method, panel, covariates)
         assert np.isnan(cell["t_stat"]) and np.isnan(cell["p_value"])
         assert all(np.isnan(b) for b in cell["conf_int"])
         assert cell["skip_reason"] == "zero_treated_control"
+        # The cell genuinely has treated observations but no controls -> the
+        # materialized counts must reflect that, not a hardcoded (0, 0).
+        assert cell["n_treated"] > 0
+        assert cell["n_control"] == 0
 
         # Every NaN cell carries a known reason + NaN SE; every estimable cell None.
         n_finite = 0

From d1f2bb44105185a2c43875f9782ea444c1639235 Mon Sep 17 00:00:00 2001
From: igerber <isaac.gerber@gmail.com>
Date: Mon, 29 Jun 2026 10:41:22 -0400
Subject: [PATCH 8/8] test(staggered): handle materialized NaN cells in ported
 CS tests

Three test_csdid_ported.py tests relied on non-estimable (g,t) cells being
OMITTED from group_time_effects; with the new NaN-cell materialization those
cells are present as NaN, so the tests' membership / golden-iteration guards no
longer skip them. Update them to preserve their intent on the FINITE cells:
- test_some_units_treated_first_period: a first-period cohort (no base period) is
  now all-NaN (missing_period) rather than absent -> assert it is all-NaN.
- test_zero_pretreatment_outcomes: skip NaN pre-cells (the last cohort under
  not_yet_treated has no controls); finite pre-cells are still ~0.
- test_golden_fewer_periods: skip NaN cells (gapped panel where base g-1 is
  unobserved -> missing_period; R falls back to an available base) -> R-parity on
  the finite cells.

No source change; the cells are correctly non-estimable, only now visible.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 tests/test_csdid_ported.py | 25 ++++++++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

diff --git a/tests/test_csdid_ported.py b/tests/test_csdid_ported.py
index 87556979..52cf9d83 100644
--- a/tests/test_csdid_ported.py
+++ b/tests/test_csdid_ported.py
@@ -633,9 +633,16 @@ def test_some_units_treated_first_period(self):
                 time="period",
                 first_treat="first_treat",
             )
-        # G=2 should be excluded (no pre-treatment period available)
-        groups_in_results = set(k[0] for k in results.group_time_effects.keys())
-        assert 2 not in groups_in_results, "G=2 treated in first period should be excluded"
+        # G=2 is treated in the first observed period, so it has no valid base
+        # period -> all its (g,t) cells are non-estimable. They are now materialized
+        # as NaN entries (skip_reason="missing_period") rather than omitted, so G=2
+        # contributes no FINITE estimate (the prior "excluded from the analysis"
+        # intent: it is not silently dropped, but it is never estimated).
+        g2_cells = [v for (g, t), v in results.group_time_effects.items() if g == 2]
+        assert g2_cells, "G=2 cells should be materialized (as NaN), not silently dropped"
+        assert all(
+            np.isnan(v["effect"]) and v["skip_reason"] == "missing_period" for v in g2_cells
+        ), "G=2 (no pre-treatment period) must be all-NaN (missing_period), never estimated"
 
 
 class TestCSDIDBugFixRegressions:
@@ -774,6 +781,11 @@ def test_zero_pretreatment_outcomes(self):
         gt = results.group_time_effects
         pre_effects = {k: v for k, v in gt.items() if k[1] < k[0]}
         for (g, t), eff in pre_effects.items():
+            # Non-estimable pre-cells are now materialized as NaN (e.g. the last
+            # cohort under not_yet_treated has no controls); skip them. Finite
+            # pre-treatment cells (DiD of 0-0 vs 0-0) must still be ~0.
+            if np.isnan(eff["effect"]):
+                continue
             assert abs(eff["effect"]) < 0.01, (
                 f"Pre-treatment ATT(g={g}, t={t}) = {eff['effect']:.4f}, " "expected 0"
             )
@@ -1199,6 +1211,13 @@ def test_golden_fewer_periods(self, golden_values):
             g, t = int(g), int(t)
             if (g, t) in results.group_time_effects:
                 py_att = results.group_time_effects[(g, t)]["effect"]
+                # Skip cells we materialize as non-estimable (e.g. a gapped panel
+                # where the base period g-1 is not observed -> missing_period). R
+                # falls back to an available base and reports a value where our
+                # impl does not; compare only cells both estimate (R-parity on the
+                # finite cells, which is what this golden test pins).
+                if not np.isfinite(py_att):
+                    continue
                 r_att = r_gt["att"][i]
                 assert abs(py_att - r_att) < 0.05, (
                     f"Fewer periods ATT(g={g},t={t}): " f"Py={py_att:.4f}, R={r_att:.4f}"