Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 16 additions & 2 deletions econml/dml/dml.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,22 @@ def train(self, is_selecting, folds, X, W, Target, sample_weight=None, groups=No
"don't contain all treatments")
Target = inverse_onehot(Target)

self._model.train(is_selecting, folds, _combine(X, W, Target.shape[0]), Target,
**filter_none_kwargs(groups=groups, sample_weight=sample_weight))
try:
self._model.train(is_selecting, folds, _combine(X, W, Target.shape[0]), Target,
**filter_none_kwargs(groups=groups, sample_weight=sample_weight))
except ValueError as exc:
if (not self._discrete_target
and np.ndim(Target) == 2 and np.shape(Target)[1] > 1):
raise ValueError(
f"First-stage model failed to fit a {np.shape(Target)[1]}-column target. "
"This typically happens when treatment_featurizer (or a multi-dimensional "
"outcome) produces a multi-column target but the supplied model does not "
"support multi-output regression. Wrap your model with "
"sklearn.multioutput.MultiOutputRegressor, or use a model with native "
"multi-output support (e.g. LinearRegression, RandomForestRegressor, "
f"GradientBoostingRegressor). Original error: {exc}"
) from exc
raise
return self

@property
Expand Down
54 changes: 54 additions & 0 deletions econml/tests/test_treatment_featurization.py
Original file line number Diff line number Diff line change
Expand Up @@ -626,3 +626,57 @@ def test_score_nuisances_applies_treatment_featurizer(self):
for s in scores[t_key]:
assert np.isfinite(s), \
f"score_nuisances T score {s} is not finite (#1006/#1029)"

def test_single_output_model_t_with_featurizer_raises_helpful_error(self):
# Regression test for #1012: when treatment_featurizer produces a
# multi-column target but model_t does not support multi-output
# regression (e.g. CatBoost, older XGBoost), the underlying
# estimator raises an opaque shape error. Verify EconML now wraps
# that error with guidance to use MultiOutputRegressor.
from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.multioutput import MultiOutputRegressor

class _SingleOutputOnlyRegressor(BaseEstimator, RegressorMixin):
def fit(self, X, y, **kwargs):
y_arr = np.asarray(y)
if y_arr.ndim > 1 and y_arr.shape[1] > 1:
raise ValueError(
f"single-output regressor expected 1D target, got shape {y_arr.shape}"
)
self.coef_ = np.linalg.lstsq(X, y_arr.ravel(), rcond=None)[0]
return self

def predict(self, X):
return X @ self.coef_

rng = np.random.default_rng(0)
n = 600
X = rng.normal(size=(n, 3))
T = rng.normal(size=n)
Y = 2 * T + T**2 + X[:, 0] + rng.normal(size=n)

est = LinearDML(
model_y=LinearRegression(),
model_t=_SingleOutputOnlyRegressor(),
treatment_featurizer=polynomial_treatment_featurizer,
cv=2,
random_state=0,
)
with self.assertRaises(ValueError) as ctx:
est.fit(Y, T, X=X)
msg = str(ctx.exception)
assert "MultiOutputRegressor" in msg, \
f"expected MultiOutputRegressor guidance in error, got: {msg}"
assert "multi-column target" in msg or "multi-output" in msg, \
f"expected multi-output framing in error, got: {msg}"

est_fixed = LinearDML(
model_y=LinearRegression(),
model_t=MultiOutputRegressor(_SingleOutputOnlyRegressor()),
treatment_featurizer=polynomial_treatment_featurizer,
cv=2,
random_state=0,
)
est_fixed.fit(Y, T, X=X)
assert np.all(np.isfinite(est_fixed.effect(X[:5]))), \
"wrapping single-output model_t with MultiOutputRegressor should make fit succeed"
Loading