diff --git a/doc/release_notes.rst b/doc/release_notes.rst index 7883db82..7dd6dd3c 100644 --- a/doc/release_notes.rst +++ b/doc/release_notes.rst @@ -52,6 +52,9 @@ Most users should keep calling ``model.solve(...)``. If you want more control, y **Bug Fixes** +* ``Model.add_variables``: 0.7.0 made ``coords`` (dims, order, and values) the source of truth for ``DataArray`` bounds; this release closes the two remaining gaps. Pandas ``Series`` / ``DataFrame`` bounds missing a dimension are broadcast to ``coords`` instead of being silently dropped (`#709 `__), and the variable's dimension order always follows ``coords`` regardless of bound type (`#706 `__). +* ``add_variables`` / ``add_constraints``: the same rule now applies to ``mask`` — pandas ``Series`` / ``DataFrame`` masks missing a dimension are broadcast to the variable/constraint shape. As previously announced via ``FutureWarning``, masks whose coordinates are a sparse subset of the data's coordinates now raise ``ValueError`` rather than silently filling missing entries with ``False``; masks with dimensions not in the data raise ``ValueError`` instead of ``AssertionError``. +* ``add_piecewise_formulation`` now produces a reproducible dimension order in the broadcast breakpoint array. The previous set-based expansion gave a hash-randomized order that varied between processes. * SOS constraints on masked variables no longer cause solver-specific failures (Gurobi ``IndexError``, Xpress ``?404 Invalid column number``, LP parse errors, silent set corruption). ``Model.solve()`` and ``Model.to_file()`` now raise a clear ``NotImplementedError`` referring users to `#688 `__; pass ``reformulate_sos=True`` as a workaround. * ``Model.solve(..., reformulate_sos=True)`` now actually reformulates SOS constraints even when the solver supports them natively. Previously it was silently ignored with a warning. @@ -64,6 +67,14 @@ Most users should keep calling ``model.solve(...)``. If you want more control, y **Internal** +* ``linopy.common.as_dataarray`` is now the single broadcasting primitive; + strict subset-dim / coord-value checks live in + ``validate_alignment`` (via ``align_to_coords`` in + ``add_variables`` / ``add_constraints``). Validation errors name the + argument (``lower bound``, ``upper bound``, ``mask``) and explain whether + dimensions or coordinate values disagree with ``coords``. When ``coords`` is + a mapping, extra keys beyond the positional ``dims`` are broadcast in rather + than dropped. * Each ``Solver`` subclass now overrides at most three hooks: ``_build_direct`` (build the native model), ``_run_direct`` (run it), and ``_run_file`` (run the solver on an LP/MPS file). File-only solvers (CBC, GLPK, CPLEX, SCIP, Knitro, COPT, MindOpt) only override ``_run_file``. * New ``ConstraintLabelIndex`` cached on ``Model.constraints`` (mirrors the existing ``Variables.label_index``); ``ConstraintBase`` gains ``active_labels()`` and a ``range`` property; ``CSRConstraint`` exposes ``coords``. * ``linopy.common`` gains ``values_to_lookup_array``; the legacy pandas-based helpers ``series_to_lookup_array`` and ``lookup_vals`` are removed. diff --git a/linopy/common.py b/linopy/common.py index e9a38d29..58b34ed7 100644 --- a/linopy/common.py +++ b/linopy/common.py @@ -9,7 +9,7 @@ import operator import os -from collections.abc import Callable, Generator, Hashable, Iterable, Sequence +from collections.abc import Callable, Generator, Hashable, Iterable, Mapping, Sequence from functools import cached_property, partial, reduce, wraps from pathlib import Path from typing import TYPE_CHECKING, Any, Generic, TypeVar, overload @@ -23,6 +23,7 @@ from xarray import Coordinates, DataArray, Dataset, apply_ufunc, broadcast from xarray import align as xr_align from xarray.core import dtypes, indexing +from xarray.core.coordinates import CoordinateValidationError from xarray.core.types import JoinOptions, T_Alignable from xarray.namedarray.utils import is_dict_like @@ -213,30 +214,20 @@ def numpy_to_dataarray( return DataArray(arr, coords=coords, dims=dims, **kwargs) -def as_dataarray( +def _as_dataarray_lax( arr: Any, coords: CoordsLike | None = None, dims: DimsLike | None = None, **kwargs: Any, ) -> DataArray: """ - Convert an object to a DataArray. + Type-dispatched DataArray conversion without any coords validation. - Parameters - ---------- - arr: - The input object. - coords (Union[dict, list, None]): - The coordinates for the DataArray. If None, default coordinates will be used. - dims (Union[list, None]): - The dimensions for the DataArray. If None, the dimensions will be automatically generated. - **kwargs: - Additional keyword arguments to be passed to the DataArray constructor. - - Returns - ------- - DataArray: - The converted DataArray. + This is the conversion primitive used by ``as_dataarray``: it picks the + right constructor for each supported input type but does not check the + result against ``coords``. Callers that need ``coords`` to govern the + output (dim order, shared-dim values, missing-dim expansion) should use + ``as_dataarray`` instead. """ if isinstance(arr, pd.Series | pd.DataFrame): arr = pandas_to_dataarray(arr, coords=coords, dims=dims, **kwargs) @@ -275,6 +266,317 @@ def as_dataarray( return arr +def as_dataarray( + arr: Any, + coords: CoordsLike | None = None, + dims: DimsLike | None = None, + **kwargs: Any, +) -> DataArray: + """ + Convert ``arr`` to a DataArray and broadcast it against ``coords``. + + When ``coords`` carries named dimensions, the result is aligned with + those coords: + + - positional inputs (numpy, polars, unnamed pandas, scalar) are labeled + with the coord dim names by position; + - for every dim shared between ``arr`` and ``coords``, same-values- + different-order coordinates are reindexed to ``coords`` order; + - dims present in ``coords`` but not in ``arr`` are expanded to the + ``coords`` shape; + - the result is transposed to ``coords`` order. + + Dimensions present in ``arr`` but not in ``coords`` are preserved so + standard xarray broadcasting keeps working. Disagreeing coord values + on a shared dim (i.e. value sets that are not equal as sets) are + passed through unchanged: downstream xarray alignment decides how to + combine them. To enforce that ``arr.dims`` ⊆ ``coords.dims`` and that + shared coord values match, use ``validate_alignment`` (called + automatically for ``lower``, ``upper``, and ``mask`` in + :meth:`~linopy.model.Model.add_variables` and for ``mask`` in + :meth:`~linopy.model.Model.add_constraints`). + + Parameters + ---------- + arr + Input scalar / list / numpy / polars / pandas / DataArray. + coords + Mapping of dim name → coord values, or a sequence of ``pd.Index`` + / unnamed sequences. ``None`` falls back to xarray's default + labeling (no broadcasting). + dims + Optional dim-names hint, used for positional inputs and to bias + pandas-axis interpretation. + **kwargs + Forwarded to the underlying DataArray construction. + + Returns + ------- + DataArray + Broadcast against ``coords`` (extra dims preserved). + """ + if coords is None: + return _as_dataarray_lax(arr, coords, dims, **kwargs) + + expected = _coords_to_dict(coords, dims=dims) + if not expected: + return _as_dataarray_lax(arr, coords, dims, **kwargs) + + if isinstance(arr, pd.Series | pd.DataFrame): + converted = _named_pandas_to_dataarray(arr) + if converted is not None: + arr = converted + + if not isinstance(arr, DataArray): + # numpy/polars/unnamed-pandas inputs are positional — their only + # meaningful information is the values; any axis labels are + # auto-generated. Default dims to coords' keys so the lax conversion + # labels axes correctly (instead of dim_0/dim_1), then re-assign + # coords from expected so positional inputs align to coords by + # position. A shape mismatch surfaces here as a clear xarray + # "conflicting sizes" error rather than a confusing + # "coordinates do not match" further down. + if dims is None: + dims = list(expected) + arr = _as_dataarray_lax(arr, coords, dims=dims, **kwargs) + # Skip MultiIndex dims — re-assigning a PandasMultiIndex coord emits + # a FutureWarning and isn't needed (the lax pass already used it). + arr = arr.assign_coords( + { + d: expected[d] + for d in arr.dims + if d in expected and not isinstance(arr.indexes.get(d), pd.MultiIndex) + } + ) + + for dim, coord_values in expected.items(): + if dim not in arr.dims: + continue + if isinstance(arr.indexes.get(dim), pd.MultiIndex): + continue + expected_idx = ( + coord_values + if isinstance(coord_values, pd.Index) + else pd.Index(coord_values) + ) + actual_idx = arr.coords[dim].to_index() + if actual_idx.equals(expected_idx): + continue + # Same values, different order → reindex to match expected order. + # Different value sets are left alone: downstream xarray alignment + # (e.g. xr.align in arithmetic) handles them. Callers needing strict + # value matching (add_variables / add_constraints) should use + # ``validate_alignment`` after this call. + if len(actual_idx) == len(expected_idx) and set(actual_idx) == set( + expected_idx + ): + arr = arr.reindex({dim: expected_idx}) + + # expand_dims prepends new dimensions and their coordinate variables; + # the subsequent transpose restores coords order. Both are no-ops when + # the array already matches. Reconstruct so the DataArray's coords + # iteration order also follows coords (a Dataset built from this picks + # up its dim order from coord insertion). + expand = {k: v for k, v in expected.items() if k not in arr.dims} + if expand: + arr = arr.expand_dims(expand) + + target_dims = tuple(d for d in expected if d in arr.dims) + tuple( + d for d in arr.dims if d not in expected + ) + arr = arr.transpose(*target_dims) + + coord_order = [c for c in target_dims if c in arr.coords] + [ + c for c in arr.coords if c not in target_dims + ] + if list(arr.coords) != coord_order: + arr = DataArray( + arr.variable, + coords={c: arr.coords[c] for c in coord_order}, + name=arr.name, + ) + + return arr + + +def validate_alignment( + arr: DataArray, + coords: CoordsLike | None, + dims: DimsLike | None = None, + *, + label: str | None = None, +) -> None: + """ + Raise ``ValueError`` if ``arr`` is incompatible with ``coords``. + + ``arr`` is compatible with ``coords`` when both of the following hold: + + - every dim in ``arr.dims`` is also a dim in ``coords`` (no extras); + - for every dim shared between ``arr`` and ``coords``, the coord + values are equal. + + ``dims`` mirrors the ``dims`` argument of ``as_dataarray``: it names + unnamed entries in a sequence-form ``coords`` by position, so + ``coords=[[1, 2, 3]], dims=["x"]`` is enforced the same way as + ``coords={"x": [1, 2, 3]}``. + + ``label`` names the argument in error messages (e.g. ``"lower bound"``). + + No-op when ``coords`` is ``None`` or carries no named dimensions. + """ + if coords is None: + return + expected = _coords_to_dict(coords, dims=dims) + if not expected: + return + subject = label or "Value" + expected_dims = set(expected) + extra = set(arr.dims) - expected_dims + if extra: + raise ValueError( + f"{subject} has dimension(s) {sorted(extra)} not declared in coords " + f"({sorted(expected_dims)}). Add them to coords or remove them from " + f"{subject.lower()}." + ) + for dim, coord_values in expected.items(): + if dim not in arr.dims: + continue + expected_is_mi = isinstance(coord_values, pd.MultiIndex) + actual_is_mi = isinstance(arr.indexes.get(dim), pd.MultiIndex) + if expected_is_mi or actual_is_mi: + if expected_is_mi and actual_is_mi: + if not arr.indexes[dim].equals(coord_values): + raise ValueError( + f"{subject}: MultiIndex for dimension {dim!r} does not " + f"match coords." + ) + continue + expected_idx = ( + coord_values + if isinstance(coord_values, pd.Index) + else pd.Index(coord_values) + ) + actual_idx = arr.coords[dim].to_index() + if not actual_idx.equals(expected_idx): + raise ValueError( + f"{subject}: coordinate values for dimension {dim!r} do not match " + f"coords — expected {expected_idx.tolist()}, got " + f"{actual_idx.tolist()}." + ) + + +def align_to_coords( + value: Any, + coords: CoordsLike | None, + *, + label: str, + **kwargs: Any, +) -> DataArray: + """ + Convert ``value`` with :func:`as_dataarray` and enforce the coords contract. + + Used by :meth:`~linopy.model.Model.add_variables` for ``lower``, ``upper``, + and ``mask``, and by :meth:`~linopy.model.Model.add_constraints` for + ``mask``. Raises :class:`ValueError` with a message that names ``label`` + when conversion or validation fails. + """ + try: + da = as_dataarray(value, coords, **kwargs) + except TypeError as err: + raise TypeError(f"{label} could not be aligned to coords: {err}") from err + except (ValueError, CoordinateValidationError) as err: + raise ValueError(f"{label} could not be aligned to coords: {err}") from err + validate_alignment(da, coords, dims=kwargs.get("dims"), label=label) + return da + + +def _coords_to_dict( + coords: Sequence[Sequence | pd.Index] | Mapping, + dims: DimsLike | None = None, +) -> dict[str, Any]: + """ + Normalize coords to a dict mapping dim names to coordinate values. + + For ``xarray.Coordinates`` (and ``DataArray.coords``), only entries + that are actual dimensions are kept; derived MultiIndex level coords + are dropped here and re-attached by xarray downstream. Plain mappings + are returned as-is. For sequence inputs, entries must be ``pd.Index`` + (named or not) or unnamed sequences (``list`` / ``tuple`` / ``range`` + / ``np.ndarray``). A ``pd.MultiIndex`` must have ``.name`` set — + xarray requires a single dimension name for the flattened index. + Other types — notably ``xarray.DataArray`` — raise ``TypeError`` + rather than being silently dropped: callers should convert via + ``variable.indexes[]`` (or ``pd.Index(...)``) first. + + Unnamed sequence entries (or unnamed ``pd.Index``) gain a dim name + from ``dims`` by position when ``dims`` is provided, so callers that + pass ``coords=[[1, 2, 3]], dims=["x"]`` get the same strict + enforcement as ``coords={"x": [1, 2, 3]}``. + """ + if isinstance(coords, Coordinates): + # Coordinates iterates over every coord variable, including + # MultiIndex level coords. Keep only the entries that are dims. + return {d: coords[d] for d in coords.dims if d in coords} + if isinstance(coords, Mapping): + return dict(coords) + dim_names: list[Any] | None = None + if dims is not None: + dim_names = list(dims) if isinstance(dims, list | tuple) else [dims] + result: dict[str, Any] = {} + for i, c in enumerate(coords): + if isinstance(c, pd.MultiIndex): + if not c.name: + raise TypeError( + "MultiIndex coords entries must have .name set so " + "xarray can use it as the dimension name. Set it via " + "`idx.name = 'my_dim'` before passing to coords." + ) + result[c.name] = c + elif isinstance(c, pd.Index): + name = ( + c.name + if c.name + else (dim_names[i] if dim_names and i < len(dim_names) else None) + ) + if name is not None: + result[name] = c + elif isinstance(c, list | tuple | range | np.ndarray): + if dim_names and i < len(dim_names): + result[dim_names[i]] = pd.Index(c, name=dim_names[i]) + else: + raise TypeError( + f"coords entries must be pd.Index or an unnamed sequence " + f"(list / tuple / range / numpy.ndarray); got " + f"{type(c).__name__}. For an xarray DataArray coord, pass " + f"`variable.indexes[]` (a pd.Index) instead." + ) + return result + + +def _named_pandas_to_dataarray(arr: pd.Series | pd.DataFrame) -> DataArray | None: + """ + Convert a pandas Series or DataFrame with fully named axes to a DataArray. + + Returns ``None`` if any axis (or MultiIndex level) is unnamed or + non-string, so the caller can fall back to ``as_dataarray``. + """ + names = list(arr.index.names) + if isinstance(arr, pd.DataFrame): + names += list(arr.columns.names) + if any(not isinstance(n, str) for n in names): + return None + + if isinstance(arr, pd.DataFrame): + if isinstance(arr.index, pd.MultiIndex) or isinstance( + arr.columns, pd.MultiIndex + ): + arr = arr.stack(list(range(arr.columns.nlevels)), future_stack=True) + return arr.to_xarray() + return DataArray(arr) + + return arr.to_xarray() + + def broadcast_mask(mask: DataArray, labels: DataArray) -> DataArray: """ Broadcast a boolean mask to match the shape of labels. diff --git a/linopy/expressions.py b/linopy/expressions.py index 2ab0b8d3..674c987c 100644 --- a/linopy/expressions.py +++ b/linopy/expressions.py @@ -1844,7 +1844,7 @@ def from_rule( cls, model: Model, rule: Callable, - coords: Sequence[Sequence | pd.Index | DataArray] | Mapping | None = None, + coords: Sequence[Sequence | pd.Index] | Mapping | None = None, ) -> LinearExpression: """ Create a linear expression from a rule and a set of coordinates. diff --git a/linopy/model.py b/linopy/model.py index 48a8200b..01708f49 100644 --- a/linopy/model.py +++ b/linopy/model.py @@ -20,7 +20,7 @@ import pandas as pd import xarray as xr from deprecation import deprecated -from numpy import inf, ndarray +from numpy import inf from pandas.core.frame import DataFrame from pandas.core.series import Series from xarray import DataArray, Dataset @@ -28,10 +28,10 @@ from linopy import solvers from linopy.common import ( + align_to_coords, as_dataarray, assign_multiindex_safe, best_int, - broadcast_mask, maybe_replace_signs, replace_by_map, to_path, @@ -112,73 +112,6 @@ logger = logging.getLogger(__name__) -def _coords_to_dict( - coords: Sequence[Sequence | pd.Index | DataArray] | Mapping, -) -> dict[str, Any]: - """Normalize coords to a dict mapping dim names to coordinate values.""" - if isinstance(coords, Mapping): - return dict(coords) - # Sequence of indexes - result: dict[str, Any] = {} - for c in coords: - if isinstance(c, pd.Index) and c.name: - result[c.name] = c - return result - - -def _validate_dataarray_bounds(arr: Any, coords: Any) -> Any: - """ - Validate and expand DataArray bounds against explicit coords. - - If ``arr`` is not a DataArray, return it unchanged (``as_dataarray`` - will handle conversion). For DataArray inputs: - - - Raises ``ValueError`` if the array has dimensions not in coords. - - Raises ``ValueError`` if shared dimension coordinates don't match. - - Expands missing dimensions via ``expand_dims``. - """ - if not isinstance(arr, DataArray): - return arr - - expected = _coords_to_dict(coords) - if not expected: - return arr - - extra = set(arr.dims) - set(expected) - if extra: - raise ValueError(f"DataArray has extra dimensions not in coords: {extra}") - - for dim, coord_values in expected.items(): - if dim not in arr.dims: - continue - if isinstance(arr.indexes.get(dim), pd.MultiIndex): - continue - expected_idx = ( - coord_values - if isinstance(coord_values, pd.Index) - else pd.Index(coord_values) - ) - actual_idx = arr.coords[dim].to_index() - if not actual_idx.equals(expected_idx): - # Same values, different order → reindex to match expected order - if len(actual_idx) == len(expected_idx) and set(actual_idx) == set( - expected_idx - ): - arr = arr.reindex({dim: expected_idx}) - else: - raise ValueError( - f"Coordinates for dimension '{dim}' do not match: " - f"expected {expected_idx.tolist()}, got {actual_idx.tolist()}" - ) - - # Expand missing dimensions - expand = {k: v for k, v in expected.items() if k not in arr.dims} - if expand: - arr = arr.expand_dims(expand) - - return arr - - class Model: """ Linear optimization model. @@ -657,9 +590,9 @@ def add_variables( self, lower: Any = -inf, upper: Any = inf, - coords: Sequence[Sequence | pd.Index | DataArray] | Mapping | None = None, + coords: Sequence[Sequence | pd.Index] | Mapping | None = None, name: str | None = None, - mask: DataArray | ndarray | Series | None = None, + mask: MaskLike | None = None, binary: bool = False, integer: bool = False, semi_continuous: bool = False, @@ -682,12 +615,27 @@ def add_variables( upper : TYPE, optional Upper bound of the variable(s). Ignored if `binary` is True. The default is inf. - coords : list/xarray.Coordinates, optional - The coords of the variable array. - These are directly passed to the DataArray creation of - `lower` and `upper`. For every single combination of - coordinates a optimization variable is added to the model. - The default is None. + coords : list/dict/xarray.Coordinates, optional + The coords of the variable array. When provided with **named + dimensions** (a ``Mapping``, ``xarray.Coordinates``, a + sequence of named ``pd.Index`` objects, or an unnamed + sequence paired with ``dims=`` in ``**kwargs``), ``coords`` + is the source of truth for the variable's dimensions, + order, and values. ``lower``, ``upper`` and ``mask`` are + aligned to this contract: + + - dims of every bound must be a subset of ``coords.dims``; + extra dims raise ``ValueError``; + - dim order in the variable always follows ``coords``; + - shared-dim coordinate values must equal ``coords``; same + values in a different order are auto-reindexed, different + value sets raise ``ValueError``; + - dims listed in ``coords`` but missing from a bound are + broadcast to ``coords`` shape. + + One optimization variable is added per combination of + coordinates. The default is ``None``, in which case the + shape is inferred from the bounds. name : str, optional Reference name of the added variables. The default None results in a name like "var1", "var2" etc. @@ -740,6 +688,67 @@ def add_variables( [7]: x[7] ∈ [0, inf] [8]: x[8] ∈ [0, inf] [9]: x[9] ∈ [0, inf] + + Strict coords-as-truth: a bound with an extra dim raises. + + >>> import xarray as xr + >>> m = Model() + >>> bad = xr.DataArray( + ... [[1.0, 2.0, 3.0]] * 2, + ... dims=["extra", "x"], + ... coords={"x": [0, 1, 2]}, + ... ) + >>> m.add_variables(lower=bad, coords=[pd.Index([0, 1, 2], name="x")], name="v") + Traceback (most recent call last): + ... + ValueError: lower bound has dimension(s) ['extra'] not declared in coords ... + + Strict coords-as-truth: a bound whose shared-dim values don't + match raises. + + >>> m = Model() + >>> wrong = xr.DataArray( + ... [1.0, 2.0, 3.0], dims=["x"], coords={"x": [10, 20, 30]} + ... ) + >>> m.add_variables( + ... lower=wrong, coords=[pd.Index([0, 1, 2], name="x")], name="v" + ... ) + Traceback (most recent call last): + ... + ValueError: lower bound: coordinate values for dimension 'x' do not match coords ... + + Strict coords-as-truth, helpful side: a bound whose coord values + match ``coords`` only in a different order is auto-reindexed. + + >>> m = Model() + >>> reordered = xr.DataArray( + ... [3.0, 1.0, 2.0], dims=["x"], coords={"x": ["c", "a", "b"]} + ... ) + >>> v = m.add_variables( + ... lower=reordered, + ... coords=[pd.Index(["a", "b", "c"], name="x")], + ... name="r", + ... ) + >>> list(v.data.lower.values) + [1.0, 2.0, 3.0] + + Unnamed-coords sequence + ``dims=`` opts into the same strict + enforcement as a named index — extra dims still raise. + + >>> m = Model() + >>> m.add_variables(lower=bad, coords=[[0, 1, 2]], dims=["x"], name="w") + Traceback (most recent call last): + ... + ValueError: lower bound has dimension(s) ['extra'] not declared in coords ... + + The same strict contract applies to ``mask`` (including with + ``coords=[[...]], dims=[...]``). + + >>> m = Model() + >>> m.add_variables(mask=bad, coords=[[0, 1, 2]], dims=["x"], name="wm") + Traceback (most recent call last): + ... + ValueError: mask has dimension(s) ['extra'] not declared in coords ... """ if name is None: name = f"var{self._varnameCounter}" @@ -765,14 +774,12 @@ def add_variables( "Semi-continuous variables require a positive scalar lower bound." ) - if coords is not None: - lower = _validate_dataarray_bounds(lower, coords) - upper = _validate_dataarray_bounds(upper, coords) - + lower_da = align_to_coords(lower, coords, label="lower bound", **kwargs) + upper_da = align_to_coords(upper, coords, label="upper bound", **kwargs) data = Dataset( { - "lower": as_dataarray(lower, coords, **kwargs), - "upper": as_dataarray(upper, coords, **kwargs), + "lower": lower_da, + "upper": upper_da, "labels": -1, } ) @@ -781,8 +788,12 @@ def add_variables( self._check_valid_dim_names(data) if mask is not None: - mask = as_dataarray(mask, coords=data.coords, dims=data.dims).astype(bool) - mask = broadcast_mask(mask, data.labels) + mask = align_to_coords( + mask, + coords if coords is not None else data.coords, + label="mask", + **kwargs, + ).astype(bool) # Auto-mask based on NaN in bounds (use numpy for speed) if self.auto_mask: @@ -891,7 +902,7 @@ def add_constraints( sign: SignLike | None = ..., rhs: ConstantLike | VariableLike | ExpressionLike | None = ..., name: str | None = ..., - coords: Sequence[Sequence | pd.Index | DataArray] | Mapping | None = ..., + coords: Sequence[Sequence | pd.Index] | Mapping | None = ..., mask: MaskLike | None = ..., freeze: Literal[False] = ..., ) -> Constraint: ... @@ -907,7 +918,7 @@ def add_constraints( sign: SignLike | None = ..., rhs: ConstantLike | VariableLike | ExpressionLike | None = ..., name: str | None = ..., - coords: Sequence[Sequence | pd.Index | DataArray] | Mapping | None = ..., + coords: Sequence[Sequence | pd.Index] | Mapping | None = ..., mask: MaskLike | None = ..., freeze: Literal[True] = ..., ) -> CSRConstraint: ... @@ -922,7 +933,7 @@ def add_constraints( sign: SignLike | None = None, rhs: ConstantLike | VariableLike | ExpressionLike | None = None, name: str | None = None, - coords: Sequence[Sequence | pd.Index | DataArray] | Mapping | None = None, + coords: Sequence[Sequence | pd.Index] | Mapping | None = None, mask: MaskLike | None = None, freeze: bool | None = None, ) -> ConstraintBase: @@ -1046,8 +1057,7 @@ def add_constraints( (data,) = xr.broadcast(data, exclude=[TERM_DIM]) if mask is not None: - mask = as_dataarray(mask, coords=data.coords, dims=data.dims).astype(bool) - mask = broadcast_mask(mask, data.labels) + mask = align_to_coords(mask, data.coords, label="mask").astype(bool) # Auto-mask based on null expressions or NaN RHS (use numpy for speed) if self.auto_mask: @@ -1428,7 +1438,7 @@ def calculate_block_maps(self) -> None: @overload def linexpr( - self, *args: Sequence[Sequence | pd.Index | DataArray] | Mapping + self, *args: Sequence[Sequence | pd.Index] | Mapping ) -> LinearExpression: ... @overload @@ -1441,7 +1451,7 @@ def linexpr( *args: tuple[ConstantLike, str | Variable | ScalarVariable] | ConstantLike | Callable - | Sequence[Sequence | pd.Index | DataArray] + | Sequence[Sequence | pd.Index] | Mapping, ) -> LinearExpression: """ diff --git a/linopy/piecewise.py b/linopy/piecewise.py index ccc265a7..25a0ce17 100644 --- a/linopy/piecewise.py +++ b/linopy/piecewise.py @@ -1006,20 +1006,18 @@ def _broadcast_points( lin_exprs = [_to_linexpr(e) for e in exprs] - target_dims: set[str] = set() - for le in lin_exprs: - target_dims.update(str(d) for d in le.coord_dims) - - missing = target_dims - skip - {str(d) for d in points.dims} - if not missing: - return points + point_dims = {str(d) for d in points.dims} + # Iterate exprs/dims in order; a set would give a hash-dependent, + # run-varying expanded dimension order. expand_map: dict[str, list] = {} - for d in missing: - for le in lin_exprs: + for le in lin_exprs: + for dim in le.coord_dims: + d = str(dim) + if d in skip or d in point_dims or d in expand_map: + continue if d in le.coords: - expand_map[str(d)] = list(le.coords[d].values) - break + expand_map[d] = list(le.coords[d].values) if expand_map: points = points.expand_dims(expand_map) diff --git a/linopy/sos_reformulation.py b/linopy/sos_reformulation.py index 1f17ee92..4abfb755 100644 --- a/linopy/sos_reformulation.py +++ b/linopy/sos_reformulation.py @@ -119,7 +119,7 @@ def reformulate_sos1( upper_name = f"{prefix}{name}_upper" card_name = f"{prefix}{name}_card" - coords = [var.coords[d] for d in var.dims] + coords = [var.indexes[d] for d in var.dims] y = model.add_variables(coords=coords, name=y_name, binary=True) model.add_constraints(var <= M * y, name=upper_name) @@ -173,9 +173,9 @@ def reformulate_sos2( card_name = f"{prefix}{name}_card" z_coords = [ - pd.Index(var.coords[sos_dim].values[:-1], name=sos_dim) + pd.Index(var.indexes[sos_dim][:-1], name=sos_dim) if d == sos_dim - else var.coords[d] + else var.indexes[d] for d in var.dims ] z = model.add_variables(coords=z_coords, name=z_name, binary=True) diff --git a/test/test_common.py b/test/test_common.py index 0c379a0b..a9e84bf3 100644 --- a/test/test_common.py +++ b/test/test_common.py @@ -18,6 +18,7 @@ from linopy import LinearExpression, Model, Variable from linopy.common import ( align, + align_to_coords, as_dataarray, assign_multiindex_safe, best_int, @@ -25,6 +26,7 @@ is_constant, iterate_slices, maybe_group_terms_polars, + validate_alignment, ) from linopy.testing import assert_linequal, assert_varequal from linopy.types import CoordsLike @@ -345,13 +347,15 @@ def test_as_dataarray_with_ndarray_coords_dict_dims_aligned() -> None: def test_as_dataarray_with_ndarray_coords_dict_set_dims_not_aligned() -> None: + """Coords is source of truth: extra coord entries broadcast into the result.""" target_dims = ("dim_0", "dim_1") target_coords = {"dim_0": ["a", "b"], "dim_2": ["A", "B"]} arr = np.array([[1, 2], [3, 4]]) da = as_dataarray(arr, coords=target_coords, dims=target_dims) - assert da.dims == target_dims + # dims labels the positional axes; coords adds dim_2 by broadcast. + assert set(da.dims) == {"dim_0", "dim_1", "dim_2"} assert list(da.coords["dim_0"].values) == ["a", "b"] - assert "dim_2" not in da.coords + assert list(da.coords["dim_2"].values) == ["A", "B"] def test_as_dataarray_with_number() -> None: @@ -483,6 +487,79 @@ def test_as_dataarray_with_unsupported_type() -> None: as_dataarray(lambda x: 1, dims=["dim1"], coords=[["a"]]) +def test_as_dataarray_preserves_extra_dims_for_broadcasting() -> None: + """Extra dims in the input are not rejected — they broadcast downstream.""" + arr = DataArray( + [[1, 2], [3, 4], [5, 6]], + dims=["a", "t"], + coords={"a": [0, 1, 2], "t": [10, 20]}, + ) + coords = {"a": [0, 1, 2]} + da = as_dataarray(arr, coords=coords) + assert set(da.dims) == {"a", "t"} + assert list(da.coords["t"].values) == [10, 20] + + +def test_as_dataarray_keeps_disjoint_shared_dim_values() -> None: + """Different value sets on a shared dim are passed through (xr.align handles).""" + arr = DataArray([1, 2, 3, 4, 5], dims=["a"], coords={"a": [0, 1, 2, 3, 4]}) + coords = {"a": [2, 3]} + da = as_dataarray(arr, coords=coords) + # No exception, no reindex; downstream alignment intersects. + assert list(da.coords["a"].values) == [0, 1, 2, 3, 4] + + +def test_validate_alignment_rejects_extra_dims() -> None: + arr = DataArray( + [[1, 2], [3, 4]], dims=["a", "b"], coords={"a": [0, 1], "b": [0, 1]} + ) + with pytest.raises(ValueError, match=r"not declared in coords"): + validate_alignment(arr, {"a": [0, 1]}) + + +def test_validate_alignment_rejects_value_mismatch() -> None: + arr = DataArray([1, 2, 3], dims=["a"], coords={"a": [0, 1, 2]}) + with pytest.raises(ValueError, match="do not match coords"): + validate_alignment(arr, {"a": [10, 20, 30]}) + + +def test_validate_alignment_allows_subset_dims() -> None: + """arr.dims ⊂ coords.dims is fine (broadcasting fills the missing dim).""" + arr = DataArray([1, 2, 3], dims=["a"], coords={"a": [0, 1, 2]}) + validate_alignment(arr, {"a": [0, 1, 2], "b": [10, 20]}) # no raise + + +def test_validate_alignment_unnamed_coords_and_dims() -> None: + """coords=[[...]], dims=[...] enforces the same contract as a named mapping.""" + arr = DataArray([1, 2, 3], dims=["x"], coords={"x": [0, 1, 2]}) + validate_alignment(arr, [[0, 1, 2]], dims=["x"]) # no raise + + bad = DataArray( + [[1, 2], [3, 4]], dims=["x", "y"], coords={"x": [0, 1], "y": [0, 1]} + ) + with pytest.raises(ValueError, match=r"not declared in coords"): + validate_alignment(bad, [[0, 1]], dims=["x"]) + + +def test_validate_alignment_label_in_error() -> None: + arr = DataArray( + [[1, 2], [3, 4]], dims=["a", "b"], coords={"a": [0, 1], "b": [0, 1]} + ) + with pytest.raises(ValueError, match=r"lower bound has dimension\(s\) \['b'\]"): + validate_alignment(arr, {"a": [0, 1]}, label="lower bound") + + +def test_align_to_coords_wraps_conversion_errors() -> None: + with pytest.raises(ValueError, match=r"lower bound could not be aligned"): + align_to_coords(np.array([1, 2]), {"x": [0, 1, 2]}, label="lower bound") + + +def test_align_to_coords_preserves_type_errors() -> None: + """Unsupported input types stay TypeError (don't become ValueError).""" + with pytest.raises(TypeError, match=r"lower bound could not be aligned"): + align_to_coords(lambda x: x, {"x": [0, 1, 2]}, label="lower bound") + + def test_best_int() -> None: # Test for int8 assert best_int(127) == np.int8 diff --git a/test/test_constraints.py b/test/test_constraints.py index 1667bfec..acc41b2e 100644 --- a/test/test_constraints.py +++ b/test/test_constraints.py @@ -258,20 +258,29 @@ def test_masked_constraints_broadcast() -> None: assert (m.constraints.labels.bc2[:, 0:5] != -1).all() assert (m.constraints.labels.bc2[:, 5:10] == -1).all() + # Pandas Series with named index missing a dim is broadcast to data.coords. + mask_pd = pd.Series( + [True, False, True] + [False] * 7, index=pd.RangeIndex(10, name="dim_0") + ) + m.add_constraints(1 * x + 10 * y, EQUAL, 0, name="bc_pd", mask=mask_pd) + assert (m.constraints.labels.bc_pd[[0, 2], :] != -1).all() + assert (m.constraints.labels.bc_pd[[1, 3, 4, 5, 6, 7, 8, 9], :] == -1).all() + + # Mask with sparse coords (subset of data's coords) now raises instead of + # emitting a FutureWarning — the rule from the bounds path applies here too. mask3 = xr.DataArray( [True, True, False, False, False], dims=["dim_0"], coords={"dim_0": range(5)}, ) - with pytest.warns(FutureWarning, match="Missing values will be filled"): + with pytest.raises( + ValueError, match=r"mask: coordinate values for dimension 'dim_0'" + ): m.add_constraints(1 * x + 10 * y, EQUAL, 0, name="bc3", mask=mask3) - assert (m.constraints.labels.bc3[0:2, :] != -1).all() - assert (m.constraints.labels.bc3[2:5, :] == -1).all() - assert (m.constraints.labels.bc3[5:10, :] == -1).all() # Mask with extra dimension not in data should raise mask4 = xr.DataArray([True, False], dims=["extra_dim"]) - with pytest.raises(AssertionError, match="not a subset"): + with pytest.raises(ValueError, match=r"mask has dimension\(s\) \['extra_dim'\]"): m.add_constraints(1 * x + 10 * y, EQUAL, 0, name="bc4", mask=mask4) diff --git a/test/test_piecewise_constraints.py b/test/test_piecewise_constraints.py index c44af394..72b57265 100644 --- a/test/test_piecewise_constraints.py +++ b/test/test_piecewise_constraints.py @@ -1383,6 +1383,23 @@ def test_broadcast_over_extra_dims(self) -> None: assert "generator" in delta.dims assert "time" in delta.dims + def test_broadcast_points_dim_order_follows_exprs(self) -> None: + """Expanded dims follow the expression dim order, not set ordering.""" + import xarray as xr + + from linopy.piecewise import BREAKPOINT_DIM, _broadcast_points + + m = Model() + coords = [ + pd.Index(["v0", "v1"], name="alpha"), + pd.Index(["w0", "w1"], name="beta"), + pd.Index([0, 1], name="gamma"), + ] + x = m.add_variables(coords=coords, name="x") + points = xr.DataArray([0, 1, 2, 3], dims=[BREAKPOINT_DIM]) + out = _broadcast_points(points, 1 * x) + assert out.dims == ("alpha", "beta", "gamma", BREAKPOINT_DIM) + # =========================================================================== # NaN masking diff --git a/test/test_variable.py b/test/test_variable.py index b14b746e..47c25986 100644 --- a/test/test_variable.py +++ b/test/test_variable.py @@ -419,42 +419,190 @@ def test_bound_types_with_coords( ) def test_dataarray_coord_mismatch(self, model: "Model", coords: Any) -> None: lower = DataArray([0, 0, 0], dims=["x"], coords={"x": [0, 1, 2]}) - with pytest.raises(ValueError, match="do not match"): + with pytest.raises(ValueError, match="lower bound.*do not match coords"): model.add_variables(lower=lower, coords=coords, name="x") def test_dataarray_coord_mismatch_upper(self, model: "Model") -> None: upper = DataArray([1, 2, 3], dims=["x"], coords={"x": [10, 20, 30]}) - with pytest.raises(ValueError, match="do not match"): + with pytest.raises(ValueError, match="upper bound.*do not match coords"): model.add_variables(upper=upper, coords=self.SEQ_COORDS, name="x") def test_dataarray_extra_dims(self, model: "Model") -> None: - lower = DataArray([[1, 2], [3, 4]], dims=["x", "y"]) - with pytest.raises(ValueError, match="extra dimensions"): + lower = DataArray( + [[1, 2], [3, 4], [5, 6]], dims=["x", "y"], coords={"x": [0, 1, 2]} + ) + with pytest.raises(ValueError, match=r"lower bound has dimension\(s\) \['y'\]"): model.add_variables(lower=lower, coords=self.DICT_COORDS, name="x") + def test_mask_extra_dims_with_unnamed_coords_and_dims(self, model: "Model") -> None: + """Mask is validated against coords + dims= like lower/upper.""" + mask = DataArray( + [[True, False], [True, False], [False, True]], + dims=["x", "extra"], + coords={"x": [0, 1, 2]}, + ) + with pytest.raises(ValueError, match=r"mask has dimension\(s\) \['extra'\]"): + model.add_variables( + mask=mask, + coords=[[0, 1, 2]], + dims=["x"], + name="m", + ) + + def test_dataarray_coord_reorder(self, model: "Model") -> None: + """A bound whose coords differ only in order is reindexed to coords.""" + lower = DataArray([3, 1, 2], dims=["x"], coords={"x": ["c", "a", "b"]}) + var = model.add_variables( + lower=lower, coords=[pd.Index(["a", "b", "c"], name="x")], name="x" + ) + assert (var.data.lower == [1, 2, 3]).all() + + def test_positional_bound_aligns_to_coords(self, model: "Model") -> None: + """ + Numpy / unnamed-pandas bounds align to coords positionally, + even when the input's auto-generated coord values would not match. + """ + coords = [pd.Index(list("abc"), name="x")] + # numpy array — no labels at all, positional alignment. + v_np = model.add_variables(upper=np.array([1, 2, 3]), coords=coords, name="np") + assert v_np.dims == ("x",) + assert (v_np.data.upper.sel(x="a") == 1).all() + assert (v_np.data.upper.sel(x="c") == 3).all() + # Unnamed Series — pandas index is auto-generated, ignored in favour + # of coords (positional alignment, principle: coords is source of truth). + v_s = model.add_variables( + upper=pd.Series([10, 20, 30]), coords=coords, name="s" + ) + assert v_s.dims == ("x",) + assert (v_s.data.upper.sel(x="a") == 10).all() + assert (v_s.data.upper.sel(x="c") == 30).all() + # Unnamed DataFrame — both axes positional. + v_df = model.add_variables( + upper=pd.DataFrame([[1, 2], [3, 4], [5, 6]]), + coords=[pd.Index(list("abc"), name="x"), pd.Index(list("xy"), name="y")], + name="df", + ) + assert v_df.dims == ("x", "y") + assert (v_df.data.upper.sel(x="a", y="x") == 1).all() + assert (v_df.data.upper.sel(x="c", y="y") == 6).all() + + def test_positional_bound_wrong_size_raises_clear_error( + self, model: "Model" + ) -> None: + """ + Shape mismatch on positional inputs surfaces as a size error, + not a 'coordinates do not match' error. + """ + coords = [pd.Index(list("abc"), name="x")] + with pytest.raises(ValueError, match=r"upper bound could not be aligned"): + model.add_variables(upper=np.array([1, 2]), coords=coords, name="np_bad") + with pytest.raises(ValueError, match=r"upper bound could not be aligned"): + model.add_variables(upper=pd.Series([1, 2]), coords=coords, name="s_bad") + + def test_unnamed_coords_short_circuit(self, model: "Model") -> None: + """Coords as a list of unnamed indexes leaves the bound unchanged.""" + bound = DataArray([1, 2, 3], dims=["dim_0"]) + var = model.add_variables(upper=bound, coords=[pd.Index([0, 1, 2])], name="x") + assert (var.data.upper == [1, 2, 3]).all() + # -- Broadcasting missing dims ----------------------------------------- - def test_dataarray_broadcast_missing_dim(self, model: "Model") -> None: + @pytest.mark.parametrize( + "bound", + [ + pytest.param( + DataArray([1, 2, 3], dims=["time"], coords={"time": range(3)}), + id="DataArray", + ), + pytest.param( + pd.Series(index=pd.RangeIndex(3, name="time"), data=[1, 2, 3]), + id="Series", + ), + pytest.param( + pd.DataFrame( + index=pd.RangeIndex(3, name="time"), + columns=pd.Index(["red"], name="colour"), + data=[[1], [2], [3]], + ), + id="DataFrame", + ), + pytest.param( + pd.Series( + index=pd.MultiIndex.from_product( + [pd.RangeIndex(3), ["red"]], names=("time", "colour") + ), + data=[1, 2, 3], + ), + id="Series-multiindex", + ), + pytest.param( + pd.DataFrame( + index=pd.RangeIndex(3, name="time"), + columns=pd.MultiIndex.from_product( + [["a", "b"], ["red"]], names=("space", "colour") + ), + data=[[1, 1], [2, 2], [3, 3]], + ), + id="DataFrame-multicolumns", + ), + pytest.param( + pd.DataFrame( + index=pd.MultiIndex.from_product( + [pd.RangeIndex(3), ["a", "b"]], names=("time", "space") + ), + columns=pd.Index(["red"], name="colour"), + data=[[1], [1], [2], [2], [3], [3]], + ), + id="DataFrame-multiindex", + ), + ], + ) + def test_bound_broadcast_missing_dim( + self, model: "Model", bound: DataArray | pd.Series | pd.DataFrame + ) -> None: + """Pandas / DataArray bounds missing dims are broadcast to coords.""" time = pd.RangeIndex(3, name="time") space = pd.Index(["a", "b"], name="space") - lower = DataArray([1, 2, 3], dims=["time"], coords={"time": range(3)}) - var = model.add_variables(lower=lower, coords=[time, space], name="x") - assert set(var.data.dims) == {"time", "space"} - assert var.data.sizes == {"time": 3, "space": 2} - # Verify broadcast filled with actual values, not NaN + colour = pd.Index(["red"], name="colour") + var = model.add_variables( + lower=-bound, upper=bound, coords=[time, space, colour], name="x" + ) + assert var.dims == ("time", "space", "colour") + assert var.data.lower.dims == ("time", "space", "colour") + assert var.data.upper.dims == ("time", "space", "colour") + assert var.data.sizes == {"time": 3, "space": 2, "colour": 1} assert not var.data.lower.isnull().any() - assert (var.data.lower.sel(space="a") == [1, 2, 3]).all() - assert (var.data.lower.sel(space="b") == [1, 2, 3]).all() - - # -- Special coord formats --------------------------------------------- + assert (var.data.lower.sel(space="a", colour="red") == [-1, -2, -3]).all() + assert (var.data.lower.sel(space="b", colour="red") == [-1, -2, -3]).all() + assert (var.data.upper.sel(space="a", colour="red") == [1, 2, 3]).all() - def test_multiindex_coords(self, model: "Model") -> None: - idx = pd.MultiIndex.from_product( - [[1, 2], ["a", "b"]], names=("level1", "level2") + @pytest.mark.parametrize( + "lower, upper", + [ + pytest.param(0, "da", id="scalar-lower+da-upper"), + pytest.param("da", 1, id="da-lower+scalar-upper"), + pytest.param("da", "da", id="da-lower+da-upper"), + ], + ) + def test_dataarray_broadcast_missing_dim_order( + self, model: "Model", lower: Any, upper: Any + ) -> None: + """Dimension order follows coords, not the type of the bounds (#706).""" + x = pd.Index(["a", "b", "c"], name="x") + y = pd.Index(["X", "Y"], name="y") + full = DataArray( + np.arange(6).reshape(3, 2), coords={"x": x, "y": y}, dims=["x", "y"] ) - idx.name = "multi" - var = model.add_variables(lower=0, upper=1, coords=[idx], name="x") - assert var.shape == (4,) + # bounds are DataArrays missing the 'y' dimension + da = full.sum("y") + lower = da if lower == "da" else lower + upper = da if upper == "da" else upper + var = model.add_variables(lower=lower, upper=upper, coords=[x, y], name="x") + assert var.dims == ("x", "y") + assert var.data.lower.dims == ("x", "y") + assert var.data.upper.dims == ("x", "y") + + # -- Special coord formats --------------------------------------------- def test_xarray_coordinates_object(self, model: "Model") -> None: time = pd.RangeIndex(3, name="time") @@ -527,7 +675,7 @@ def test_one_dataarray_mismatches_other_ok(self, model: "Model") -> None: """Only the mismatched bound should raise, regardless of the other.""" lower = DataArray([0, 0, 0], dims=["x"], coords={"x": [0, 1, 2]}) upper = DataArray([1, 1], dims=["x"], coords={"x": [10, 20]}) - with pytest.raises(ValueError, match="do not match"): + with pytest.raises(ValueError, match=r"upper bound.*do not match coords"): model.add_variables( lower=lower, upper=upper, coords=self.SEQ_COORDS, name="x" ) @@ -629,7 +777,7 @@ def test_reordered_coords_reindexed(self, model: "Model") -> None: def test_reordered_coords_different_values_raises(self, model: "Model") -> None: """Overlapping but not identical coord sets must still raise.""" lower = DataArray([10, 20], dims=["x"], coords={"x": ["a", "b"]}) - with pytest.raises(ValueError, match="do not match"): + with pytest.raises(ValueError, match=r"lower bound.*do not match coords"): model.add_variables(lower=lower, coords={"x": ["a", "c"]}, name="x") # -- String and datetime coordinates ----------------------------------- @@ -657,9 +805,60 @@ def test_string_coords_mismatch(self, model: "Model") -> None: lower = DataArray( [0, 0], dims=["region"], coords={"region": ["north", "south"]} ) - with pytest.raises(ValueError, match="do not match"): + with pytest.raises(ValueError, match=r"lower bound.*do not match coords"): model.add_variables( lower=lower, coords={"region": ["north", "south", "east"]}, name="x", ) + + +class TestAddVariablesMultiIndexCoords: + """MultiIndex-specific coord handling in add_variables.""" + + @pytest.fixture + def model(self) -> "Model": + return Model() + + @pytest.fixture + def midx(self) -> pd.MultiIndex: + mi = pd.MultiIndex.from_product([[0, 1], ["a", "b"]], names=("l1", "l2")) + mi.name = "multi" + return mi + + def test_scalar_bounds(self, model: "Model", midx: pd.MultiIndex) -> None: + var = model.add_variables(lower=0, upper=1, coords=[midx], name="x") + assert var.shape == (4,) + assert var.dims == ("multi",) + + def test_dataarray_bound(self, model: "Model", midx: pd.MultiIndex) -> None: + bound = DataArray([1, 2, 3, 4], dims=["multi"], coords={"multi": midx}) + var = model.add_variables(upper=bound, coords=[midx], name="x") + assert var.shape == (4,) + assert (var.data.upper == [1, 2, 3, 4]).all() + + def test_dataarray_bound_broadcast( + self, model: "Model", midx: pd.MultiIndex + ) -> None: + time = pd.Index([10, 20, 30], name="time") + bound = DataArray([1, 2, 3, 4], dims=["multi"], coords={"multi": midx}) + var = model.add_variables( + lower=-bound, upper=bound, coords=[midx, time], name="x" + ) + assert var.dims == ("multi", "time") + assert var.shape == (4, 3) + assert (var.data.upper.sel(time=10) == [1, 2, 3, 4]).all() + + def test_without_name_raises(self, model: "Model") -> None: + midx = pd.MultiIndex.from_product([[0, 1], ["a", "b"]], names=("l1", "l2")) + with pytest.raises(TypeError, match="MultiIndex.*must have .name set"): + model.add_variables(lower=0, upper=1, coords=[midx], name="x") + + def test_mismatched_multiindex_raises( + self, model: "Model", midx: pd.MultiIndex + ) -> None: + other = pd.MultiIndex.from_product([[0, 1], ["x", "y"]], names=("l1", "l2")) + other.name = "multi" + bound = DataArray([1, 2, 3, 4], dims=["multi"], coords={"multi": other}) + with pytest.raises(ValueError, match="MultiIndex.*does not match"): + model.add_variables(upper=bound, coords=[midx], name="x") diff --git a/test/test_variables.py b/test/test_variables.py index 37de6aff..e55ca680 100644 --- a/test/test_variables.py +++ b/test/test_variables.py @@ -123,20 +123,29 @@ def test_variables_mask_broadcast() -> None: assert (y.labels[:, 0:5] != -1).all() assert (y.labels[:, 5:10] == -1).all() + # Pandas Series with named index missing a dim is broadcast to data.coords. + mask_pd = pd.Series( + [True, False, True] + [False] * 7, index=pd.RangeIndex(10, name="dim_0") + ) + v = m.add_variables(lower, upper, name="v", mask=mask_pd) + assert (v.labels[[0, 2], :] != -1).all() + assert (v.labels[[1, 3, 4, 5, 6, 7, 8, 9], :] == -1).all() + + # Mask with sparse coords (subset of data's coords) now raises instead of + # emitting a FutureWarning — the rule from the bounds path applies here too. mask3 = xr.DataArray( [True, True, False, False, False], dims=["dim_0"], coords={"dim_0": range(5)}, ) - with pytest.warns(FutureWarning, match="Missing values will be filled"): - z = m.add_variables(lower, upper, name="z", mask=mask3) - assert (z.labels[0:2, :] != -1).all() - assert (z.labels[2:5, :] == -1).all() - assert (z.labels[5:10, :] == -1).all() + with pytest.raises( + ValueError, match=r"mask: coordinate values for dimension 'dim_0'" + ): + m.add_variables(lower, upper, name="z", mask=mask3) # Mask with extra dimension not in data should raise mask4 = xr.DataArray([True, False], dims=["extra_dim"]) - with pytest.raises(AssertionError, match="not a subset"): + with pytest.raises(ValueError, match=r"mask has dimension\(s\) \['extra_dim'\]"): m.add_variables(lower, upper, name="w", mask=mask4)