Skip to content
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/release_notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ Most users should keep calling ``model.solve(...)``. If you want more control, y

**Bug Fixes**

* ``Model.add_variables``: 0.7.0 made ``coords`` (dims, order, and values) the source of truth for ``DataArray`` bounds; this release closes the two remaining gaps. Pandas ``Series`` / ``DataFrame`` bounds missing a dimension are broadcast to ``coords`` instead of being silently dropped (`#709 <https://github.com/PyPSA/linopy/issues/709>`__), and the variable's dimension order always follows ``coords`` regardless of bound type (`#706 <https://github.com/PyPSA/linopy/issues/706>`__).
* ``add_piecewise_formulation`` now produces a reproducible dimension order in the broadcast breakpoint array. The previous set-based expansion gave a hash-randomized order that varied between processes.
* SOS constraints on masked variables no longer cause solver-specific failures (Gurobi ``IndexError``, Xpress ``?404 Invalid column number``, LP parse errors, silent set corruption). ``Model.solve()`` and ``Model.to_file()`` now raise a clear ``NotImplementedError`` referring users to `#688 <https://github.com/PyPSA/linopy/issues/688>`__; pass ``reformulate_sos=True`` as a workaround.
* ``Model.solve(..., reformulate_sos=True)`` now actually reformulates SOS constraints even when the solver supports them natively. Previously it was silently ignored with a warning.

Expand Down
106 changes: 83 additions & 23 deletions linopy/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,27 +126,70 @@ def _coords_to_dict(
return result


def _validate_dataarray_bounds(arr: Any, coords: Any) -> Any:
def _named_pandas_to_dataarray(arr: pd.Series | pd.DataFrame) -> DataArray | None:
Comment thread
FBumann marked this conversation as resolved.
Outdated
"""
Validate and expand DataArray bounds against explicit coords.
Convert a pandas Series or DataFrame with fully named axes to a DataArray.

Multi-level columns are unstacked so each level becomes its own dimension.
Returns ``None`` if any axis (or MultiIndex level) is unnamed, signalling
that the caller should fall back to ``as_dataarray``.
"""
if isinstance(arr, pd.DataFrame):
while isinstance(arr, pd.DataFrame):
arr = arr.unstack()
if not isinstance(arr, pd.Series):
return None

index = arr.index
if isinstance(index, pd.MultiIndex):
if any(n is None for n in index.names):
return None
elif index.name is None:
return None

return arr.to_xarray()

If ``arr`` is not a DataArray, return it unchanged (``as_dataarray``
will handle conversion). For DataArray inputs:

- Raises ``ValueError`` if the array has dimensions not in coords.
- Raises ``ValueError`` if shared dimension coordinates don't match.
- Expands missing dimensions via ``expand_dims``.
def _as_dataarray_in_coords(arr: Any, coords: Any, **kwargs: Any) -> DataArray:
"""
if not isinstance(arr, DataArray):
return arr
Coerce ``arr`` into a DataArray that matches the model ``coords``.

``coords`` is the source of truth: the returned DataArray has the
dimensions, dimension order, and coordinate values of ``coords``,
regardless of the input type. Pandas inputs with fully named axes
are converted via ``to_xarray`` so their axis names map to
dimensions; scalars, numpy arrays, and unnamed pandas go through
``as_dataarray``. The result is then validated, expanded over
missing dims, and transposed; ``expand_dims`` and ``transpose``
are no-ops when the array already matches.

- Raises ``ValueError`` if the input has dimensions not in
``coords``.
- Raises ``ValueError`` if shared dimension coordinates differ in
values. Same-values-different-order coordinates are reindexed.
"""
if coords is None:
return as_dataarray(arr, coords, **kwargs)

expected = _coords_to_dict(coords)
if not expected:
return arr
return as_dataarray(arr, coords, **kwargs)

orig_type_name = type(arr).__name__

if isinstance(arr, pd.Series | pd.DataFrame):
converted = _named_pandas_to_dataarray(arr)
if converted is not None:
arr = converted

if not isinstance(arr, DataArray):
return as_dataarray(arr, coords, **kwargs)

extra = set(arr.dims) - set(expected)
if extra:
raise ValueError(f"DataArray has extra dimensions not in coords: {extra}")
raise ValueError(
f"{orig_type_name} has extra dimensions not in coords: {extra}"
)

for dim, coord_values in expected.items():
if dim not in arr.dims:
Expand All @@ -171,11 +214,30 @@ def _validate_dataarray_bounds(arr: Any, coords: Any) -> Any:
f"expected {expected_idx.tolist()}, got {actual_idx.tolist()}"
)

# Expand missing dimensions
# expand_dims prepends new dimensions and their coordinate variables;
# the subsequent transpose restores coords order. Both are no-ops when
# the array already matches. Reconstruct so the DataArray's coords
# iteration order also follows coords (a Dataset built from this picks
# up its dim order from coord insertion).
expand = {k: v for k, v in expected.items() if k not in arr.dims}
if expand:
arr = arr.expand_dims(expand)

target_dims = tuple(d for d in expected if d in arr.dims) + tuple(
d for d in arr.dims if d not in expected
)
arr = arr.transpose(*target_dims)

coord_order = [c for c in target_dims if c in arr.coords] + [
c for c in arr.coords if c not in target_dims
]
if list(arr.coords) != coord_order:
arr = DataArray(
arr.variable,
coords={c: arr.coords[c] for c in coord_order},
name=arr.name,
)

return arr


Expand Down Expand Up @@ -683,11 +745,13 @@ def add_variables(
Upper bound of the variable(s). Ignored if `binary` is True.
The default is inf.
coords : list/xarray.Coordinates, optional
The coords of the variable array.
These are directly passed to the DataArray creation of
`lower` and `upper`. For every single combination of
coordinates a optimization variable is added to the model.
The default is None.
The coords of the variable array. When provided, ``coords``
is the source of truth for the variable's dimensions,
dimension order, and coordinate values; ``lower`` and
``upper`` are broadcast and aligned to match. One
optimization variable is added per combination of
coordinates. The default is None, in which case the shape
is inferred from the bounds.
name : str, optional
Reference name of the added variables. The default None results in
a name like "var1", "var2" etc.
Expand Down Expand Up @@ -765,14 +829,10 @@ def add_variables(
"Semi-continuous variables require a positive scalar lower bound."
)

if coords is not None:
lower = _validate_dataarray_bounds(lower, coords)
upper = _validate_dataarray_bounds(upper, coords)

data = Dataset(
{
"lower": as_dataarray(lower, coords, **kwargs),
"upper": as_dataarray(upper, coords, **kwargs),
"lower": _as_dataarray_in_coords(lower, coords, **kwargs),
"upper": _as_dataarray_in_coords(upper, coords, **kwargs),
"labels": -1,
}
)
Expand Down
20 changes: 9 additions & 11 deletions linopy/piecewise.py
Original file line number Diff line number Diff line change
Expand Up @@ -1006,20 +1006,18 @@ def _broadcast_points(

lin_exprs = [_to_linexpr(e) for e in exprs]

target_dims: set[str] = set()
for le in lin_exprs:
target_dims.update(str(d) for d in le.coord_dims)

missing = target_dims - skip - {str(d) for d in points.dims}
if not missing:
return points
point_dims = {str(d) for d in points.dims}

# Iterate exprs/dims in order; a set would give a hash-dependent,
# run-varying expanded dimension order.
expand_map: dict[str, list] = {}
for d in missing:
for le in lin_exprs:
for le in lin_exprs:
for dim in le.coord_dims:
d = str(dim)
if d in skip or d in point_dims or d in expand_map:
continue
if d in le.coords:
expand_map[str(d)] = list(le.coords[d].values)
break
expand_map[d] = list(le.coords[d].values)

if expand_map:
points = points.expand_dims(expand_map)
Expand Down
17 changes: 17 additions & 0 deletions test/test_piecewise_constraints.py
Original file line number Diff line number Diff line change
Expand Up @@ -1383,6 +1383,23 @@ def test_broadcast_over_extra_dims(self) -> None:
assert "generator" in delta.dims
assert "time" in delta.dims

def test_broadcast_points_dim_order_follows_exprs(self) -> None:
"""Expanded dims follow the expression dim order, not set ordering."""
import xarray as xr

from linopy.piecewise import BREAKPOINT_DIM, _broadcast_points

m = Model()
coords = [
pd.Index(["v0", "v1"], name="alpha"),
pd.Index(["w0", "w1"], name="beta"),
pd.Index([0, 1], name="gamma"),
]
x = m.add_variables(coords=coords, name="x")
points = xr.DataArray([0, 1, 2, 3], dims=[BREAKPOINT_DIM])
out = _broadcast_points(points, 1 * x)
assert out.dims == ("alpha", "beta", "gamma", BREAKPOINT_DIM)


# ===========================================================================
# NaN masking
Expand Down
107 changes: 99 additions & 8 deletions test/test_variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,19 +432,110 @@ def test_dataarray_extra_dims(self, model: "Model") -> None:
with pytest.raises(ValueError, match="extra dimensions"):
model.add_variables(lower=lower, coords=self.DICT_COORDS, name="x")

def test_dataarray_coord_reorder(self, model: "Model") -> None:
"""A bound whose coords differ only in order is reindexed to coords."""
lower = DataArray([3, 1, 2], dims=["x"], coords={"x": ["c", "a", "b"]})
var = model.add_variables(
lower=lower, coords=[pd.Index(["a", "b", "c"], name="x")], name="x"
)
assert (var.data.lower == [1, 2, 3]).all()

# -- Broadcasting missing dims -----------------------------------------

def test_dataarray_broadcast_missing_dim(self, model: "Model") -> None:
@pytest.mark.parametrize(
"bound",
[
pytest.param(
DataArray([1, 2, 3], dims=["time"], coords={"time": range(3)}),
id="DataArray",
),
pytest.param(
pd.Series(index=pd.RangeIndex(3, name="time"), data=[1, 2, 3]),
id="Series",
),
pytest.param(
pd.DataFrame(
index=pd.RangeIndex(3, name="time"),
columns=pd.Index(["red"], name="colour"),
data=[[1], [2], [3]],
),
id="DataFrame",
),
pytest.param(
pd.Series(
index=pd.MultiIndex.from_product(
[pd.RangeIndex(3), ["red"]], names=("time", "colour")
),
data=[1, 2, 3],
),
id="Series-multiindex",
),
pytest.param(
pd.DataFrame(
index=pd.RangeIndex(3, name="time"),
columns=pd.MultiIndex.from_product(
[["a", "b"], ["red"]], names=("space", "colour")
),
data=[[1, 1], [2, 2], [3, 3]],
),
id="DataFrame-multicolumns",
),
pytest.param(
pd.DataFrame(
index=pd.MultiIndex.from_product(
[pd.RangeIndex(3), ["a", "b"]], names=("time", "space")
),
columns=pd.Index(["red"], name="colour"),
data=[[1], [1], [2], [2], [3], [3]],
),
id="DataFrame-multiindex",
),
],
)
def test_bound_broadcast_missing_dim(
self, model: "Model", bound: DataArray | pd.Series | pd.DataFrame
) -> None:
"""Pandas / DataArray bounds missing dims are broadcast to coords."""
time = pd.RangeIndex(3, name="time")
space = pd.Index(["a", "b"], name="space")
lower = DataArray([1, 2, 3], dims=["time"], coords={"time": range(3)})
var = model.add_variables(lower=lower, coords=[time, space], name="x")
assert set(var.data.dims) == {"time", "space"}
assert var.data.sizes == {"time": 3, "space": 2}
# Verify broadcast filled with actual values, not NaN
colour = pd.Index(["red"], name="colour")
var = model.add_variables(
lower=-bound, upper=bound, coords=[time, space, colour], name="x"
)
assert var.dims == ("time", "space", "colour")
assert var.data.lower.dims == ("time", "space", "colour")
assert var.data.upper.dims == ("time", "space", "colour")
assert var.data.sizes == {"time": 3, "space": 2, "colour": 1}
assert not var.data.lower.isnull().any()
assert (var.data.lower.sel(space="a") == [1, 2, 3]).all()
assert (var.data.lower.sel(space="b") == [1, 2, 3]).all()
assert (var.data.lower.sel(space="a", colour="red") == [-1, -2, -3]).all()
assert (var.data.lower.sel(space="b", colour="red") == [-1, -2, -3]).all()
assert (var.data.upper.sel(space="a", colour="red") == [1, 2, 3]).all()

@pytest.mark.parametrize(
"lower, upper",
[
pytest.param(0, "da", id="scalar-lower+da-upper"),
pytest.param("da", 1, id="da-lower+scalar-upper"),
pytest.param("da", "da", id="da-lower+da-upper"),
],
)
def test_dataarray_broadcast_missing_dim_order(
self, model: "Model", lower: Any, upper: Any
) -> None:
"""Dimension order follows coords, not the type of the bounds (#706)."""
x = pd.Index(["a", "b", "c"], name="x")
y = pd.Index(["X", "Y"], name="y")
full = DataArray(
np.arange(6).reshape(3, 2), coords={"x": x, "y": y}, dims=["x", "y"]
)
# bounds are DataArrays missing the 'y' dimension
da = full.sum("y")
lower = da if lower == "da" else lower
upper = da if upper == "da" else upper
var = model.add_variables(lower=lower, upper=upper, coords=[x, y], name="x")
assert var.dims == ("x", "y")
assert var.data.lower.dims == ("x", "y")
assert var.data.upper.dims == ("x", "y")

# -- Special coord formats ---------------------------------------------

Expand Down
Loading