Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .claude/sweep-metadata-state.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
module,last_inspected,issue,severity_max,categories_found,notes
geotiff,2026-05-12,1753,HIGH,2,"read_geotiff_gpu stripped-fallback windowed read on a non-georef TIFF emitted float64 [-0.5, -1.5, ...] coords via the default unit GeoTransform placeholder, while the eager numpy and dask paths emit int64 file-relative pixel coords. Regression of #1710 (fix missed the stripped GPU branch). The tiled-GPU helper _gpu_apply_window_band already gates on has_georef correctly; the stripped fallback only checked t is None. Fixed by routing both non-georef and t-is-None cases through the integer pixel-coord branch and using file-relative offsets to match every other backend. Verified 4-backend parity (numpy / cupy / dask+numpy / dask+cupy)."
geotiff,2026-05-13,1812,HIGH,3,"Re-audit 2026-05-13 (af588e03 worktree, branch deep-sweep-metadata-geotiff-2026-05-13-af588e03): 4-backend (numpy/cupy/dask+numpy/dask+cupy) parity verified for full reads, windowed reads, multi-band, miniswhite, nodata, extra_tags, gdal_metadata, x/y_resolution, image_description. All three writers (eager to_geotiff, dask streaming, write_geotiff_gpu) preserve rich tags through round-trip. VRT read attrs agree across open_geotiff / read_geotiff_dask / read_vrt (all 3 drop x/y_resolution -- VRT XML format limitation, LOW). NEW HIGH finding #1812: to_geotiff silently corrupted data when 3D dims[0] was not in _BAND_DIM_NAMES (e.g. dims=('time','y','x')); writer skipped moveaxis and treated leading axis as y -- round-trip produced shape (2,4,5) instead of (4,5,2) with swapped per-slice contents. Fix raises ValueError at all 3 writer entry points with an actionable message (transpose to (y,x,band) or rename to band/bands/channel). Existing LOW (int+nodata dtype dask always-promotes-to-float64 while eager/gpu keep int when sentinel absent) is documented and intentional per #1597."
reproject,2026-05-10,1572;1573,HIGH,1;3;4,geoid_height_raster dropped input attrs and used dims[-2:] for 3D inputs (#1572). reproject/merge ignored nodatavals (rasterio convention) when rioxarray absent (#1573). Fixed in same branch.
89 changes: 89 additions & 0 deletions xrspatial/geotiff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,58 @@
# a GeoTransform from coords (see ``_coords_to_transform`` and issue #1643).
_BAND_DIM_NAMES = ('band', 'bands', 'channel')

# Spatial dim names recognised on 3D writer inputs. ``y``/``x`` are the
# canonical TIFF axes; aliases are accepted so a user who happens to use
# ``lat``/``lon`` or ``row``/``col`` is not bounced by the validator below.
_Y_DIM_NAMES = ('y', 'lat', 'latitude', 'row')
_X_DIM_NAMES = ('x', 'lon', 'longitude', 'col')


def _validate_3d_writer_dims(dims) -> None:
"""Reject ambiguous 3D writer inputs (issue #1812).

The writer interprets a 3D DataArray as either ``(band, y, x)`` or
``(y, x, band)``. ``data.dims[0] in _BAND_DIM_NAMES`` decides which
branch fires the ``moveaxis``. Anything else (e.g. ``('time', 'y', 'x')``)
used to fall through silently: the writer kept the leading axis as
the spatial ``y`` axis and the result was a TIFF with the leading
axis values laid out along ``y`` (silent data corruption -- on
read-back the array round-tripped with a swapped shape).

Refuse the ambiguous case at the entry point. The message tells the
caller exactly how to fix the input (rename to one of
``_BAND_DIM_NAMES`` or transpose to ``(y, x, band)``).
"""
if len(dims) != 3:
return
d0, d1, d2 = dims
band_layout = (d0 in _BAND_DIM_NAMES
and d1 in _Y_DIM_NAMES
and d2 in _X_DIM_NAMES)
yxb_layout = (d0 in _Y_DIM_NAMES
and d1 in _X_DIM_NAMES
and d2 in _BAND_DIM_NAMES)
if band_layout or yxb_layout:
return
# Bare (y, x, *) or (*, y, x) where the third dim is unnamed but
# spatial -- the writer's old behaviour treats the non-spatial axis
# as bands. Accept that only when the unknown dim is in the band
# position (last), which matches how raw numpy callers typically
# build a band-last array.
if d0 in _Y_DIM_NAMES and d1 in _X_DIM_NAMES:
return
raise ValueError(
f"3D writer input has ambiguous dims {dims!r}. Expected "
f"(band, y, x) or (y, x, band); accepted band-dim aliases are "
f"{_BAND_DIM_NAMES} and spatial aliases are y={_Y_DIM_NAMES} / "
f"x={_X_DIM_NAMES}. Rename the non-spatial dim to 'band' or "
f"transpose the array so spatial dims come first (e.g. "
f"``da.transpose('y', 'x', {dims[0]!r})``). The writer cannot "
f"infer which axis is the band axis from arbitrary dim names "
f"and would otherwise silently treat the leading axis as the "
f"spatial y axis (issue #1812)."
)


class GeoTIFFFallbackWarning(UserWarning):
"""Warning emitted when a geotiff helper falls back to a slower path.
Expand Down Expand Up @@ -1388,6 +1440,15 @@ def to_geotiff(data: xr.DataArray | np.ndarray,
(``b != 0`` or ``d != 0`` in rasterio ``Affine`` ordering). The
on-disk GeoTIFF is axis-aligned; reproject onto an axis-aligned
grid first.
ValueError
If ``data`` is a 3D DataArray whose ``dims`` is not
``(band, y, x)`` or ``(y, x, band)`` (accepting the band-name
aliases ``bands`` / ``channel`` and spatial-name aliases
``lat`` / ``lon`` / ``latitude`` / ``longitude`` / ``row`` /
``col``). A leading non-band dim such as ``time`` is rejected
because the writer cannot infer the band axis from arbitrary
names and used to silently treat the leading axis as ``y``
(issue #1812).
"""
from ._reader import _coerce_path

Expand Down Expand Up @@ -1632,6 +1693,12 @@ def to_geotiff(data: xr.DataArray | np.ndarray,
# destinations we materialise eagerly and assemble in-memory.
if hasattr(raw, 'dask') and not cog and not _path_is_file_like:
dask_arr = raw
# Reject ambiguous 3D layouts at the entry point so a leading
# non-band dim like ``('time', 'y', 'x')`` cannot silently
# round-trip as a TIFF whose ``y`` axis carries the time
# values (issue #1812).
if raw.ndim == 3:
_validate_3d_writer_dims(data.dims)
# Handle band-first dimension order (band, y, x) -> (y, x, band)
if raw.ndim == 3 and data.dims[0] in _BAND_DIM_NAMES:
import dask.array as da
Expand Down Expand Up @@ -1682,6 +1749,12 @@ def to_geotiff(data: xr.DataArray | np.ndarray,
arr = arr.get() # Dask+CuPy -> numpy
else:
arr = np.asarray(raw)
# Reject ambiguous 3D layouts (issue #1812). The validator runs
# on ``data.dims`` (the original DataArray's dim names) rather
# than on ``arr`` so the error fires before the move-axis even
# for COG and file-like destinations that fall through here.
if arr.ndim == 3:
_validate_3d_writer_dims(data.dims)
# Handle band-first dimension order (band, y, x) -> (y, x, band)
if arr.ndim == 3 and data.dims[0] in _BAND_DIM_NAMES:
arr = np.moveaxis(arr, 0, -1)
Expand Down Expand Up @@ -3522,6 +3595,16 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray,
GPU writer forwards this kwarg unchanged. Default ``'auto'``
writes MinIsBlack for any band count, so a 4-band raster is
not silently tagged as RGB+alpha (issue #1769).

Raises
------
ValueError
If ``data`` is a 3D DataArray whose ``dims`` is not
``(band, y, x)`` or ``(y, x, band)`` (accepting band-name
aliases ``bands`` / ``channel`` and spatial-name aliases
``lat`` / ``lon`` / ``row`` / ``col``). A leading non-band
dim such as ``time`` would otherwise silently round-trip with
the leading axis treated as ``y`` (issue #1812).
"""
if not tiled:
raise ValueError(
Expand Down Expand Up @@ -3603,6 +3686,12 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray,
else:
arr = cupy.asarray(np.asarray(arr)) # numpy -> GPU

# Reject ambiguous 3D layouts (issue #1812). Mirrors the gate
# in ``to_geotiff``: a leading non-band dim like ``time`` would
# otherwise round-trip with the leading axis silently treated
# as ``y``.
if arr.ndim == 3:
_validate_3d_writer_dims(data.dims)
# Handle band-first dimension order (band, y, x) -> (y, x, band).
# rioxarray and CF-style multi-band rasters land here; without
# this remap the writer treats arr.shape[2] as the band axis and
Expand Down
232 changes: 232 additions & 0 deletions xrspatial/geotiff/tests/test_to_geotiff_3d_dim_validation_1812.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,232 @@
"""Regression tests for issue #1812.

``to_geotiff`` / ``write_geotiff_gpu`` used to silently mishandle 3D
DataArrays whose leading dim was not in
``_BAND_DIM_NAMES = ('band', 'bands', 'channel')``. The
``moveaxis(arr, 0, -1)`` that puts ``(band, y, x)`` into the on-disk
``(y, x, band)`` layout was skipped, the writer kept the leading axis
as the spatial ``y`` axis, and the result was a TIFF with silently
swapped axes -- on read-back, ``out[:, :, 0].sum() != arr[0].sum()``.

The fix raises ``ValueError`` at the entry point of all three writers
(eager, dask streaming, and GPU) when ``dims`` is not unambiguously
one of ``(band, y, x)`` or ``(y, x, band)``.
"""
from __future__ import annotations

import importlib.util

import dask.array as dsk
import numpy as np
import pytest
import xarray as xr

from xrspatial.geotiff import open_geotiff, to_geotiff


def _gpu_available() -> bool:
if importlib.util.find_spec("cupy") is None:
return False
try:
import cupy

return bool(cupy.cuda.is_available())
except Exception:
return False


_HAS_GPU = _gpu_available()
_gpu_only = pytest.mark.skipif(not _HAS_GPU, reason="cupy + CUDA required")


# Inputs that *must* raise. Each tuple is (dims, shape).
_AMBIGUOUS_3D_INPUTS = [
pytest.param(("time", "y", "x"), (2, 4, 5), id="time-y-x"),
pytest.param(("z", "y", "x"), (2, 4, 5), id="z-y-x"),
pytest.param(("band", "lat", "lon"), (2, 4, 5), id="band-lat-lon"), # ok via alias
pytest.param(("y", "x", "depth"), (4, 5, 2), id="y-x-depth"), # accepted: spatial-first
pytest.param(("foo", "bar", "baz"), (2, 4, 5), id="foo-bar-baz"),
]


# Inputs that must be accepted (round-trip cleanly).
_HAPPY_3D_INPUTS = [
pytest.param(("band", "y", "x"), (3, 4, 5), id="band-y-x"),
pytest.param(("bands", "y", "x"), (3, 4, 5), id="bands-y-x"),
pytest.param(("channel", "y", "x"), (3, 4, 5), id="channel-y-x"),
pytest.param(("y", "x", "band"), (4, 5, 3), id="y-x-band"),
pytest.param(("lat", "lon", "band"), (4, 5, 3), id="lat-lon-band"),
pytest.param(("row", "col", "channel"), (4, 5, 3), id="row-col-channel"),
pytest.param(("band", "lat", "lon"), (3, 4, 5), id="band-lat-lon-alias"),
]


def _make_da(dims, shape, dtype=np.uint8, backend="numpy"):
if backend == "numpy":
arr = np.arange(int(np.prod(shape)), dtype=dtype).reshape(shape)
elif backend == "dask":
arr_np = np.arange(int(np.prod(shape)), dtype=dtype).reshape(shape)
arr = dsk.from_array(arr_np, chunks=2)
elif backend == "cupy":
import cupy

arr = cupy.arange(int(np.prod(shape)),
dtype=cupy.dtype(dtype)).reshape(shape)
else:
raise ValueError(backend)
return xr.DataArray(arr, dims=dims, attrs={"crs": "EPSG:4326"})


def test_repro_silent_corruption_now_raises(tmp_path):
"""The original repro now raises a clear ValueError."""
arr = np.zeros((2, 4, 5), dtype=np.uint8)
arr[0] = 1
arr[1] = 2
da = xr.DataArray(arr, dims=("time", "y", "x"),
attrs={"crs": "EPSG:4326"})
out_path = tmp_path / "tmp_1812_time_y_x.tif"
with pytest.raises(ValueError, match="ambiguous dims"):
to_geotiff(da, str(out_path), crs=4326)


@pytest.mark.parametrize("dims, shape", [
pytest.param(("time", "y", "x"), (2, 4, 5), id="time-y-x"),
pytest.param(("z", "y", "x"), (2, 4, 5), id="z-y-x"),
pytest.param(("foo", "bar", "baz"), (2, 4, 5), id="foo-bar-baz"),
])
def test_eager_rejects_ambiguous_3d(tmp_path, dims, shape):
"""Eager numpy path raises ValueError on ambiguous 3D dim names."""
da = _make_da(dims, shape, backend="numpy")
out_path = tmp_path / f"tmp_1812_eager_{'_'.join(dims)}.tif"
with pytest.raises(ValueError, match="ambiguous dims"):
to_geotiff(da, str(out_path), crs=4326)


@pytest.mark.parametrize("dims, shape", [
pytest.param(("time", "y", "x"), (2, 4, 5), id="time-y-x"),
pytest.param(("z", "y", "x"), (2, 4, 5), id="z-y-x"),
pytest.param(("foo", "bar", "baz"), (2, 4, 5), id="foo-bar-baz"),
])
def test_dask_streaming_rejects_ambiguous_3d(tmp_path, dims, shape):
"""Dask-streaming branch raises ValueError on ambiguous 3D dim names."""
da = _make_da(dims, shape, backend="dask")
out_path = tmp_path / f"tmp_1812_dask_{'_'.join(dims)}.tif"
with pytest.raises(ValueError, match="ambiguous dims"):
to_geotiff(da, str(out_path), crs=4326)


@_gpu_only
@pytest.mark.parametrize("dims, shape", [
pytest.param(("time", "y", "x"), (2, 4, 5), id="time-y-x"),
pytest.param(("foo", "bar", "baz"), (2, 4, 5), id="foo-bar-baz"),
])
def test_gpu_writer_rejects_ambiguous_3d(tmp_path, dims, shape):
"""GPU writer raises ValueError on ambiguous 3D dim names."""
from xrspatial.geotiff import write_geotiff_gpu

da = _make_da(dims, shape, backend="cupy")
out_path = tmp_path / f"tmp_1812_gpu_{'_'.join(dims)}.tif"
with pytest.raises(ValueError, match="ambiguous dims"):
write_geotiff_gpu(da, str(out_path), crs=4326)


@pytest.mark.parametrize("dims, shape", _HAPPY_3D_INPUTS)
def test_happy_3d_round_trip(tmp_path, dims, shape):
"""Accepted 3D dim layouts still round-trip cleanly (eager + dask).

Each slice along the band axis is filled with a distinct constant
so a silent axis swap would change the per-slice sums.
"""
# Build a per-slice-distinguishable array. ``arr_full[k]`` along the
# band axis is filled with ``k + 1``.
band_pos = next(i for i, d in enumerate(dims)
if d in ("band", "bands", "channel"))
n_bands = shape[band_pos]
spatial_shape = tuple(s for i, s in enumerate(shape) if i != band_pos)
arr_np = np.empty(shape, dtype=np.uint8)
for k in range(n_bands):
slicer = [slice(None)] * 3
slicer[band_pos] = k
arr_np[tuple(slicer)] = k + 1

# Eager round-trip
da_eager = xr.DataArray(arr_np, dims=dims,
attrs={"crs": "EPSG:4326"})
p_eager = tmp_path / f"tmp_1812_happy_eager_{'_'.join(dims)}.tif"
to_geotiff(da_eager, str(p_eager), crs=4326)
out_eager = open_geotiff(str(p_eager))
# On-disk layout is always (y, x, band). Compare per-band sums.
assert out_eager.shape == spatial_shape + (n_bands,)
for k in range(n_bands):
expected = (k + 1) * (spatial_shape[0] * spatial_shape[1])
assert int(out_eager.values[:, :, k].sum()) == expected, (
f"band {k} sum mismatch on eager round-trip of dims={dims}"
)

# Dask streaming round-trip
da_dask = xr.DataArray(dsk.from_array(arr_np, chunks=2), dims=dims,
attrs={"crs": "EPSG:4326"})
p_dask = tmp_path / f"tmp_1812_happy_dask_{'_'.join(dims)}.tif"
to_geotiff(da_dask, str(p_dask), crs=4326)
out_dask = open_geotiff(str(p_dask))
assert out_dask.shape == spatial_shape + (n_bands,)
for k in range(n_bands):
expected = (k + 1) * (spatial_shape[0] * spatial_shape[1])
assert int(out_dask.values[:, :, k].sum()) == expected, (
f"band {k} sum mismatch on dask round-trip of dims={dims}"
)


def test_2d_still_works(tmp_path):
"""2D inputs are unaffected by the new 3D validator."""
arr = np.arange(20, dtype=np.uint8).reshape(4, 5)
da = xr.DataArray(arr, dims=("y", "x"), attrs={"crs": "EPSG:4326"})
p = tmp_path / "tmp_1812_2d.tif"
to_geotiff(da, str(p), crs=4326)
out = open_geotiff(str(p))
assert out.shape == (4, 5)
assert np.array_equal(out.values, arr)


def test_error_message_actionable(tmp_path):
"""The ValueError message tells the caller how to fix the input."""
arr = np.zeros((2, 4, 5), dtype=np.uint8)
da = xr.DataArray(arr, dims=("time", "y", "x"),
attrs={"crs": "EPSG:4326"})
p = tmp_path / "tmp_1812_msg.tif"
with pytest.raises(ValueError) as excinfo:
to_geotiff(da, str(p), crs=4326)
msg = str(excinfo.value)
# Mentions the offending dim layout
assert "('time', 'y', 'x')" in msg
# Mentions the accepted alternatives
assert "(band, y, x)" in msg
assert "(y, x, band)" in msg
# Points the user at a concrete remediation
assert "transpose" in msg.lower() or "rename" in msg.lower()
# References the issue
assert "#1812" in msg


@_gpu_only
def test_gpu_writer_happy_path_still_works(tmp_path):
"""GPU writer's existing happy paths (band-first and band-last) survive."""
import cupy

from xrspatial.geotiff import write_geotiff_gpu

arr_bf = cupy.arange(3 * 4 * 5, dtype=cupy.uint8).reshape(3, 4, 5)
da_bf = xr.DataArray(arr_bf, dims=("band", "y", "x"),
attrs={"crs": "EPSG:4326"})
p_bf = tmp_path / "tmp_1812_gpu_bf.tif"
write_geotiff_gpu(da_bf, str(p_bf), crs=4326)
out_bf = open_geotiff(str(p_bf))
assert out_bf.shape == (4, 5, 3)

arr_bl = cupy.arange(4 * 5 * 3, dtype=cupy.uint8).reshape(4, 5, 3)
da_bl = xr.DataArray(arr_bl, dims=("y", "x", "band"),
attrs={"crs": "EPSG:4326"})
p_bl = tmp_path / "tmp_1812_gpu_bl.tif"
write_geotiff_gpu(da_bl, str(p_bl), crs=4326)
out_bl = open_geotiff(str(p_bl))
assert out_bl.shape == (4, 5, 3)
Loading