Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .claude/sweep-api-consistency-state.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
module,last_inspected,issue,severity_max,categories_found,notes
geotiff,2026-05-13,1775,MEDIUM,3,"Sweep v4 (deep-sweep-api-consistency-geotiff-2026-05-13). 1 MEDIUM finding filed and fixed: #1775 reader trio dtype kwarg lacks str|np.dtype|None annotation (Cat 3). Annotation added on open_geotiff, read_geotiff_dask, read_geotiff_gpu, and read_vrt; pinning tests extended in test_signature_annotations_1654.py. Cross-sibling return-type drift surfaced for follow-up but not fixed (Cat 2): write_vrt returns str while to_geotiff and write_geotiff_gpu return None -- behaviour change deferred. Prior sweep findings (#1654 #1683 #1684 #1685 #1705 #1754) all confirmed fixed. cuda-validated."
geotiff,2026-05-13,1810,MEDIUM,5,"Sweep v5 (deep-sweep-api-consistency-geotiff-2026-05-13). 1 MEDIUM finding filed and fixed: #1810 open_geotiff dispatcher dropped missing_sources kwarg when routing to read_vrt (Cat 5, same class as #1561/#1605/#1685/#1795). Fix mirrors the on_gpu_failure pattern: sentinel default, forward to read_vrt for .vrt sources, reject for non-VRT sources. Regression test in test_open_geotiff_missing_sources_1810.py. Prior sweep findings (#1654 #1683 #1684 #1685 #1705 #1715 #1754 #1775) all confirmed fixed. Cross-sibling return-type drift (Cat 2): write_vrt returns str while to_geotiff and write_geotiff_gpu return None -- still deferred (LOW, callers do not substitute these writers). cuda-validated."
reproject,2026-05-10,1570,HIGH,2;5,"Filed cross-module attrs['vertical_crs'] type collision (string vs EPSG int) vs xrspatial.geotiff. Fixed in PR (TBD): reproject now writes EPSG int and preserves friendly token under vertical_datum. MEDIUM kwarg-order drift (transform_precision vs chunk_size) and missing type hints vs geotiff documented but not fixed (cosmetic, kwarg-only)."
38 changes: 36 additions & 2 deletions xrspatial/geotiff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,12 @@
# None is itself a value a caller could supply alongside crs=. See
# issue #1715.
_CRS_WKT_DEPRECATED_SENTINEL = object()
# ``open_geotiff`` needs to tell "caller never set missing_sources" (default
# sentinel: skip forwarding so the read_vrt default applies, and reject the
# kwarg up front for non-VRT sources) from "caller set missing_sources=<value>"
# (forward verbatim to read_vrt). Mirrors the on_gpu_failure pattern. See
# issue #1810.
_MISSING_SOURCES_SENTINEL = object()

# Names of dims that ``to_geotiff`` / ``write_geotiff_gpu`` treat as the
# non-spatial band axis. Used both to remap ``(band, y, x)`` inputs to
Expand Down Expand Up @@ -754,6 +760,7 @@ def open_geotiff(source: str | BinaryIO, *,
gpu: bool = False,
max_pixels: int | None = None,
on_gpu_failure: str = _ON_GPU_FAILURE_SENTINEL,
missing_sources: str = _MISSING_SOURCES_SENTINEL,
) -> xr.DataArray:
"""Read a GeoTIFF, COG, or VRT file into an xarray.DataArray.

Expand Down Expand Up @@ -799,6 +806,14 @@ def open_geotiff(source: str | BinaryIO, *,
Passing this kwarg with ``gpu=False`` raises ``ValueError``
because the policy only applies to the GPU pipeline. See
``read_geotiff_gpu`` for the full description.
missing_sources : {'warn', 'raise'}, optional
Forwarded to ``read_vrt`` when the source is a ``.vrt`` file.
``'warn'`` preserves the historical behavior: emit
``GeoTIFFFallbackWarning``, record ``attrs['vrt_holes']``, and
return a partial mosaic. ``'raise'`` fails immediately. Passing
this kwarg with a non-VRT source raises ``ValueError`` because
the policy only applies to the VRT pipeline. See ``read_vrt``
for the full description.

Returns
-------
Expand Down Expand Up @@ -844,8 +859,24 @@ def open_geotiff(source: str | BinaryIO, *,
"Pass gpu=True to enable the GPU pipeline, or drop "
"on_gpu_failure to keep the default CPU path.")

# ``missing_sources`` is VRT-only. Reject it up front when the source
# is not a ``.vrt`` file so callers learn the policy is being ignored
# instead of getting a silent drop -- same pattern ``on_gpu_failure``
# uses above for the GPU-only kwarg, and the same class of dispatcher
# silently-drops-backend-kwarg bug #1561 / #1605 / #1685 / #1795 fixed
# for the other VRT/GPU kwargs. See issue #1810.
missing_sources_passed = (
missing_sources is not _MISSING_SOURCES_SENTINEL)
_is_vrt_source = (
isinstance(source, str) and source.lower().endswith('.vrt'))
if missing_sources_passed and not _is_vrt_source:
raise ValueError(
"missing_sources only applies to VRT sources. "
"Pass a .vrt path to enable the VRT pipeline, or drop "
"missing_sources to keep the default GeoTIFF path.")

# VRT files (string paths only -- VRT XML references other files on disk)
if isinstance(source, str) and source.lower().endswith('.vrt'):
if _is_vrt_source:
# ``read_vrt`` does not accept ``overview_level`` (the VRT XML
# references its own source files; overview selection would need
# to apply to each one). Silently dropping the kwarg was the same
Expand All @@ -872,9 +903,12 @@ def open_geotiff(source: str | BinaryIO, *,
"VRT reads do not go through the GPU decoder pipeline; "
"drop the kwarg or call read_geotiff_gpu directly on a "
".tif source.")
vrt_kwargs = {}
if missing_sources_passed:
vrt_kwargs['missing_sources'] = missing_sources
return read_vrt(source, dtype=dtype, window=window, band=band,
name=name, chunks=chunks, gpu=gpu,
max_pixels=max_pixels)
max_pixels=max_pixels, **vrt_kwargs)

# File-like buffers don't support the GPU or dask code paths because
# those re-open the source by path from worker tasks or device-side
Expand Down
146 changes: 146 additions & 0 deletions xrspatial/geotiff/tests/test_open_geotiff_missing_sources_1810.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
"""Regression test for #1810: ``open_geotiff`` did not accept
``missing_sources`` and did not forward it to ``read_vrt`` when the
source was a VRT.

The api-consistency sweep on 2026-05-13 flagged that ``read_vrt`` had
gained a ``missing_sources='warn'|'raise'`` policy kwarg (#1806) but
the documented dispatcher entry point ``open_geotiff`` did not expose
it. Callers wanting strict failure had to call ``read_vrt`` directly.
Same class of dispatcher-drops-backend-kwarg bug as #1561, #1605,
#1685, #1795.
"""
from __future__ import annotations

import inspect

import numpy as np
import pytest
import xarray as xr

from xrspatial.geotiff import (
GeoTIFFFallbackWarning,
open_geotiff,
to_geotiff,
write_vrt,
)


def _write_missing_source_vrt(path):
"""Write a VRT pointing at a non-existent source file.

Mirrors the helper in ``test_vrt_missing_sources_policy_1799.py`` so
the warn-vs-raise branches surface the same way through the
dispatcher as they do via the direct ``read_vrt`` call.
"""
path.write_text(
'<VRTDataset rasterXSize="2" rasterYSize="2">\n'
' <VRTRasterBand dataType="Byte" band="1">\n'
' <SimpleSource>\n'
' <SourceFilename relativeToVRT="1">missing.tif'
'</SourceFilename>\n'
' <SourceBand>1</SourceBand>\n'
' <SrcRect xOff="0" yOff="0" xSize="2" ySize="2"/>\n'
' <DstRect xOff="0" yOff="0" xSize="2" ySize="2"/>\n'
' </SimpleSource>\n'
' </VRTRasterBand>\n'
'</VRTDataset>\n'
)


def test_open_geotiff_accepts_missing_sources():
"""The signature exposes ``missing_sources`` so IDE autocomplete and
``inspect.signature`` see the kwarg without parsing the docstring.
"""
sig = inspect.signature(open_geotiff)
assert 'missing_sources' in sig.parameters


def test_open_geotiff_vrt_missing_sources_warn(tmp_path):
"""``missing_sources='warn'`` matches the historic behavior:
``GeoTIFFFallbackWarning`` is emitted and ``attrs['vrt_holes']`` is
populated. Going through the dispatcher must produce the same
result as calling ``read_vrt`` directly.
"""
vrt = tmp_path / "tmp_1810_missing_warn.vrt"
_write_missing_source_vrt(vrt)

with pytest.warns(GeoTIFFFallbackWarning, match="could not be read"):
da = open_geotiff(str(vrt), missing_sources='warn')

assert 'vrt_holes' in da.attrs
assert da.attrs['vrt_holes'][0]['source'].endswith('missing.tif')


def test_open_geotiff_vrt_missing_sources_raise(tmp_path):
"""``missing_sources='raise'`` propagates through the dispatcher and
fails fast instead of silently returning a partial mosaic.
"""
vrt = tmp_path / "tmp_1810_missing_raise.vrt"
_write_missing_source_vrt(vrt)

with pytest.raises((OSError, ValueError)):
open_geotiff(str(vrt), missing_sources='raise')


def test_open_geotiff_vrt_missing_sources_validates_policy(tmp_path):
"""The validator on ``read_vrt`` fires through the dispatcher path
so callers get the same parameter-named error from either entry
point.
"""
vrt = tmp_path / "tmp_1810_missing_bad_policy.vrt"
_write_missing_source_vrt(vrt)

with pytest.raises(ValueError, match="missing_sources"):
open_geotiff(str(vrt), missing_sources='ignore')


def test_open_geotiff_rejects_missing_sources_on_tif(tmp_path):
"""``missing_sources`` is VRT-only; passing it with a ``.tif`` source
raises rather than silently doing nothing. Mirrors the
``on_gpu_failure`` rejection pattern.
"""
arr = np.arange(16, dtype=np.uint16).reshape(4, 4)
da = xr.DataArray(
arr, dims=['y', 'x'],
coords={'y': np.arange(4, dtype=np.float64),
'x': np.arange(4, dtype=np.float64)},
attrs={'crs': 4326},
)
tif_path = tmp_path / "single_tile.tif"
to_geotiff(da, str(tif_path))

with pytest.raises(ValueError, match="missing_sources only applies"):
open_geotiff(str(tif_path), missing_sources='raise')


def test_open_geotiff_vrt_without_missing_sources_kwarg_still_works(tmp_path):
"""Omitting ``missing_sources`` is a no-op: behaviour matches the
pre-fix dispatcher (and the ``read_vrt`` default of ``'warn'``).
The existing two-tile VRT exercises the happy path so callers
upgrading from #1806 do not see a regression.
"""
arr_a = np.arange(16, dtype=np.uint16).reshape(4, 4)
da_a = xr.DataArray(
arr_a, dims=['y', 'x'],
coords={'y': np.array([0.5, 1.5, 2.5, 3.5]),
'x': np.array([0.5, 1.5, 2.5, 3.5])},
attrs={'crs': 4326},
)
tile_a = tmp_path / "tile_a.tif"
to_geotiff(da_a, str(tile_a))

arr_b = np.arange(16, 32, dtype=np.uint16).reshape(4, 4)
da_b = xr.DataArray(
arr_b, dims=['y', 'x'],
coords={'y': np.array([0.5, 1.5, 2.5, 3.5]),
'x': np.array([4.5, 5.5, 6.5, 7.5])},
attrs={'crs': 4326},
)
tile_b = tmp_path / "tile_b.tif"
to_geotiff(da_b, str(tile_b))

vrt_path = tmp_path / "mosaic_1810.vrt"
write_vrt(str(vrt_path), [str(tile_a), str(tile_b)])

da = open_geotiff(str(vrt_path))
assert da.shape == (4, 8)
Loading