diff --git a/.claude/sweep-api-consistency-state.csv b/.claude/sweep-api-consistency-state.csv index 76ec19a2..d1e60c54 100644 --- a/.claude/sweep-api-consistency-state.csv +++ b/.claude/sweep-api-consistency-state.csv @@ -1,3 +1,3 @@ module,last_inspected,issue,severity_max,categories_found,notes -geotiff,2026-05-13,1775,MEDIUM,3,"Sweep v4 (deep-sweep-api-consistency-geotiff-2026-05-13). 1 MEDIUM finding filed and fixed: #1775 reader trio dtype kwarg lacks str|np.dtype|None annotation (Cat 3). Annotation added on open_geotiff, read_geotiff_dask, read_geotiff_gpu, and read_vrt; pinning tests extended in test_signature_annotations_1654.py. Cross-sibling return-type drift surfaced for follow-up but not fixed (Cat 2): write_vrt returns str while to_geotiff and write_geotiff_gpu return None -- behaviour change deferred. Prior sweep findings (#1654 #1683 #1684 #1685 #1705 #1754) all confirmed fixed. cuda-validated." +geotiff,2026-05-13,1810,MEDIUM,5,"Sweep v5 (deep-sweep-api-consistency-geotiff-2026-05-13). 1 MEDIUM finding filed and fixed: #1810 open_geotiff dispatcher dropped missing_sources kwarg when routing to read_vrt (Cat 5, same class as #1561/#1605/#1685/#1795). Fix mirrors the on_gpu_failure pattern: sentinel default, forward to read_vrt for .vrt sources, reject for non-VRT sources. Regression test in test_open_geotiff_missing_sources_1810.py. Prior sweep findings (#1654 #1683 #1684 #1685 #1705 #1715 #1754 #1775) all confirmed fixed. Cross-sibling return-type drift (Cat 2): write_vrt returns str while to_geotiff and write_geotiff_gpu return None -- still deferred (LOW, callers do not substitute these writers). cuda-validated." reproject,2026-05-10,1570,HIGH,2;5,"Filed cross-module attrs['vertical_crs'] type collision (string vs EPSG int) vs xrspatial.geotiff. Fixed in PR (TBD): reproject now writes EPSG int and preserves friendly token under vertical_datum. MEDIUM kwarg-order drift (transform_precision vs chunk_size) and missing type hints vs geotiff documented but not fixed (cosmetic, kwarg-only)." diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py index d3485919..11ffec2f 100644 --- a/xrspatial/geotiff/__init__.py +++ b/xrspatial/geotiff/__init__.py @@ -87,6 +87,12 @@ # None is itself a value a caller could supply alongside crs=. See # issue #1715. _CRS_WKT_DEPRECATED_SENTINEL = object() +# ``open_geotiff`` needs to tell "caller never set missing_sources" (default +# sentinel: skip forwarding so the read_vrt default applies, and reject the +# kwarg up front for non-VRT sources) from "caller set missing_sources=" +# (forward verbatim to read_vrt). Mirrors the on_gpu_failure pattern. See +# issue #1810. +_MISSING_SOURCES_SENTINEL = object() # Names of dims that ``to_geotiff`` / ``write_geotiff_gpu`` treat as the # non-spatial band axis. Used both to remap ``(band, y, x)`` inputs to @@ -754,6 +760,7 @@ def open_geotiff(source: str | BinaryIO, *, gpu: bool = False, max_pixels: int | None = None, on_gpu_failure: str = _ON_GPU_FAILURE_SENTINEL, + missing_sources: str = _MISSING_SOURCES_SENTINEL, ) -> xr.DataArray: """Read a GeoTIFF, COG, or VRT file into an xarray.DataArray. @@ -799,6 +806,14 @@ def open_geotiff(source: str | BinaryIO, *, Passing this kwarg with ``gpu=False`` raises ``ValueError`` because the policy only applies to the GPU pipeline. See ``read_geotiff_gpu`` for the full description. + missing_sources : {'warn', 'raise'}, optional + Forwarded to ``read_vrt`` when the source is a ``.vrt`` file. + ``'warn'`` preserves the historical behavior: emit + ``GeoTIFFFallbackWarning``, record ``attrs['vrt_holes']``, and + return a partial mosaic. ``'raise'`` fails immediately. Passing + this kwarg with a non-VRT source raises ``ValueError`` because + the policy only applies to the VRT pipeline. See ``read_vrt`` + for the full description. Returns ------- @@ -844,8 +859,24 @@ def open_geotiff(source: str | BinaryIO, *, "Pass gpu=True to enable the GPU pipeline, or drop " "on_gpu_failure to keep the default CPU path.") + # ``missing_sources`` is VRT-only. Reject it up front when the source + # is not a ``.vrt`` file so callers learn the policy is being ignored + # instead of getting a silent drop -- same pattern ``on_gpu_failure`` + # uses above for the GPU-only kwarg, and the same class of dispatcher + # silently-drops-backend-kwarg bug #1561 / #1605 / #1685 / #1795 fixed + # for the other VRT/GPU kwargs. See issue #1810. + missing_sources_passed = ( + missing_sources is not _MISSING_SOURCES_SENTINEL) + _is_vrt_source = ( + isinstance(source, str) and source.lower().endswith('.vrt')) + if missing_sources_passed and not _is_vrt_source: + raise ValueError( + "missing_sources only applies to VRT sources. " + "Pass a .vrt path to enable the VRT pipeline, or drop " + "missing_sources to keep the default GeoTIFF path.") + # VRT files (string paths only -- VRT XML references other files on disk) - if isinstance(source, str) and source.lower().endswith('.vrt'): + if _is_vrt_source: # ``read_vrt`` does not accept ``overview_level`` (the VRT XML # references its own source files; overview selection would need # to apply to each one). Silently dropping the kwarg was the same @@ -872,9 +903,12 @@ def open_geotiff(source: str | BinaryIO, *, "VRT reads do not go through the GPU decoder pipeline; " "drop the kwarg or call read_geotiff_gpu directly on a " ".tif source.") + vrt_kwargs = {} + if missing_sources_passed: + vrt_kwargs['missing_sources'] = missing_sources return read_vrt(source, dtype=dtype, window=window, band=band, name=name, chunks=chunks, gpu=gpu, - max_pixels=max_pixels) + max_pixels=max_pixels, **vrt_kwargs) # File-like buffers don't support the GPU or dask code paths because # those re-open the source by path from worker tasks or device-side diff --git a/xrspatial/geotiff/tests/test_open_geotiff_missing_sources_1810.py b/xrspatial/geotiff/tests/test_open_geotiff_missing_sources_1810.py new file mode 100644 index 00000000..5e23bacd --- /dev/null +++ b/xrspatial/geotiff/tests/test_open_geotiff_missing_sources_1810.py @@ -0,0 +1,146 @@ +"""Regression test for #1810: ``open_geotiff`` did not accept +``missing_sources`` and did not forward it to ``read_vrt`` when the +source was a VRT. + +The api-consistency sweep on 2026-05-13 flagged that ``read_vrt`` had +gained a ``missing_sources='warn'|'raise'`` policy kwarg (#1806) but +the documented dispatcher entry point ``open_geotiff`` did not expose +it. Callers wanting strict failure had to call ``read_vrt`` directly. +Same class of dispatcher-drops-backend-kwarg bug as #1561, #1605, +#1685, #1795. +""" +from __future__ import annotations + +import inspect + +import numpy as np +import pytest +import xarray as xr + +from xrspatial.geotiff import ( + GeoTIFFFallbackWarning, + open_geotiff, + to_geotiff, + write_vrt, +) + + +def _write_missing_source_vrt(path): + """Write a VRT pointing at a non-existent source file. + + Mirrors the helper in ``test_vrt_missing_sources_policy_1799.py`` so + the warn-vs-raise branches surface the same way through the + dispatcher as they do via the direct ``read_vrt`` call. + """ + path.write_text( + '\n' + ' \n' + ' \n' + ' missing.tif' + '\n' + ' 1\n' + ' \n' + ' \n' + ' \n' + ' \n' + '\n' + ) + + +def test_open_geotiff_accepts_missing_sources(): + """The signature exposes ``missing_sources`` so IDE autocomplete and + ``inspect.signature`` see the kwarg without parsing the docstring. + """ + sig = inspect.signature(open_geotiff) + assert 'missing_sources' in sig.parameters + + +def test_open_geotiff_vrt_missing_sources_warn(tmp_path): + """``missing_sources='warn'`` matches the historic behavior: + ``GeoTIFFFallbackWarning`` is emitted and ``attrs['vrt_holes']`` is + populated. Going through the dispatcher must produce the same + result as calling ``read_vrt`` directly. + """ + vrt = tmp_path / "tmp_1810_missing_warn.vrt" + _write_missing_source_vrt(vrt) + + with pytest.warns(GeoTIFFFallbackWarning, match="could not be read"): + da = open_geotiff(str(vrt), missing_sources='warn') + + assert 'vrt_holes' in da.attrs + assert da.attrs['vrt_holes'][0]['source'].endswith('missing.tif') + + +def test_open_geotiff_vrt_missing_sources_raise(tmp_path): + """``missing_sources='raise'`` propagates through the dispatcher and + fails fast instead of silently returning a partial mosaic. + """ + vrt = tmp_path / "tmp_1810_missing_raise.vrt" + _write_missing_source_vrt(vrt) + + with pytest.raises((OSError, ValueError)): + open_geotiff(str(vrt), missing_sources='raise') + + +def test_open_geotiff_vrt_missing_sources_validates_policy(tmp_path): + """The validator on ``read_vrt`` fires through the dispatcher path + so callers get the same parameter-named error from either entry + point. + """ + vrt = tmp_path / "tmp_1810_missing_bad_policy.vrt" + _write_missing_source_vrt(vrt) + + with pytest.raises(ValueError, match="missing_sources"): + open_geotiff(str(vrt), missing_sources='ignore') + + +def test_open_geotiff_rejects_missing_sources_on_tif(tmp_path): + """``missing_sources`` is VRT-only; passing it with a ``.tif`` source + raises rather than silently doing nothing. Mirrors the + ``on_gpu_failure`` rejection pattern. + """ + arr = np.arange(16, dtype=np.uint16).reshape(4, 4) + da = xr.DataArray( + arr, dims=['y', 'x'], + coords={'y': np.arange(4, dtype=np.float64), + 'x': np.arange(4, dtype=np.float64)}, + attrs={'crs': 4326}, + ) + tif_path = tmp_path / "single_tile.tif" + to_geotiff(da, str(tif_path)) + + with pytest.raises(ValueError, match="missing_sources only applies"): + open_geotiff(str(tif_path), missing_sources='raise') + + +def test_open_geotiff_vrt_without_missing_sources_kwarg_still_works(tmp_path): + """Omitting ``missing_sources`` is a no-op: behaviour matches the + pre-fix dispatcher (and the ``read_vrt`` default of ``'warn'``). + The existing two-tile VRT exercises the happy path so callers + upgrading from #1806 do not see a regression. + """ + arr_a = np.arange(16, dtype=np.uint16).reshape(4, 4) + da_a = xr.DataArray( + arr_a, dims=['y', 'x'], + coords={'y': np.array([0.5, 1.5, 2.5, 3.5]), + 'x': np.array([0.5, 1.5, 2.5, 3.5])}, + attrs={'crs': 4326}, + ) + tile_a = tmp_path / "tile_a.tif" + to_geotiff(da_a, str(tile_a)) + + arr_b = np.arange(16, 32, dtype=np.uint16).reshape(4, 4) + da_b = xr.DataArray( + arr_b, dims=['y', 'x'], + coords={'y': np.array([0.5, 1.5, 2.5, 3.5]), + 'x': np.array([4.5, 5.5, 6.5, 7.5])}, + attrs={'crs': 4326}, + ) + tile_b = tmp_path / "tile_b.tif" + to_geotiff(da_b, str(tile_b)) + + vrt_path = tmp_path / "mosaic_1810.vrt" + write_vrt(str(vrt_path), [str(tile_a), str(tile_b)]) + + da = open_geotiff(str(vrt_path)) + assert da.shape == (4, 8)