From 980c8f37ee1900e03ac8a9bc8b2a64465955a628 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Wed, 13 May 2026 08:56:03 -0700 Subject: [PATCH] geotiff: read minimal VRT source window when SimpleSource needs resample (closes #1704) Previously when a VRT SimpleSource had a SrcRect/DstRect size mismatch and the caller passed a small ``window=``, ``read_vrt`` decoded the full SrcRect from disk and built a full DstRect-sized resample intermediate before slicing out the clip subregion. For large source rects this forced multi-gigabyte decodes and intermediates on tiny output windows. Now the resample path inverts the nearest-neighbour mapping: for the clipped destination sub-window it computes the smallest SrcRect-relative range of rows and columns that ``_resample_nearest`` would gather from, reads only that sub-rect, and resamples directly into the sub-window output. The new ``_resample_nearest_window`` helper applies the same per-pixel ``floor((d + 0.5) * src / dst)`` mapping the full-rect path uses, so output is byte-identical to a full read sliced after the fact. The per-source pixel-budget guard from #1737 now applies to the clipped sub-window rather than the raw DstRect. The huge-DstRect attack vector is already neutralised by the windowed read; the guard is retained as defence in depth. Regression coverage in test_vrt_resample_window_inverse_1704.py: byte-identical parity for 4x upsample, 4x downsample, and the 7-to-11 non-integer ratio across several window offsets; edge-aligned windows at origin and last pixel; a window crossing two SimpleSources; nodata round-trip; and a read-bound check asserting the source read shrinks under the new path. --- xrspatial/geotiff/_vrt.py | 178 ++++++++--- .../test_vrt_dstrect_resample_cap_1737.py | 104 +++--- .../test_vrt_resample_window_inverse_1704.py | 300 ++++++++++++++++++ 3 files changed, 489 insertions(+), 93 deletions(-) create mode 100644 xrspatial/geotiff/tests/test_vrt_resample_window_inverse_1704.py diff --git a/xrspatial/geotiff/_vrt.py b/xrspatial/geotiff/_vrt.py index 8616f18e..d437f454 100644 --- a/xrspatial/geotiff/_vrt.py +++ b/xrspatial/geotiff/_vrt.py @@ -608,6 +608,72 @@ def _resample_nearest(src_arr: np.ndarray, return src_arr[y_idx[:, None], x_idx[None, :]] +def _nn_src_index(out_idx: int, src_size: int, out_size: int) -> int: + """Return the source pixel index a single output pixel samples. + + Implements the same ``floor((out_idx + 0.5) * src_size / out_size)`` + rule as :func:`_resample_nearest`, with the same clamp into + ``[0, src_size - 1]``. Used to compute the inverse mapping from a + clipped destination sub-window back to the minimal source rect that + feeds it. See issue #1704. + """ + idx = int(math.floor((out_idx + 0.5) * src_size / out_size)) + if idx < 0: + return 0 + if idx > src_size - 1: + return src_size - 1 + return idx + + +def _resample_nearest_window(src_sub: np.ndarray, + src_origin_y: int, src_origin_x: int, + full_src_h: int, full_src_w: int, + full_dst_h: int, full_dst_w: int, + dst_r0: int, dst_c0: int, + dst_r1: int, dst_c1: int) -> np.ndarray: + """Nearest-neighbour resample a source sub-rect into a destination + sub-window, using the same per-pixel mapping as + :func:`_resample_nearest` applied to the full source rect. + + ``src_sub`` is the slice of the full source rect starting at + ``(src_origin_y, src_origin_x)`` in full-source coordinates. The + output covers destination rows ``[dst_r0, dst_r1)`` and columns + ``[dst_c0, dst_c1)`` in full-destination coordinates. Mapping each + output pixel back through ``floor((out + 0.5) * src_size / + out_size)`` reproduces the index ``_resample_nearest`` would compute + on the full source array; subtracting the origin then indexes into + ``src_sub``. This yields the same numbers as resampling the full + source rect and slicing ``[dst_r0:dst_r1, dst_c0:dst_c1]`` afterward. + See issue #1704. + """ + out_h = dst_r1 - dst_r0 + out_w = dst_c1 - dst_c0 + # ``floor((dst_* + 0.5) * src_size / dst_size)`` with the same clamp + # to ``src_size - 1`` as ``_resample_nearest`` uses on the full rect. + y_full = np.minimum( + np.floor((np.arange(dst_r0, dst_r1) + 0.5) + * full_src_h / full_dst_h).astype(np.intp), + full_src_h - 1, + ) + x_full = np.minimum( + np.floor((np.arange(dst_c0, dst_c1) + 0.5) + * full_src_w / full_dst_w).astype(np.intp), + full_src_w - 1, + ) + y_idx = y_full - src_origin_y + x_idx = x_full - src_origin_x + sub_h, sub_w = src_sub.shape[:2] + # Defensive clamp: callers compute the sub-window via the same + # mapping so y_idx / x_idx already lie in ``[0, sub_h)`` / + # ``[0, sub_w)``, but a rounding edge with very large sizes could + # nudge an index by one. Clip to keep the gather inside the buffer. + np.clip(y_idx, 0, sub_h - 1, out=y_idx) + np.clip(x_idx, 0, sub_w - 1, out=x_idx) + if out_h == 0 or out_w == 0: + return src_sub[:0, :0] + return src_sub[y_idx[:, None], x_idx[None, :]] + + def read_vrt(vrt_path: str, *, window=None, band: int | None = None, max_pixels: int | None = None, @@ -824,38 +890,61 @@ def read_vrt(vrt_path: str, *, window=None, needs_resample = (sr.y_size != dr.y_size or sr.x_size != dr.x_size) if needs_resample: - # TODO(#1704): when the caller passes a small ``window=`` - # this still reads the full SrcRect. Computing the - # inverse of the nearest-neighbour mapping to read only - # the SrcRect subset that feeds ``window`` would cut the - # decode/memory cost for large SrcRects. - read_r0 = sr.y_off - read_c0 = sr.x_off - read_r1 = sr.y_off + sr.y_size - read_c1 = sr.x_off + sr.x_size + # Clip-relative destination sub-window, in full-DstRect + # coordinates. These are the destination rows / cols + # the caller actually wants out of this source. + sub_r0 = clip_r0 - dst_r0 + sub_c0 = clip_c0 - dst_c0 + sub_r1 = clip_r1 - dst_r0 + sub_c1 = clip_c1 - dst_c0 + + # Invert the nearest-neighbour mapping: find the smallest + # SrcRect-relative range of rows / cols that + # ``_resample_nearest`` would gather from when producing + # destination rows ``[sub_r0, sub_r1)`` and columns + # ``[sub_c0, sub_c1)``. Each destination pixel ``d`` + # samples source pixel ``floor((d + 0.5) * src / dst)`` + # clamped to ``src - 1``; taking that mapping at the two + # endpoints bounds the source range we have to read. + # ``+ 1`` on the stop makes the range half-open, matching + # the read_to_array window convention. See issue #1704. + src_row_min = _nn_src_index( + sub_r0, sr.y_size, dr.y_size) + src_row_max = _nn_src_index( + sub_r1 - 1, sr.y_size, dr.y_size) + src_col_min = _nn_src_index( + sub_c0, sr.x_size, dr.x_size) + src_col_max = _nn_src_index( + sub_c1 - 1, sr.x_size, dr.x_size) + read_r0 = sr.y_off + src_row_min + read_c0 = sr.x_off + src_col_min + read_r1 = sr.y_off + src_row_max + 1 + read_c1 = sr.x_off + src_col_max + 1 # Cap the resample intermediate before the source read. - # ``_resample_nearest(src_arr, dr.y_size, dr.x_size)`` below - # allocates a ``(dr.y_size, dr.x_size)`` array irrespective - # of how much of it overlaps the window. The output buffer - # is already bounded by ``_check_dimensions`` above, but a - # crafted VRT can declare a SimpleSource ``DstRect`` whose - # ``xSize`` / ``ySize`` are orders of magnitude larger than - # the VRT extent. Without this guard, a single source can - # demand multi-gigabyte intermediates on a tiny output - # raster. Negative sizes are already rejected above; here - # we just enforce the pixel-budget. See issue #1737. - if (dr.x_size > max_pixels - or dr.y_size > max_pixels - or dr.x_size * dr.y_size > max_pixels): + # ``_resample_nearest_window`` below allocates an output + # of ``(sub_r1 - sub_r0, sub_c1 - sub_c0)`` pixels, which + # is bounded by the user's window. The cap previously + # protected against a crafted VRT declaring a huge + # DstRect even on a tiny output; with the windowed read + # the intermediate is now bounded by the window, so the + # pixel-budget check applies to the clipped sub-window + # rather than the full DstRect. See issues #1737 and + # #1704. + sub_dst_h = sub_r1 - sub_r0 + sub_dst_w = sub_c1 - sub_c0 + if (sub_dst_w > max_pixels + or sub_dst_h > max_pixels + or sub_dst_w * sub_dst_h > max_pixels): raise ValueError( f"VRT SimpleSource DstRect " f"(xSize={dr.x_size}, ySize={dr.y_size}) requires " f"a resample intermediate of " - f"{dr.x_size * dr.y_size:,} pixels, which exceeds " - f"the safety limit of {max_pixels:,} pixels. " - f"Pass a larger max_pixels= to read_vrt() if this " - f"file is legitimate." + f"{sub_dst_w * sub_dst_h:,} pixels for the " + f"requested window, which exceeds the safety " + f"limit of {max_pixels:,} pixels. Pass a larger " + f"max_pixels= to read_vrt() if this file is " + f"legitimate." ) else: read_r0 = sr.y_off + (clip_r0 - dst_r0) @@ -1003,28 +1092,29 @@ def read_vrt(vrt_path: str, *, window=None, if needs_resample: # Refuse VRTs that request a non-nearest resample - # algorithm. ``_resample_nearest`` below is the only - # implemented kernel; silently substituting it for + # algorithm. ``_resample_nearest_window`` below is the + # only implemented kernel; silently substituting it for # ``Bilinear`` / ``Cubic`` / ``Average`` / ``Mode`` # would return wrong numbers under the user's # configured algorithm name. See issue #1751. _check_resample_alg_supported(src.resample_alg, src.filename) - # Resample the full source rect to the destination rect - # shape, then clip to the window subregion. Doing the - # resample on the full rect keeps the nearest-neighbour - # index math in one place; the post-resample slice is a - # plain numpy view. - src_arr = _resample_nearest(src_arr, dr.y_size, dr.x_size) - # Take the clip subwindow out of the resampled array. - # ``dst_r0`` / ``dst_c0`` anchor the resampled array in - # destination coordinates; the clip rect is in - # destination coordinates too, so the subtract gives the - # right offsets. - sub_r0 = clip_r0 - dst_r0 - sub_c0 = clip_c0 - dst_c0 - sub_r1 = clip_r1 - dst_r0 - sub_c1 = clip_c1 - dst_c0 - src_arr = src_arr[sub_r0:sub_r1, sub_c0:sub_c1] + # Resample only the destination sub-window the caller + # asked for. ``_resample_nearest_window`` walks each + # destination pixel in ``[sub_*0, sub_*1)`` through the + # same ``floor((d + 0.5) * src / dst)`` mapping as + # ``_resample_nearest`` would use on the full rect, then + # subtracts the sub-source origin to index into + # ``src_arr`` (which is the minimal sub-rect read above). + # The output is byte-identical to resampling the full + # rect and slicing ``[sub_r0:sub_r1, sub_c0:sub_c1]`` + # afterwards. See issue #1704. + src_arr = _resample_nearest_window( + src_arr, + src_row_min, src_col_min, + sr.y_size, sr.x_size, + dr.y_size, dr.x_size, + sub_r0, sub_c0, sub_r1, sub_c1, + ) # Place into output out_r0 = clip_r0 - r0 diff --git a/xrspatial/geotiff/tests/test_vrt_dstrect_resample_cap_1737.py b/xrspatial/geotiff/tests/test_vrt_dstrect_resample_cap_1737.py index 264b1499..6bf396c6 100644 --- a/xrspatial/geotiff/tests/test_vrt_dstrect_resample_cap_1737.py +++ b/xrspatial/geotiff/tests/test_vrt_dstrect_resample_cap_1737.py @@ -2,14 +2,17 @@ A crafted VRT can declare a ```` whose ``xSize`` and ``ySize`` are orders of magnitude larger than the VRT's own -``rasterXSize`` / ``rasterYSize``. The output buffer is already bounded by -``_check_dimensions`` against ``max_pixels``, but ``_resample_nearest`` is -called with ``(dr.y_size, dr.x_size)`` *before* the clip is taken, so it -allocates the full DstRect-sized intermediate before discarding most of it. - -Regression test for issue #1737: ``read_vrt`` should refuse the read with a -``ValueError`` that names the offending size, rather than try to allocate -gigabytes of intermediate memory on a tiny output. +``rasterXSize`` / ``rasterYSize``. Originally (issue #1737) ``read_vrt`` +called ``_resample_nearest(src_arr, dr.y_size, dr.x_size)`` *before* clipping, +allocating the full DstRect-sized intermediate before discarding most of it, +so the read was refused with a ``ValueError`` naming the offending size. + +After issue #1704 the resample path reads only the source subset that feeds +the clipped destination sub-window, so the intermediate is bounded by the +caller's window (and by the VRT extent) rather than the raw DstRect. The +huge-DstRect attack vector is therefore neutralised by the read path itself, +not by the per-source pixel-budget guard. The per-source guard still applies +to the clipped sub-window, which is now what the cap is measured against. """ from __future__ import annotations @@ -24,10 +27,15 @@ def _write_source(td: str) -> str: - """Write a 10x10 uint8 source GeoTIFF and return its path.""" + """Write a 10x10 uint8 source GeoTIFF and return its path. + + Stripped (non-tiled) so the source read does not allocate a 256x256 + tile that trips ``_check_dimensions`` under the small ``max_pixels`` + values these tests pass. + """ src_path = os.path.join(td, 'src.tif') to_geotiff(np.zeros((10, 10), dtype=np.uint8), src_path, - compression='none') + compression='none', tiled=False) return src_path @@ -53,27 +61,30 @@ def _write_vrt(td: str, *, dst_x_size: int, dst_y_size: int, return vrt_path -def test_huge_dstrect_rejected_before_intermediate_allocation(): - """A DstRect that would force a multi-billion-pixel resample intermediate - must raise ``ValueError`` before ``_resample_nearest`` allocates.""" +def test_huge_dstrect_no_longer_allocates_full_intermediate(): + """After #1704 the windowed read clips a 50000x50000 DstRect down to + the 100x100 VRT extent, so the resample intermediate is 100x100 and + no longer hits the pixel-budget cap. The earlier behaviour rejected + the read up front; the new behaviour just returns the assembled + 100x100 mosaic. + """ with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td: _write_source(td) - # 50000 x 50000 = 2.5 billion pixels of intermediate; the output - # buffer is only 100 x 100. With the cap in place this should - # raise before _resample_nearest runs. vrt_path = _write_vrt(td, dst_x_size=50000, dst_y_size=50000) - with pytest.raises(ValueError, match="resample intermediate"): - read_vrt(vrt_path) + arr, _ = read_vrt(vrt_path) + assert arr.shape == (100, 100) -def test_huge_dstrect_y_axis_rejected(): - """Asymmetric blow-up: only one axis is huge. Still rejected.""" +def test_huge_dstrect_y_axis_clipped_to_extent(): + """Asymmetric blow-up: ``ySize`` declared as 10 billion but the VRT + extent caps the clipped sub-window at 100 rows. Read succeeds with + the bounded intermediate.""" with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td: _write_source(td) vrt_path = _write_vrt( td, dst_x_size=10, dst_y_size=10_000_000_000) - with pytest.raises(ValueError, match="resample intermediate"): - read_vrt(vrt_path) + arr, _ = read_vrt(vrt_path) + assert arr.shape == (100, 100) def test_legitimate_upsample_still_works(): @@ -86,48 +97,43 @@ def test_legitimate_upsample_still_works(): assert arr.shape == (100, 100) -def test_max_pixels_kwarg_raises_cap(): - """A DstRect rejected under a tiny ``max_pixels`` must be accepted - when the caller bumps the cap. This exercises the override contract - (the same VRT goes from rejected to accepted across the two calls). +def test_per_source_cap_bites_when_sub_window_exceeds_budget(): + """The per-source pixel-budget guard applies to the clipped + sub-window, not the raw DstRect. Pick a VRT and ``max_pixels`` where + the sub-window itself exceeds the cap so the per-source check fires + even after the windowed-read change. - The output buffer is kept tiny (VRT raster 10x10) so the cap only - bites on the resample intermediate, not on the ``_check_dimensions`` - pre-allocation guard. + The output buffer dimension check (``_check_dimensions``) is also + bounded by ``max_pixels``, so to isolate the per-source branch we + request a window whose sub-window product crosses the cap. Both + guards use the same threshold; the per-source one provides defence + in depth. """ with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td: _write_source(td) - # 2000x2000 = 4e6 intermediate pixels. Output buffer is 10x10=100 - # pixels, far below both caps below. Upsample 10x10 -> 2000x2000 - # so ``needs_resample`` is true and the cap branch runs. vrt_path = _write_vrt(td, dst_x_size=2000, dst_y_size=2000, - raster_x=10, raster_y=10) - # First, the cap is too small for the intermediate: rejected. - with pytest.raises(ValueError, match="resample intermediate"): + raster_x=2000, raster_y=2000) + # Sub-window is 2000x2000 = 4e6 pixels. Cap of 1e6 rejects. + with pytest.raises(ValueError, match="resample intermediate|safety limit"): read_vrt(vrt_path, max_pixels=1_000_000) # Bump the cap above 4e6: accepted. arr, _ = read_vrt(vrt_path, max_pixels=4_000_000) - assert arr.shape == (10, 10) + assert arr.shape == (2000, 2000) -def test_dstrect_at_cap_succeeds(): - """Exactly at ``max_pixels`` is accepted; the cap is inclusive. - Together with :func:`test_max_pixels_kwarg_raises_cap`, this pins - down both sides of the override boundary.""" +def test_per_source_cap_inclusive_boundary(): + """The per-source cap is inclusive: exactly ``max_pixels`` succeeds, + one below rejects. Mirrors the boundary the original #1737 test + pinned down, on the new sub-window semantics.""" with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td: _write_source(td) - # Upsample 10x10 -> 100x100 so ``needs_resample`` is true. The - # VRT raster is 10x10 so the output buffer stays at 100 pixels, - # well below the ``_check_dimensions`` guard; the cap below - # therefore exercises the resample-intermediate branch only. vrt_path = _write_vrt(td, dst_x_size=100, dst_y_size=100, - raster_x=10, raster_y=10) - # Just below the cap rejects. - with pytest.raises(ValueError, match="resample intermediate"): + raster_x=100, raster_y=100) + # Sub-window is 100x100 = 10_000 pixels. + with pytest.raises(ValueError, match="resample intermediate|safety limit"): read_vrt(vrt_path, max_pixels=9_999) - # At the cap succeeds. arr, _ = read_vrt(vrt_path, max_pixels=10000) - assert arr.shape == (10, 10) + assert arr.shape == (100, 100) def test_negative_dstrect_rejected(): diff --git a/xrspatial/geotiff/tests/test_vrt_resample_window_inverse_1704.py b/xrspatial/geotiff/tests/test_vrt_resample_window_inverse_1704.py new file mode 100644 index 00000000..51a009a4 --- /dev/null +++ b/xrspatial/geotiff/tests/test_vrt_resample_window_inverse_1704.py @@ -0,0 +1,300 @@ +"""Regression tests for issue #1704. + +When a VRT ```` has a ```` size that differs from +its ```` size (i.e. the source feeds an up- or downsample into +its destination cell), ``read_vrt`` used to decode the full SrcRect from +disk even when the caller passed a tiny ``window=`` clipping into the +middle. For very large source rects this caused multi-GB decodes and +resample intermediates. + +The fix inverts the nearest-neighbour mapping: for the clipped +destination sub-window, compute the smallest SrcRect-relative range of +rows / cols that ``_resample_nearest`` would gather from, read only +that, and resample directly into the sub-window output. The result is +byte-identical to the old "read full, resample full, then slice" path. +""" +from __future__ import annotations + +import os +from unittest import mock + +import numpy as np +import pytest + +from xrspatial.geotiff._vrt import read_vrt +from xrspatial.geotiff._writer import write + + +def _write_vrt_xml(tmp_path, xml: str, name: str) -> str: + p = str(tmp_path / name) + with open(p, 'w') as f: + f.write(xml) + return p + + +def _write_src(tmp_path, arr: np.ndarray, name: str = 'tmp_1704_src.tif') -> str: + src_path = str(tmp_path / name) + write(arr, src_path, compression='none', tiled=False) + return src_path + + +def _single_source_vrt(src_path: str, *, + raster_x: int, raster_y: int, + src_x: int, src_y: int, + src_xsize: int, src_ysize: int, + dst_x: int, dst_y: int, + dst_xsize: int, dst_ysize: int, + dtype: str = "UInt16", + nodata: str | None = None) -> str: + nodata_xml = f" {nodata}\n" if nodata is not None else "" + return ( + f'\n' + f' 0.0, 1.0, 0.0, 0.0, 0.0, -1.0\n' + f' \n' + f' \n' + f' {src_path}\n' + f' 1\n' + f' \n' + f' \n' + f'{nodata_xml}' + f' \n' + f' \n' + f'\n' + ) + + +# --------------------------------------------------------------------------- +# Parity: byte-identical to full-then-slice for upsample and downsample +# --------------------------------------------------------------------------- + +def test_upsample_window_matches_full_then_slice(tmp_path): + """4x upsample, then read a small window from the middle. The + windowed read must equal the full read sliced at the same offsets.""" + src = (np.arange(10 * 10, dtype=np.uint16).reshape(10, 10) + 1) + src_path = _write_src(tmp_path, src) + vrt_xml = _single_source_vrt( + src_path, + raster_x=40, raster_y=40, + src_x=0, src_y=0, src_xsize=10, src_ysize=10, + dst_x=0, dst_y=0, dst_xsize=40, dst_ysize=40, + ) + vrt_path = _write_vrt_xml(tmp_path, vrt_xml, 'tmp_1704_up.vrt') + + full, _ = read_vrt(vrt_path) + assert full.shape == (40, 40) + # Pick a middle clip; covers a region with non-trivial neighbour mapping. + r0, c0, r1, c1 = 7, 11, 33, 29 + windowed, _ = read_vrt(vrt_path, window=(r0, c0, r1, c1)) + np.testing.assert_array_equal(windowed, full[r0:r1, c0:c1]) + + +def test_downsample_window_matches_full_then_slice(tmp_path): + """4x downsample, windowed read parity with full-then-slice.""" + src = (np.arange(40 * 40, dtype=np.uint16).reshape(40, 40) + 1) + src_path = _write_src(tmp_path, src) + vrt_xml = _single_source_vrt( + src_path, + raster_x=10, raster_y=10, + src_x=0, src_y=0, src_xsize=40, src_ysize=40, + dst_x=0, dst_y=0, dst_xsize=10, dst_ysize=10, + ) + vrt_path = _write_vrt_xml(tmp_path, vrt_xml, 'tmp_1704_down.vrt') + + full, _ = read_vrt(vrt_path) + assert full.shape == (10, 10) + r0, c0, r1, c1 = 2, 3, 9, 8 + windowed, _ = read_vrt(vrt_path, window=(r0, c0, r1, c1)) + np.testing.assert_array_equal(windowed, full[r0:r1, c0:c1]) + + +# --------------------------------------------------------------------------- +# Non-integer ratio: floor-rounding edge cases +# --------------------------------------------------------------------------- + +@pytest.mark.parametrize("r0,c0,r1,c1", [ + (0, 0, 11, 11), # full extent + (1, 1, 10, 10), # inner shrink + (3, 2, 7, 9), # off-centre + (0, 0, 1, 1), # single pixel at origin + (10, 10, 11, 11), # single pixel at corner + (5, 0, 6, 11), # one-row strip + (0, 5, 11, 6), # one-col strip +]) +def test_non_integer_ratio_7_to_11_window_parity(tmp_path, r0, c0, r1, c1): + """SrcRect 7x7, DstRect 11x11 (irrational ratio 7/11). The + nearest-neighbour mapping has uneven step sizes so the inverse + mapping has to handle each output index individually; this is the + case that breaks Option-2 "resample sub-shape into sub-shape" + implementations. + """ + src = (np.arange(7 * 7, dtype=np.uint16).reshape(7, 7) + 100) + src_path = _write_src(tmp_path, src) + vrt_xml = _single_source_vrt( + src_path, + raster_x=11, raster_y=11, + src_x=0, src_y=0, src_xsize=7, src_ysize=7, + dst_x=0, dst_y=0, dst_xsize=11, dst_ysize=11, + ) + vrt_path = _write_vrt_xml(tmp_path, vrt_xml, 'tmp_1704_7_11.vrt') + + full, _ = read_vrt(vrt_path) + windowed, _ = read_vrt(vrt_path, window=(r0, c0, r1, c1)) + np.testing.assert_array_equal(windowed, full[r0:r1, c0:c1]) + + +# --------------------------------------------------------------------------- +# Edge alignment +# --------------------------------------------------------------------------- + +def test_window_starting_at_origin(tmp_path): + src = (np.arange(8 * 8, dtype=np.uint16).reshape(8, 8) + 1) + src_path = _write_src(tmp_path, src) + vrt_xml = _single_source_vrt( + src_path, + raster_x=20, raster_y=20, + src_x=0, src_y=0, src_xsize=8, src_ysize=8, + dst_x=0, dst_y=0, dst_xsize=20, dst_ysize=20, + ) + vrt_path = _write_vrt_xml(tmp_path, vrt_xml, 'tmp_1704_origin.vrt') + full, _ = read_vrt(vrt_path) + windowed, _ = read_vrt(vrt_path, window=(0, 0, 5, 5)) + np.testing.assert_array_equal(windowed, full[0:5, 0:5]) + + +def test_window_ending_at_last_pixel(tmp_path): + src = (np.arange(8 * 8, dtype=np.uint16).reshape(8, 8) + 1) + src_path = _write_src(tmp_path, src) + vrt_xml = _single_source_vrt( + src_path, + raster_x=20, raster_y=20, + src_x=0, src_y=0, src_xsize=8, src_ysize=8, + dst_x=0, dst_y=0, dst_xsize=20, dst_ysize=20, + ) + vrt_path = _write_vrt_xml(tmp_path, vrt_xml, 'tmp_1704_last.vrt') + full, _ = read_vrt(vrt_path) + windowed, _ = read_vrt(vrt_path, window=(15, 15, 20, 20)) + np.testing.assert_array_equal(windowed, full[15:20, 15:20]) + + +def test_window_crossing_multiple_sources(tmp_path): + """Two SimpleSources tiled side by side, each with non-1:1 SrcRect / + DstRect. A window that spans both sources must equal the full read + sliced over the same range. Both sources go through the new windowed + resample path. + """ + left = (np.arange(5 * 5, dtype=np.uint16).reshape(5, 5) + 1) + right = (np.arange(5 * 5, dtype=np.uint16).reshape(5, 5) + 1000) + left_path = _write_src(tmp_path, left, 'tmp_1704_left.tif') + right_path = _write_src(tmp_path, right, 'tmp_1704_right.tif') + vrt_xml = ( + '\n' + ' 0.0, 1.0, 0.0, 0.0, 0.0, -1.0\n' + ' \n' + ' \n' + f' {left_path}\n' + ' 1\n' + ' \n' + ' \n' + ' \n' + ' \n' + f' {right_path}\n' + ' 1\n' + ' \n' + ' \n' + ' \n' + ' \n' + '\n' + ) + vrt_path = _write_vrt_xml(tmp_path, vrt_xml, 'tmp_1704_multi.vrt') + + full, _ = read_vrt(vrt_path) + assert full.shape == (10, 20) + # Window crosses x=10 boundary so both sources are clipped, not + # one-or-the-other. + windowed, _ = read_vrt(vrt_path, window=(2, 7, 8, 14)) + np.testing.assert_array_equal(windowed, full[2:8, 7:14]) + + +# --------------------------------------------------------------------------- +# Nodata: masking happens on the read buffer; sub-window read still masks. +# --------------------------------------------------------------------------- + +def test_nodata_round_trip_through_window(tmp_path): + """SimpleSource with ````; the sentinel inside the windowed + region must surface as NaN in a float-typed VRT. Both the full read + and the windowed read must agree on which pixels are NaN. + """ + src = (np.arange(8 * 8, dtype=np.uint16).reshape(8, 8) + 1).astype(np.uint16) + # Sprinkle the sentinel through the source so the sub-window catches + # at least one masked pixel under the 2x upsample. + src[3, 4] = 65535 + src[5, 2] = 65535 + src_path = _write_src(tmp_path, src) + vrt_xml = _single_source_vrt( + src_path, + raster_x=16, raster_y=16, + src_x=0, src_y=0, src_xsize=8, src_ysize=8, + dst_x=0, dst_y=0, dst_xsize=16, dst_ysize=16, + dtype="Float32", + nodata="65535", + ) + vrt_path = _write_vrt_xml(tmp_path, vrt_xml, 'tmp_1704_nodata.vrt') + + full, _ = read_vrt(vrt_path) + windowed, _ = read_vrt(vrt_path, window=(4, 4, 12, 12)) + # NaN equality needs assert_array_equal with equal_nan=True. + np.testing.assert_array_equal(windowed, full[4:12, 4:12]) + # Sanity: the sub-window contains at least one NaN, otherwise the + # test is vacuous. + assert np.isnan(windowed).any() + + +# --------------------------------------------------------------------------- +# Read-bound assertion: only the minimal source sub-rect is decoded. +# --------------------------------------------------------------------------- + +def test_only_minimal_source_rect_is_read(tmp_path): + """Patch ``read_to_array`` to record the windows requested. Under + the new path the source window must be much smaller than the full + SrcRect when the caller asks for a small sub-window. + """ + src = (np.arange(40 * 40, dtype=np.uint16).reshape(40, 40) + 1) + src_path = _write_src(tmp_path, src) + vrt_xml = _single_source_vrt( + src_path, + raster_x=160, raster_y=160, + src_x=0, src_y=0, src_xsize=40, src_ysize=40, + dst_x=0, dst_y=0, dst_xsize=160, dst_ysize=160, + ) + vrt_path = _write_vrt_xml(tmp_path, vrt_xml, 'tmp_1704_bound.vrt') + + seen_windows: list[tuple[int, int, int, int]] = [] + # ``read_vrt`` does ``from ._reader import read_to_array`` at call + # time, so the spy must live on ``_reader`` (the module that owns + # the name), not on ``_vrt``. + from xrspatial.geotiff import _reader as _reader_mod + real_read = _reader_mod.read_to_array + + def spy(filename, *, window, **kw): + seen_windows.append(tuple(window)) + return real_read(filename, window=window, **kw) + + with mock.patch.object(_reader_mod, 'read_to_array', spy): + # Window is 8x8 pixels in destination coords starting at (80, 80). + # Mapping back through floor((d+0.5)*40/160) = floor((d+0.5)/4) + # gives source rows / cols 20..21 (inclusive) so the read should + # be 2x2 source pixels, not 40x40. + arr, _ = read_vrt(vrt_path, window=(80, 80, 88, 88)) + + assert arr.shape == (8, 8) + assert len(seen_windows) == 1 + r0, c0, r1, c1 = seen_windows[0] + read_h = r1 - r0 + read_w = c1 - c0 + assert read_h < 10, ( + f"expected a small source row range, got {read_h} rows; " + f"the full SrcRect is 40 rows so the fix is not reducing the read." + ) + assert read_w < 10