diff --git a/xrspatial/geotiff/_reader.py b/xrspatial/geotiff/_reader.py
index b3da32bd..c174c3fc 100644
--- a/xrspatial/geotiff/_reader.py
+++ b/xrspatial/geotiff/_reader.py
@@ -1560,9 +1560,14 @@ def _read_tiles(data: bytes, ifd: IFD, header: TIFFHeader,
raise ValueError(
f"Invalid tile dimensions: TileWidth={tw}, TileLength={th}")
- # Reject crafted tile dims that would force huge per-tile allocations.
- # A single tile's decoded bytes must also fit under the pixel budget.
- _check_dimensions(tw, th, samples, max_pixels)
+ # Reject crafted tile dims (e.g. TileWidth = 2**31). This guards the
+ # TIFF header against malformed values; it is not the caller's output
+ # budget. The output-window check below uses ``max_pixels`` and is
+ # what enforces the user's per-call memory cap. The source-read path
+ # under ``read_vrt`` (#1796) relies on that output check to honour a
+ # small caller ``max_pixels`` against a normal-tile source; see
+ # #1823.
+ _check_dimensions(tw, th, samples, MAX_PIXELS_DEFAULT)
# Per-tile compressed-byte cap (issue #1664). Same env var as the
# HTTP path. mmap slicing is bounded by the file size, but the slice
@@ -2016,10 +2021,14 @@ def _fetch_decode_cog_http_tiles(
# A windowed HTTP read of a multi-billion-pixel COG only allocates
# the window, so capping the full image would reject legitimate
# tiled reads. The full-image cap still applies for whole-file
- # reads (window is None). The single-tile budget always applies.
+ # reads (window is None). The per-tile dim check below guards the
+ # TIFF header against absurd ``TileWidth`` / ``TileLength`` values
+ # (e.g. 2**31) and uses ``MAX_PIXELS_DEFAULT`` so a caller's small
+ # ``max_pixels`` -- intended as an output-window budget -- does not
+ # reject normal 256x256 tiles. See #1823.
if window is None:
_check_dimensions(width, height, samples, max_pixels)
- _check_dimensions(tw, th, samples, max_pixels)
+ _check_dimensions(tw, th, samples, MAX_PIXELS_DEFAULT)
# Reject malformed TIFFs whose declared tile grid exceeds the supplied
# TileOffsets length. See issue #1219.
diff --git a/xrspatial/geotiff/_vrt.py b/xrspatial/geotiff/_vrt.py
index 8616f18e..cf23cdec 100644
--- a/xrspatial/geotiff/_vrt.py
+++ b/xrspatial/geotiff/_vrt.py
@@ -65,6 +65,53 @@ def _codec_decode_exceptions() -> tuple[type[BaseException], ...]:
_ALLOWED_ROOTS_ENV = 'XRSPATIAL_VRT_ALLOWED_ROOTS'
+# Cap on the VRT XML file read. VRTs are XML metadata only (pixel data lives
+# in source TIFFs); a 50k-source VRT is around 25 MB, so 64 MiB is a safe
+# default that still rejects pathological inputs without scanning the whole
+# file into memory.
+_MAX_XML_BYTES_ENV = 'XRSPATIAL_VRT_MAX_XML_BYTES'
+_DEFAULT_MAX_XML_BYTES = 64 * 1024 * 1024
+
+
+def _get_vrt_max_xml_bytes() -> int:
+ """Return the cap on VRT XML file size, in bytes."""
+ raw = os.environ.get(_MAX_XML_BYTES_ENV)
+ if raw is None or raw == '':
+ return _DEFAULT_MAX_XML_BYTES
+ try:
+ value = int(raw)
+ except (TypeError, ValueError):
+ raise ValueError(
+ f"{_MAX_XML_BYTES_ENV} must be a positive integer, got "
+ f"{raw!r}")
+ if value <= 0:
+ raise ValueError(
+ f"{_MAX_XML_BYTES_ENV} must be a positive integer, got "
+ f"{value}")
+ return value
+
+
+def _read_vrt_xml(vrt_path: str) -> str:
+ """Read a VRT XML file with a bounded total size."""
+ cap = _get_vrt_max_xml_bytes()
+ chunks = []
+ total = 0
+ with open(vrt_path, 'rb') as f:
+ while True:
+ remaining = cap + 1 - total
+ chunk = f.read(min(65536, remaining))
+ if not chunk:
+ break
+ total += len(chunk)
+ if total > cap:
+ raise ValueError(
+ f"VRT XML at {vrt_path!r} exceeds the "
+ f"{cap:,}-byte cap. Raise the cap by setting "
+ f"{_MAX_XML_BYTES_ENV} if this file is legitimate.")
+ chunks.append(chunk)
+ return b''.join(chunks).decode('utf-8')
+
+
def _allowed_source_roots() -> list[str]:
"""Return the operator-supplied allowlist of trusted source roots.
@@ -637,8 +684,7 @@ def read_vrt(vrt_path: str, *, window=None,
"""
from ._reader import PixelSafetyLimitError, read_to_array
- with open(vrt_path, 'r') as f:
- xml_str = f.read()
+ xml_str = _read_vrt_xml(vrt_path)
vrt_dir = os.path.dirname(os.path.abspath(vrt_path))
vrt = parse_vrt(xml_str, vrt_dir)
diff --git a/xrspatial/geotiff/tests/test_vrt_source_tile_check_1823.py b/xrspatial/geotiff/tests/test_vrt_source_tile_check_1823.py
new file mode 100644
index 00000000..b02dd2fb
--- /dev/null
+++ b/xrspatial/geotiff/tests/test_vrt_source_tile_check_1823.py
@@ -0,0 +1,113 @@
+"""Regression tests for #1823.
+
+PR #1803 forwarded the caller's ``max_pixels`` to ``read_to_array`` inside
+the VRT source loop so that a tiny VRT output could not force a huge
+source decode (#1796). The output-window check enforces that. A separate
+per-tile dimension check was incorrectly using the same ``max_pixels``
+value, so a caller setting ``max_pixels`` as an output budget (e.g.
+10,000) would also fail the per-tile sanity check on every normal source
+whose default tile size is 256x256 (= 65,536 pixels).
+
+The #1796 protection remains: the output-window check still catches a
+tiny VRT output that asks for a large source window.
+"""
+from __future__ import annotations
+
+import os
+import tempfile
+
+import numpy as np
+import pytest
+
+from xrspatial.geotiff import to_geotiff
+from xrspatial.geotiff._reader import PixelSafetyLimitError
+from xrspatial.geotiff._vrt import read_vrt
+
+
+def _write_normal_tile_source(td: str) -> str:
+ """10x10 uint8 source -- ``to_geotiff`` pads to a 256x256 tile."""
+ src = os.path.join(td, 'src.tif')
+ to_geotiff(np.zeros((10, 10), dtype=np.uint8), src, compression='none')
+ return src
+
+
+def _write_vrt(td: str, *, dst_x_size: int, dst_y_size: int,
+ raster_x: int = 100, raster_y: int = 100,
+ src_x_size: int = 10, src_y_size: int = 10) -> str:
+ vrt = os.path.join(td, 'mosaic.vrt')
+ xml = (
+ f'\n'
+ f' \n'
+ f' \n'
+ f' src.tif\n'
+ f' 1\n'
+ f' \n'
+ f' \n'
+ f' \n'
+ f' \n'
+ f'\n'
+ )
+ with open(vrt, 'w') as f:
+ f.write(xml)
+ return vrt
+
+
+class TestPerTileCheckDoesNotUseCallerBudget:
+ """Per-tile dim sanity must not reject normal 256x256 source tiles
+ when the caller's ``max_pixels`` is a small output-budget value."""
+
+ def test_normal_tile_source_with_small_max_pixels(self):
+ with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td:
+ _write_normal_tile_source(td)
+ vrt = _write_vrt(td, dst_x_size=100, dst_y_size=100)
+ arr, _ = read_vrt(vrt, max_pixels=10_000)
+ assert arr.shape == (100, 100)
+
+ def test_normal_tile_source_with_tiny_max_pixels(self):
+ """An output budget below a single tile must still succeed when
+ the requested output window itself fits."""
+ with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td:
+ _write_normal_tile_source(td)
+ # Output 5x5 = 25 pixels; max_pixels = 100 fits 25 with room.
+ vrt = _write_vrt(td, dst_x_size=5, dst_y_size=5,
+ raster_x=5, raster_y=5)
+ arr, _ = read_vrt(vrt, max_pixels=100)
+ assert arr.shape == (5, 5)
+
+
+class TestOutputWindowCheckStillEnforced:
+ """The output-window check at the source read still rejects an
+ over-budget read; the #1796 protection is preserved."""
+
+ def test_output_window_exceeds_max_pixels_still_rejected(self):
+ with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td:
+ src = os.path.join(td, 'src.tif')
+ to_geotiff(np.arange(16, dtype=np.uint8).reshape(4, 4),
+ src, compression='none')
+ vrt = _write_vrt(td, dst_x_size=1, dst_y_size=1,
+ raster_x=1, raster_y=1,
+ src_x_size=4, src_y_size=4)
+ # SrcRect 4x4 = 16 pixels > max_pixels=1 → output check fires.
+ with pytest.raises(ValueError, match="exceed"):
+ read_vrt(vrt, max_pixels=1)
+
+
+class TestPerTileCheckStillRejectsCraftedHeader:
+ """A pathological ``TileWidth``/``TileLength`` must still fail at
+ the per-tile sanity check, which uses ``MAX_PIXELS_DEFAULT``."""
+
+ def test_per_tile_check_caps_at_default(self, monkeypatch):
+ """Lower ``MAX_PIXELS_DEFAULT`` to verify the per-tile call site
+ is wired to it (rather than to the caller's budget)."""
+ from xrspatial.geotiff import _reader as reader_mod
+
+ monkeypatch.setattr(reader_mod, "MAX_PIXELS_DEFAULT", 100)
+ with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td:
+ _write_normal_tile_source(td)
+ vrt = _write_vrt(td, dst_x_size=100, dst_y_size=100)
+ # 256x256 tile > patched MAX_PIXELS_DEFAULT=100 → per-tile
+ # check fires regardless of caller's max_pixels (1e9 here).
+ with pytest.raises(PixelSafetyLimitError, match="65,536"):
+ read_vrt(vrt, max_pixels=1_000_000_000)
diff --git a/xrspatial/geotiff/tests/test_vrt_xml_size_cap_1815.py b/xrspatial/geotiff/tests/test_vrt_xml_size_cap_1815.py
new file mode 100644
index 00000000..dc8f992d
--- /dev/null
+++ b/xrspatial/geotiff/tests/test_vrt_xml_size_cap_1815.py
@@ -0,0 +1,94 @@
+"""VRT XML reads must be bounded to avoid unbounded memory allocation.
+
+Regression test for issue #1815: ``read_vrt`` previously called
+``f.read()`` on the VRT XML path with no size limit, so a multi-gigabyte
+XML file would consume all that memory before parsing. The fix adds a
+64 MiB default cap, overridable via ``XRSPATIAL_VRT_MAX_XML_BYTES``.
+"""
+from __future__ import annotations
+
+import os
+
+import numpy as np
+import pytest
+
+from xrspatial.geotiff import to_geotiff
+from xrspatial.geotiff._vrt import read_vrt
+
+
+def _write_source(td: str) -> str:
+ src_path = os.path.join(td, 'tmp_1815_src.tif')
+ to_geotiff(np.zeros((10, 10), dtype=np.uint8), src_path,
+ compression='none')
+ return src_path
+
+
+def _write_vrt(td: str, *, pad_bytes: int = 0) -> str:
+ """Write a VRT, optionally padded with a large XML comment."""
+ vrt_path = os.path.join(td, 'tmp_1815_mosaic.vrt')
+ comment = ''
+ if pad_bytes > 0:
+ comment = '\n'
+ vrt_xml = (
+ '\n'
+ + comment +
+ ' \n'
+ ' \n'
+ ' '
+ 'tmp_1815_src.tif\n'
+ ' 1\n'
+ ' \n'
+ ' \n'
+ ' \n'
+ ' \n'
+ '\n'
+ )
+ with open(vrt_path, 'w') as f:
+ f.write(vrt_xml)
+ return vrt_path
+
+
+def test_small_vrt_parses_under_default_cap(tmp_path):
+ """A normal-sized VRT parses successfully with the default cap."""
+ td = str(tmp_path)
+ _write_source(td)
+ vrt_path = _write_vrt(td)
+ arr, _ = read_vrt(vrt_path)
+ assert arr.shape == (10, 10)
+
+
+def test_oversized_vrt_raises_value_error(tmp_path, monkeypatch):
+ """A VRT padded past the cap raises ValueError naming the cap and env var."""
+ td = str(tmp_path)
+ _write_source(td)
+ # Set a small cap (1 KiB) and pad the comment past it.
+ monkeypatch.setenv('XRSPATIAL_VRT_MAX_XML_BYTES', '1024')
+ vrt_path = _write_vrt(td, pad_bytes=4096)
+ with pytest.raises(ValueError) as exc_info:
+ read_vrt(vrt_path)
+ msg = str(exc_info.value)
+ assert 'XRSPATIAL_VRT_MAX_XML_BYTES' in msg
+ assert '1,024' in msg
+
+
+def test_raising_cap_lets_padded_vrt_parse(tmp_path, monkeypatch):
+ """Setting the env var higher allows a padded VRT to parse."""
+ td = str(tmp_path)
+ _write_source(td)
+ vrt_path = _write_vrt(td, pad_bytes=4096)
+ # Default cap of 64 MiB is more than enough; verify with an explicit
+ # higher cap too.
+ monkeypatch.setenv('XRSPATIAL_VRT_MAX_XML_BYTES', str(1024 * 1024))
+ arr, _ = read_vrt(vrt_path)
+ assert arr.shape == (10, 10)
+
+
+@pytest.mark.parametrize('bad_value', ['not_a_number', '0', '-1', '-1024'])
+def test_invalid_cap_raises_value_error(tmp_path, monkeypatch, bad_value):
+ """Non-numeric, zero, or negative cap values produce a clear error."""
+ td = str(tmp_path)
+ _write_source(td)
+ vrt_path = _write_vrt(td)
+ monkeypatch.setenv('XRSPATIAL_VRT_MAX_XML_BYTES', bad_value)
+ with pytest.raises(ValueError, match='XRSPATIAL_VRT_MAX_XML_BYTES'):
+ read_vrt(vrt_path)