From 881ca5c65955ed09123f19679707e26f2d96b0c5 Mon Sep 17 00:00:00 2001 From: basilalshukaili Date: Tue, 2 Jun 2026 20:36:54 +0000 Subject: [PATCH] fix: raise ValueError for reversed page ranges in expand_page_range A reversed page range like '5-3' would either raise a confusing 'No valid page numbers or ranges found' error (when it was the only entry) or silently drop the pages when mixed with valid ranges (e.g. ['1-3', '7-5', '8'] returned [1, 2, 3, 8] instead of raising). Add an explicit check that start <= end and raise a clear ValueError with a descriptive message identifying the problematic range. Also adds three new tests covering: - reversed range alone raises ValueError - reversed range mixed with valid ranges raises ValueError (no silent drop) - equal start and end (e.g. '3-3') is valid and returns [3] --- haystack/utils/misc.py | 7 +++++-- ...-page-range-reversed-range-8a3f2d1e4b9c5f7a.yaml | 7 +++++++ test/utils/test_misc.py | 13 +++++++++++++ 3 files changed, 25 insertions(+), 2 deletions(-) create mode 100644 releasenotes/notes/fix-expand-page-range-reversed-range-8a3f2d1e4b9c5f7a.yaml diff --git a/haystack/utils/misc.py b/haystack/utils/misc.py index c349ea199d..85cf8df001 100644 --- a/haystack/utils/misc.py +++ b/haystack/utils/misc.py @@ -61,8 +61,11 @@ def expand_page_range(page_range: list[str | int]) -> list[int]: if not parts[0].isdigit() or not parts[1].isdigit(): msg = "range must be a string in the format 'start-end'" raise ValueError(f"Invalid page range: {page} - {msg}") - start, end = parts - expanded_page_range.extend(range(int(start), int(end) + 1)) + start, end = int(parts[0]), int(parts[1]) + if start > end: + msg = "start must be less than or equal to end" + raise ValueError(f"Invalid page range: '{parts[0]}-{parts[1]}' - {msg}") + expanded_page_range.extend(range(start, end + 1)) else: msg = "range must be a string in the format 'start-end' or an integer" diff --git a/releasenotes/notes/fix-expand-page-range-reversed-range-8a3f2d1e4b9c5f7a.yaml b/releasenotes/notes/fix-expand-page-range-reversed-range-8a3f2d1e4b9c5f7a.yaml new file mode 100644 index 0000000000..31d4ef5238 --- /dev/null +++ b/releasenotes/notes/fix-expand-page-range-reversed-range-8a3f2d1e4b9c5f7a.yaml @@ -0,0 +1,7 @@ +--- +fixes: + - | + Fixed `expand_page_range` silently dropping pages when a reversed range (e.g. '7-5') appeared + alongside valid entries. Mixed inputs like ['1-3', '7-5', '8'] previously returned [1, 2, 3, 8] + with no warning, losing pages 5-7. A reversed range now raises `ValueError` with a descriptive + message identifying the offending range. diff --git a/test/utils/test_misc.py b/test/utils/test_misc.py index 2e241a872d..6250f643b3 100644 --- a/test/utils/test_misc.py +++ b/test/utils/test_misc.py @@ -195,3 +195,16 @@ def test_invalid_string_raises_value_error(self): def test_malformed_range_with_multiple_hyphens_raises_value_error(self): with pytest.raises(ValueError, match="Invalid page range"): expand_page_range(["1-3", "5-10-15"]) + + def test_reversed_range_alone_raises_value_error(self): + with pytest.raises(ValueError, match="Invalid page range.*start must be less than or equal to end"): + expand_page_range(["5-3"]) + + def test_reversed_range_mixed_raises_value_error(self): + # Previously, a reversed range mixed with valid entries silently dropped the reversed range. + # e.g. ["1-3", "7-5", "8"] would return [1, 2, 3, 8], losing pages 5-7 with no error. + with pytest.raises(ValueError, match="Invalid page range.*start must be less than or equal to end"): + expand_page_range(["1-3", "7-5", "8"]) + + def test_equal_start_end_is_valid(self): + assert expand_page_range(["3-3"]) == [3]