diff --git a/python/cudf_polars/cudf_polars/testing/asserts.py b/python/cudf_polars/cudf_polars/testing/asserts.py index 973272f0112..69985bda6d9 100644 --- a/python/cudf_polars/cudf_polars/testing/asserts.py +++ b/python/cudf_polars/cudf_polars/testing/asserts.py @@ -33,7 +33,6 @@ def assert_gpu_result_equal( engine: GPUEngine, collect_kwargs: CollectKwargs | None = None, polars_collect_kwargs: CollectKwargs | None = None, - cudf_collect_kwargs: CollectKwargs | None = None, check_row_order: bool = True, check_column_order: bool = True, check_dtypes: bool = True, @@ -59,10 +58,6 @@ def assert_gpu_result_equal( Keyword arguments to pass to collect for execution on polars CPU. Overrides kwargs in collect_kwargs. Useful for controlling optimization settings. - cudf_collect_kwargs - Keyword arguments to pass to collect for execution on cudf-polars. - Overrides kwargs in collect_kwargs. - Useful for controlling optimization settings. check_row_order Expect rows to be in same order check_column_order @@ -86,14 +81,13 @@ def assert_gpu_result_equal( NotImplementedError If GPU collection failed in some way. """ - final_polars_collect_kwargs, final_cudf_collect_kwargs = _process_kwargs( - collect_kwargs, polars_collect_kwargs, cudf_collect_kwargs - ) + gpu_kwargs = collect_kwargs or {} + cpu_kwargs = gpu_kwargs | (polars_collect_kwargs or {}) # These keywords are correct, but mypy doesn't see that. # the 'misc' is for 'error: Keywords must be strings' - expect = lazydf.collect(**final_polars_collect_kwargs) # type: ignore[misc, call-overload] - got = lazydf.collect(**final_cudf_collect_kwargs, engine=engine) # type: ignore[misc, call-overload] + expect = lazydf.collect(**cpu_kwargs) # type: ignore[misc, call-overload] + got = lazydf.collect(**gpu_kwargs, engine=engine) # type: ignore[misc, call-overload] # In multi-rank SPMD mode each rank holds only its local slice; gather the # full result on every rank so each rank can compare against the CPU result. if ( @@ -158,102 +152,6 @@ def assert_ir_translation_raises(q: pl.LazyFrame, *exceptions: type[Exception]) raise AssertionError(f"Translation DID NOT RAISE {exceptions}") -def _process_kwargs( - collect_kwargs: CollectKwargs | None, - polars_collect_kwargs: CollectKwargs | None, - cudf_collect_kwargs: CollectKwargs | None, -) -> tuple[CollectKwargs, CollectKwargs]: - if collect_kwargs is None: - collect_kwargs = {} - final_polars_collect_kwargs = collect_kwargs.copy() - final_cudf_collect_kwargs = collect_kwargs.copy() - if polars_collect_kwargs is not None: # pragma: no cover; not currently used - final_polars_collect_kwargs.update(polars_collect_kwargs) - if cudf_collect_kwargs is not None: # pragma: no cover; not currently used - final_cudf_collect_kwargs.update(cudf_collect_kwargs) - return final_polars_collect_kwargs, final_cudf_collect_kwargs - - -def assert_collect_raises( - lazydf: pl.LazyFrame, - *, - polars_except: type[Exception] | tuple[type[Exception], ...], - cudf_except: type[Exception] | tuple[type[Exception], ...], - collect_kwargs: CollectKwargs | None = None, - polars_collect_kwargs: CollectKwargs | None = None, - cudf_collect_kwargs: CollectKwargs | None = None, -) -> None: - """ - Assert that collecting the result of a query raises the expected exceptions. - - Parameters - ---------- - lazydf - frame to collect. - collect_kwargs - Common keyword arguments to pass to collect for both polars CPU and - cudf-polars. - Useful for controlling optimization settings. - polars_except - Exception or exceptions polars CPU is expected to raise. If - an empty tuple ``()``, CPU is expected to succeed without raising. - cudf_except - Exception or exceptions polars GPU is expected to raise. If - an empty tuple ``()``, GPU is expected to succeed without raising. - collect_kwargs - Common keyword arguments to pass to collect for both polars CPU and - cudf-polars. - Useful for controlling optimization settings. - polars_collect_kwargs - Keyword arguments to pass to collect for execution on polars CPU. - Overrides kwargs in collect_kwargs. - Useful for controlling optimization settings. - cudf_collect_kwargs - Keyword arguments to pass to collect for execution on cudf-polars. - Overrides kwargs in collect_kwargs. - Useful for controlling optimization settings. - - Returns - ------- - None - If both sides raise the expected exceptions. - - Raises - ------ - AssertionError - If either side did not raise the expected exceptions. - """ - final_polars_collect_kwargs, final_cudf_collect_kwargs = _process_kwargs( - collect_kwargs, polars_collect_kwargs, cudf_collect_kwargs - ) - - try: - lazydf.collect(**final_polars_collect_kwargs) # type: ignore[misc, call-overload] - except polars_except: - pass - except Exception as e: - raise AssertionError( - f"CPU execution RAISED {type(e)}, EXPECTED {polars_except}" - ) from e - else: - if polars_except != (): - raise AssertionError(f"CPU execution DID NOT RAISE {polars_except}") - - # TODO: https://github.com/rapidsai/cudf/issues/22346 - engine = GPUEngine(executor="in-memory", raise_on_fail=True) - try: - lazydf.collect(**final_cudf_collect_kwargs, engine=engine) # type: ignore[misc, call-overload] - except cudf_except: - pass - except Exception as e: - raise AssertionError( - f"GPU execution RAISED {type(e)}, EXPECTED {cudf_except}" - ) from e - else: - if cudf_except != (): - raise AssertionError(f"GPU execution DID NOT RAISE {cudf_except}") - - def _resolve_sink_format(path: Path) -> str: """Returns valid sink format for assert utilities.""" suffix = path.suffix.lower() diff --git a/python/cudf_polars/tests/expressions/test_casting.py b/python/cudf_polars/tests/expressions/test_casting.py index 6a0c472f7f2..1fbc1aa1476 100644 --- a/python/cudf_polars/tests/expressions/test_casting.py +++ b/python/cudf_polars/tests/expressions/test_casting.py @@ -9,7 +9,6 @@ import polars as pl from cudf_polars.testing.asserts import ( - assert_collect_raises, assert_gpu_result_equal, assert_ir_translation_raises, ) @@ -69,12 +68,10 @@ def test_cast_strict_false_string_to_numeric(engine: pl.GPUEngine, dtype, strict df = pl.LazyFrame({"c0": ["1969-12-08 17:00:01", "1", None]}) query = df.with_columns(pl.col("c0").cast(dtype, strict=strict)) if strict: - cudf_except = pl.exceptions.InvalidOperationError - assert_collect_raises( - query, - polars_except=pl.exceptions.InvalidOperationError, - cudf_except=cudf_except, - ) + with pytest.raises(pl.exceptions.InvalidOperationError): + query.collect() + with pytest.raises(pl.exceptions.InvalidOperationError): + query.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True)) else: assert_gpu_result_equal(query, engine=engine) diff --git a/python/cudf_polars/tests/expressions/test_datetime_basic.py b/python/cudf_polars/tests/expressions/test_datetime_basic.py index 9f6fd36c578..c42d1bc2515 100644 --- a/python/cudf_polars/tests/expressions/test_datetime_basic.py +++ b/python/cudf_polars/tests/expressions/test_datetime_basic.py @@ -11,7 +11,6 @@ from cudf_polars.dsl.expr import TemporalFunction from cudf_polars.testing.asserts import ( - assert_collect_raises, assert_gpu_result_equal, assert_ir_translation_raises, ) @@ -379,11 +378,10 @@ def test_datetime_from_integer(engine: pl.GPUEngine, datetime_dtype, integer_dty df = pl.LazyFrame({"data": pl.Series(values, dtype=integer_dtype)}) q = df.select(pl.col("data").cast(datetime_dtype).alias("datetime_from_int")) if integer_dtype == pl.UInt64(): - assert_collect_raises( - q, - cudf_except=pl.exceptions.ComputeError, - polars_except=pl.exceptions.InvalidOperationError, - ) + with pytest.raises(pl.exceptions.InvalidOperationError): + q.collect() + with pytest.raises(pl.exceptions.ComputeError): + q.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True)) else: assert_gpu_result_equal(q, engine=engine) diff --git a/python/cudf_polars/tests/expressions/test_stringfunction.py b/python/cudf_polars/tests/expressions/test_stringfunction.py index f834ba62acb..321e6d6964e 100644 --- a/python/cudf_polars/tests/expressions/test_stringfunction.py +++ b/python/cudf_polars/tests/expressions/test_stringfunction.py @@ -10,7 +10,6 @@ from cudf_polars import execute_with_cudf from cudf_polars.testing.asserts import ( - assert_collect_raises, assert_gpu_result_equal, assert_ir_translation_raises, ) @@ -303,12 +302,10 @@ def test_to_datetime( if outcome == "translation_error": assert_ir_translation_raises(q, NotImplementedError) elif outcome == "collect_error": - cudf_exc = pl.exceptions.InvalidOperationError - assert_collect_raises( - q, - polars_except=pl.exceptions.InvalidOperationError, - cudf_except=cudf_exc, - ) + with pytest.raises(pl.exceptions.InvalidOperationError): + q.collect() + with pytest.raises(pl.exceptions.InvalidOperationError): + q.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True)) else: assert_gpu_result_equal(q, engine=engine) @@ -453,11 +450,10 @@ def test_invalid_regex_raises(): q = df.select(pl.col("a").str.contains(r"ab)", strict=True)) - assert_collect_raises( - q, - polars_except=pl.exceptions.ComputeError, - cudf_except=pl.exceptions.ComputeError, - ) + with pytest.raises(pl.exceptions.ComputeError): + q.collect() + with pytest.raises(pl.exceptions.ComputeError): + q.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True)) @pytest.mark.parametrize("pattern", ["a{1000}", "a(?i:B)", ""]) @@ -508,11 +504,10 @@ def test_string_from_float(engine: pl.GPUEngine, request, str_from_float_data): def test_string_to_numeric_invalid(numeric_type): df = pl.LazyFrame({"a": ["a", "b", "c"]}) q = df.select(pl.col("a").cast(numeric_type)) - assert_collect_raises( - q, - polars_except=pl.exceptions.InvalidOperationError, - cudf_except=pl.exceptions.InvalidOperationError, - ) + with pytest.raises(pl.exceptions.InvalidOperationError): + q.collect() + with pytest.raises(pl.exceptions.InvalidOperationError): + q.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True)) @pytest.mark.parametrize("ignore_nulls", [False, True]) @@ -548,11 +543,10 @@ def test_string_zfill(engine: pl.GPUEngine, fill, input_strings): q = ldf.select(pl.col("a").str.zfill(fill)) if fill is not None and fill < 0: - assert_collect_raises( - q, - polars_except=pl.exceptions.InvalidOperationError, - cudf_except=pl.exceptions.InvalidOperationError, - ) + with pytest.raises(pl.exceptions.InvalidOperationError): + q.collect() + with pytest.raises(pl.exceptions.InvalidOperationError): + q.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True)) else: assert_gpu_result_equal(q, engine=engine) @@ -588,11 +582,9 @@ def test_string_zfill_column(engine: pl.GPUEngine, fill): ).lazy() q = ldf.select(pl.col("input_strings").str.zfill(pl.col("fill"))) if fill is not None and fill < 0: - assert_collect_raises( - q, - polars_except=pl.exceptions.InvalidOperationError, - cudf_except=(), - ) + with pytest.raises(pl.exceptions.InvalidOperationError): + q.collect() + q.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True)) else: assert_gpu_result_equal(q, engine=engine) @@ -600,11 +592,9 @@ def test_string_zfill_column(engine: pl.GPUEngine, fill): def test_string_zfill_forbidden_chars(): ldf = pl.LazyFrame({"a": ["Café", "345", "東京", None]}) q = ldf.select(pl.col("a").str.zfill(3)) - assert_collect_raises( - q, - polars_except=(), - cudf_except=pl.exceptions.InvalidOperationError, - ) + q.collect() + with pytest.raises(pl.exceptions.InvalidOperationError): + q.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True)) @pytest.mark.parametrize( diff --git a/python/cudf_polars/tests/testing/test_asserts.py b/python/cudf_polars/tests/testing/test_asserts.py index d58b5af502c..dded6078d96 100644 --- a/python/cudf_polars/tests/testing/test_asserts.py +++ b/python/cudf_polars/tests/testing/test_asserts.py @@ -16,7 +16,6 @@ assert_tpch_result_equal, ) from cudf_polars.testing.asserts import ( - assert_collect_raises, assert_gpu_result_equal, assert_ir_translation_raises, assert_sink_ir_translation_raises, @@ -55,55 +54,6 @@ class E(Exception): assert_ir_translation_raises(unsupported, E) -def test_collect_assert_raises(): - df = pl.LazyFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]}) - - with pytest.raises(AssertionError, match="CPU execution DID NOT RAISE"): - # This should raise, because polars CPU can run this query, - # but we expect an error. - assert_collect_raises( - df, - polars_except=pl.exceptions.InvalidOperationError, - cudf_except=(), - ) - - with pytest.raises(AssertionError, match="GPU execution DID NOT RAISE"): - # This should raise, because polars GPU can run this query, - # but we expect an error. - assert_collect_raises( - df, - polars_except=(), - cudf_except=pl.exceptions.InvalidOperationError, - ) - - # Here's an invalid query that gets caught at IR optimisation time. - q = df.select(pl.col("a") * pl.col("b")) - - # This exception is raised in preprocessing, so is the same for - # both CPU and GPU engines. - assert_collect_raises( - q, - polars_except=pl.exceptions.InvalidOperationError, - cudf_except=pl.exceptions.InvalidOperationError, - ) - - with pytest.raises(AssertionError, match="GPU execution RAISED"): - # This should raise because the expected GPU error is wrong - assert_collect_raises( - q, - polars_except=pl.exceptions.InvalidOperationError, - cudf_except=NotImplementedError, - ) - - with pytest.raises(AssertionError, match="CPU execution RAISED"): - # This should raise because the expected CPU error is wrong - assert_collect_raises( - q, - polars_except=NotImplementedError, - cudf_except=pl.exceptions.InvalidOperationError, - ) - - def test_sink_ir_translation_raises_bad_extension(): df = pl.LazyFrame({"a": [1, 2, 3]}) # Should raise because ".foo" is not a recognized file extension