Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 4 additions & 106 deletions python/cudf_polars/cudf_polars/testing/asserts.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ def assert_gpu_result_equal(
engine: GPUEngine,
collect_kwargs: CollectKwargs | None = None,
polars_collect_kwargs: CollectKwargs | None = None,
cudf_collect_kwargs: CollectKwargs | None = None,
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

never used

check_row_order: bool = True,
check_column_order: bool = True,
check_dtypes: bool = True,
Expand All @@ -59,10 +58,6 @@ def assert_gpu_result_equal(
Keyword arguments to pass to collect for execution on polars CPU.
Overrides kwargs in collect_kwargs.
Useful for controlling optimization settings.
cudf_collect_kwargs
Keyword arguments to pass to collect for execution on cudf-polars.
Overrides kwargs in collect_kwargs.
Useful for controlling optimization settings.
check_row_order
Expect rows to be in same order
check_column_order
Expand All @@ -86,14 +81,13 @@ def assert_gpu_result_equal(
NotImplementedError
If GPU collection failed in some way.
"""
final_polars_collect_kwargs, final_cudf_collect_kwargs = _process_kwargs(
collect_kwargs, polars_collect_kwargs, cudf_collect_kwargs
)
gpu_kwargs = collect_kwargs or {}
cpu_kwargs = gpu_kwargs | (polars_collect_kwargs or {})

# These keywords are correct, but mypy doesn't see that.
# the 'misc' is for 'error: Keywords must be strings'
expect = lazydf.collect(**final_polars_collect_kwargs) # type: ignore[misc, call-overload]
got = lazydf.collect(**final_cudf_collect_kwargs, engine=engine) # type: ignore[misc, call-overload]
expect = lazydf.collect(**cpu_kwargs) # type: ignore[misc, call-overload]
got = lazydf.collect(**gpu_kwargs, engine=engine) # type: ignore[misc, call-overload]
# In multi-rank SPMD mode each rank holds only its local slice; gather the
# full result on every rank so each rank can compare against the CPU result.
if (
Expand Down Expand Up @@ -158,102 +152,6 @@ def assert_ir_translation_raises(q: pl.LazyFrame, *exceptions: type[Exception])
raise AssertionError(f"Translation DID NOT RAISE {exceptions}")


def _process_kwargs(
collect_kwargs: CollectKwargs | None,
polars_collect_kwargs: CollectKwargs | None,
cudf_collect_kwargs: CollectKwargs | None,
) -> tuple[CollectKwargs, CollectKwargs]:
if collect_kwargs is None:
collect_kwargs = {}
final_polars_collect_kwargs = collect_kwargs.copy()
final_cudf_collect_kwargs = collect_kwargs.copy()
if polars_collect_kwargs is not None: # pragma: no cover; not currently used
final_polars_collect_kwargs.update(polars_collect_kwargs)
if cudf_collect_kwargs is not None: # pragma: no cover; not currently used
final_cudf_collect_kwargs.update(cudf_collect_kwargs)
return final_polars_collect_kwargs, final_cudf_collect_kwargs
Comment on lines -161 to -174
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Inlining, the function is only called once now.



def assert_collect_raises(
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Inlining the function is simpler to understand IMO. We now just do:

-        assert_collect_raises(
-            q,
-            cudf_except=pl.exceptions.ComputeError,
-            polars_except=pl.exceptions.InvalidOperationError,
-        )
+        with pytest.raises(pl.exceptions.InvalidOperationError):
+            q.collect()
+        with pytest.raises(pl.exceptions.ComputeError):
+            q.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True))

lazydf: pl.LazyFrame,
*,
polars_except: type[Exception] | tuple[type[Exception], ...],
cudf_except: type[Exception] | tuple[type[Exception], ...],
collect_kwargs: CollectKwargs | None = None,
polars_collect_kwargs: CollectKwargs | None = None,
cudf_collect_kwargs: CollectKwargs | None = None,
) -> None:
"""
Assert that collecting the result of a query raises the expected exceptions.

Parameters
----------
lazydf
frame to collect.
collect_kwargs
Common keyword arguments to pass to collect for both polars CPU and
cudf-polars.
Useful for controlling optimization settings.
polars_except
Exception or exceptions polars CPU is expected to raise. If
an empty tuple ``()``, CPU is expected to succeed without raising.
cudf_except
Exception or exceptions polars GPU is expected to raise. If
an empty tuple ``()``, GPU is expected to succeed without raising.
collect_kwargs
Common keyword arguments to pass to collect for both polars CPU and
cudf-polars.
Useful for controlling optimization settings.
polars_collect_kwargs
Keyword arguments to pass to collect for execution on polars CPU.
Overrides kwargs in collect_kwargs.
Useful for controlling optimization settings.
cudf_collect_kwargs
Keyword arguments to pass to collect for execution on cudf-polars.
Overrides kwargs in collect_kwargs.
Useful for controlling optimization settings.

Returns
-------
None
If both sides raise the expected exceptions.

Raises
------
AssertionError
If either side did not raise the expected exceptions.
"""
final_polars_collect_kwargs, final_cudf_collect_kwargs = _process_kwargs(
collect_kwargs, polars_collect_kwargs, cudf_collect_kwargs
)

try:
lazydf.collect(**final_polars_collect_kwargs) # type: ignore[misc, call-overload]
except polars_except:
pass
except Exception as e:
raise AssertionError(
f"CPU execution RAISED {type(e)}, EXPECTED {polars_except}"
) from e
else:
if polars_except != ():
raise AssertionError(f"CPU execution DID NOT RAISE {polars_except}")

# TODO: https://github.com/rapidsai/cudf/issues/22346
engine = GPUEngine(executor="in-memory", raise_on_fail=True)
try:
lazydf.collect(**final_cudf_collect_kwargs, engine=engine) # type: ignore[misc, call-overload]
except cudf_except:
pass
except Exception as e:
raise AssertionError(
f"GPU execution RAISED {type(e)}, EXPECTED {cudf_except}"
) from e
else:
if cudf_except != ():
raise AssertionError(f"GPU execution DID NOT RAISE {cudf_except}")


def _resolve_sink_format(path: Path) -> str:
"""Returns valid sink format for assert utilities."""
suffix = path.suffix.lower()
Expand Down
11 changes: 4 additions & 7 deletions python/cudf_polars/tests/expressions/test_casting.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import polars as pl

from cudf_polars.testing.asserts import (
assert_collect_raises,
assert_gpu_result_equal,
assert_ir_translation_raises,
)
Expand Down Expand Up @@ -69,12 +68,10 @@ def test_cast_strict_false_string_to_numeric(engine: pl.GPUEngine, dtype, strict
df = pl.LazyFrame({"c0": ["1969-12-08 17:00:01", "1", None]})
query = df.with_columns(pl.col("c0").cast(dtype, strict=strict))
if strict:
cudf_except = pl.exceptions.InvalidOperationError
assert_collect_raises(
query,
polars_except=pl.exceptions.InvalidOperationError,
cudf_except=cudf_except,
)
with pytest.raises(pl.exceptions.InvalidOperationError):
query.collect()
with pytest.raises(pl.exceptions.InvalidOperationError):
query.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True))
Comment thread
mroeschke marked this conversation as resolved.
else:
assert_gpu_result_equal(query, engine=engine)

Expand Down
10 changes: 4 additions & 6 deletions python/cudf_polars/tests/expressions/test_datetime_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@

from cudf_polars.dsl.expr import TemporalFunction
from cudf_polars.testing.asserts import (
assert_collect_raises,
assert_gpu_result_equal,
assert_ir_translation_raises,
)
Expand Down Expand Up @@ -379,11 +378,10 @@ def test_datetime_from_integer(engine: pl.GPUEngine, datetime_dtype, integer_dty
df = pl.LazyFrame({"data": pl.Series(values, dtype=integer_dtype)})
q = df.select(pl.col("data").cast(datetime_dtype).alias("datetime_from_int"))
if integer_dtype == pl.UInt64():
assert_collect_raises(
q,
cudf_except=pl.exceptions.ComputeError,
polars_except=pl.exceptions.InvalidOperationError,
)
with pytest.raises(pl.exceptions.InvalidOperationError):
q.collect()
with pytest.raises(pl.exceptions.ComputeError):
q.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True))
else:
assert_gpu_result_equal(q, engine=engine)

Expand Down
54 changes: 22 additions & 32 deletions python/cudf_polars/tests/expressions/test_stringfunction.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@

from cudf_polars import execute_with_cudf
from cudf_polars.testing.asserts import (
assert_collect_raises,
assert_gpu_result_equal,
assert_ir_translation_raises,
)
Expand Down Expand Up @@ -303,12 +302,10 @@ def test_to_datetime(
if outcome == "translation_error":
assert_ir_translation_raises(q, NotImplementedError)
elif outcome == "collect_error":
cudf_exc = pl.exceptions.InvalidOperationError
assert_collect_raises(
q,
polars_except=pl.exceptions.InvalidOperationError,
cudf_except=cudf_exc,
)
with pytest.raises(pl.exceptions.InvalidOperationError):
q.collect()
with pytest.raises(pl.exceptions.InvalidOperationError):
q.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True))
else:
assert_gpu_result_equal(q, engine=engine)

Expand Down Expand Up @@ -453,11 +450,10 @@ def test_invalid_regex_raises():

q = df.select(pl.col("a").str.contains(r"ab)", strict=True))

assert_collect_raises(
q,
polars_except=pl.exceptions.ComputeError,
cudf_except=pl.exceptions.ComputeError,
)
with pytest.raises(pl.exceptions.ComputeError):
q.collect()
with pytest.raises(pl.exceptions.ComputeError):
q.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True))


@pytest.mark.parametrize("pattern", ["a{1000}", "a(?i:B)", ""])
Expand Down Expand Up @@ -508,11 +504,10 @@ def test_string_from_float(engine: pl.GPUEngine, request, str_from_float_data):
def test_string_to_numeric_invalid(numeric_type):
df = pl.LazyFrame({"a": ["a", "b", "c"]})
q = df.select(pl.col("a").cast(numeric_type))
assert_collect_raises(
q,
polars_except=pl.exceptions.InvalidOperationError,
cudf_except=pl.exceptions.InvalidOperationError,
)
with pytest.raises(pl.exceptions.InvalidOperationError):
q.collect()
with pytest.raises(pl.exceptions.InvalidOperationError):
q.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True))


@pytest.mark.parametrize("ignore_nulls", [False, True])
Expand Down Expand Up @@ -548,11 +543,10 @@ def test_string_zfill(engine: pl.GPUEngine, fill, input_strings):
q = ldf.select(pl.col("a").str.zfill(fill))

if fill is not None and fill < 0:
assert_collect_raises(
q,
polars_except=pl.exceptions.InvalidOperationError,
cudf_except=pl.exceptions.InvalidOperationError,
)
with pytest.raises(pl.exceptions.InvalidOperationError):
q.collect()
with pytest.raises(pl.exceptions.InvalidOperationError):
q.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True))
else:
assert_gpu_result_equal(q, engine=engine)

Expand Down Expand Up @@ -588,23 +582,19 @@ def test_string_zfill_column(engine: pl.GPUEngine, fill):
).lazy()
q = ldf.select(pl.col("input_strings").str.zfill(pl.col("fill")))
if fill is not None and fill < 0:
assert_collect_raises(
q,
polars_except=pl.exceptions.InvalidOperationError,
cudf_except=(),
)
with pytest.raises(pl.exceptions.InvalidOperationError):
q.collect()
q.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True))
Comment thread
coderabbitai[bot] marked this conversation as resolved.
else:
assert_gpu_result_equal(q, engine=engine)


def test_string_zfill_forbidden_chars():
ldf = pl.LazyFrame({"a": ["Café", "345", "東京", None]})
q = ldf.select(pl.col("a").str.zfill(3))
assert_collect_raises(
q,
polars_except=(),
cudf_except=pl.exceptions.InvalidOperationError,
)
q.collect()
with pytest.raises(pl.exceptions.InvalidOperationError):
q.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True))


@pytest.mark.parametrize(
Expand Down
50 changes: 0 additions & 50 deletions python/cudf_polars/tests/testing/test_asserts.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
assert_tpch_result_equal,
)
from cudf_polars.testing.asserts import (
assert_collect_raises,
assert_gpu_result_equal,
assert_ir_translation_raises,
assert_sink_ir_translation_raises,
Expand Down Expand Up @@ -55,55 +54,6 @@ class E(Exception):
assert_ir_translation_raises(unsupported, E)


def test_collect_assert_raises():
df = pl.LazyFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]})

with pytest.raises(AssertionError, match="CPU execution DID NOT RAISE"):
# This should raise, because polars CPU can run this query,
# but we expect an error.
assert_collect_raises(
df,
polars_except=pl.exceptions.InvalidOperationError,
cudf_except=(),
)

with pytest.raises(AssertionError, match="GPU execution DID NOT RAISE"):
# This should raise, because polars GPU can run this query,
# but we expect an error.
assert_collect_raises(
df,
polars_except=(),
cudf_except=pl.exceptions.InvalidOperationError,
)

# Here's an invalid query that gets caught at IR optimisation time.
q = df.select(pl.col("a") * pl.col("b"))

# This exception is raised in preprocessing, so is the same for
# both CPU and GPU engines.
assert_collect_raises(
q,
polars_except=pl.exceptions.InvalidOperationError,
cudf_except=pl.exceptions.InvalidOperationError,
)

with pytest.raises(AssertionError, match="GPU execution RAISED"):
# This should raise because the expected GPU error is wrong
assert_collect_raises(
q,
polars_except=pl.exceptions.InvalidOperationError,
cudf_except=NotImplementedError,
)

with pytest.raises(AssertionError, match="CPU execution RAISED"):
# This should raise because the expected CPU error is wrong
assert_collect_raises(
q,
polars_except=NotImplementedError,
cudf_except=pl.exceptions.InvalidOperationError,
)


def test_sink_ir_translation_raises_bad_extension():
df = pl.LazyFrame({"a": [1, 2, 3]})
# Should raise because ".foo" is not a recognized file extension
Expand Down
Loading