Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2,062 changes: 2,051 additions & 11 deletions python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py

Large diffs are not rendered by default.

1,997 changes: 0 additions & 1,997 deletions python/cudf_polars/cudf_polars/experimental/benchmarks/utils_new_frontends.py

This file was deleted.

80 changes: 0 additions & 80 deletions python/cudf_polars/cudf_polars/testing/asserts.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,86 +174,6 @@ def _process_kwargs(
return final_polars_collect_kwargs, final_cudf_collect_kwargs


def assert_collect_raises(
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Inlining it instead of calling assert_collect_raises is simpler IMO.

lazydf: pl.LazyFrame,
*,
polars_except: type[Exception] | tuple[type[Exception], ...],
cudf_except: type[Exception] | tuple[type[Exception], ...],
collect_kwargs: CollectKwargs | None = None,
polars_collect_kwargs: CollectKwargs | None = None,
cudf_collect_kwargs: CollectKwargs | None = None,
) -> None:
"""
Assert that collecting the result of a query raises the expected exceptions.

Parameters
----------
lazydf
frame to collect.
collect_kwargs
Common keyword arguments to pass to collect for both polars CPU and
cudf-polars.
Useful for controlling optimization settings.
polars_except
Exception or exceptions polars CPU is expected to raise. If
an empty tuple ``()``, CPU is expected to succeed without raising.
cudf_except
Exception or exceptions polars GPU is expected to raise. If
an empty tuple ``()``, GPU is expected to succeed without raising.
collect_kwargs
Common keyword arguments to pass to collect for both polars CPU and
cudf-polars.
Useful for controlling optimization settings.
polars_collect_kwargs
Keyword arguments to pass to collect for execution on polars CPU.
Overrides kwargs in collect_kwargs.
Useful for controlling optimization settings.
cudf_collect_kwargs
Keyword arguments to pass to collect for execution on cudf-polars.
Overrides kwargs in collect_kwargs.
Useful for controlling optimization settings.

Returns
-------
None
If both sides raise the expected exceptions.

Raises
------
AssertionError
If either side did not raise the expected exceptions.
"""
final_polars_collect_kwargs, final_cudf_collect_kwargs = _process_kwargs(
collect_kwargs, polars_collect_kwargs, cudf_collect_kwargs
)

try:
lazydf.collect(**final_polars_collect_kwargs) # type: ignore[misc, call-overload]
except polars_except:
pass
except Exception as e:
raise AssertionError(
f"CPU execution RAISED {type(e)}, EXPECTED {polars_except}"
) from e
else:
if polars_except != ():
raise AssertionError(f"CPU execution DID NOT RAISE {polars_except}")

# TODO: https://github.com/rapidsai/cudf/issues/22346
engine = GPUEngine(executor="in-memory", raise_on_fail=True)
try:
lazydf.collect(**final_cudf_collect_kwargs, engine=engine) # type: ignore[misc, call-overload]
except cudf_except:
pass
except Exception as e:
raise AssertionError(
f"GPU execution RAISED {type(e)}, EXPECTED {cudf_except}"
) from e
else:
if cudf_except != ():
raise AssertionError(f"GPU execution DID NOT RAISE {cudf_except}")


def _resolve_sink_format(path: Path) -> str:
"""Returns valid sink format for assert utilities."""
suffix = path.suffix.lower()
Expand Down
11 changes: 4 additions & 7 deletions python/cudf_polars/tests/expressions/test_casting.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import polars as pl

from cudf_polars.testing.asserts import (
assert_collect_raises,
assert_gpu_result_equal,
assert_ir_translation_raises,
)
Expand Down Expand Up @@ -69,12 +68,10 @@ def test_cast_strict_false_string_to_numeric(engine: pl.GPUEngine, dtype, strict
df = pl.LazyFrame({"c0": ["1969-12-08 17:00:01", "1", None]})
query = df.with_columns(pl.col("c0").cast(dtype, strict=strict))
if strict:
cudf_except = pl.exceptions.InvalidOperationError
assert_collect_raises(
query,
polars_except=pl.exceptions.InvalidOperationError,
cudf_except=cudf_except,
)
with pytest.raises(pl.exceptions.InvalidOperationError):
query.collect()
with pytest.raises(pl.exceptions.InvalidOperationError):
query.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True))
else:
assert_gpu_result_equal(query, engine=engine)

Expand Down
10 changes: 4 additions & 6 deletions python/cudf_polars/tests/expressions/test_datetime_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@

from cudf_polars.dsl.expr import TemporalFunction
from cudf_polars.testing.asserts import (
assert_collect_raises,
assert_gpu_result_equal,
assert_ir_translation_raises,
)
Expand Down Expand Up @@ -379,11 +378,10 @@ def test_datetime_from_integer(engine: pl.GPUEngine, datetime_dtype, integer_dty
df = pl.LazyFrame({"data": pl.Series(values, dtype=integer_dtype)})
q = df.select(pl.col("data").cast(datetime_dtype).alias("datetime_from_int"))
if integer_dtype == pl.UInt64():
assert_collect_raises(
q,
cudf_except=pl.exceptions.ComputeError,
polars_except=pl.exceptions.InvalidOperationError,
)
with pytest.raises(pl.exceptions.InvalidOperationError):
q.collect()
with pytest.raises(pl.exceptions.ComputeError):
q.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True))
else:
assert_gpu_result_equal(q, engine=engine)

Expand Down
54 changes: 22 additions & 32 deletions python/cudf_polars/tests/expressions/test_stringfunction.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@

from cudf_polars import execute_with_cudf
from cudf_polars.testing.asserts import (
assert_collect_raises,
assert_gpu_result_equal,
assert_ir_translation_raises,
)
Expand Down Expand Up @@ -303,12 +302,10 @@ def test_to_datetime(
if outcome == "translation_error":
assert_ir_translation_raises(q, NotImplementedError)
elif outcome == "collect_error":
cudf_exc = pl.exceptions.InvalidOperationError
assert_collect_raises(
q,
polars_except=pl.exceptions.InvalidOperationError,
cudf_except=cudf_exc,
)
with pytest.raises(pl.exceptions.InvalidOperationError):
q.collect()
with pytest.raises(pl.exceptions.InvalidOperationError):
q.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True))
else:
assert_gpu_result_equal(q, engine=engine)

Expand Down Expand Up @@ -453,11 +450,10 @@ def test_invalid_regex_raises():

q = df.select(pl.col("a").str.contains(r"ab)", strict=True))

assert_collect_raises(
q,
polars_except=pl.exceptions.ComputeError,
cudf_except=pl.exceptions.ComputeError,
)
with pytest.raises(pl.exceptions.ComputeError):
q.collect()
with pytest.raises(pl.exceptions.ComputeError):
q.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True))


@pytest.mark.parametrize("pattern", ["a{1000}", "a(?i:B)", ""])
Expand Down Expand Up @@ -508,11 +504,10 @@ def test_string_from_float(engine: pl.GPUEngine, request, str_from_float_data):
def test_string_to_numeric_invalid(numeric_type):
df = pl.LazyFrame({"a": ["a", "b", "c"]})
q = df.select(pl.col("a").cast(numeric_type))
assert_collect_raises(
q,
polars_except=pl.exceptions.InvalidOperationError,
cudf_except=pl.exceptions.InvalidOperationError,
)
with pytest.raises(pl.exceptions.InvalidOperationError):
q.collect()
with pytest.raises(pl.exceptions.InvalidOperationError):
q.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True))


@pytest.mark.parametrize("ignore_nulls", [False, True])
Expand Down Expand Up @@ -548,11 +543,10 @@ def test_string_zfill(engine: pl.GPUEngine, fill, input_strings):
q = ldf.select(pl.col("a").str.zfill(fill))

if fill is not None and fill < 0:
assert_collect_raises(
q,
polars_except=pl.exceptions.InvalidOperationError,
cudf_except=pl.exceptions.InvalidOperationError,
)
with pytest.raises(pl.exceptions.InvalidOperationError):
q.collect()
with pytest.raises(pl.exceptions.InvalidOperationError):
q.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True))
else:
assert_gpu_result_equal(q, engine=engine)

Expand Down Expand Up @@ -588,23 +582,19 @@ def test_string_zfill_column(engine: pl.GPUEngine, fill):
).lazy()
q = ldf.select(pl.col("input_strings").str.zfill(pl.col("fill")))
if fill is not None and fill < 0:
assert_collect_raises(
q,
polars_except=pl.exceptions.InvalidOperationError,
cudf_except=(),
)
with pytest.raises(pl.exceptions.InvalidOperationError):
q.collect()
q.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True))
else:
Comment on lines +585 to 588
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

Wrap the GPU collect call in pytest.raises for the negative-fill branch.

The GPU call is currently unguarded in this branch, so it can fail the test unexpectedly instead of asserting the intended exception behavior.

Proposed fix
     if fill is not None and fill < 0:
         with pytest.raises(pl.exceptions.InvalidOperationError):
             q.collect()
-        q.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True))
+        with pytest.raises(pl.exceptions.InvalidOperationError):
+            q.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True))
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
with pytest.raises(pl.exceptions.InvalidOperationError):
q.collect()
q.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True))
else:
with pytest.raises(pl.exceptions.InvalidOperationError):
q.collect()
with pytest.raises(pl.exceptions.InvalidOperationError):
q.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True))
else:
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@python/cudf_polars/tests/expressions/test_stringfunction.py` around lines 585
- 588, The GPU collect call in the negative-fill branch should be wrapped with
pytest.raises to assert the expected exception instead of letting it fail the
test; update the block around
q.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True)) to call
it inside pytest.raises(pl.exceptions.InvalidOperationError) (matching the
earlier CPU assertion) so both the plain q.collect() and the GPU q.collect(...)
are asserted to raise InvalidOperationError.

assert_gpu_result_equal(q, engine=engine)


def test_string_zfill_forbidden_chars():
ldf = pl.LazyFrame({"a": ["Café", "345", "東京", None]})
q = ldf.select(pl.col("a").str.zfill(3))
assert_collect_raises(
q,
polars_except=(),
cudf_except=pl.exceptions.InvalidOperationError,
)
q.collect()
with pytest.raises(pl.exceptions.InvalidOperationError):
q.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True))


@pytest.mark.parametrize(
Expand Down
7 changes: 3 additions & 4 deletions python/cudf_polars/tests/test_tracing.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,9 @@ def test_trace_basic(
assert b"frames_input" in result
assert b"total_bytes_output" in result
assert b"total_bytes_input" in result
# TODO: With rapidsmpf are the rmm fields not supposed to be logged?
assert b"rmm_total_bytes_output" not in result
assert b"rmm_total_bytes_input" not in result
assert b"rmm_current_bytes_output" not in result
assert b"rmm_total_bytes_output" in result
assert b"rmm_total_bytes_input" in result
assert b"rmm_current_bytes_output" in result
assert b"overhead_duration" in result


Expand Down
50 changes: 0 additions & 50 deletions python/cudf_polars/tests/testing/test_asserts.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
assert_tpch_result_equal,
)
from cudf_polars.testing.asserts import (
assert_collect_raises,
assert_gpu_result_equal,
assert_ir_translation_raises,
assert_sink_ir_translation_raises,
Expand Down Expand Up @@ -55,55 +54,6 @@ class E(Exception):
assert_ir_translation_raises(unsupported, E)


def test_collect_assert_raises():
df = pl.LazyFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]})

with pytest.raises(AssertionError, match="CPU execution DID NOT RAISE"):
# This should raise, because polars CPU can run this query,
# but we expect an error.
assert_collect_raises(
df,
polars_except=pl.exceptions.InvalidOperationError,
cudf_except=(),
)

with pytest.raises(AssertionError, match="GPU execution DID NOT RAISE"):
# This should raise, because polars GPU can run this query,
# but we expect an error.
assert_collect_raises(
df,
polars_except=(),
cudf_except=pl.exceptions.InvalidOperationError,
)

# Here's an invalid query that gets caught at IR optimisation time.
q = df.select(pl.col("a") * pl.col("b"))

# This exception is raised in preprocessing, so is the same for
# both CPU and GPU engines.
assert_collect_raises(
q,
polars_except=pl.exceptions.InvalidOperationError,
cudf_except=pl.exceptions.InvalidOperationError,
)

with pytest.raises(AssertionError, match="GPU execution RAISED"):
# This should raise because the expected GPU error is wrong
assert_collect_raises(
q,
polars_except=pl.exceptions.InvalidOperationError,
cudf_except=NotImplementedError,
)

with pytest.raises(AssertionError, match="CPU execution RAISED"):
# This should raise because the expected CPU error is wrong
assert_collect_raises(
q,
polars_except=NotImplementedError,
cudf_except=pl.exceptions.InvalidOperationError,
)


def test_sink_ir_translation_raises_bad_extension():
df = pl.LazyFrame({"a": [1, 2, 3]})
# Should raise because ".foo" is not a recognized file extension
Expand Down
Loading