Skip to content
Closed
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2,062 changes: 2,051 additions & 11 deletions python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py

Large diffs are not rendered by default.

1,997 changes: 0 additions & 1,997 deletions python/cudf_polars/cudf_polars/experimental/benchmarks/utils_new_frontends.py

This file was deleted.

127 changes: 2 additions & 125 deletions python/cudf_polars/cudf_polars/testing/asserts.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,11 @@
"assert_sink_result_equal",
]

# Will be overriden by `conftest.py` with the value from the `--executor`
# command-line argument.
DEFAULT_EXECUTOR = "in-memory"


def assert_gpu_result_equal(
lazydf: pl.LazyFrame,
*,
engine: GPUEngine | None = None,
engine: GPUEngine,
Comment thread
madsbk marked this conversation as resolved.
collect_kwargs: CollectKwargs | None = None,
polars_collect_kwargs: CollectKwargs | None = None,
cudf_collect_kwargs: CollectKwargs | None = None,
Expand All @@ -45,7 +41,6 @@ def assert_gpu_result_equal(
rtol: float = 1e-05,
atol: float = 1e-08,
categorical_as_str: bool = False,
executor: str | None = None,
) -> None:
"""
Assert that collection of a lazyframe on GPU produces correct results.
Expand Down Expand Up @@ -83,9 +78,6 @@ def assert_gpu_result_equal(
Absolute tolerance for float comparisons
categorical_as_str
Decat categoricals to strings before comparing
executor
The executor configuration to pass to `GPUEngine`. If not specified
uses the module level `Executor` attribute.

Raises
------
Expand All @@ -94,7 +86,6 @@ def assert_gpu_result_equal(
NotImplementedError
If GPU collection failed in some way.
"""
engine = engine or get_default_engine(executor)
final_polars_collect_kwargs, final_cudf_collect_kwargs = _process_kwargs(
collect_kwargs, polars_collect_kwargs, cudf_collect_kwargs
)
Expand Down Expand Up @@ -167,35 +158,6 @@ def assert_ir_translation_raises(q: pl.LazyFrame, *exceptions: type[Exception])
raise AssertionError(f"Translation DID NOT RAISE {exceptions}")


def get_default_engine(
executor: str | None = None,
) -> GPUEngine:
"""
Get the default engine used for testing.

Parameters
----------
executor
The executor configuration to pass to `GPUEngine`. If not specified
uses the module level `Executor` attribute.

Returns
-------
engine
A polars GPUEngine configured with the default settings for tests.

See Also
--------
assert_gpu_result_equal
assert_sink_result_equal
"""
executor = executor or DEFAULT_EXECUTOR
return GPUEngine(
raise_on_fail=True,
executor=executor,
)


def _process_kwargs(
collect_kwargs: CollectKwargs | None,
polars_collect_kwargs: CollectKwargs | None,
Expand All @@ -212,86 +174,6 @@ def _process_kwargs(
return final_polars_collect_kwargs, final_cudf_collect_kwargs


def assert_collect_raises(
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Inlining it instead of calling assert_collect_raises is simpler IMO.

lazydf: pl.LazyFrame,
*,
polars_except: type[Exception] | tuple[type[Exception], ...],
cudf_except: type[Exception] | tuple[type[Exception], ...],
collect_kwargs: CollectKwargs | None = None,
polars_collect_kwargs: CollectKwargs | None = None,
cudf_collect_kwargs: CollectKwargs | None = None,
) -> None:
"""
Assert that collecting the result of a query raises the expected exceptions.

Parameters
----------
lazydf
frame to collect.
collect_kwargs
Common keyword arguments to pass to collect for both polars CPU and
cudf-polars.
Useful for controlling optimization settings.
polars_except
Exception or exceptions polars CPU is expected to raise. If
an empty tuple ``()``, CPU is expected to succeed without raising.
cudf_except
Exception or exceptions polars GPU is expected to raise. If
an empty tuple ``()``, GPU is expected to succeed without raising.
collect_kwargs
Common keyword arguments to pass to collect for both polars CPU and
cudf-polars.
Useful for controlling optimization settings.
polars_collect_kwargs
Keyword arguments to pass to collect for execution on polars CPU.
Overrides kwargs in collect_kwargs.
Useful for controlling optimization settings.
cudf_collect_kwargs
Keyword arguments to pass to collect for execution on cudf-polars.
Overrides kwargs in collect_kwargs.
Useful for controlling optimization settings.

Returns
-------
None
If both sides raise the expected exceptions.

Raises
------
AssertionError
If either side did not raise the expected exceptions.
"""
final_polars_collect_kwargs, final_cudf_collect_kwargs = _process_kwargs(
collect_kwargs, polars_collect_kwargs, cudf_collect_kwargs
)

try:
lazydf.collect(**final_polars_collect_kwargs) # type: ignore[misc, call-overload]
except polars_except:
pass
except Exception as e:
raise AssertionError(
f"CPU execution RAISED {type(e)}, EXPECTED {polars_except}"
) from e
else:
if polars_except != ():
raise AssertionError(f"CPU execution DID NOT RAISE {polars_except}")

# TODO: https://github.com/rapidsai/cudf/issues/22346
engine = GPUEngine(executor="in-memory", raise_on_fail=True)
try:
lazydf.collect(**final_cudf_collect_kwargs, engine=engine) # type: ignore[misc, call-overload]
except cudf_except:
pass
except Exception as e:
raise AssertionError(
f"GPU execution RAISED {type(e)}, EXPECTED {cudf_except}"
) from e
else:
if cudf_except != ():
raise AssertionError(f"GPU execution DID NOT RAISE {cudf_except}")


def _resolve_sink_format(path: Path) -> str:
"""Returns valid sink format for assert utilities."""
suffix = path.suffix.lower()
Expand All @@ -311,10 +193,9 @@ def assert_sink_result_equal(
lazydf: pl.LazyFrame,
path: str | Path,
*,
engine: str | GPUEngine | None = None,
engine: GPUEngine,
read_kwargs: dict | None = None,
write_kwargs: dict | None = None,
executor: str | None = None,
) -> None:
"""
Assert that writing a LazyFrame via sink produces the same output.
Expand All @@ -332,9 +213,6 @@ def assert_sink_result_equal(
Optional keyword arguments to pass to the corresponding `pl.read_*` function.
write_kwargs
Optional keyword arguments to pass to the corresponding `sink_*` function.
executor
The executor configuration to pass to `GPUEngine`. If not specified
uses the module level `Executor` attribute.

Raises
------
Expand All @@ -343,7 +221,6 @@ def assert_sink_result_equal(
ValueError
If the file extension is not one of the supported formats.
"""
engine = engine or get_default_engine(executor)
path = Path(path)
read_kwargs = read_kwargs or {}
write_kwargs = write_kwargs or {}
Expand Down
14 changes: 0 additions & 14 deletions python/cudf_polars/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,19 +272,7 @@ def engine_raise_on_fail() -> pl.GPUEngine:
return pl.GPUEngine(executor="in-memory", raise_on_fail=True)


def pytest_addoption(parser):
parser.addoption(
"--executor",
action="store",
default="streaming",
choices=("in-memory", "streaming"),
help="Executor to use for GPUEngine.",
)


def pytest_configure(config):
import cudf_polars.testing.asserts

config.addinivalue_line(
"markers",
"skip_on_streaming_engine(reason, *, engine=None): skip the test for "
Expand All @@ -302,8 +290,6 @@ def pytest_configure(config):
# apply globally rather than per-module.
config.addinivalue_line("filterwarnings", "ignore::ResourceWarning")

cudf_polars.testing.asserts.DEFAULT_EXECUTOR = config.getoption("--executor")


def pytest_collection_modifyitems(items):
"""Apply ``skip_on_streaming_engine`` markers to streaming ``engine`` items."""
Expand Down
11 changes: 4 additions & 7 deletions python/cudf_polars/tests/expressions/test_casting.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import polars as pl

from cudf_polars.testing.asserts import (
assert_collect_raises,
assert_gpu_result_equal,
assert_ir_translation_raises,
)
Expand Down Expand Up @@ -69,12 +68,10 @@ def test_cast_strict_false_string_to_numeric(engine: pl.GPUEngine, dtype, strict
df = pl.LazyFrame({"c0": ["1969-12-08 17:00:01", "1", None]})
query = df.with_columns(pl.col("c0").cast(dtype, strict=strict))
if strict:
cudf_except = pl.exceptions.InvalidOperationError
assert_collect_raises(
query,
polars_except=pl.exceptions.InvalidOperationError,
cudf_except=cudf_except,
)
with pytest.raises(pl.exceptions.InvalidOperationError):
query.collect()
with pytest.raises(pl.exceptions.InvalidOperationError):
query.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True))
else:
assert_gpu_result_equal(query, engine=engine)

Expand Down
10 changes: 4 additions & 6 deletions python/cudf_polars/tests/expressions/test_datetime_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@

from cudf_polars.dsl.expr import TemporalFunction
from cudf_polars.testing.asserts import (
assert_collect_raises,
assert_gpu_result_equal,
assert_ir_translation_raises,
)
Expand Down Expand Up @@ -379,11 +378,10 @@ def test_datetime_from_integer(engine: pl.GPUEngine, datetime_dtype, integer_dty
df = pl.LazyFrame({"data": pl.Series(values, dtype=integer_dtype)})
q = df.select(pl.col("data").cast(datetime_dtype).alias("datetime_from_int"))
if integer_dtype == pl.UInt64():
assert_collect_raises(
q,
cudf_except=pl.exceptions.ComputeError,
polars_except=pl.exceptions.InvalidOperationError,
)
with pytest.raises(pl.exceptions.InvalidOperationError):
q.collect()
with pytest.raises(pl.exceptions.ComputeError):
q.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True))
else:
assert_gpu_result_equal(q, engine=engine)

Expand Down
54 changes: 22 additions & 32 deletions python/cudf_polars/tests/expressions/test_stringfunction.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@

from cudf_polars import execute_with_cudf
from cudf_polars.testing.asserts import (
assert_collect_raises,
assert_gpu_result_equal,
assert_ir_translation_raises,
)
Expand Down Expand Up @@ -303,12 +302,10 @@ def test_to_datetime(
if outcome == "translation_error":
assert_ir_translation_raises(q, NotImplementedError)
elif outcome == "collect_error":
cudf_exc = pl.exceptions.InvalidOperationError
assert_collect_raises(
q,
polars_except=pl.exceptions.InvalidOperationError,
cudf_except=cudf_exc,
)
with pytest.raises(pl.exceptions.InvalidOperationError):
q.collect()
with pytest.raises(pl.exceptions.InvalidOperationError):
q.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True))
else:
assert_gpu_result_equal(q, engine=engine)

Expand Down Expand Up @@ -453,11 +450,10 @@ def test_invalid_regex_raises():

q = df.select(pl.col("a").str.contains(r"ab)", strict=True))

assert_collect_raises(
q,
polars_except=pl.exceptions.ComputeError,
cudf_except=pl.exceptions.ComputeError,
)
with pytest.raises(pl.exceptions.ComputeError):
q.collect()
with pytest.raises(pl.exceptions.ComputeError):
q.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True))


@pytest.mark.parametrize("pattern", ["a{1000}", "a(?i:B)", ""])
Expand Down Expand Up @@ -508,11 +504,10 @@ def test_string_from_float(engine: pl.GPUEngine, request, str_from_float_data):
def test_string_to_numeric_invalid(numeric_type):
df = pl.LazyFrame({"a": ["a", "b", "c"]})
q = df.select(pl.col("a").cast(numeric_type))
assert_collect_raises(
q,
polars_except=pl.exceptions.InvalidOperationError,
cudf_except=pl.exceptions.InvalidOperationError,
)
with pytest.raises(pl.exceptions.InvalidOperationError):
q.collect()
with pytest.raises(pl.exceptions.InvalidOperationError):
q.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True))


@pytest.mark.parametrize("ignore_nulls", [False, True])
Expand Down Expand Up @@ -548,11 +543,10 @@ def test_string_zfill(engine: pl.GPUEngine, fill, input_strings):
q = ldf.select(pl.col("a").str.zfill(fill))

if fill is not None and fill < 0:
assert_collect_raises(
q,
polars_except=pl.exceptions.InvalidOperationError,
cudf_except=pl.exceptions.InvalidOperationError,
)
with pytest.raises(pl.exceptions.InvalidOperationError):
q.collect()
with pytest.raises(pl.exceptions.InvalidOperationError):
q.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True))
else:
assert_gpu_result_equal(q, engine=engine)

Expand Down Expand Up @@ -588,23 +582,19 @@ def test_string_zfill_column(engine: pl.GPUEngine, fill):
).lazy()
q = ldf.select(pl.col("input_strings").str.zfill(pl.col("fill")))
if fill is not None and fill < 0:
assert_collect_raises(
q,
polars_except=pl.exceptions.InvalidOperationError,
cudf_except=(),
)
with pytest.raises(pl.exceptions.InvalidOperationError):
q.collect()
q.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True))
else:
Comment on lines +585 to 588
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

Wrap the GPU collect call in pytest.raises for the negative-fill branch.

The GPU call is currently unguarded in this branch, so it can fail the test unexpectedly instead of asserting the intended exception behavior.

Proposed fix
     if fill is not None and fill < 0:
         with pytest.raises(pl.exceptions.InvalidOperationError):
             q.collect()
-        q.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True))
+        with pytest.raises(pl.exceptions.InvalidOperationError):
+            q.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True))
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
with pytest.raises(pl.exceptions.InvalidOperationError):
q.collect()
q.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True))
else:
with pytest.raises(pl.exceptions.InvalidOperationError):
q.collect()
with pytest.raises(pl.exceptions.InvalidOperationError):
q.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True))
else:
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@python/cudf_polars/tests/expressions/test_stringfunction.py` around lines 585
- 588, The GPU collect call in the negative-fill branch should be wrapped with
pytest.raises to assert the expected exception instead of letting it fail the
test; update the block around
q.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True)) to call
it inside pytest.raises(pl.exceptions.InvalidOperationError) (matching the
earlier CPU assertion) so both the plain q.collect() and the GPU q.collect(...)
are asserted to raise InvalidOperationError.

assert_gpu_result_equal(q, engine=engine)


def test_string_zfill_forbidden_chars():
ldf = pl.LazyFrame({"a": ["Café", "345", "東京", None]})
q = ldf.select(pl.col("a").str.zfill(3))
assert_collect_raises(
q,
polars_except=(),
cudf_except=pl.exceptions.InvalidOperationError,
)
q.collect()
with pytest.raises(pl.exceptions.InvalidOperationError):
q.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True))


@pytest.mark.parametrize(
Expand Down
4 changes: 3 additions & 1 deletion python/cudf_polars/tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,9 @@ def raise_unimplemented(self, *args):
),
):
# And ensure that collecting issues the correct warning.
assert_gpu_result_equal(q)
assert_gpu_result_equal(
q, engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True)
)


def test_unsupported_config_raises():
Expand Down
Loading
Loading