-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Cleanup the legacy engines code path #22488
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
b92a9e9
4c95b90
e5b1aba
0a10efd
79bd93c
e43dff3
129d910
076c7db
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
This file was deleted.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -26,15 +26,11 @@ | |
| "assert_sink_result_equal", | ||
| ] | ||
|
|
||
| # Will be overriden by `conftest.py` with the value from the `--executor` | ||
| # command-line argument. | ||
| DEFAULT_EXECUTOR = "in-memory" | ||
|
|
||
|
|
||
| def assert_gpu_result_equal( | ||
| lazydf: pl.LazyFrame, | ||
| *, | ||
| engine: GPUEngine | None = None, | ||
| engine: GPUEngine, | ||
| collect_kwargs: CollectKwargs | None = None, | ||
| polars_collect_kwargs: CollectKwargs | None = None, | ||
| cudf_collect_kwargs: CollectKwargs | None = None, | ||
|
|
@@ -45,7 +41,6 @@ def assert_gpu_result_equal( | |
| rtol: float = 1e-05, | ||
| atol: float = 1e-08, | ||
| categorical_as_str: bool = False, | ||
| executor: str | None = None, | ||
| ) -> None: | ||
| """ | ||
| Assert that collection of a lazyframe on GPU produces correct results. | ||
|
|
@@ -83,9 +78,6 @@ def assert_gpu_result_equal( | |
| Absolute tolerance for float comparisons | ||
| categorical_as_str | ||
| Decat categoricals to strings before comparing | ||
| executor | ||
| The executor configuration to pass to `GPUEngine`. If not specified | ||
| uses the module level `Executor` attribute. | ||
|
|
||
| Raises | ||
| ------ | ||
|
|
@@ -94,7 +86,6 @@ def assert_gpu_result_equal( | |
| NotImplementedError | ||
| If GPU collection failed in some way. | ||
| """ | ||
| engine = engine or get_default_engine(executor) | ||
| final_polars_collect_kwargs, final_cudf_collect_kwargs = _process_kwargs( | ||
| collect_kwargs, polars_collect_kwargs, cudf_collect_kwargs | ||
| ) | ||
|
|
@@ -167,35 +158,6 @@ def assert_ir_translation_raises(q: pl.LazyFrame, *exceptions: type[Exception]) | |
| raise AssertionError(f"Translation DID NOT RAISE {exceptions}") | ||
|
|
||
|
|
||
| def get_default_engine( | ||
| executor: str | None = None, | ||
| ) -> GPUEngine: | ||
| """ | ||
| Get the default engine used for testing. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| executor | ||
| The executor configuration to pass to `GPUEngine`. If not specified | ||
| uses the module level `Executor` attribute. | ||
|
|
||
| Returns | ||
| ------- | ||
| engine | ||
| A polars GPUEngine configured with the default settings for tests. | ||
|
|
||
| See Also | ||
| -------- | ||
| assert_gpu_result_equal | ||
| assert_sink_result_equal | ||
| """ | ||
| executor = executor or DEFAULT_EXECUTOR | ||
| return GPUEngine( | ||
| raise_on_fail=True, | ||
| executor=executor, | ||
| ) | ||
|
|
||
|
|
||
| def _process_kwargs( | ||
| collect_kwargs: CollectKwargs | None, | ||
| polars_collect_kwargs: CollectKwargs | None, | ||
|
|
@@ -212,86 +174,6 @@ def _process_kwargs( | |
| return final_polars_collect_kwargs, final_cudf_collect_kwargs | ||
|
|
||
|
|
||
| def assert_collect_raises( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Inlining it instead of calling |
||
| lazydf: pl.LazyFrame, | ||
| *, | ||
| polars_except: type[Exception] | tuple[type[Exception], ...], | ||
| cudf_except: type[Exception] | tuple[type[Exception], ...], | ||
| collect_kwargs: CollectKwargs | None = None, | ||
| polars_collect_kwargs: CollectKwargs | None = None, | ||
| cudf_collect_kwargs: CollectKwargs | None = None, | ||
| ) -> None: | ||
| """ | ||
| Assert that collecting the result of a query raises the expected exceptions. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| lazydf | ||
| frame to collect. | ||
| collect_kwargs | ||
| Common keyword arguments to pass to collect for both polars CPU and | ||
| cudf-polars. | ||
| Useful for controlling optimization settings. | ||
| polars_except | ||
| Exception or exceptions polars CPU is expected to raise. If | ||
| an empty tuple ``()``, CPU is expected to succeed without raising. | ||
| cudf_except | ||
| Exception or exceptions polars GPU is expected to raise. If | ||
| an empty tuple ``()``, GPU is expected to succeed without raising. | ||
| collect_kwargs | ||
| Common keyword arguments to pass to collect for both polars CPU and | ||
| cudf-polars. | ||
| Useful for controlling optimization settings. | ||
| polars_collect_kwargs | ||
| Keyword arguments to pass to collect for execution on polars CPU. | ||
| Overrides kwargs in collect_kwargs. | ||
| Useful for controlling optimization settings. | ||
| cudf_collect_kwargs | ||
| Keyword arguments to pass to collect for execution on cudf-polars. | ||
| Overrides kwargs in collect_kwargs. | ||
| Useful for controlling optimization settings. | ||
|
|
||
| Returns | ||
| ------- | ||
| None | ||
| If both sides raise the expected exceptions. | ||
|
|
||
| Raises | ||
| ------ | ||
| AssertionError | ||
| If either side did not raise the expected exceptions. | ||
| """ | ||
| final_polars_collect_kwargs, final_cudf_collect_kwargs = _process_kwargs( | ||
| collect_kwargs, polars_collect_kwargs, cudf_collect_kwargs | ||
| ) | ||
|
|
||
| try: | ||
| lazydf.collect(**final_polars_collect_kwargs) # type: ignore[misc, call-overload] | ||
| except polars_except: | ||
| pass | ||
| except Exception as e: | ||
| raise AssertionError( | ||
| f"CPU execution RAISED {type(e)}, EXPECTED {polars_except}" | ||
| ) from e | ||
| else: | ||
| if polars_except != (): | ||
| raise AssertionError(f"CPU execution DID NOT RAISE {polars_except}") | ||
|
|
||
| # TODO: https://github.com/rapidsai/cudf/issues/22346 | ||
| engine = GPUEngine(executor="in-memory", raise_on_fail=True) | ||
| try: | ||
| lazydf.collect(**final_cudf_collect_kwargs, engine=engine) # type: ignore[misc, call-overload] | ||
| except cudf_except: | ||
| pass | ||
| except Exception as e: | ||
| raise AssertionError( | ||
| f"GPU execution RAISED {type(e)}, EXPECTED {cudf_except}" | ||
| ) from e | ||
| else: | ||
| if cudf_except != (): | ||
| raise AssertionError(f"GPU execution DID NOT RAISE {cudf_except}") | ||
|
|
||
|
|
||
| def _resolve_sink_format(path: Path) -> str: | ||
| """Returns valid sink format for assert utilities.""" | ||
| suffix = path.suffix.lower() | ||
|
|
@@ -311,10 +193,9 @@ def assert_sink_result_equal( | |
| lazydf: pl.LazyFrame, | ||
| path: str | Path, | ||
| *, | ||
| engine: str | GPUEngine | None = None, | ||
| engine: GPUEngine, | ||
| read_kwargs: dict | None = None, | ||
| write_kwargs: dict | None = None, | ||
| executor: str | None = None, | ||
| ) -> None: | ||
| """ | ||
| Assert that writing a LazyFrame via sink produces the same output. | ||
|
|
@@ -332,9 +213,6 @@ def assert_sink_result_equal( | |
| Optional keyword arguments to pass to the corresponding `pl.read_*` function. | ||
| write_kwargs | ||
| Optional keyword arguments to pass to the corresponding `sink_*` function. | ||
| executor | ||
| The executor configuration to pass to `GPUEngine`. If not specified | ||
| uses the module level `Executor` attribute. | ||
|
|
||
| Raises | ||
| ------ | ||
|
|
@@ -343,7 +221,6 @@ def assert_sink_result_equal( | |
| ValueError | ||
| If the file extension is not one of the supported formats. | ||
| """ | ||
| engine = engine or get_default_engine(executor) | ||
| path = Path(path) | ||
| read_kwargs = read_kwargs or {} | ||
| write_kwargs = write_kwargs or {} | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -10,7 +10,6 @@ | |||||||||||||||||||
|
|
||||||||||||||||||||
| from cudf_polars import execute_with_cudf | ||||||||||||||||||||
| from cudf_polars.testing.asserts import ( | ||||||||||||||||||||
| assert_collect_raises, | ||||||||||||||||||||
| assert_gpu_result_equal, | ||||||||||||||||||||
| assert_ir_translation_raises, | ||||||||||||||||||||
| ) | ||||||||||||||||||||
|
|
@@ -303,12 +302,10 @@ def test_to_datetime( | |||||||||||||||||||
| if outcome == "translation_error": | ||||||||||||||||||||
| assert_ir_translation_raises(q, NotImplementedError) | ||||||||||||||||||||
| elif outcome == "collect_error": | ||||||||||||||||||||
| cudf_exc = pl.exceptions.InvalidOperationError | ||||||||||||||||||||
| assert_collect_raises( | ||||||||||||||||||||
| q, | ||||||||||||||||||||
| polars_except=pl.exceptions.InvalidOperationError, | ||||||||||||||||||||
| cudf_except=cudf_exc, | ||||||||||||||||||||
| ) | ||||||||||||||||||||
| with pytest.raises(pl.exceptions.InvalidOperationError): | ||||||||||||||||||||
| q.collect() | ||||||||||||||||||||
| with pytest.raises(pl.exceptions.InvalidOperationError): | ||||||||||||||||||||
| q.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True)) | ||||||||||||||||||||
| else: | ||||||||||||||||||||
| assert_gpu_result_equal(q, engine=engine) | ||||||||||||||||||||
|
|
||||||||||||||||||||
|
|
@@ -453,11 +450,10 @@ def test_invalid_regex_raises(): | |||||||||||||||||||
|
|
||||||||||||||||||||
| q = df.select(pl.col("a").str.contains(r"ab)", strict=True)) | ||||||||||||||||||||
|
|
||||||||||||||||||||
| assert_collect_raises( | ||||||||||||||||||||
| q, | ||||||||||||||||||||
| polars_except=pl.exceptions.ComputeError, | ||||||||||||||||||||
| cudf_except=pl.exceptions.ComputeError, | ||||||||||||||||||||
| ) | ||||||||||||||||||||
| with pytest.raises(pl.exceptions.ComputeError): | ||||||||||||||||||||
| q.collect() | ||||||||||||||||||||
| with pytest.raises(pl.exceptions.ComputeError): | ||||||||||||||||||||
| q.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True)) | ||||||||||||||||||||
|
|
||||||||||||||||||||
|
|
||||||||||||||||||||
| @pytest.mark.parametrize("pattern", ["a{1000}", "a(?i:B)", ""]) | ||||||||||||||||||||
|
|
@@ -508,11 +504,10 @@ def test_string_from_float(engine: pl.GPUEngine, request, str_from_float_data): | |||||||||||||||||||
| def test_string_to_numeric_invalid(numeric_type): | ||||||||||||||||||||
| df = pl.LazyFrame({"a": ["a", "b", "c"]}) | ||||||||||||||||||||
| q = df.select(pl.col("a").cast(numeric_type)) | ||||||||||||||||||||
| assert_collect_raises( | ||||||||||||||||||||
| q, | ||||||||||||||||||||
| polars_except=pl.exceptions.InvalidOperationError, | ||||||||||||||||||||
| cudf_except=pl.exceptions.InvalidOperationError, | ||||||||||||||||||||
| ) | ||||||||||||||||||||
| with pytest.raises(pl.exceptions.InvalidOperationError): | ||||||||||||||||||||
| q.collect() | ||||||||||||||||||||
| with pytest.raises(pl.exceptions.InvalidOperationError): | ||||||||||||||||||||
| q.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True)) | ||||||||||||||||||||
|
|
||||||||||||||||||||
|
|
||||||||||||||||||||
| @pytest.mark.parametrize("ignore_nulls", [False, True]) | ||||||||||||||||||||
|
|
@@ -548,11 +543,10 @@ def test_string_zfill(engine: pl.GPUEngine, fill, input_strings): | |||||||||||||||||||
| q = ldf.select(pl.col("a").str.zfill(fill)) | ||||||||||||||||||||
|
|
||||||||||||||||||||
| if fill is not None and fill < 0: | ||||||||||||||||||||
| assert_collect_raises( | ||||||||||||||||||||
| q, | ||||||||||||||||||||
| polars_except=pl.exceptions.InvalidOperationError, | ||||||||||||||||||||
| cudf_except=pl.exceptions.InvalidOperationError, | ||||||||||||||||||||
| ) | ||||||||||||||||||||
| with pytest.raises(pl.exceptions.InvalidOperationError): | ||||||||||||||||||||
| q.collect() | ||||||||||||||||||||
| with pytest.raises(pl.exceptions.InvalidOperationError): | ||||||||||||||||||||
| q.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True)) | ||||||||||||||||||||
| else: | ||||||||||||||||||||
| assert_gpu_result_equal(q, engine=engine) | ||||||||||||||||||||
|
|
||||||||||||||||||||
|
|
@@ -588,23 +582,19 @@ def test_string_zfill_column(engine: pl.GPUEngine, fill): | |||||||||||||||||||
| ).lazy() | ||||||||||||||||||||
| q = ldf.select(pl.col("input_strings").str.zfill(pl.col("fill"))) | ||||||||||||||||||||
| if fill is not None and fill < 0: | ||||||||||||||||||||
| assert_collect_raises( | ||||||||||||||||||||
| q, | ||||||||||||||||||||
| polars_except=pl.exceptions.InvalidOperationError, | ||||||||||||||||||||
| cudf_except=(), | ||||||||||||||||||||
| ) | ||||||||||||||||||||
| with pytest.raises(pl.exceptions.InvalidOperationError): | ||||||||||||||||||||
| q.collect() | ||||||||||||||||||||
| q.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True)) | ||||||||||||||||||||
| else: | ||||||||||||||||||||
|
Comment on lines
+585
to
588
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Wrap the GPU collect call in The GPU call is currently unguarded in this branch, so it can fail the test unexpectedly instead of asserting the intended exception behavior. Proposed fix if fill is not None and fill < 0:
with pytest.raises(pl.exceptions.InvalidOperationError):
q.collect()
- q.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True))
+ with pytest.raises(pl.exceptions.InvalidOperationError):
+ q.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True))📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents |
||||||||||||||||||||
| assert_gpu_result_equal(q, engine=engine) | ||||||||||||||||||||
|
|
||||||||||||||||||||
|
|
||||||||||||||||||||
| def test_string_zfill_forbidden_chars(): | ||||||||||||||||||||
| ldf = pl.LazyFrame({"a": ["Café", "345", "東京", None]}) | ||||||||||||||||||||
| q = ldf.select(pl.col("a").str.zfill(3)) | ||||||||||||||||||||
| assert_collect_raises( | ||||||||||||||||||||
| q, | ||||||||||||||||||||
| polars_except=(), | ||||||||||||||||||||
| cudf_except=pl.exceptions.InvalidOperationError, | ||||||||||||||||||||
| ) | ||||||||||||||||||||
| q.collect() | ||||||||||||||||||||
| with pytest.raises(pl.exceptions.InvalidOperationError): | ||||||||||||||||||||
| q.collect(engine=pl.GPUEngine(executor="in-memory", raise_on_fail=True)) | ||||||||||||||||||||
|
|
||||||||||||||||||||
|
|
||||||||||||||||||||
| @pytest.mark.parametrize( | ||||||||||||||||||||
|
|
||||||||||||||||||||
Uh oh!
There was an error while loading. Please reload this page.